inst/Scripts/peakContext.R

source("/home/dsarkar/svn/Projects/eboxes/context.R")

data(geneMouse)

gregions.500 <- genomic_regions(genes = geneMouse, proximal = 500)
gregions.2000 <- genomic_regions(genes = geneMouse, proximal = 2000)

gregions <- gregions.2000
gregions$gene <- as.character(gregions$gene)

library(lattice) # for make.groups
library(Matrix)
library(IRanges)

load("peakSummary.rda")


countHits <- function(subject, query)
{
    sum(query %over% subject)
}


doPeakSet <- function(peaks, gregions)
{
    query <- with(peaks, IRanges(start, end))
    irangeByType <-
        function(type = c("promoter", "threeprime",
                          "upstream", "downstream", "gene"))
        {
            type <- match.arg(type)
            istarts <- sprintf("%s.start", type)
            iends <- sprintf("%s.end", type)
            keep <- !duplicated(gregions[[istarts]]) ## what's the right thing to do here???
            IRanges(start = gregions[[istarts]][keep],
                    end = gregions[[iends]][keep])
        }
    subject <-
        list(promoter = irangeByType("promoter"),
             threeprime = irangeByType("threeprime"),
             upstream = irangeByType("upstream"),
             downstream = irangeByType("downstream"),
             gene = irangeByType("gene"))
    c(total = length(query), sapply(subject, countHits, query = query))
}

doChromosome <- function(chr)
{
    gregions.sub <- subset(gregions, chrom == chr)
    tube.peaks <-
        subset(peakSummary.blasts.wrt.tubes,
               chromosome == chr)
    blast.peaks <-
        subset(peakSummary.tubes.wrt.blasts,
               chromosome == chr)
    ans <-
        as.data.frame(rbind(tube = doPeakSet(tube.peaks, gregions.sub),
                            blast = doPeakSet(blast.peaks, gregions.sub),
                            tube.only = doPeakSet(subset(tube.peaks, resids < -2), gregions.sub),
                            blast.only = doPeakSet(subset(blast.peaks, resids < -2), gregions.sub)))
    ans <- cbind(type = factor(rownames(ans), levels = unique(rownames(ans))), ans)
    ans
}

## all.chroms <- levels(peakSummary.blasts.wrt.tubes$chromosome)

doAll <- function(chroms = paste("chr", 1:19, sep = ""))
{
    ans <- do.call(make.groups, sapply(chroms, doChromosome, simplify = FALSE))
    names(ans)[names(ans) == "which"] <- "chromosome"
    rownames(ans) <- NULL
    ans
}

ans <- doAll()
       
sumtab <- 
    as.data.frame(rbind(total = xtabs(total ~ type, ans),
                        promoter = xtabs(promoter ~ type, ans),
                        threeprime = xtabs(threeprime ~ type, ans),
                        upstream = xtabs(upstream ~ type, ans),
                        downstream = xtabs(downstream ~ type, ans),
                        gene = xtabs(gene ~ type, ans)))


sumtab <-
    within(sumtab,
       {
           `tube/blast` <- tube / blast
           `tube.only/blast.only` <- tube.only / blast.only
       })

sumtab[c("tube", "blast", "tube/blast", "tube.only", "blast.only", "tube.only/blast.only")]
Bioconductor/chipseq documentation built on Oct. 29, 2023, 5:04 p.m.