R/browseMOWChIP.R

# borrowed from erma for shinyapp in november 

genemodel = function(key, keytype="SYMBOL", annoResource=Homo.sapiens,
    keepStandardChromosomes=TRUE) {
#
# purpose is to get exon addresses given a symbol
# propagate seqinfo from annotation resource
#
if (class(annoResource)=="OrganismDb") {
  oblig=c("EXONCHROM", "EXONSTART", "EXONEND", "EXONSTRAND", "EXONID")
  addrs = AnnotationDbi::select(annoResource, keys=key, keytype=keytype, columns=oblig)
  ans = GRanges(addrs$EXONCHROM, IRanges(addrs$EXONSTART, addrs$EXONEND),
      strand=addrs$EXONSTRAND, exon_id=addrs$EXONID)
  }
else if (class(annoResource)=="EnsDb") {
  oblig = c("SEQNAME", "EXONSEQSTART", "EXONSEQEND", "SEQSTRAND",
        "EXONIDX")
  addrs = AnnotationDbi::select(annoResource, keys=key, keytype=keytype, columns=oblig)
  ans = GRanges(addrs$SEQNAME,
        IRanges(addrs$EXONSEQSTART, addrs$EXONSEQEND),
        strand = addrs$SEQSTRAND, exon_id = addrs$EXONIDX)
  }
  else stop("annoResource must be of class OrganismDb or EnsDb")
  mcols(ans)[[keytype]] = key
  useq = unique(as.character(seqnames(ans)))
  si = seqinfo(annoResource)
  seqinfo(ans) = si[useq,]
  if (keepStandardChromosomes)
       return(keepStandardChromosomes(ans, pruning.mode="coarse"))
  ans
}

#' create HTTP paths to S3 bucket holding selected MOWChIP bigwig files
#' @examples
#' mowChInS3()
#' @export
mowChInS3 = function() {
#c("http://s3.amazonaws.com/bcfound-mow/GSM1599147_GM12878_K4me3_10K_Rep1.bw",
#"http://s3.amazonaws.com/bcfound-mow/GSM1599149_GM12878_K4me3_1K_Rep1.bw",
#"http://s3.amazonaws.com/bcfound-mow/GSM1599151_GM12878_K4me3_600_Rep1.bw",
#"http://s3.amazonaws.com/bcfound-mow/GSM1599153_GM12878_K4me3_100_Rep1.bw")
k4n = c("GSM1599147_GM12878_K4me3_10K_Rep1.bw",
"GSM1599149_GM12878_K4me3_1K_Rep1.bw",
"GSM1599151_GM12878_K4me3_600_Rep1.bw",
"GSM1599153_GM12878_K4me3_100_Rep1.bw")
acn = c("GSM1599155_GM12878_K27ac_10K_Rep1.bw",
"GSM1599157_GM12878_K27ac_1K_Rep1.bw",
"GSM1599159_GM12878_K27ac_600_Rep1.bw",
"GSM1599161_GM12878_K27ac_100_Rep1.bw")
sprintf("http://s3.amazonaws.com/bcfound-mow/%s", c(k4n,acn))
}

#' @importFrom GenomeInfoDb "seqlevelsStyle<-"
filterToGene = function( gf, geneSymbol, 
     radius, anno=EnsDb.Hsapiens.v75, 
     slStyle = "UCSC", stdSeqs=TRUE ) {
  mod = try(genemodel(geneSymbol, annoResource=anno))
  if (inherits(mod, "try-error")) stop(sprintf("genemodel fails for %s with given annoResource", geneSymbol))
  gm = range(genemodel(geneSymbol,
        annoResource=anno))+radius
  gm = keepStandardChromosomes(gm, pruning.mode="coarse")
  seqlevelsStyle(gm) = slStyle
  rowRanges(gf) = gm
  reduceByRange(gf, MAP=function(r,f)
            import.bw(f, which=r))
}


#' multimodal Gviz-based visualization of MOWChIP tracks
#' @import Gviz
#' @import EnsDb.Hsapiens.v75 
#' @import GenomicFiles 
#' @import ensembldb 
#' @import GenomicRanges
#' @import IRanges
#' @param gf GenomicFiles instance giving paths to .bw files
#' @param sym gene symbol
#' @param radius number of bp around coding region to display
#' @param gstr a GRanges with gene annotation information, e.g., genes(EnsDb.Hsapiens.v75)
#' @param namegen a function of one argument assumed to be a row of \code{colData(gf)} that will be used as the \code{name} parameter for the
#' \code{\link[Gviz]{DataTrack}} derived from each sample
#' @param \dots passed to \code{\link[Gviz]{plotTracks}} exclusive of type which is set to "polygon"
#' @examples
#' data(caoChIP)
#' S3paths = mowChInS3()
#' if (interactive()) {
#'  remoteMowK4me3 = GenomicFiles(files=S3paths[1:4], 
#'     colData=colData(caoChIP[,c(1,3,5,7)]))
#'  remoteMowK27ac = GenomicFiles(files=S3paths[5:8], 
#'     colData=colData(caoChIP[,c(9,11,13,15)]))
#'  require(EnsDb.Hsapiens.v75)
#'  gg = genes(EnsDb.Hsapiens.v75)
#'  require(GenomeInfoDb)
#'  seqlevelsStyle(gg) = "UCSC"
#'  viewByGene(gf=remoteMowK4me3, sym="IGHA2", radius=80000, gstr=gg)
#'  viewByGene(gf=remoteMowK27ac, sym="SPI1", radius=80000, gstr=gg)
#'  }
#' @export
viewByGene = function(gf=caoChIP[,1:6], sym="IGHA2",
    radius=100000, gstr, namegen = function(x)x$numCells[1], ...) {
  jj = try(filterToGene(gf, sym, radius)[[1]])
  if (inherits(jj, "try-error")) stop("filterToGene failed")
  nms = vapply(1:length(jj), function(i)
                  namegen(colData(gf)[i,]), character(1))
  zz = lapply(1:length(jj), function(i) DataTrack(jj[[i]], name=nms[i]))
  sn = as.character(seqnames(jj[[1]])[1])
  grt = genesInRegion(sn, start=min(start(jj[[1]])), # jj[[1]] already has radius
          end=max(end(jj[[1]])), gstr=gstr, radius=0)
  plotTracks(c(zz, grt, GenomeAxisTrack(littleTicks=TRUE)), type="polygon", ...)
}

biotypes_ig = function() sprintf("IG_%s_gene", c("C", "D", "V", "J"))

genesInRegion = function(chr, start, end, gstr, radius=0,
   biotypes = c("protein_coding", biotypes_ig()),
   asGRT = TRUE) {
  ans = subsetByOverlaps(gstr, GRanges(chr, IRanges(start,end))+radius)
  ans = ans[which(ans$gene_biotype %in% biotypes)]
  seqlevels(ans) = chr
  if (asGRT) return(GeneRegionTrack(ans, showId=TRUE, name=chr[1]))
  ans
}


getStandardModel = function(sym) {
  mymod = genemodel(sym, anno=EnsDb.Hsapiens.v75)
  seqlevelsStyle(mymod) = "UCSC"
  keepStandardChromosomes(mymod, pruning.mode="coarse")
}
getGRT = function(syms) {
  m = lapply(syms, getStandardModel)
  do.call(GeneRegionTrack, m)
}
vjcitn/mowChIP documentation built on May 8, 2019, 2:34 a.m.