#' bin CpG/CpH coverage to simplify & improve CNA "sketching" (e.g. for E-M)
#'
#' FIXME: turn this into a generic for bsseq objects, for bigWigs, for [...]
#' FIXME: figure out how to estimate the most likely GCbias ~ DNAme linkage
#'
#' @param x a bsseq object, maybe HDF5-backed, for getCoverage()
#' @param bins bins to summarize over (from tileGenome or QDNAseq.xxYY)
#' @param which limit to specific regions? (functions as in import())
#' @param QDNAseq return a QDNAseqReadCounts? (if FALSE, return a GRanges)
#' @param readLen correction factor for coverage: read length in bp (100)
#'
#' @return binned read counts
#'
#' @import GenomicRanges
#' @import Mus.musculus
#' @import Homo.sapiens
#' @import Biobase
#' @import QDNAseq
#'
#' @export
binCoverage <- function(x, bins, which=NULL, QDNAseq=TRUE, readLen=100) {
# turn QDNAseq bins into an annotated GRanges object
if (is(bins, "AnnotatedDataFrame") & # QDNAseq bins
!is.null(attr(bins, "QDNAseq")$build)) {
message("Converting QDNAseq bins to GRanges for coverage")
gr <- makeGRangesFromDataFrame(pData(bins), keep=TRUE)
genome(gr) <- attr(bins, "QDNAseq")$build
} else if (is(bins, "GenomicRanges")) {
message("You really should use QDNAseq bins if you can.")
gr <- bins
} else {
stop("Don't know what to do with bins of class ", class(bins), "!")
}
origStyle <- seqlevelsStyle(gr)
if (!is.null(which)) {
seqlevelsStyle(gr) <- seqlevelsStyle(which)
gr <- subsetByOverlaps(gr, which)
}
seqlevelsStyle(gr) <- seqlevelsStyle(x)
names(gr) <- as(granges(gr), "character")
summed <- getCoverage(x, gr, what="perRegionTotal", withDimnames=TRUE)
summed <- round(summed/readLen) # mostly so plot titles don't look insane
gc(,TRUE) # cautious
gr$score <- summed
attr(gr, "binned") <- TRUE
if (QDNAseq) {
seqlevelsStyle(gr) <- origStyle
names(gr) <- as(granges(gr), "character")
phenodata <- data.frame(name=colnames(gr$score),
row.names=colnames(gr$score),
stringsAsFactors=FALSE)
phenodata$total.reads <- colSums(score(gr), na.rm=TRUE)
phenodata$used.reads <- colSums(score(subset(gr, use)), na.rm=TRUE)
object <- new("QDNAseqReadCounts",
bins=subset(bins, featureNames(bins) %in% names(gr)),
counts=gr$score, phenodata=phenodata)
object$expected.variance <- QDNAseq:::expectedVariance(object)
annotation(object) <- paste("generated by biscuiteer",
packageVersion("biscuiteer"))
return(object)
} else {
return(subset(gr, rowSums(is.na(gr$score)) < ncol(x) & use == TRUE))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.