#' helper function for compartment inference (shrink-by-smoothing logit frac5mC)
#'
#' We want something with nominally Gaussian error for compartment inference, so
#' this function grabs suitable (default >= 3 reads in >=2 sample) measurements
#' and turns them into lightly moderated, logit-transformed methylated-fraction
#' estimates (also known, unfortunately, as M-values) for compartment calling,
#' by performing Dirichlet smoothing (adding `k` reads to M and U support).
#'
#' @param x a BSseq object with methylated and total reads
#' @param minCov minimum read coverage for landmarking samples (3)
#' @param minSamp minimum landmark samples with >= minCov (2)
#' @param k pseudoreads for smoothing (0.1)
#' @param r regions to collapse over (default is NULL, do it by CpG)
#'
#' @return smoothed logit(M/Cov) matrix with coordinates as row names
#'
#' @aliases getMvals
#'
#' @import gtools
#' @import bsseq
#'
#' @export
getLogitFracMeth <- function(x, minCov=3, minSamp=2, k=0.1, r=NULL) {
# do any loci/regions have enough read coverage in enough samples?
if (!is.null(r) && is(r, "GenomicRanges")) {
covgs <- getCoverage(x, sort(r), type="Cov", what="perRegionTotal")
} else {
covgs <- getCoverage(x, type="Cov", what="perBase")
}
usable <- DelayedMatrixStats::rowSums2(covgs >= minCov) >= minSamp
if (!any(usable)) stop("No usable loci/regions ( >= minCov in >= minSamp )!")
# construct a subset of the overall BSseq object with smoothed mvalues
if (!is.null(r) && is(r, "GenomicRanges")) {
getSmoothedLogitFrac(x, k=k, minCov=minCov, r=subset(sort(r), usable))
} else {
getSmoothedLogitFrac(subset(x, usable), k=k, minCov=minCov)
}
}
# helper fn
getSmoothedLogitFrac <- function(x, k=0.1, minCov=3, maxFrac=0.5, r=NULL) {
if (!is.null(r) && is(r, "GenomicRanges")) {
M <- getCoverage(x, sort(r), type="M", what="perRegionTotal")
U <- getCoverage(x, sort(r), type="Cov", what="perRegionTotal") - M
rnames <- as.character(sort(r))
} else {
M <- getCoverage(x, type="M", what="perBase")
U <- getCoverage(x, type="Cov", what="perBase") - M
rnames <- as.character(granges(x))
}
res <- logit((M + k) / ((M + k) + (U + k)))
rownames(res) <- rnames
makeNA <- ((M + U) < minCov)
maxPct <- paste0(100 * maxFrac, "%")
tooManyNAs <- (DelayedMatrixStats::colSums2(makeNA)/nrow(x)) > maxFrac
if (any(tooManyNAs)) {
message(paste(colnames(x)[tooManyNAs],collapse=", ")," are >",maxPct," NA!")
}
res[ makeNA ] <- NA
return(res)
}
# alias
getMvals <- getLogitFracMeth
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.