Nothing
#' @title Estimate A/B compartments from methylation array data
#'
#' @description
#' \code{getArrayABsignal} returns estimated A/B compartments from methylation array data.
#'
#' @details This function is modified from the minfi::compartments to infer A/B compartments from array data
#'
#'
#' @param obj Input GenomicRatioSet object
#' @param res Compartment resolution (in bp)
#' @param parallel Should the inference be done in parallel?
#' @param allchrs Whether all autosomes should be used for A/B inference
#' @param chr Specific chromosomes to analyze
#' @param targets Specify samples to use as shrinkage targets
#' @param ... Additional arguments
#'
#' @return A p x n matrix (samples as columns and compartments as rows) of compartments
#' @import minfi
#' @import Homo.sapiens
#' @export
getArrayABsignal <- function(obj, res=1e6, parallel=FALSE, allchrs=FALSE, chr = NULL, targets = NULL, ...) {
globalMeanSet <- .getMeanGrSet(obj, targets)
columns <- colnames(obj)
names(columns) <- columns
getComp <- .getPairedArray
if (allchrs == TRUE) getComp <- .getPairedAllChrsArray
if (parallel) {
options(mc.cores=detectCores()/2) # RAM blows up otherwise
do.call(cbind,
mclapply(columns,getComp,grSet=obj,globalMeanSet=globalMeanSet,chr=chr,targets=targets,res=res))
} else {
do.call(cbind,
lapply(columns,getComp,grSet=obj,globalMeanSet=globalMeanSet,chr=chr,targets=targets,res=res))
}
}
#Helper function to get compartments as in minfi::compartments
.arraycompartments <- function(obj, resolution = 1e6, what="OpenSea",
chr = "chr22", method = c("pearson", "spearman"),
keep = FALSE) {
if (length(chr) > 1) stop("Expected a single chromosome internally and got more than one...")
if (!(chr %in% seqlevels(obj))) stop("The supplied chromosome is not found in the seqlevels of the object...")
method <- match.arg(method)
gr <- .createCorMatrix(
obj = obj,
resolution = resolution,
what = what,
chr = chr,
method = method)
gr <- .extractAB(gr, keep = keep)
gr$compartment <- .extractOpenClosedArray(gr)
gr
}
#Helper function to perform mean imputation of missing values
.imputeMatrix <- function(matrix) {
matrix[is.infinite(matrix) & matrix > 0] <- max(matrix[is.finite(matrix)])
matrix[is.infinite(matrix) & matrix < 0] <- min(matrix[is.finite(matrix)])
# Imputation of the missing values:
# Probably should be using knn instead of mean imputation
# Will leave for now but should be updated later
missing <- which(is.na(matrix), arr.ind = TRUE)
if (length(missing) != 0) {
for (j in seq_len(nrow(missing))) {
mean <- mean(
x = matrix[missing[j, 1L], ][
is.finite(matrix[missing[j, 1L], ])],
na.rm = TRUE)
matrix[missing[j, 1L], missing[j, 2L]] <- mean
}
}
matrix
}
#Helper function to return binned M-values
#this cannot possibly be optimal (two nested loops?!)
.returnBinnedMatrix <- function(gr.unbinnedCor, resolution){
bin2D <- function(matrix, ids, n) {
unique.ids <- sort(unique(ids))
bin.matrix <- matrix(0, n, n)
m <- length(unique.ids)
for (i in seq_len(n)) {
for (j in seq_len(n)) {
# TODO: We should be able to speed this one up a lot
indices1 <- which(ids == unique.ids[i])
indices2 <- which(ids == unique.ids[j])
bin.matrix[unique.ids[i],unique.ids[j]] <- median(
x = matrix[indices1, indices2],
na.rm = TRUE)
}
}
# TODO: This line should not be necessary
bin.matrix[is.na(bin.matrix)] <- 1
bin.matrix
}
# TODO: Use Bioconductor infrastructure to get chromosome lengths and
# don't directly call structure() to create object
# chr.lengths <- structure(
# c(249250621L, 243199373L, 198022430L, 191154276L, 180915260L,
# 171115067L, 159138663L, 146364022L, 141213431L, 135534747L,
# 135006516L, 133851895L, 115169878L, 107349540L, 102531392L,
# 90354753L, 81195210L, 78077248L, 59128983L, 63025520L, 48129895L,
# 51304566L, 155270560L, 59373566L),
# .Names = c("chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7",
# "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14",
# "chr15", "chr16", "chr17", "chr18", "chr19", "chr20",
# "chr21", "chr22", "chrX", "chrY"))
chr.lengths <- seqlengths(Homo.sapiens)[standardChromosomes(Homo.sapiens)]
seqlengths(gr.unbinnedCor) <- chr.lengths[seqlevels(gr.unbinnedCor)]
#Split this out and return a more informative error...
if (length(seqlevels(gr.unbinnedCor)) > 1) stop("The number of seqlevels returned greater than one...")
if (is.na(seqlengths(gr.unbinnedCor))) stop("The seqlengths returned are NA... is this human data?")
if (!("cor.matrix" %in% names(mcols(gr.unbinnedCor)))) stop("The correlation matrix was not found in the GRanges object...")
gr.binnedCor <- tileGenome(
seqlengths = seqlengths(gr.unbinnedCor),
tilewidth = resolution,
cut.last.tile.in.chrom = TRUE)
ids <- subjectHits(findOverlaps(gr.unbinnedCor, gr.binnedCor))
gr.binnedCor$cor.matrix <- bin2D(
matrix = gr.unbinnedCor$cor.matrix,
ids = ids,
n = length(gr.binnedCor))
gr.binnedCor
}
#Helper function to remove empty bins
.removeBadBins <- function(gr) {
good.bins <- !colAlls(gr$cor.matrix, value = 0)
if (length(good.bins) < nrow(gr$cor.matrix)) {
gr <- gr[good.bins]
gr$cor.matrix <- gr$cor.matrix[, good.bins]
}
gr
}
#Helper function to extract whether the compartment is open or closed
.extractOpenClosedArray <- function(gr, cutoff = 0){
pc <- gr$pc
ifelse(pc < cutoff, "open", "closed")
}
#Helper function to get the first principal component
.getFirstPCarray <- function(matrix, method){
# Centre the matrix
center <- rowMeans2(matrix, na.rm = TRUE)
matrix <- sweep(matrix, 1L, center, check.margin = FALSE)
.fsvd(matrix, k = 1, method = method)$u
}
#Helper function to use a moving average smooth
.meanSmootherArray <- function(x, k = 1L, iter = 2L, na.rm = TRUE) {
meanSmoother.internal <- function(x, k=1, na.rm=TRUE){
if (k < 1) stop("k needs to be greater than or equal to 1...")
if (length(x) < k) stop("Cannot smooth. Too few bins...")
n <- length(x)
y <- rep(NA,n)
window.mean <- function(x, j, k, na.rm=na.rm){
if (k>=1){
return(mean(x[(j-(k+1)):(j+k)], na.rm=na.rm))
} else {
return(x[j])
}
}
for (i in (k+1):(n-k)){
y[i] <- window.mean(x,i,k, na.rm)
}
for (i in 1:k){
y[i] <- window.mean(x,i,i-1, na.rm)
}
for (i in (n-k+1):n){
y[i] <- window.mean(x,i,n-i,na.rm)
}
y
}
for (i in 1:iter){
x <- meanSmoother.internal(x, k=k, na.rm=na.rm)
}
x
}
#Helper function to unitarize the A/B estimates
.unitarize <- function(x, medianCenter = TRUE) {
if (medianCenter) x <- x - median(x, na.rm = TRUE)
bad <- is.na(x)
x[!bad] <- x[!bad] / sqrt(sum(x[!bad]^2))
n.bad <- sum(bad)
if (n.bad > 0) {
message(
sprintf("[.unitarize] %i missing values were ignored.\n", n.bad))
}
x
}
#Helper function to perform fsvd instead of the nipals method from mixOmics package
#This needs to be resolved since nipals is required for ATAC but not here
#We are already supporting nipals so why do we need fsvd too - or the inverse?
.fsvd <- function(A, k, i = 1, p = 2, method = c("qr", "svd", "exact")) {
method <- match.arg(method)
l <- k + p
n <- ncol(A)
m <- nrow(A)
tall <- TRUE
# NOTE: In the case a WIDE matrix is provided:
if (m < n) {
A <- t(A) # This is potentially expensive
n <- ncol(A)
m <- nrow(A)
tall <- FALSE
}
if (l > ncol(A) && method != "exact") {
stop("(k+p) is greater than the number of columns. Please decrease ",
"the value of k.")
}
# Construct G to be n x l and Gaussian
G <- matrix(rnorm(n * l, 0, 1), nrow = n, ncol = l)
if (method == "svd") {
# Power method:
H <- A %*% G # m x l matrix
for (j in seq_len(i)) {
H <- A %*% (crossprod(A, H))
}
# NOTE: We use a SVD to find an othogonal basis Q:
# TODO: Remove this commented line if not needed
# H = FF %*% Omega %*% t(S)
svd <- svd(crossprod(H))
FF <- svd$u # l x l
omega <- diag(1/sqrt(svd$d)) # l x l
S <- H %*% FF %*% omega # m x l
# Define the orthogonal basis:
Q <- S[, seq_len(k), drop = FALSE] # m x k
# TODO: Remove these commented lines if not needed
# TT <- t(A) %*% Q # n x k
# TT <- t(TT)
TT <- crossprod(Q, A)
} else if (method == "qr") {
# NOTE: Need to create a list of H matrices
h.list <- vector("list", i + 1L)
h.list[[1]] <- A %*% G
for (j in seq(2, i + 1L)) {
h.list[[j]] <- A %*% (crossprod(A, h.list[[j - 1L]]))
}
H <- do.call("cbind", h.list) # n x [(1+1)l] matrix
# QR algorithm
Q <- qr.Q(qr(H, 0))
# TODO: Remove these commented lines if not needed
# TT <- t(A)%*%Q # n x [(i+1)l]
# TT <- t(TT)
TT <- crossprod(Q, A)
}
if (method == "svd" || method == "qr") {
svd <- svd(TT)
u <- Q %*% svd$u[,seq_len(k), drop = FALSE]
v <- svd$v[, seq_len(k), drop = FALSE]
d <- svd$d[seq_len(k)]
} else {
# Exact SVD
svd <- svd(A)
u <- svd$u[, seq_len(k), drop = FALSE]
v <- svd$v[, seq_len(k), drop = FALSE]
d <- svd$d[seq_len(k)]
}
if (!tall) {
uu <- v
v <- u
u <- uu
}
list(u = u, v = v, d = d)
}
#Helper function to create a correlation matrix
.createCorMatrix <- function(obj, resolution = 100 * 1000, what = "OpenSea",
chr = "chr22", method = c("pearson", "spearman")) {
if (length(chr) > 1) stop("Expected a single chromosome internally and got more than one...")
if (!(chr %in% seqlevels(obj))) stop("The supplied chromosome is not found in the seqlevels of the object...")
method <- match.arg(method)
if (is(obj, "GenomicMethylSet")) {
obj <- ratioConvert(obj, what = "M", keepCN = FALSE)
}
assay(obj, "M") <- .imputeMatrix(getM(obj))
# Next we subset to a chromosome, keep OpenSea probes and remove SNPs
seqlevels(obj, pruning.mode = "coarse") <- chr
obj <- obj[getIslandStatus(obj) %in% what,]
obj <- dropLociWithSnps(obj, snps = c("CpG", "SBE"), maf = 0.01)
gr.unbinnedCor <- granges(obj)
gr.unbinnedCor$cor.matrix <- suppressWarnings(cor(t(getM(obj)), method = method))
gr.cor <- .returnBinnedMatrix(gr.unbinnedCor, resolution = resolution)
gr.cor <- .removeBadBins(gr.cor)
#Fisher's Z to ensure MVN
gr.cor$cor.matrix <- fisherZ(gr.cor$cor.matrix)
gr.cor
}
#Helper function to extract A/B compartments
.extractAB <- function(gr, keep = TRUE, svdMethod = "qr"){
if (!(is(gr, "GRanges") && "cor.matrix" %in% names(mcols(gr)))) {
stop("'gr' must be an object created by createCorMatrix")
}
pc <- .getFirstPCarray(gr$cor.matrix, method = svdMethod)
pc <- .meanSmootherArray(pc)
pc <- .unitarize(pc)
# Fix sign of eigenvector
# Also check for NA correlation between colsums and the PC
# Not sure if we should just dump all of these to NAs if the correlation fails
# Explicitly implies that many of the eigenvalues are [close to] zero
# This check ensures positive values are associated with the closed compartment
# Currently we leave it as is...
if (!is.na(cor(colSums2(gr$cor.matrix), pc)) & cor(colSums2(gr$cor.matrix), pc) < 0 ) {
pc <- -pc
}
pc <- pc * sqrt(length(pc))
gr$pc <- pc
if (!keep) gr$cor.matrix <- NULL
gr
}
.getMeanGrSet <- function(grSet, targets = NULL) {
if (!is.null(targets)) {
stargets <- .getShrinkageTargets(grSet, targets)
message("Using ", paste(shQuote(targets), collapse = ", "), " as shrinkage targets...")
meanBeta <- matrix(rowMeans(getBeta(stargets), na.rm=TRUE), ncol=1)
}
else meanBeta <- matrix(rowMeans(getBeta(grSet), na.rm=TRUE), ncol=1)
subGrSet <- grSet[,1]
assays(subGrSet)$Beta <- meanBeta
sampleNames(subGrSet) <- "globalMean"
return(subGrSet)
}
.getPairedArray <- function(column, grSet, globalMeanSet=NULL, res=1e6, targets = NULL, ...) {
message("Computing shrunken compartment eigenscores for ", column, "...")
if(is.null(globalMeanSet)) globalMeanSet <- .getMeanGrSet(grSet, targets)
res <- .arraycompartments(cbind(grSet[,column], globalMeanSet), keep=FALSE, resolution=res, ...)
pc <- res$pc
names(pc) <- as.character(res)
return(pc)
}
.getPairedAllChrsArray <- function(column, grSet, globalMeanSet=NULL, res=1e6, targets = NULL, ...) {
if (is.null(globalMeanSet)) globalMeanSet <- .getMeanGrSet(grSet, targets)
chrs <- intersect(paste0("chr", seq_along(1:22)), seqlevels(grSet))
names(chrs) <- chrs
getPairedChr <- function(chr) {
message("Computing shrunken eigenscores for ", column, " on ", chr, "...")
res <- .arraycompartments(cbind(grSet[,column],globalMeanSet),keep=FALSE,resolution=res,chr=chr)
pc <- res$pc
names(pc) <- as.character(res)
return(pc)
}
unlist(lapply(chrs, getPairedChr))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.