compartmentalize <- function(data, centers=2, dist.correct=TRUE,
cov.correct=TRUE, robust.cov=5, ...)
# Computes compartments for every chromosome, using the intra-chromosomal
# contact maps that have been corrected for distance effects.
#
# written by Aaron Lun
# created 26 May 2015
# last modified 8 December 2015
{
.check_StrictGI(data)
is.intra <- intrachr(data)
data <- data[is.intra,]
if (dist.correct) {
trended <- filterTrended(data)
contacts <- trended$abundance - trended$threshold
dist2trend <- approxfun(x=trended$log.distance, y=trended$threshold, rule=2)
} else {
contacts <- scaledAverage(data)
dist2trend <- function(x) { 0 } # Trend correction function does nothing if no distance correction is requested.
}
# Going chromosome-by-chromosome.
chrs <- seqlevelsInUse(regions(data))
stored <- vector('list', length(chrs))
names(stored) <- chrs
for (chr in chrs) {
mat <- inflate(data, rows=chr, columns=chr, fill=contacts)
stored[[chr]] <- .compartChr(mat, data, dist2trend, robust.cov, cov.correct, centers, ...)
}
# Not sensible to use the entire thing, as clustering will probably be dominated by chromosome, not compartment.
# } else {
# # Using the entirety of the interaction space.
# mat <- as.matrix(data, fill=contacts)
# stored <- .compartChr(mat, data, dist2trend, robust.cov, cov.correct, centers, ...)
# }
return(stored)
}
.compartChr <- function(cm, data, dist2trend, robust.cov, cov.correct, centers, ...) {
all.a <- anchors(cm, type="row", id=TRUE)
mat <- as.matrix(cm)
colnames(mat) <- rownames(mat) <- all.a
# Filling NA's (i.e., zero's). Using mid-distance, interpolating to get the trend.
lost.ind <- Matrix::which(is.na(mat))
lost <- arrayInd(lost.ind, dim(mat))
seq.id <- as.integer(seqnames(regions(data)))
mid.pts <- mid(ranges(regions(data)))
lost.dist <- abs(mid.pts[lost[,1]] - mid.pts[lost[,2]])
lost.dist <- log10(lost.dist + metadata(data)$width)
mat[lost.ind] <- .makeEmpty(data) - dist2trend(lost.dist)
# Correcting for coverage biases, by subtracting half the average coverage from both rows
# and columns. This is equivalent to dividing by square root of coverage, which works pretty
# well in place of a more rigorous iterative approach (check out Rao's supplementaries).
rwm <- log2(Matrix::rowMeans(2^mat))
if (cov.correct) {
mat <- mat - rwm/2
mat <- t(t(mat) - rwm/2)
}
# Robustifying.
if (!is.na(robust.cov)) {
rwm.med <- median(rwm)
rwm.mad <- mad(rwm, center=rwm.med)
keep <- (rwm <= rwm.med + robust.cov*rwm.mad) & (rwm >= rwm.med - robust.cov*rwm.mad)
temp.mat <- mat
mat <- mat[keep,keep,drop=FALSE]
}
# K-means.
if (nrow(mat) > centers) {
out <- kmeans(mat, centers=centers, ...)
comp <- out$cluster
} else {
comp <- seq_len(nrow(mat))
}
# Filling the robustified bins back in.
if (!is.na(robust.cov)) {
mat <- temp.mat
temp <- integer(length(keep))
temp[keep] <- comp
temp[!keep] <- NA
comp <- temp
}
names(comp) <- all.a
as.matrix(cm) <- mat
return(list(compartment=comp, matrix=cm))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.