#' Generates meta cells for each cluster
#'
#' @param counts.mat Matrix of filtered counts (genes X samples).
#' @param dist.mat Distance matrix for this data.
#' @param clust.vec Clustering vector. If not specified, entire matrix will be used.
#' @param num.neighbors Number of neighbors to use in metacells. Default of 5.
#' @param subset Number of cells to subset to. Default of 250. No subsetting if set equal to NULL.
#' @param min.samps Minimum number of samples in a cluster required for meta cells. Default of 500.
#' @return A list of meta cell matrices for all clusters with enough samples.
#' @export
make_metacells <- function(counts.mat, dist.mat, clust.vec, num.neighbors = 5, subset = 250, min.samps = 500) {
# ensure matrix format
counts.mat <- as.matrix(counts.mat)
dist.mat <- as.matrix(dist.mat)
# dummy clustering vector if not specified
if (missing(clust.vec)) {
clust.vec <- rep(1, ncol(counts.mat))
names(clust.vec) <- colnames(counts.mat)
}
clust.labels <- sort(unique(clust.vec))
clust.labels <- as.character(clust.labels)
# make metacell matrix for each cluster
meta.mats <- list()
for (cl in clust.labels) {
clust.samps <- names(clust.vec)[which(clust.vec == cl)]
if (length(clust.samps) > min.samps) {
print(paste("Making metacell matrix for cluster ", cl, "...", sep = ''))
# get cluster objects
clust.counts <- counts.mat[,clust.samps]
clust.dist <- dist.mat[clust.samps, clust.samps]
knn.mat <- knn(clust.dist, k = num.neighbors)
if (is.null(subset)) {
sub.samps <- clust.samps
sub.size <- length(sub.samps)
} else {
sub.size <- min(length(clust.samps), subset)
sub.samps <- sample(clust.samps, sub.size)
}
# impute matrix
imp.mat <- matrix(0L, nrow = nrow(clust.counts), ncol = sub.size)
rownames(imp.mat) <- rownames(counts.mat); colnames(imp.mat) <- sub.samps
for (ss in sub.samps) {
neighbor.vect <- c(ss, rownames(knn.mat)[knn.mat[ss,]])
ss.mat <- clust.counts[, neighbor.vect]
imp.mat[,ss] <- rowSums(ss.mat)
}
print(dim(imp.mat))
# normalize
imp.mat <- pflpf_norm(imp.mat)
meta.mats[[as.character(cl)]] <- imp.mat
}
}
return(meta.mats)
}
#' Generates a KNN matrix.
#'
#' @param dist.mat Distance matrix.
#' @param k Number of neighbors. Default of 5.
#' @return A naighbor matrix with samples in rows and neighbors in columns.
#' @export
knn <- function(dist.mat, k = 5){
dist.mat <- as.matrix(dist.mat)
n <- nrow(dist.mat)
neighbor.mat <- matrix(0L, nrow = n, ncol = k)
for (i in 1:n) {
neighbor.mat[i,] <- order(dist.mat[i,])[2:(k + 1)]
}
rownames(neighbor.mat) <- colnames(dist.mat)
return(neighbor.mat)
}
#' Consolidates networks into a single integrated network
#'
#' @param network.list List of networks generated by ARACNe3.
#' @param network.weights Weights for each network. Set to 1 for each network if not specified.
#' @param seed.val Optional seed value to ensure reproducibility. Default of 343.
#' @return A single integrated network in the regulon-list format.
#' @export
network_integrate <- function(network.list, network.weights, max.reg.size = 500, seed.val = 343) {
set.seed(seed.val)
## set weights if missing
if (missing(network.weights)) {
network.weights <- rep(1, length(network.list))
}
## set names if missing
if (is.null(names(network.list))) {
names(network.list) <- paste('net', 1:length(network.list), sep = '.')
}
## identify all regulators across the network list
regulator.names <- unique(unlist(sapply(network.list, names)))
## integration for each regulator
integrated.net <- list()
for (reg.name in regulator.names) {
# get all targets for this regulator across all networks
reg.aw <- lapply(network.list, function(x) {x[[reg.name]]$aw})
reg.am <- lapply(network.list, function(x) {x[[reg.name]]$am})
reg.targets <- unique(unlist(sapply(reg.aw, names)))
# build matrices
aw.mat <- matrix(0L, nrow = length(reg.targets), ncol = length(network.list))
rownames(aw.mat) <- reg.targets; colnames(aw.mat) <- names(network.list)
am.mat <- matrix(0L, nrow = length(reg.targets), ncol = length(network.list))
rownames(am.mat) <- reg.targets; colnames(am.mat) <- names(network.list)
for (net.name in names(network.list)) {
aw.mat[names(reg.aw[[net.name]]), net.name] <- reg.aw[[net.name]]
am.mat[names(reg.am[[net.name]]), net.name] <- reg.am[[net.name]]
}
# apply weights
aw.mat <- t(t(aw.mat) * network.weights)
am.mat <- t(t(am.mat) * network.weights)
# get mean vectors
mean.aw <- apply(aw.mat, 1, mean)
am.mat[which(am.mat == 0)] <- NA
mean.am <- apply(am.mat, 1, mean, na.rm = TRUE)
# find final set of targets
target.set <- names(sort(mean.aw, decreasing = TRUE))[1:min(length(mean.aw), max.reg.size)]
aw.vec <- rank(mean.aw[target.set], ties.method = "random") / (1 + length(target.set))
am.vec <- mean.am[target.set]
# add to list
integrated.net[[reg.name]] <- list('aw' = aw.vec, 'am' = am.vec)
}
# remove nans from integrated network
final.net <- lapply(integrated.net, function(x) {
good.targets <- intersect(which(!is.nan(x$aw)), which(!is.nan(x$am)))
return(list('aw' = x$aw[good.targets],
'am' = x$am[good.targets]))
})
return(final.net)
}
#' Saves a matrix in a format for input to ARACNe
#'
#' @param dat.mat Matrix of data (genes X samples).
#' @param out.file Output file where matrix will be saved.
#' @param subset Switch for subsetting the matrix to 500 samples. Default TRUE.
#' @export
ARACNeTable <- function(dat.mat, out.file, subset = TRUE) {
dat.mat <- dat.mat[!duplicated(rownames(dat.mat)), ]
if (subset) {
dat.mat <- dat.mat[, sample(colnames(dat.mat), min(ncol(dat.mat), 500)) ]
}
sample.names <- colnames(dat.mat)
gene.ids <- rownames(dat.mat)
m <- dat.mat
mm <- rbind( c("gene", sample.names), cbind(gene.ids, m))
write.table( x = mm , file = out.file ,
sep="\t", quote = F , row.names = F , col.names = F )
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.