PISCES: Protein Activity Inferecne in Single Cells

Documented in ARACNeTable knn make_metacells

#' Generates meta cells for each cluster
#' 
#' @param counts.mat Matrix of filtered counts (genes X samples).
#' @param dist.mat Distance matrix for this data.
#' @param clust.vec Clustering vector. If not specified, entire matrix will be used.
#' @param num.neighbors Number of neighbors to use in metacells. Default of 5.
#' @param subset Number of cells to subset to. Default of 250. No subsetting if set equal to NULL.
#' @param min.samps Minimum number of samples in a cluster required for meta cells. Default of 500. 
#' @return A list of meta cell matrices for all clusters with enough samples.
#' @export
make_metacells <- function(counts.mat, dist.mat, clust.vec, num.neighbors = 5, subset = 250, min.samps = 500) {
  # ensure matrix format
  counts.mat <- as.matrix(counts.mat)
  dist.mat <- as.matrix(dist.mat)
  # dummy clustering vector if not specified
  if (missing(clust.vec)) {
    clust.vec <- rep(1, ncol(counts.mat))
    names(clust.vec) <- colnames(counts.mat)
  }
  clust.labels <- sort(unique(clust.vec))
  clust.labels <- as.character(clust.labels)
  # make metacell matrix for each cluster
  meta.mats <- list()
  for (cl in clust.labels) {
    clust.samps <- names(clust.vec)[which(clust.vec == cl)]
    if (length(clust.samps) > min.samps) {
      print(paste("Making metacell matrix for cluster ", cl, "...", sep = ''))
      # get cluster objects
      clust.counts <- counts.mat[,clust.samps]
      clust.dist <- dist.mat[clust.samps, clust.samps]
      knn.mat <- knn(clust.dist, k = num.neighbors)
      if (is.null(subset)) {
        sub.samps <- clust.samps
        sub.size <- length(sub.samps)
      } else {
        sub.size <- min(length(clust.samps), subset)
        sub.samps <- sample(clust.samps, sub.size)
      }
      # impute matrix
      imp.mat <- matrix(0L, nrow = nrow(clust.counts), ncol = sub.size)
      rownames(imp.mat) <- rownames(counts.mat); colnames(imp.mat) <- sub.samps
      for (ss in sub.samps) {
        neighbor.vect <- c(ss, rownames(knn.mat)[knn.mat[ss,]])
        ss.mat <- clust.counts[, neighbor.vect]
        imp.mat[,ss] <- rowSums(ss.mat)
      }
      print(dim(imp.mat))
      # normalize
      imp.mat <- pflpf_norm(imp.mat)
      meta.mats[[as.character(cl)]] <- imp.mat
    }
  }
  return(meta.mats)
}

#' Generates a KNN matrix.
#'
#' @param dist.mat Distance matrix.
#' @param k Number of neighbors. Default of 5.
#' @return A naighbor matrix with samples in rows and neighbors in columns.
#' @export
knn <- function(dist.mat, k = 5){
  dist.mat <- as.matrix(dist.mat)
  n <- nrow(dist.mat)
  neighbor.mat <- matrix(0L, nrow = n, ncol = k)
  for (i in 1:n) {
    neighbor.mat[i,] <- order(dist.mat[i,])[2:(k + 1)]
  }
  rownames(neighbor.mat) <- colnames(dist.mat)
  return(neighbor.mat)
}

#' Consolidates networks into a single integrated network
#' 
#' @param network.list List of networks generated by ARACNe3.
#' @param network.weights Weights for each network. Set to 1 for each network if not specified.
#' @param seed.val Optional seed value to ensure reproducibility. Default of 343.
#' @return A single integrated network in the regulon-list format.
#' @export
network_integrate <- function(network.list, network.weights, max.reg.size = 500, seed.val = 343) {
  set.seed(seed.val)
  ## set weights if missing
  if (missing(network.weights)) {
    network.weights <- rep(1, length(network.list))
  }
  ## set names if missing
  if (is.null(names(network.list))) {
    names(network.list) <- paste('net', 1:length(network.list), sep = '.')
  }
  
  ## identify all regulators across the network list
  regulator.names <- unique(unlist(sapply(network.list, names)))
  
  ## integration for each regulator
  integrated.net <- list()
  for (reg.name in regulator.names) {
    # get all targets for this regulator across all networks
    reg.aw <- lapply(network.list, function(x) {x[[reg.name]]$aw})
    reg.am <- lapply(network.list, function(x) {x[[reg.name]]$am})
    reg.targets <- unique(unlist(sapply(reg.aw, names)))
    # build matrices
    aw.mat <- matrix(0L, nrow = length(reg.targets), ncol = length(network.list))
    rownames(aw.mat) <- reg.targets; colnames(aw.mat) <- names(network.list)
    am.mat <- matrix(0L, nrow = length(reg.targets), ncol = length(network.list))
    rownames(am.mat) <- reg.targets; colnames(am.mat) <- names(network.list)
    for (net.name in names(network.list)) {
      aw.mat[names(reg.aw[[net.name]]), net.name] <- reg.aw[[net.name]]
      am.mat[names(reg.am[[net.name]]), net.name] <- reg.am[[net.name]]
    }
    # apply weights
    aw.mat <- t(t(aw.mat) * network.weights)
    am.mat <- t(t(am.mat) * network.weights)
    # get mean vectors
    mean.aw <- apply(aw.mat, 1, mean)
    am.mat[which(am.mat == 0)] <- NA
    mean.am <- apply(am.mat, 1, mean, na.rm = TRUE)
    # find final set of targets
    target.set <- names(sort(mean.aw, decreasing = TRUE))[1:min(length(mean.aw), max.reg.size)]
    aw.vec <- rank(mean.aw[target.set], ties.method = "random") / (1 + length(target.set))
    am.vec <- mean.am[target.set]
    # add to list
    integrated.net[[reg.name]] <- list('aw' = aw.vec, 'am' = am.vec)
  }
  # remove nans from integrated network
  final.net <- lapply(integrated.net, function(x) {
    good.targets <- intersect(which(!is.nan(x$aw)), which(!is.nan(x$am)))
    return(list('aw' = x$aw[good.targets],
                'am' = x$am[good.targets]))
  })
  return(final.net)
}

#' Saves a matrix in a format for input to ARACNe
#'
#' @param dat.mat Matrix of data (genes X samples).
#' @param out.file Output file where matrix will be saved.
#' @param subset Switch for subsetting the matrix to 500 samples. Default TRUE.
#' @export
ARACNeTable <- function(dat.mat, out.file, subset = TRUE) {
  dat.mat <- dat.mat[!duplicated(rownames(dat.mat)), ]
  if (subset) {
    dat.mat <- dat.mat[, sample(colnames(dat.mat), min(ncol(dat.mat), 500)) ]
  }
  sample.names <- colnames(dat.mat)
  gene.ids <- rownames(dat.mat)
  m <- dat.mat
  mm <- rbind( c("gene", sample.names), cbind(gene.ids, m))
  write.table( x = mm , file = out.file ,
               sep="\t", quote = F , row.names = F , col.names = F )
}