R/narnea_funcs.R

Defines functions combine_nes nes_covariance undirected_nes directed_nes matrix_narnea weighted_integration unweighted_integration narnea_combine meta_narnea stouffer_narnea network_match_narnea

Documented in combine_nes directed_nes matrix_narnea meta_narnea narnea_combine nes_covariance network_match_narnea stouffer_narnea undirected_nes unweighted_integration weighted_integration

#' Performs NaRnEA on multiple samples using network matching. Those samples with matched networks
#' will have protein activity generated with one network. Those without will be analyzed using MetaNaRnEA.
#' 
#' @param ges.mat GES matrix (genes X samples).
#' @param net.list List of networks appropriately parametrized for NaRnEA.
#' @param net.match.vec Named vector of the same length as the number of samples, with either the name of a matched network or NA
#' @return NaRnEA object
#' @export
network_match_narnea <- function(ges.mat, net.list, net.match.vec) {
  narnea.list <- list()
  # run narnea with a single network for each network name
  for (net.name in names(net.list)) {
    cat(paste("Analyzing samples matched with the '", net.name, "' network...\n", sep = ''))
    # get the matched samples and run narnea
    match.samps <- names(net.match.vec)[which(net.match.vec == net.name)]
    if (length(match.samps) == 0) {next}
    match.ges <- ges.mat[,match.samps]
    match.narnea <- matrix_narnea(match.ges, net.list[[net.name]])
    # add to list
    narnea.list[[net.name]] <- match.narnea
  }
  # run narnea for the unmatched samples
  unmatch.samps <- names(net.match.vec)[which(!(net.match.vec %in% names(net.list)))] 
  print(length(unmatch.samps))
  if (length(unmatch.samps) > 0) {
    unmatch.ges <- ges.mat[, unmatch.samps]
    unmatch.narnea <- meta_narnea(unmatch.ges, net.list)
    narnea.list[['unmatch']] <- list('NES' = unmatch.narnea$NES, 'PES' = unmatch.narnea$PES)
  }
  # create unified narnea matrices
  full.prot.list <- unique(unlist(sapply(narnea.list, function(x) {rownames(x$PES)})))
  padded.narnea.list <- lapply(narnea.list, function(x) {
    missing.prots <- setdiff(full.prot.list, rownames(x$NES))
    pad.mat <- matrix(0L, nrow = length(missing.prots), ncol = ncol(x$NES))
    pad.nes <- rbind(x$NES, pad.mat)
    pad.pes <- rbind(x$PES, pad.mat)
    rownames(pad.nes) <- c(rownames(x$NES), missing.prots)
    rownames(pad.pes) <- c(rownames(x$PES), missing.prots)
    return(list('NES' = pad.nes[full.prot.list,], 'PES' = pad.pes[full.prot.list,]))
  })
  combine.nes <- Reduce('cbind', lapply(padded.narnea.list, function(x) {x$NES}))
  combine.pes <- Reduce('cbind', lapply(padded.narnea.list, function(x) {x$PES}))
  # return final list
  return(list('NES' = combine.nes, 'PES' = combine.pes, 'unmatch.weights' = unmatch.narnea$weights))
}

#' Returns cluster-integrated NaRnEA results using Stouffer's method.
#' PES values are averaged across groups, while NES values are Stouffer integrated.
#' 
#' @param narnea.res NaRnEA result object with `PES` and `NES` members.
#' @param clust.vec Vector of categorical labels.
#' @return Integrated NaRnEA results; list with both `PES` and `NES` members.
#' @export
stouffer_narnea <- function(narnea.res, clust.vec) {
  clust.names <- sort(unique(clust.vec))
  
  pes.mat <- Reduce(cbind, lapply(clust.names, function(x) {
    rowMeans(narnea.res$PES[, which(clust.vec == x), drop = FALSE])
  }))
  nes.mat <- Reduce(cbind, lapply(clust.names, function(x) {
    rowSums(narnea.res$NES[, which(clust.vec == x), drop = FALSE]) / length(which(clust.vec == x))
  }))
  colnames(pes.mat) <- clust.names; colnames(nes.mat) <- clust.names
  
  return(list('PES' = pes.mat, 'NES' = nes.mat))
}

#' Runs MetaNaRnEA using the given ges matrix and network list. 
#' 
#' @param ges.mat GES matrix (features X samples).
#' @param net.list List of A3 networks.
#' @param sample.weights Flag to perform weighted integration. Default of TRUE.
#' WARNING: Running w/ sample.weights = TRUE may be very slow for large numbers of samples.
#' @return NaRnEA result list; matrices (regulators X samples) for NES and PES values.
#' @export
meta_narnea <- function(ges.mat, net.list, sample.weights = TRUE) {
  # throw warning for weighting procedure
  if (sample.weights) {
    cat("WARNING: Running `meta_narnea` with sample.weights = TRUE may be very slow for large number of samples.\n")
  }
  # generate narnea results for each network
  narnea.list <- lapply(net.list, function(x) {matrix_narnea(ges.mat, x)})
  narnea.3d <- narnea_combine(narnea.list)
  # perform integration
  if (sample.weights) {
    narnea.res <- weighted_integration(narnea.3d)
  } else {
    narnea.res <- unweighted_integration(narnea.3d)
  }
  # return
  return(narnea.res)
}

#' Generates two 3D matrices (PES/NES) from a list of NaRnEA results.
#' 
#' @param narnea.list List of result objects from `MatrixNaRnEA`
#' @return Two member list - PES and NES - with each element a 3D matrix.
#' @export
narnea_combine <- function(narnea.list) {
  require(abind)
  # collect shared and union regulators
  shared.regulators <- Reduce(intersect, sapply(narnea.list, function(x) {rownames(x$NES)}))
  union.regulators <- unique(unlist(sapply(narnea.list, function(x) {rownames(x$NES)})))
  res.num <- length(narnea.list)
  samp.num <- ncol(narnea.list[[1]]$NES)
  reg.num <- length(union.regulators)
  
  # create PES matrix
  pes.array <- Reduce(function(x, y) {abind(x, y, along = 3)}, 
                      lapply(narnea.list, function(x) {
                        pes.mat <- x$PES
                        missing.regs <- setdiff(union.regulators, rownames(pes.mat))
                        reg.names <- c(rownames(pes.mat), missing.regs)
                        pes.mat <- rbind(pes.mat, matrix(NA, 
                                                         nrow = length(missing.regs), 
                                                         ncol = ncol(pes.mat)))
                        rownames(pes.mat) <- reg.names
                          return(pes.mat[union.regulators,])
                      }))
  # create NES matrix
  nes.array <- Reduce(function(x, y) {abind(x, y, along = 3)}, 
                      lapply(narnea.list, function(x) {
                        pes.mat <- x$NES
                        missing.regs <- setdiff(union.regulators, rownames(pes.mat))
                        reg.names <- c(rownames(pes.mat), missing.regs)
                        pes.mat <- rbind(pes.mat, matrix(NA, 
                                                         nrow = length(missing.regs), 
                                                         ncol = ncol(pes.mat)))
                        rownames(pes.mat) <- reg.names
                        return(pes.mat[union.regulators,])
                      }))
  # add network names
  if (is.null(names(narnea.list))) {
    dimnames(pes.array)[[3]] <- paste('net', 1:length(narnea.list), sep = '.')
    dimnames(nes.array)[[3]] <- paste('net', 1:length(narnea.list), sep = '.')
  } else {
    dimnames(pes.array)[[3]] <- names(narnea.list)
    dimnames(nes.array)[[3]] <- names(narnea.list)
  }
  # return
  return(list('PES' = pes.array, 'NES' = nes.array))
}

#' Integrates 3D arrays of NaRnEA results generated by `narnea_combine` with equal weights across results.
#' 
#' @param combine.list List generated by `narnea_combine`; two 3D arrays, one each for PES and NES
#' @return Integrated NaRnEA results, with PES and NES matrices.
#' @export
unweighted_integration <- function(combine.list) {
  # integrate PES via mean
  pes.mat <- apply(combine.list$PES, c(1,2), function(x) {mean(x, na.rm = TRUE)})
  # integrate NES via stouffer
  nes.mat <- apply(combine.list$NES, c(1,2), function(x) {
    not.na <- length(which(!is.na(x)))
    return(sum(x, na.rm = TRUE) / sqrt(not.na))
  })
  # return
  return(list('PES' = pes.mat, 'NES' = nes.mat))
}

#' Integrates 3D arrays of NaRnEA results generated by `narnea_combine` with sample-specific weights.
#' Weights are generated via a network voting procedure.
#' 
#' @param combine.list List generated by `narnea_combine`; two 3D arrays, one each for PES and NES
#' @param return.weights Flag to return network-sample weight matrix. TRUE by default.
#' @return Integrated NaRnEA results, with PES and NES matrices.
#' @export
weighted_integration <- function(combine.list, return.weights = TRUE) {
  # determine regulator parameters
  shared.regulators <- names(which(!is.na(rowSums(combine.list$PES[,1,]))))
  union.regulators <- rownames(combine.list$PES)
  net.num <- dim(combine.list$PES)[3]
  reg.num <- length(union.regulators)
  
  # construct weight matrix
  cat("Constructing weight matrix...\n")
  abs.pes.array <- abs(combine.list$PES[shared.regulators,,])
  pes.max <- apply(abs.pes.array, c(1, 2), which.max)
  network.weights <- apply(pes.max, 2, function(x) {
    x.table <- table(x)
    net.vec <- rep(0, net.num)
    net.vec[as.numeric(names(x.table))] <- x.table
    return(net.vec)
  })
  network.weights <- network.weights / length(shared.regulators)
  rownames(network.weights) <- dimnames(combine.list$PES)[[3]]
  
  # perform integration
  cat("Performing integration...\n")
  # make weight array; equal for all genes in a given sample / network
  network.weight.array <- Reduce(function(x, y) {abind(x, y, along = 3)},
                                 replicate(reg.num, network.weights, simplify = FALSE))
  network.weight.array <- aperm(network.weight.array, c(3,2,1))
  rownames(network.weight.array) <- union.regulators
  non.na.weight.array <- as.numeric(!is.na(combine.list$PES)) * network.weight.array
  # take mean PES
  pes.denom <- apply(non.na.weight.array, c(1,2), sum)
  pes.denom <- Reduce(function(x, y) {abind(x, y, along = 3)},
                      replicate(net.num, pes.denom, simplify = FALSE))
  weighted.pes <- apply(combine.list$PES * network.weight.array / pes.denom, c(1, 2), sum, na.rm = TRUE)
  # stouffer integrate NES
  nes.denom <- apply(non.na.weight.array, c(1,2),
                     function(x) {sqrt(sum(x**2))} )
  nes.denom <- Reduce(function(x, y) {abind(x, y, along = 3)},
                      replicate(net.num, nes.denom, simplify = FALSE))
  stouffer.nes <- apply(combine.list$NES * network.weight.array / nes.denom, c(1, 2), sum, na.rm = TRUE)
  
  if (return.weights) {
    return(list('NES' = stouffer.nes, 'PES' = weighted.pes, 'weights' = network.weights))
  } else {
    return(list('NES' = stouffer.nes, 'PES' = weighted.pes))
  }
}

#' Runs the NaRnEA algorithm on multiple samples / regulons simultaneously.
#' 
#' @param ges.mat Matrix of gene expression signatures (genes X samples).
#' @param regulon.list List of regulon lists, with am and aw values.
#' @param seed.val Value of seed to ensure reproducibility. Default of 343.
#' @param min.targets Minimum number of targets needed between a regulon and a GES. Default of 30.
#' @return List of matrices; 'nes' and 'pes'.
#' @export
matrix_narnea <- function(ges.mat, regulon.list, seed.val = 343, min.targets = 30) {
  set.seed(seed.val)
  cat("Data prep...")
  
  ## remove edges not present in the NES; correct am values (nothing equal to 1, -1, or 0)
  regulon.list <- lapply(regulon.list, function(x) {
    # correct for targets
    ges.targets <- intersect(rownames(ges.mat), names(x$aw))
    aw.vec <- x$aw[ges.targets]
    am.vec <- x$am[ges.targets]
    # correct am values
    am.vec[which(am.vec == 1)] <- 0.999
    am.vec[which(am.vec == -1)] <- -0.999
    zero.samps <- which(am.vec == 0)
    am.vec[zero.samps] <- sample(c(0.001, -0.001), size = length(zero.samps), replace = TRUE)
    # reformat
    reg.list <- list('aw' = aw.vec, 'am' = am.vec)
    return(reg.list)
  })
  ## remove regulons with too few targets
  regulon.list <- regulon.list[which(sapply(regulon.list, function(x) {length(x$aw)}) >= min.targets)]
  if (length(regulon.list) == 0) {print('No regulons of adequate size'); return(NULL)}
  ## remove any duplicate regulons
  regulon.list <- regulon.list[!duplicated(names(regulon.list))]
  
  ## correct signature for zeros
  ges.mat <- apply(ges.mat, 2, function(x) {
    non.zero.num <- length(which(x == 0))
    if (non.zero.num == 0) { return(x) }
    pos.percent <- length(which(x > 0)) / non.zero.num
    neg.percent <- length(which(x < 0)) / non.zero.num
    non.zero.min <- min(abs(x[which(x != 0)]))
    x[which(x == 0)] <- runif(n = non.zero.num, min = 0, max = non.zero.min) *
      sample(c(-1, 1), size = non.zero.num, prob = c(neg.percent, pos.percent), replace = TRUE)
    return(x)
  })
  
  ## normalize GES
  R <- apply(ges.mat, 2, function(x) {rank(abs(x), ties.method = 'random')} )
  S <- apply(ges.mat, 2, function(x) {sign(x)} )
  ## get dimension parameters
  n <- ncol(ges.mat)
  g <- nrow(ges.mat)
  r <- length(regulon.list)
  ## create regulon matrices
  AM.mat <- matrix(0L, nrow = g, ncol = r)
  rownames(AM.mat) <- rownames(ges.mat); colnames(AM.mat) <- names(regulon.list)
  AW.mat <- matrix(0L, nrow = g, ncol = r)
  rownames(AW.mat) <- rownames(ges.mat); colnames(AW.mat) <- names(regulon.list)
  for (reg.name in names(regulon.list)) {
    AM.mat[names(regulon.list[[reg.name]]$am), reg.name] <- regulon.list[[reg.name]]$am
    AW.mat[names(regulon.list[[reg.name]]$aw), reg.name] <- regulon.list[[reg.name]]$aw
  }
  
  ## precalculated scalars
  E.r <- (g + 1) /2
  E.r2 <- (2*g^2 + 3*g + 1) / 6
  E.rs <- t(as.matrix((1 / g) * colSums(R * S)))
  ## precalculated matrices 
  AM.abs.mat <- 1 - abs(AM.mat)
  AM.abs.mat[which(AM.mat == 0)] <- 0 #(forces non-targets back to zero weight as opposed to one)
  AW.AM.prod <- AW.mat * AM.mat
  AW.AM.abs.prod <- AW.mat * AM.abs.mat
  
  ## calculate NES
  cat("Calculating DES...")
  D.list <- directed_nes(R, S, AW.AM.prod, E.rs, E.r2, n)
  cat("Calculating UES...")
  U.list <- undirected_nes(R, S, AW.AM.abs.prod, E.r, E.r2, n)
  COV.nes <- nes_covariance(R, S, AW.AM.prod, AW.AM.abs.prod, E.r, E.rs, D.list$var, U.list$var, g)
  cat("Calculating NES...")
  NES.mat <- combine_nes(D.list$nes, U.list$nes, COV.nes)
  
  cat("Calculating PES...")
  ## calculate max D and U for each gene
  max.du <- lapply(names(regulon.list), function(x) {
    gene.order <- names(sort(regulon.list[[x]]$aw, decreasing = TRUE))
    aw.vec <- regulon.list[[x]]$aw[gene.order]
    abs.am.vec <- abs(regulon.list[[x]]$am[gene.order])
    ges.mag <- g:(g + 1 - length(gene.order))
    d.val <- sum(aw.vec * ges.mag * abs.am.vec)
    u.val <- sum(aw.vec * ges.mag * (1 - abs.am.vec))
    return(c(d.val, u.val))
  })
  max.du <- do.call('rbind', max.du)
  ## positive PES
  PES.pos.D <- matrix(rep(max.du[,1], n), nrow = r)
  PES.pos.U <- matrix(rep(max.du[,2], n), nrow = r)
  PES.pos.D.nes <- (PES.pos.D - D.list$exp) / sqrt(D.list$var)
  PES.pos.U.nes <- (PES.pos.U - U.list$exp) / sqrt(U.list$var)
  PES.pos.NES <- combine_nes(PES.pos.D.nes, PES.pos.U.nes, COV.nes)
  ## negative PES
  PES.neg.D <- PES.pos.D * (-1)
  PES.neg.U <- PES.pos.U
  PES.neg.D.nes <- (PES.neg.D - D.list$exp) / sqrt(D.list$var)
  PES.neg.U.nes <- (PES.neg.U - U.list$exp) / sqrt(U.list$var)
  PES.neg.NES <- combine_nes(PES.neg.D.nes, PES.neg.U.nes, COV.nes)
  ## combine, then normalize the NES scores
  pos.NES <- NES.mat > 0
  PES.comb.nes <- PES.pos.NES * pos.NES + PES.neg.NES * (!pos.NES)
  PES.mat <- NES.mat / abs(PES.comb.nes)
  
  cat("Done\n")
  return(list('NES' = NES.mat, 'PES' = PES.mat))
}

#' Computes the Directed NES
#' 
#' @param R Rank normalized ges matrix (genes x samples).
#' @param S Sign of ges matrix (genes x samples).
#' @param AW.AM.prod Product of the association weight (AW) and mode (AM) matrices created from the regulon list.
#' @param E.rs Expected value of r*s in each sample (1 x samples).
#' @param E.r2 Expected value of r^2 in each sample (scalar value).
#' @param n Number of samples (scalar value).
#' @return List of matrices, each (regulons x samples):
#' Enrichment score matrix 'es'; expected value 'exp'; variance 'var'; NES matrix 'NES'
#' @export
directed_nes <- function(R, S, AW.AM.prod, E.rs, E.r2, n) {
  D <- t(AW.AM.prod) %*% (R * S)
  ## calculate expected value
  D.e <- as.matrix(colSums(AW.AM.prod)) %*% E.rs
  ## calculate variance
  reg.squared.sum.product <- as.matrix(colSums((AW.AM.prod)**2))
  E.d.2 <- reg.squared.sum.product %*% (E.rs ** 2)
  E.d2 <- reg.squared.sum.product %*% matrix(rep(E.r2, n), nrow = 1)
  D.v <- E.d2 - E.d.2
  ## calculate NES
  D.nes <- (D - D.e) / sqrt(D.v)
  
  return(list('es' = D, 'exp' = D.e, 'var' = D.v, 'nes' = D.nes))
}

#' Computes the Undirected NES
#' 
#' @param R Rank normalized ges matrix (genes x samples).
#' @param S Sign of ges matrix (genes x samples).
#' @param AW.AM.abs.prod Product of the association weight (AW) and absolute value of the association mode (AM.abs) matrices created from the regulon list.
#' @param E.r Expected value of r in each sample (scalar value).
#' @param E.r2 Expected value of r^2 in each sample (scalar value).
#' @param n Number of samples (scalar value).
#' @return List of matrices, each (regulons x samples):
#' Enrichment score matrix 'es'; expected value 'exp'; variance 'var'; NES matrix 'NES'
#' @export
undirected_nes <- function(R, S, AW.AM.abs.prod, E.r, E.r2, n) {
  U <- t(AW.AM.abs.prod) %*% R
  ## calculate expected value
  U.e <- as.matrix(colSums(AW.AM.abs.prod)) %*% matrix(rep(E.r, n), nrow = 1)
  ## calculate variance
  reg.squared.sum.product.abs <- as.matrix(colSums((AW.AM.abs.prod)**2))
  E.u.2 <- reg.squared.sum.product.abs %*% matrix(rep(E.r**2, n), nrow = 1)
  E.u2 <- reg.squared.sum.product.abs %*% matrix(rep(E.r2, n), nrow = 1)
  U.v <- E.u2 - E.u.2
  ## calculate nes
  U.nes <- (U - U.e) / sqrt(U.v)
  
  return(list('es' = U, 'exp' = U.e, 'var' = U.v, 'nes' = U.nes))
}

#' Compute the Covariance of the Directed and Undirected NES.
#' 
#' @param R Rank normalized ges matrix (genes x samples).
#' @param S Sign of ges matrix (genes x samples).
#' @param AW.AM.prod Product of the association weight (AW) and mode (AM) matrices created from the regulon list.
#' @param AW.AM.abs.prod Product of the association weight (AW) and absolute value of the association mode (AM.abs) matrices created from the regulon list.
#' @param E.r Expected value of r in each sample (scalar value).
#' @param E.rs Expected value of r*s in each sample (1 x samples).
#' @param D.v Variance of Directed Enrichment Score (regulons x samples).
#' @param U.v Variance of Undirected Enrichment Score (regulons x samples).
#' @param g Number of genes in the gene expression signature matrix (scalar value).
#' @return Matrix of covariance values (regulons x samples).
#' @export
nes_covariance <- function(R, S, AW.AM.prod, AW.AM.abs.prod, E.r, E.rs, D.v, U.v, g) {
  cov.prod.mat <- as.matrix(colSums(AW.AM.prod * AW.AM.abs.prod))
  ## first component
  E.r2s <- as.matrix((1 / g) * colSums(R * R * S))
  E.du <- cov.prod.mat %*% t(E.r2s)
  ## second component
  E.d.u <- cov.prod.mat %*% E.rs * E.r
  ## combine
  COV.du <- E.du - E.d.u
  COV.nes <- COV.du / sqrt(D.v * U.v)
  
  return(COV.nes)
}

#' Generates a final NES from the combination of the Directed and Undirected NES.
#' 
#' @param D.nes NES scores for the directed enrichment (regulons x samples).
#' @param U.nes NES scores for the undirected enrichment (regulons x samples).
#' @param COV.nes Covariance of the undirected and directed enrichment NES (regulons x samples).
#' @return Matrix of integrated NES scores (regulons x samples).
#' @export
combine_nes <- function(D.nes, U.nes, COV.nes) {
  NES.pos <- (D.nes + U.nes) / sqrt(2 + 2*COV.nes)
  NES.neg <- (D.nes - U.nes) / sqrt(2 - 2*COV.nes)
  ## calculate p values
  p.pos <- pnorm(NES.pos, lower.tail = FALSE, log.p = TRUE)
  p.neg <- pnorm(NES.neg, lower.tail = TRUE, log.p = TRUE)
  ## combine p values
  p.dif <- (p.pos < p.neg)
  min.p <- (p.pos * p.dif + p.neg * (!p.dif))
  final.p <- min.p + log(2) + log1p(exp(min.p) / (-2))
  ## calculate final nes
  pos.nes <- qnorm(final.p - log(2), lower.tail = FALSE, log.p = TRUE)
  neg.nes <- qnorm(final.p - log(2), lower.tail = TRUE, log.p = TRUE)
  NES.mat <- (pos.nes * p.dif + neg.nes * (!p.dif))
  
  return(NES.mat)
}
califano-lab/PISCES documentation built on Jan. 11, 2023, 5:34 a.m.