R/select_ntop_per_pwy.R

Defines functions select_ntop_per_pwy

Documented in select_ntop_per_pwy

#' Select top drivers for a pathway
#' 
#' Select \code{ntop} top drivers for a pathway by calculating impact, sorting nodes with \code{alternative}, 
#' and selecting top nodes.
#' 
#' @param zscores Matrix with rownames as feature names, and 1st column with z-scores to select high impact nodes. 
#' Non-\code{NA} values should be finite.
#' @param nonz.scores Matrix with rownames as feature names, and 1st column with scores (not z-scores) to select 
#' high impact nodes. Non-\code{NA} values should be finite.
#' @param pwy Pathway, must be a column name of \code{Gmat}.
#' @inheritParams pants
#' @inheritParams ezlimma::roast_contrasts
#' @details Independent of the alternative, pathway significance is most affected by nodes with largest magnitude impact,
#' however these nodes are sorted in the output according to alternative="greater". 
#' Nodes outside the pathway with no impact, which may arise when the kernel is diagonal, are removed.
#' @return Data frame with \code{ntop} rows ordered by impact & 3 columns: \code{node} with node names; 
#' \code{impact} with impact values; \code{in.pwy} with logicals if node is in \code{pwy}.

select_ntop_per_pwy <- function(zscores, nonz.scores, Gmat, pwy, ker, ntop=3){
  zscore.v <- stats::setNames(zscores[,1], nm=rownames(zscores))
  nonz.score.v <- stats::setNames(nonz.scores[,1], nm=rownames(nonz.scores))
  
  stopifnot(is.na(zscore.v) | is.finite(zscore.v), !is.null(names(zscore.v)), names(zscore.v)==names(nonz.score.v),
            is.numeric(nonz.score.v), length(pwy) == 1, pwy %in% colnames(Gmat), 
            !is.null(ker), ncol(ker) == nrow(Gmat), ncol(ker) == length(zscore.v), colnames(ker) == names(zscore.v))
  
  if (any(nonz.score.v == 0)){
    rm.ind <- which(nonz.score.v == 0)
    zscore.v <- zscore.v[-rm.ind]
    Gmat <- Gmat[-rm.ind,]
    ker <- ker[-rm.ind, -rm.ind]
  }
  if (ntop > length(zscore.v)) ntop <- length(zscore.v)
  
  # get kernel weight sums per node
  coeff.sc <- (ker %*% Gmat[,pwy])[,1]
  
  # estimate impact of nodes on pwy score
  # this includes estimation of impact.v to NA nodes, which are properly handled by order() below
  # want Ki*Gj*zi = coeff.sc * each element of z, so can multiply elementwise
  impact.v <- stats::setNames((coeff.sc * zscore.v), nm=names(zscore.v))
  # order based on alternative="greater"
  # empty nodes with score=0 won't change in perms, but will have large neg impact, so we exclude large neg impact
  impact.v <- impact.v[order(impact.v, decreasing = TRUE)][1:ntop]
  top.node.nms <- names(impact.v)
  
  in.pwy <- Gmat[top.node.nms, pwy] != 0
  top.nodes <- data.frame(node=top.node.nms, impact=impact.v[top.node.nms], in.pwy=in.pwy, stringsAsFactors=FALSE)
  top.nodes <- top.nodes[top.nodes$impact != 0 | top.nodes$in.pwy,]
  
  return(top.nodes)
}
jdreyf/PANTS documentation built on July 18, 2019, 10:12 a.m.