R/combine_pvalues.R

Defines functions combine_pvalues

Documented in combine_pvalues

#' Combine p-values of a feature over multiple p-value columns of an object
#' 
#' Combine p-values of a feature over multiple p-value columns of an object. If \code{alternative!="two.sided"},
#' uses the direction of \code{stat.cols} to transform \code{p.cols} into one-sided p-values; this assumes that
#' these p-values were originally calculated from two-sided tests, as is done in \pkg{limma}.
#' 
#' @param tab Matrix-like object with statistical columns, some containing p-values.
#' @param p.cols Indices or \code{\link{regexp}} with column names or column names suffix of numeric p-value columns.
#' @param stat.cols Indices or \code{\link{regexp}} with column names or column names suffix with numeric signed 
#' statistics, or with \code{"Up", "Down"} values.
#' @param only.p Logical; should only combined p-values be returned? If not, returns matrix with z-scores and FDRs also.
#' @param alternative Direction of change: \code{"two.sided"}; \code{"greater"} or \code{"less"}, or their synonyms  
#' \code{"Up"} or \code{"Down"}.
#' @details Z-transform method is used to combine p-values across rows, similarly to using unweighted 
#' \code{method="z.transform"} in \code{survcomp::combine.test}, except this function slightly adjusts
#' p-values of zero or one to avoid generating z-scores of infinite magnitude.
#'
#' \code{stat.cols} are ignored if \code{alternative} is \code{"two.sided"}. Otherwise, they should match \code{p.cols}.
#' @return Vector of p-values.
#' @examples
#'  tab <- data.frame(foo.p=(1:9)/9, bar.p=(9:1)/9)
#'  combine_pvalues(tab)
#' @export

combine_pvalues <- function(tab, p.cols="p|PValue", stat.cols="logFC|slope|cor|rho|Direction", only.p=TRUE,
                            alternative=c("two.sided", "greater", "less", "Up", "Down")){
  
  stopifnot(ncol(tab) >= 1, nrow(tab) >= 1, !is.null(colnames(tab)))
  alternative <- match.arg(alternative)
  p.colnms <- grep_cols(tab, p.cols=p.cols)
  tab.p <- as.matrix(tab[, p.colnms])
  if (!is.numeric(tab.p)) stop("Some p-value columns of tab are not numeric.")
  if (any(tab.p == 0, na.rm = TRUE)){
    small.p <- max(10**(-15), min(tab.p[tab.p > 0])/2)
    warning("p-values should not be zero; these have been converted to ", small.p, ".", call. = FALSE)
    wh <- which(tab.p == 0)
    tab.p[wh] <- small.p
  }
  
  if (alternative != "two.sided"){
    stat.colnms <- grep_cols(tab, stat.cols=stat.cols)
    if (length(stat.colnms) != length(p.colnms)) stop("Lengths of p columns and stat columns must match.")
    for (col.ind in seq_along(length(p.colnms))){
      tab.p[, col.ind] <- two2one_tailed(tab, stat.cols=stat.colnms[col.ind], p.cols=p.colnms[col.ind], 
                                         alternative=alternative)
    }
    if (any(rowMeans(is.na(tab.p)) == 1)){
      stop("All rows of p-values, after accounting for stats, must not be all NA.")
    }
  }

  # account for p-values = 1 after account for alternative
  # largest x st 1-10**-x prints as less than one is x=6
  large.p <- 1 - 10**-15
  if (any(tab.p > large.p, na.rm = TRUE)){
    # p-values can be one in discrete settings
    # message("p-values of one have been converted to ", large.p, ".", call. = FALSE)
    wh <- which(tab.p > large.p)
    tab.p[wh] <- large.p
  }
  
  tab.z <- apply(as.matrix(tab.p), MARGIN=2, stats::qnorm, lower.tail = FALSE)
  # account for NAs
  combz.v <- apply(as.matrix(tab.z), MARGIN=1, FUN=function(zv){
    zv.nona <- zv[!is.na(zv)]
    sum(zv)/sqrt(length(zv))
  })
  combp.v <- stats::pnorm(combz.v, lower.tail = FALSE)
  if (!only.p){
    return(cbind(z=combz.v, p=combp.v, FDR=stats::p.adjust(combp.v, method = "BH")))
  } else {
    return(combp.v)
  }
}
jdreyf/ezlimma documentation built on March 3, 2024, 4:23 a.m.