#' Post Selection with Aggregate Testing for Independent P-Values
#'
#' @description Performs selection adjustment for independent p-values
#' following selection with an aggregate level test statistic.
#'
#' @param pmat An \code{MXn} matrix of original p-values , where \code{M} is
#' the number of features and \code{n} is the number of independent p-values
#' for each feature. If \code{globaltest="Pearson"}, the original p-values must
#' be one-sided p-values. No default.
#'
#' @param globaltest "Fisher" or "Pearson". The default is "Fisher". Set it to
#' "Pearson" only for two-sided alternatives, if common directionality is expected
#' for each feature.
#'
#' @param pval_threshold The p-value selection threshold.
#' If \code{pval_threshold=NULL}, only the global null p-value is computed. Must be
#' entered for computation of the conditional p-values. Either a scalar, or a vector
#' of length M with positive entries. See Details.
#'
#' @return List containing the global null p-value in \code{pF}, and the
#' conditional p-values in \code{p2C}. If a global null p-value is above
#' the threshold, then the conditional p-value vector corresponding to it is
#' a vector of NA's; if \code{pval_threshold=NULL}, then the conditional p-value matrix
#' P2C is NA's.
#'
#' @details The Fisher test statisic is minus two times the sum of the log of
#' the input p-values, and the Fisher global null p-value is the probability
#' that a chi-squared distribution with \code{2n} degrees of freedom exceeds
#' the Fisher test statistc, where \code{n} is the number of input p-values.
#'
#' The Pearson test statistic is the maximum of the Fisher test statistic based
#' on left-sided p-values and the Fisher test statistic based on right-sided
#' p-values, and the Pearson global null p-value is twice the probability that
#' a chi-squared distribution with \code{2n} degrees of freedom exceeds the
#' Pearson test statistic. The global null p-value is output in pF. If the
#' hypotheses are one-sided, set globaltest= "Fisher"; if the hypotheses are
#' two-sided, set globaltest="Pearson". It is assumed that the p-values come from
#' continuous test statistics, so that left sided pvalue + right sided pvalue = 1.
#' See Heller et al. (2016) for details about these global tests, about the
#' post-selection inference using these tests, and about extensions to other
#' global tests.
#'
#' The p-value threshold, pval_threshold, is the threshold that the global null
#' p-value has to reach in order to be selected for post-selection inference.
#' For example, for a bonferroni correction at level \code{alpha} for a family
#' of \code{m} global null hypotheses, it should be set to \code{alpha/m}. The
#' conditional p-values are computed only if \code{pF<=pval_threshold}.
#'
#' @references Ruth Heller, Nilanjan Chatterjee, Abba Krieger, Jianxin Shi
#' (2016). Post-selection Inference Following Aggregate Level Hypothesis
#' Testing in Large Scale Genomic Data. bioRxiv:
#' \url{http://dx.doi.org/10.1101/058404}
#'
#' @example vignettes/aggregatePvalExamples.R
aggregatePvalues <- function(pmat, globaltest = c("Fisher", "Pearson"), pval_threshold = NULL){
if(sum(is.na(pmat)) > 0){
warning("NAs in the conditional pvalue compuations")
}
if(!length(globaltest)) {
stop("globaltest must be Fisher or Pearson")
}
globaltest <- globaltest[1]
M = dim(pmat)[1]
if(length(pval_threshold) == 1) {
pval_threshold <- rep(pval_threshold, M)
}
p2C <- matrix(NA, nrow = M, ncol = dim(pmat)[2])
pF <- rep(NA, M)
if(!(globaltest %in% c("Fisher", "Pearson"))) {
stop("globaltest must be Fisher or Pearson")
}
for(i in 1:M) {
p <- as.double(pmat[i, ])
N <- length(p)
whichNotNA <- which(!is.na(p))
p <- p[!is.na(p)]
n <- length(p)
if(n > 0) {
if(globaltest == "Fisher") {
tp <- -2 * log(p)
pF[i] <- 1 - pchisq(sum(tp), 2 * n)
if(!is.null(pval_threshold)) {
if(length(pval_threshold) < M) {
stop("threshold t for post-selection computation does not match the number of rows in pmat")
}
t <- qchisq(1-pval_threshold[i], 2*n)
if(sum(tp) >= t){
p2c <- rep(NA, n)
iscond <- rep(0, n)
p2cond <- prod(p) / exp(-t/2)
if(prod(sort(p)[2:n]) <= exp(-t/2)){ #no payment for row selection
p2C[i, ] <- p
} else {
for(j in 1:n) {
iscond[j] <- sum(sum(tp[-j]) < t)
p2c[j] = ifelse(iscond[j] == 1, p2cond, p[j])
}#for
p2C[i, whichNotNA] <- p2c
}
}#end if ( sum(tp)>=t)
}#end if (!is.null(pval_threshold))
}# end if (globaltest=="Fisher")
if(globaltest == "Pearson"){
tpL <- -2 * log(p)
tpR <- -2 * log(1-p)
QL <- sum(tpL)
QR <- sum(tpR)
QC <- max(QL, QR)
pF[i] <- 2 * (1 - pchisq(QC, 2 * n))
if(!is.null(pval_threshold)) {
if(length(pval_threshold) < M) {
stop("threshold t for post-selection computation does not match the number of rows in pmat")
}
t <- qchisq(1 - pval_threshold[i] / 2, 2 * n)
if(QC >= t){
p2c <- rep(NA, n)
numc <- rep(NA, n)
denomc <- rep(NA, n)
for(j in 1:n){
logAL <- -t/2 - sum(log(p[-j]))
AL <- exp(logAL)
logAR <- -t/2 - sum(log((1-p)[-j]))
AR <- exp(logAR)
p2sided <- 2 * min(p[j], 1 - p[j])
if(AL + AR >= 1){
p2c[j] <- p2sided
}
if(AL+AR < 1) {
denomc[j] <- AL + AR
numc[j] <- min(p2sided / 2, AL) + min(p2sided / 2, AR) +
max(AL - (1 - p2sided / 2), 0) + max(AR - (1 - p2sided/2), 0)
p2c[j] <- numc[j] / denomc[j]
}
}#for
p2C[i, whichNotNA] <- p2c
}#end if (QC>=t)
}#end if (!is.null(pval_threshold))
}# end if (globaltest=="Pearson")
}# end if (n>0)
}#end for (i in 1:M)
result <- list(pF = pF, p2C = p2C)
class(result) <- "aggregatePvalues"
return(result)
}#Conditional.pvalues
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.