#' @title Perform Empirical Optimal P-value Weighting
#'
#' @description A function to perform weighted p-value multiple hypothesis test.
#' This function compute the ranks probability of the test statistics by the filter
#' statistics given the effect sizes, and consequently the weights if neighter
#' the weights nor the probabilities are given. Then provides the number of rejected null
#' hypothesis and the list of the rejected pvalues as well as the corresponing
#' filter statistics.
#'
#' @param pvalue a vector of pvalues of the test statistics
#' @param filter a vector of filter statistics
#' @param weight optional weight vector not required
#' @param ranksProb ranks probabilities of the test-groups by the filters given
#' the mean effect. Note that, for each group of tests ranks probbaility would
#' be the same.
#' @param mean_testEffect mean test effect of the true alterantives
#' @param alpha significance level of the hypothesis test
#' @param tail right-tailed or two-tailed hypothesis test. default is right-tailed test.
#' @param delInterval interval between the \code{delta} values of a sequence.
#' Note that, \code{delta} is a LaGrange multiplier, necessary to normalize the weight
#' @param group Integer, number of groups. Default is NULL. If one wants to use
#' a fixed number of group then should use max.group = NULL
#' @param max.group maximum number of groups to be used to split the p-values,
#' default is five. Note that, it is better to keep approximately 1000 p-values
#' per group.
#' @param h_breaks number of breaks to be used for the histogram, default is 71
#' @param effectType type of effect sizes; c("continuous", "binary")
#' @param method type of methods is used to obtain the results; c("BH", "BON"),
#' Benjemini-Hochberg or Bonferroni
#' @param ... Arguments passed to internal functions
#'
#' @details If one wants to test \deqn{H_0: epsilon_i = 0 vs. H_a: epsilon_i > 0,}
#' then the \code{mean_testEffect} and \code{mean_filterEffect} should be mean
#' of the test and filter effect sizes, respectively. This is called hypothesis
#' testing for the continuous effect sizes.\cr
#'
#' If one wants to test \deqn{H_0: epsilon_i = 0 vs. H_a: epsilon_i = epsilon,}
#' then \code{mean_testEffect} and \code{mean_filterEffect} should be median or
#' any discrete value of the test and filter effect sizes. This is called hypothesis
#' testing for the Binary effect sizes, where \code{epsilon} refers to a fixed value.\cr
#'
#' The main goal of the function is to compute the probabilities of the ranks from
#' the pvalues ranked by the filter statistics, consequently the weights.
#' Although \code{weights} \code{ranksProb} are optional, \code{empOPW} has the
#' options so that one can compute the probabilities and the weights externally
#' if necessary (see the examples).\cr
#'
#' Internally, \code{empOPW} function compute the \code{ranksProb} and consequently
#' the weights, then uses the p-values to make conclusions about hypotheses.
#' Although \code{ranksProb} is not required to the function,
#' One can compute \code{ranksProb} empirically by using the function
#' \code{\link{prob_rank_givenEffect_emp}}.\cr
#'
#' The function internally compute \code{mean_testEffect} from the test statistics,
#' which is obtainde from the p-values.\cr
#'
#' It is better to see different combinations of groups and h_breaks to optimize
#' the rejections. The number of p-values per group could be approximately 1000.\cr
#'
#' One must need to provide either the number of groups or the maximum number of
#' groups. If both or only the max.group is given then the max.group will be used
#' to obatian the optimal group;otherwise, the number of groups will be determined
#' by the group.
#'
#' @author Mohamad S. Hasan
#'
#' @export
#'
#' @import tibble tibble
#' @import limma propTrueNull
#'
#' @seealso \code{\link{prob_rank_givenEffect_emp}} \code{\link{weight_binary}}
#' \code{\link{weight_continuous}}
#'
#'
#' @return \code{totalTests} total number of hypothesis tests evaluated
#' @return \code{nullProp} estimated propotion of the true null hypothesis
#' @return \code{opGroup} Integer, optimal number of groups
#' @return \code{ranksProb} probability of the ranks given the mean filter effect,
#' p(rank | ey = mean_filterEffect)
#' @return \code{group_wgt} Numeric vector of group weights (normalized)
#' @return \code{weight} Numeric vector of normalized weight for all tests
#' @return \code{rejections} total number of rejections
#' @return \code{rejections_list} list of rejected pvalues and the corresponding
#' filter statistics
#'
#'
#' @examples
#' library(OPWeight)
#' # generate pvalues and filter statistics
#' m = 10000
#' set.seed(123)
#' filters = runif(m, min = 0, max = 2.5) # filter statistics
#' H = rbinom(m, size = 1, prob = 0.1) # hypothesis true or false
#' tests = rnorm(m, mean = H * filters) # Z-score
#' pvals = 1 - pnorm(tests) # pvalue
#'
#' # general use
#' results <- empOPW(pvalue = pvals, filter = filters, effectType = "continuous",
#' method = "BH")
#'
#' # supply the mean test effect externally
#' library(qvalue)
#' nullProp = qvalue(p = pvals, pi0.method = "bootstrap")$pi0
#' m0 = ceiling(nullProp*m)
#' m1 = m - m0
#'
#' et = mean(sort(tests, decreasing = TRUE)[1:m1])
#' results2 <- empOPW(pvalue = pvals, filter = filters, mean_testEffect = et,
#' tail = 2, effectType = "continuous", method = "BH")
#'
#' # supply the ranks probability externally
#' grp = 5
#' probs = prob_rank_givenEffect_emp(pvalue = pvals, filter = filters, group = grp,
#' h_breaks = 101, effectType = "continuous")
#' results3 <- empOPW(pvalue = pvals, filter = filters, ranksProb = probs,
#' effectType = "continuous", tail = 2, method = "BH")
#'
#' # supply weight externally
#' wgt <- weight_continuous(alpha = .05, et = et, m = grp, ranksProb = probs)
#' results4 <- empOPW(pvalue = pvals, filter = filters, weight = wgt,
#' effectType = "continuous", alpha = .05, method = "BH")
#'
#===============================================================================
# # function to apply empOPW methods on data
#---------------------------------------------------
# internal parameters:-----
# m = number of hypothesis test
# nullProp = proportion of true null hypothesis
# m0 = number of the true null tests
# m1 = number of the true alternative tests
# test = compute test statistics from the pvalues if not given
# test_effect_vec = estiamted number of the true alternaitve test statistics
# mean_testEffect = mean test effect sizes of the true alternaive hypotheis
# grp = number of groups
# ranksProb = probailities of the ranks given the mean effect size
# wgt = weights
# Data = create a data set
# OD = odered by covariate
# odered.pvalues = odered pvalues for all tests
# padj = adjusted pvalues for FDR uses
#-------------------------------------------------------------------------------
empOPW <- function(pvalue, filter, weight = NULL, ranksProb = NULL,
mean_testEffect = NULL, alpha = .05, tail = 1L,
delInterval = .001, group = NULL, max.group = 5L,
h_breaks = 71L, effectType = c("continuous", "binary"),
method = c("BH", "BON"), ... )
{
# formulate a data set-------------
Data = tibble(pvalue, filter)
data_omit_na <- Data[which(!is.na(Data$pvalue)),]
OD <- data_omit_na[order(data_omit_na$filter, decreasing = TRUE), ]
OD_pvalue <- OD$pvalue
# compute the number of tests------------
message("estimate the number of true null hypothesis")
m = length(OD_pvalue)
nullProp = propTrueNull(p = OD_pvalue, method = "hist")
m0 = ceiling(nullProp*m)
m1 = m - m0
#check whether weight is provided------------
if(!is.null(weight)){
wgt <- weight
} else {
# estimate the mean test effect size-------------
if(!is.null(mean_testEffect)){
mean_testEffect <- mean_testEffect
} else {
# compute test statistics from the pvalues---------
test <- qnorm(pvalue/tail, lower.tail = FALSE)
test[which(!is.finite(test))] <- NA
# estimate the true alterantive test effect sizes----------------
if(m1 == 0){
test_effect_vec <- 0
} else {
test_effect_vec <- sort(test, decreasing = TRUE)[1:m1]
}
# compute mean test effect----------
if(effectType == "continuous"){
mean_testEffect <- mean(test_effect_vec, na.rm = TRUE)
} else {
mean_testEffect <- median(test_effect_vec, na.rm = TRUE)
}
}
# finding group-----------
message("finding optimal number of groups")
if(is.null(group) & is.null(max.group)){
stop("either group or max.group must be provided")
} else if(!is.null(group) & is.null(max.group)){
grp <- group
} else {
# find optimal number of groups ----------
if(max.group <= 10){
grp_seq <- seq(5, max.group, 1)
} else if(max.group <= 30) {
grp_seq <- seq(5, max.group, 2)
} else {
grp_seq <- round(seq(5, max.group, 5))
}
op_grp <- sapply(grp_seq, optimal_group, pvalue = OD$pvalue,
filter = OD$filter, h_breaks = h_breaks, m = m, m1 = m1,
alpha = alpha, mean_testEffect = mean_testEffect,
effectType = effectType, method = method)
grp <- op_grp[[1, which.max(op_grp[2,])]]
}
message("computing ranks probabilities")
# compute the ranks probability of the tests given the mean effect
ranksProb <- prob_rank_givenEffect_emp(pvalue = pvalue, filter = filter,
group = grp, h_breaks = h_breaks, effectType = effectType)
message("computing weights")
if(effectType == "continuous"){
wgt = weight_continuous(alpha = alpha, et = mean_testEffect,
m = grp, ranksProb = ranksProb)
} else {
wgt = weight_binary(alpha = alpha, et = mean_testEffect, m = grp,
m1 = m1/m*grp, ranksProb = ranksProb)
}
}
grpSize <- ceiling(m/grp)
wgt_all = rep(wgt, each = grpSize)[1:m]
message("comparing pvalues with thresholds")
if(method == "BH"){
padj <- p.adjust(OD_pvalue/wgt_all, method = "BH")
OD <- add_column(OD, padj)
rejections_list = OD[which((padj <= alpha) == TRUE), ]
} else {
rejections_list = OD[which((OD_pvalue <= alpha*wgt_all/m) == TRUE), ]
}
# outputs--------------
n_rejections = dim(rejections_list)[1]
return(list(totalTests = length(pvalue), nullProp = nullProp, opGroup = grp,
ranksProb = ranksProb, group_wgt = wgt, weight = wgt_all,
rejections = n_rejections, rejections_list = rejections_list))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.