In this set of simulations, we consider settings with both null and non-null
tests with varying distribution of effect sizes under the non-null (alternative)
setting. Both informative and uninformative covariates are included in the setting as described in
simulations-informative-cubic.Rmd
. The effect sizes for non-null tests are
sampled from unimodal distributions composed of a mixture of normal distributions,
as described in the original adaptive shrinkage (ASH) manuscript (Stephens, 2016).
library(dplyr) library(ggplot2) library(SummarizedBenchmark) library(parallel) ## load helper functions for (f in list.files("../R", "\\.(r|R)$", full.names = TRUE)) { source(f) } ## project data/results folders resdir <- "results" dir.create(resdir, showWarnings = FALSE, recursive = TRUE) ## intermediary files we create below spiky_file <- file.path(resdir, "uasettings-benchmark-spiky.rds") flattop_file <- file.path(resdir, "uasettings-benchmark-flattop.rds") skew_file <- file.path(resdir, "uasettings-benchmark-skew.rds") bimodal_file <- file.path(resdir, "uasettings-benchmark-bimodal.rds") ## number of cores for parallelization cores <- 20 B <- 100 ## define bechmarking design bd <- initializeBenchDesign()
As described in simulations-null.Rmd
, we include Scott's FDR Regression in the analysis
for simulations with Gaussian or t-distributed test statistics. Again, we include both
nulltype = "empirical"
and nulltype = "theoretical"
. Since all settings in this
series of simulations use Gaussian test statistics, we include Scott's FDR Regression
in all of the comparisons.
bdplus <- bd bdplus <- addBMethod(bdplus, "fdrreg-t", FDRreg::FDRreg, function(x) { x$FDR }, z = test_statistic, features = model.matrix( ~ splines::bs(ind_covariate, df = 3) - 1), nulltype = 'theoretical', control = list(lambda = 0.01)) bdplus <- addBMethod(bdplus, "fdrreg-e", FDRreg::FDRreg, function(x) { x$FDR }, z = test_statistic, features = model.matrix( ~ splines::bs(ind_covariate, df = 3) - 1), nulltype = 'empirical', control = list(lambda = 0.01))
All simulation settings will share the following parameters.
m <- 20000 # integer: number of hypothesis tests pi0 <- pi0_cubic(0.90) # numeric: proportion of null hypotheses ts_dist <- rnorm_perturber(1) # functional: sampling dist/noise for test stats null_dist <- rnorm_2pvaluer(1) # functional: dist to calc p-values icovariate <- runif # functional: independent covariate
Simulation results will be presented excluding a subset of methods, and for certain plots (upset plots), a single alpha cutoff will be used.
excludeSet <- c("unadjusted", "bl-df02", "bl-df04", "bl-df05") ualpha <- 0.05
First, we consider the setting where the effect sizes under the alternative are distributed according to a "spiky" unimodal distribution centered around zero, as defined in the ASH simulations.
es_dist <- function(n) { 2*sampler_spiky(n) } # functional: dist of alternative test stats seed <- 778
We next run the simulations.
if (file.exists(spiky_file)) { res <- readRDS(spiky_file) } else { res <- mclapply(X = 1:B, FUN = simIteration, bench = bdplus, m = m, pi0 = pi0, es_dist = es_dist, icovariate = icovariate, ts_dist = ts_dist, null_dist = null_dist, seed = seed, mc.cores = cores) saveRDS(res, file = spiky_file) } res_i <- lapply(res, `[[`, "informative") res_u <- lapply(res, `[[`, "uninformative")
Here, we show the relationship between the independent covariate and p-values for a single replication of the experiment.
onerun <- simIteration(bdplus, m = m, pi0 = pi0, es_dist = es_dist, ts_dist = ts_dist, icovariate = icovariate, null_dist = null_dist, execute = FALSE)
rank_scatter(onerun, pvalue = "pval", covariate = "ind_covariate")
strat_hist(onerun, pvalue = "pval", covariate = "ind_covariate", maxy = 10, numQ = 3)
We plot the averaged results across r B
replications.
resdf <- plotsim_standardize(res_i, alpha = seq(0.01, 0.10, 0.01)) plotsim_average(resdf, met="rejections", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE) plotsim_average(resdf, met="FDR", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE) plotsim_average(resdf, met="TPR", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE) plotsim_average(resdf, met="TNR", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE)
We also take a look at the distribution of rejects for each method as a function of the effect size and independent covariate.
covariateLinePlot(res_i, alpha = ualpha, covname = "effect_size") covariateLinePlot(res_i, alpha = ualpha, covname = "ind_covariate")
Finally, (if enough methods produce rejections at r ualpha
) we take a look at
the overlap of rejections between methods.
if (numberMethodsReject(resdf, alphacutoff = ualpha, filterSet = excludeSet) >= 3) { aggupset(res_i, alpha = ualpha, supplementary = FALSE, return_list = FALSE) } else { message("Not enough methods found rejections at alpha ", ualpha, "; skipping upset plot") }
We also compare the simulation results with and without an informative covariate.
resdfu <- plotsim_standardize(res_u, alpha = seq(0.01, 0.10, 0.01)) resdfiu <- dplyr::full_join(select(resdf, rep, blabel, param.alpha, key, performanceMetric, alpha, value), select(resdfu, rep, blabel, param.alpha, key, performanceMetric, alpha, value), by = c("rep", "blabel", "param.alpha", "key", "performanceMetric", "alpha"), suffix = c(".info", ".uninfo")) resdfiu <- dplyr::mutate(resdfiu, value = value.info - value.uninfo) plotsim_average(resdfiu, met="rejections", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE) plotsim_average(resdfiu, met="FDR", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE) plotsim_average(resdfiu, met="TPR", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE) plotsim_average(resdfiu, met="TNR", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE)
Next, we consider the setting where the effect sizes under the alternative are distributed according to a "flat top" unimodal distribution centered around zero, as defined in the ASH simulations.
es_dist <- function(n) { 2*sampler_flat_top(n) } # functional: dist of alternative test stats seed <- 980
We next run the simulations.
if (file.exists(flattop_file)) { res <- readRDS(flattop_file) } else { res <- mclapply(X = 1:B, FUN = simIteration, bench = bdplus, m = m, pi0 = pi0, es_dist = es_dist, icovariate = icovariate, ts_dist = ts_dist, null_dist = null_dist, seed = seed, mc.cores = cores) saveRDS(res, file = flattop_file) } res_i <- lapply(res, `[[`, "informative") res_u <- lapply(res, `[[`, "uninformative")
Here, we show the relationship between the independent covariate and p-values for a single replication of the experiment.
onerun <- simIteration(bdplus, m = m, pi0 = pi0, es_dist = es_dist, ts_dist = ts_dist, icovariate = icovariate, null_dist = null_dist, execute = FALSE)
rank_scatter(onerun, pvalue = "pval", covariate = "ind_covariate")
strat_hist(onerun, pvalue = "pval", covariate = "ind_covariate", maxy = 10, numQ = 3)
We plot the averaged results across r B
replications.
resdf <- plotsim_standardize(res_i, alpha = seq(0.01, 0.10, 0.01)) plotsim_average(resdf, met="rejections", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE) plotsim_average(resdf, met="FDR", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE) plotsim_average(resdf, met="TPR", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE) plotsim_average(resdf, met="TNR", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE)
We also take a look at the distribution of rejects for each method as a function of the effect size and independent covariate.
covariateLinePlot(res_i, alpha = ualpha, covname = "effect_size") covariateLinePlot(res_i, alpha = ualpha, covname = "ind_covariate")
Finally, (if enough methods produce rejections at r ualpha
) we take a look at
the overlap of rejections between methods.
if (numberMethodsReject(resdf, alphacutoff = ualpha, filterSet = excludeSet) >= 3) { aggupset(res_i, alpha = ualpha, supplementary = FALSE, return_list = FALSE) } else { message("Not enough methods found rejections at alpha ", ualpha, "; skipping upset plot") }
We also compare the simulation results with and without an informative covariate.
resdfu <- plotsim_standardize(res_u, alpha = seq(0.01, 0.10, 0.01)) resdfiu <- dplyr::full_join(select(resdf, rep, blabel, param.alpha, key, performanceMetric, alpha, value), select(resdfu, rep, blabel, param.alpha, key, performanceMetric, alpha, value), by = c("rep", "blabel", "param.alpha", "key", "performanceMetric", "alpha"), suffix = c(".info", ".uninfo")) resdfiu <- dplyr::mutate(resdfiu, value = value.info - value.uninfo) plotsim_average(resdfiu, met="rejections", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE) plotsim_average(resdfiu, met="FDR", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE) plotsim_average(resdfiu, met="TPR", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE) plotsim_average(resdfiu, met="TNR", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE)
Next, we consider the setting where the effect sizes under the alternative are distributed according to a skewed unimodal distribution not centered at zero, as defined in the ASH simulations.
es_dist <- function(n) { 2*sampler_skew(n) } # functional: dist of alternative test stats seed <- 206
We next run the simulations.
if (file.exists(skew_file)) { res <- readRDS(skew_file) } else { res <- mclapply(X = 1:B, FUN = simIteration, bench = bdplus, m = m, pi0 = pi0, es_dist = es_dist, icovariate = icovariate, ts_dist = ts_dist, null_dist = null_dist, seed = seed, mc.cores = cores) saveRDS(res, file = skew_file) } res_i <- lapply(res, `[[`, "informative") res_u <- lapply(res, `[[`, "uninformative")
Here, we show the relationship between the independent covariate and p-values for a single replication of the experiment.
onerun <- simIteration(bdplus, m = m, pi0 = pi0, es_dist = es_dist, ts_dist = ts_dist, icovariate = icovariate, null_dist = null_dist, execute = FALSE)
rank_scatter(onerun, pvalue = "pval", covariate = "ind_covariate")
strat_hist(onerun, pvalue = "pval", covariate = "ind_covariate", maxy = 10, numQ = 3)
We plot the averaged results across r B
replications.
resdf <- plotsim_standardize(res_i, alpha = seq(0.01, 0.10, 0.01)) plotsim_average(resdf, met="rejections", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE) plotsim_average(resdf, met="FDR", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE) plotsim_average(resdf, met="TPR", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE) plotsim_average(resdf, met="TNR", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE)
We also take a look at the distribution of rejects for each method as a function of the effect size and independent covariate.
covariateLinePlot(res_i, alpha = ualpha, covname = "effect_size") covariateLinePlot(res_i, alpha = ualpha, covname = "ind_covariate")
Finally, (if enough methods produce rejections at r ualpha
) we take a look at
the overlap of rejections between methods.
if (numberMethodsReject(resdf, alphacutoff = ualpha, filterSet = excludeSet) >= 3) { aggupset(res_i, alpha = ualpha, supplementary = FALSE, return_list = FALSE) } else { message("Not enough methods found rejections at alpha ", ualpha, "; skipping upset plot") }
We also compare the simulation results with and without an informative covariate.
resdfu <- plotsim_standardize(res_u, alpha = seq(0.01, 0.10, 0.01)) resdfiu <- dplyr::full_join(select(resdf, rep, blabel, param.alpha, key, performanceMetric, alpha, value), select(resdfu, rep, blabel, param.alpha, key, performanceMetric, alpha, value), by = c("rep", "blabel", "param.alpha", "key", "performanceMetric", "alpha"), suffix = c(".info", ".uninfo")) resdfiu <- dplyr::mutate(resdfiu, value = value.info - value.uninfo) plotsim_average(resdfiu, met="rejections", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE) plotsim_average(resdfiu, met="FDR", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE) plotsim_average(resdfiu, met="TPR", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE) plotsim_average(resdfiu, met="TNR", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE)
Finally, we consider the setting where the effect sizes under the alternative are distributed according to a bimodal distribution (equal mixture of two normal distributions centered at -2, 2, with variance 1), again, as defined in the ASH simulations.
es_dist <- function(n) { 2*sampler_bimodal(n) } # functional: dist of alternative test stats seed <- 913
We next run the simulations.
if (file.exists(bimodal_file)) { res <- readRDS(bimodal_file) } else { res <- mclapply(X = 1:B, FUN = simIteration, bench = bdplus, m = m, pi0 = pi0, es_dist = es_dist, icovariate = icovariate, ts_dist = ts_dist, null_dist = null_dist, seed = seed, mc.cores = cores) saveRDS(res, file = bimodal_file) } res_i <- lapply(res, `[[`, "informative") res_u <- lapply(res, `[[`, "uninformative")
Here, we show the relationship between the independent covariate and p-values for a single replication of the experiment.
onerun <- simIteration(bdplus, m = m, pi0 = pi0, es_dist = es_dist, ts_dist = ts_dist, icovariate = icovariate, null_dist = null_dist, execute = FALSE)
rank_scatter(onerun, pvalue = "pval", covariate = "ind_covariate")
strat_hist(onerun, pvalue = "pval", covariate = "ind_covariate", maxy = 10, numQ = 3)
We plot the averaged results across r B
replications.
resdf <- plotsim_standardize(res_i, alpha = seq(0.01, 0.10, 0.01)) plotsim_average(resdf, met="rejections", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE) plotsim_average(resdf, met="FDR", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE) plotsim_average(resdf, met="TPR", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE) plotsim_average(resdf, met="TNR", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE)
We also take a look at the distribution of rejects for each method as a function of the effect size and independent covariate.
covariateLinePlot(res_i, alpha = ualpha, covname = "effect_size") covariateLinePlot(res_i, alpha = ualpha, covname = "ind_covariate")
Finally, (if enough methods produce rejections at r ualpha
) we take a look at
the overlap of rejections between methods.
if (numberMethodsReject(resdf, alphacutoff = ualpha, filterSet = excludeSet) >= 3) { aggupset(res_i, alpha = ualpha, supplementary = FALSE, return_list = FALSE) } else { message("Not enough methods found rejections at alpha ", ualpha, "; skipping upset plot") }
We also compare the simulation results with and without an informative covariate.
resdfu <- plotsim_standardize(res_u, alpha = seq(0.01, 0.10, 0.01)) resdfiu <- dplyr::full_join(select(resdf, rep, blabel, param.alpha, key, performanceMetric, alpha, value), select(resdfu, rep, blabel, param.alpha, key, performanceMetric, alpha, value), by = c("rep", "blabel", "param.alpha", "key", "performanceMetric", "alpha"), suffix = c(".info", ".uninfo")) resdfiu <- dplyr::mutate(resdfiu, value = value.info - value.uninfo) plotsim_average(resdfiu, met="rejections", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE) plotsim_average(resdfiu, met="FDR", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE) plotsim_average(resdfiu, met="TPR", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE) plotsim_average(resdfiu, met="TNR", filter_set = excludeSet, merge_ihw = TRUE, errorBars = TRUE, diffplot = TRUE)
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.