#' Simulate Collection of Test Statistics
#'
#' Function for simulating a large set of (independent) test results.
#' For Normal and t-distributed test statistics, effect sizes are directly simulated
#' from a specified true effect size distribution (`es_dist`) and perturbed according to a
#' specified noise distribution with correspond standard errors to obtain the observed set of
#' test statistics (`ts_dist`). For Chi-squared distributed test statistics, the notions of
#' effect size and standard error are not well defined, and we therefore just set the standard
#' error to 1 and effect size equal to the test statistic. This does not matter for our simulations,
#' where the effect size and standard error are only used by a single method (ASH) which is
#' not included in the comparisons under the Chi-squared distribution. This simulator resembles
#' the simulations carried out in the manuscripts of the Boca-Leek, ASH, and IHW methods.
#' (For IHW, in particular, the simulations illustrating the "size investing strategy" fall under
#' this class of simulations.)
#'
#' @param X simulation index
#' @param bench BenchDesign object of methods to run on simulated data set.
#' @param m number of test statistics.
#' @param pi0 proportion of null hypotheses. Can either be a single numeric value
#' between 0 and 1 or a function which takes a numeric vector (i.e. 'informative
#' covariate' values) and returns a vector of the same length containing null
#' probabilities between 0 and 1.
#' @param icovariate specification for the independent covariate, must be a function
#' which takes an integer value and returns a vector of the specified length to
#' be used as the independent covariate.
#' @param es_dist expected effect size under the alternative, can either be a single
#' numeric value or a function which takes an integer as input and returns a
#' numeric vector of effect sizes of the specified length.
#' @param ts_dist sampling distribution of effect size, must be a function taking two
#' inputs, a vector of true effect sizes and a logical value whether to return
#' the perturbed effect sizes or the corresponding standard error. The perturbed
#' effect sizes and standard errors are used to compute the test statistics.
#' @param null_dist distribution under the null used to calculate p-values from the
#' test statistic, must be a function which takes the full vector of null and
#' alternative test statistics and return the corresponding p-values.
#' @param execute logical whether benchmarking should be executed or if the simulated
#' data set should be returned. (default = TRUE)
#' @param seed integer seed for random number generator, ignored if NULL. (default = NULL)
#'
#' @return
#' SummarizedBenchmark generated by calling `buildBench` with the specified
#' BenchDesign object and a simulated data set. If `execute = FALSE`, the simulated
#' data set is returned.
#'
#' @details
#' If a function is specified for either of `effect_size` or `icovariate`, a function must also be
#' specified for the other parameter.
#' Within the function, the specified BenchDesign object is run against a simulated
#' data.frame with the following columns.
#' * `qvalue`: 0/1 indicator whether data simulated under null (0) or alternative (1)
#' * `effect_size`: simulated effect size - true effect size + sampling noise
#' * `test_statistic`: `effect_size` divided by `SE`
#' * `pval`: test p-value calculated from test-statistic using `null_dist`
#' * `ind_covariate`: the independent covariate
#' * `SE`: standard errors for the `test_statistic`
#'
#' @md
#' @author Patrick Kimes
simIteration <- function(X, bench, m, pi0, es_dist, ts_dist, null_dist,
icovariate, execute = TRUE, seed = NULL) {
if (!is.null(seed)) { set.seed(seed * X) }
stopifnot(is.function(es_dist))
stopifnot(is.function(ts_dist))
stopifnot(is.function(icovariate))
## simulate indep covariate from icovariate function
ind_cov <- icovariate(m)
stopifnot(length(ind_cov) == m)
## pi0 returns probability of null, sample alts from [1 - pi0s]
if (is.function(pi0)) {
pi0s <- pi0(ind_cov)
} else if (length(pi0) == 1) {
pi0s <- rep(pi0, m)
} else {
stop("pi0 must be function or single numeric value")
}
stopifnot(length(pi0s) == m)
stopifnot(min(pi0s) >= 0 && max(pi0s) <= 1)
alts <- which(rbinom(m, 1, 1 - pi0s) == 1)
## generate set of null (0) and alternative true effect sizes
es <- rep(0, m)
if (length(alts) > 0) {
es[alts] <- es_dist(length(alts))
}
## determine SE of test statistics
SE <- ts_dist(es, se = TRUE)
## perturb effect sizes to get observed effect sizes
es <- ts_dist(es)
stopifnot(length(es) == m)
## null/alt indicator
H <- rep(0, m)
H[alts] <- 1
## test stat for normal and t-dist is just (effect size) / SE
ts <- es / SE
## calculate p-values
pv <- null_dist(ts)
## organize in data.frame
dat <- data.frame(qvalue = H, effect_size = es, test_statistic = ts,
pval = pv, ind_covariate = ind_cov, SE = SE)
## return data if not executing
if (!execute) {
return(as_tibble(dat))
}
## run methods w/ specified independent covariate
res_i <- buildBench(bench, dat, truthCol = "qvalue", ptabular = TRUE,
ftCols = c("ind_covariate", "effect_size"))
## create data.frame w/ explicitly uninformative covariate
dat_u <- data.frame(qvalue = H, effect_size = es, test_statistic = ts,
pval = pv, ind_covariate = runif(m), SE = SE)
res_u <- buildBench(bench, dat_u, truthCol = "qvalue", ptabular = TRUE,
ftCols = c("ind_covariate", "effect_size"))
list(informative = res_i, uninformative = res_u)
}
## ##############################################################################
## informative covariates
## - if x ~ U(0,1), then expect 80%, 90%, 95% pi0 for all cases
## ##############################################################################
## step function (4 steps)
pi0_step <- function(pi0) {
stopifnot(pi0 <= 1L, pi0 >= 0L)
if (pi0 >= 0.5) {
function(x) {
pi0 - (1-pi0)/2 +
(1-pi0)/4 * (x > 0.25) +
(1-pi0)/2 * (x > 0.5) +
(1-pi0)/4 * (x > 0.75)
}
} else {
function(x) {
pi0 + pi0/2 -
pi0/4 * (x > 0.25) -
pi0/2 * (x > 0.5) -
pi0/4 * (x > 0.75)
}
}
}
## shifted/stretched cubic function
pi0_cubic <- function(pi0) {
stopifnot(pi0 <= 1L, pi0 >= 3/4)
function(x) { (1-x)^(1/3) * 4*(1-pi0) + 4 * pi0 - 3 }
}
## shifted/stretched cosine function (non-monotone)
pi0_cosine <- function(pi0) {
stopifnot(pi0 <= 1L, pi0 >= 0L)
if (pi0 >= 0.5) {
function(x) { (1-pi0) * cos(2*pi*x) + pi0 }
} else {
function(x) { pi0 - pi0 * cos(2*pi*x) }
}
}
## shifted/stretched sine function (non-monotone)
pi0_sine <- function(pi0) {
stopifnot(pi0 < 1L, pi0 >= 0L)
if (pi0 >= 0.5) {
function(x) { (1-pi0) * sin(2*pi*x) + pi0 }
} else {
function(x) { pi0 - pi0 * sin(2*pi*x) }
}
}
## simple step function to test different levels of "informativeness" with pi0 = 80%
pi0_varyinfo80 <- function(zz) {
stopifnot(zz <= 1L, zz >= 0L)
function(x) { ifelse(x > 4/5, 0.8 * (1 - zz), 0.2 * (4 + zz)) }
}
## logistic function to test different levels of "informativeness" with pi0 = 80%
pi0_varyinfo80l <- function(zz) {
stopifnot(zz <= 1L, zz >= 0L)
function(x) { zz / (1+exp(5-25*x)) + (1 - zz) * 0.8 }
}
## ##############################################################################
## CODE DERIVED FROM EXTERNAL SOURCE
## source: https://github.com/stephenslab/ash/blob/94a3347/code/dsc-shrink/datamakers/datamaker.R
## source: https://github.com/stephenslab/ash/blob/d06047e/code/dsc-shrink/add_named_scenarios.R
## license: not listed
## date: 2017/10/18
## modified: Patrick Kimes
## ##############################################################################
## test statistic distributions used in ASH simulations
params <- list(spiky = list(c(.4, .2, .2, .2), c(0, 0, 0, 0), c(.25, .5, 1, 2)),
skew = list(c(1/4, 1/4, 1/3, 1/6), c(-2, -1, 0, 1), c(2, 1.5, 1, 1)),
bignormal = list(c(1), c(0), c(4)),
bimodal = list(c(0.5, 0.5), c(-2, 2), c(1, 1)),
flat_top = list(rep(1/7, 7), c(-1.5, -1, -0.5, 0, 0.5, 1, 1.5), rep(0.5, 7)),
near_normal = list(c(2/3, 1/3), c(0, 0), c(1, 2)))
## function factory to generate mixture normal samplers
rnormmix_generator <- function(g) {
k <- unique(sapply(g, length))
stopifnot(length(k) == 1)
stopifnot(sum(g[[1]]) == 1)
function(n) {
comp <- sample(1:k, n, g[[1]], replace=TRUE)
rnorm(n, g[[2]][comp], g[[3]][comp])
}
}
## ASH effect-size distributions
sampler_spiky <- rnormmix_generator(params$spiky)
sampler_skew <- rnormmix_generator(params$skew)
sampler_bignormal <- rnormmix_generator(params$bignormal)
sampler_bimodal <- rnormmix_generator(params$bimodal)
sampler_flat_top <- rnormmix_generator(params$flat_top)
sampler_near_normal <- rnormmix_generator(params$near_normal)
## ##############################################################################
## END CODE DERIVED FROM EXTERNAL SOURCE
## ##############################################################################
## function factory: normal sampler
rnorm_generator <- function(m, s = 1) {
function(n) {
rnorm(n, m, s)
}
}
## ##############################################################################
## ##############################################################################
## function factory: observed effect size and SE generators
#' normal distribution samplers
rnorm_perturber <- function(s = 1) {
function(m, se = FALSE) {
if (se) { return(rep(1, length(m))) }
rnorm(length(m), m, s)
}
}
#' non-central t-distribution samplers
rt_perturber <- function(df) {
if (df <= 2) {
stop("var not defined for non-central t with df <= 2.")
}
function(ncp, se = FALSE) {
if (se) { return( sqrt(rchisq(length(ncp), df = df) / df) ) }
rnorm(length(ncp), ncp, 1)
}
}
#' non-central chi-square distirbution samplers
rchisq_perturber <- function(df) {
function(ncp, se = FALSE) {
stopifnot(ncp >= 0)
if (se) { return(rep(1, length(ncp))) }
rchisq(length(ncp), df, ncp)
}
}
## ##############################################################################
## ##############################################################################
## function factory: two-sided normal p-value calculator
## @param s standard deviation of null zero-centered normal distribution
rnorm_2pvaluer <- function(s) {
function(x) { 2 * (1 - pnorm(abs(x), 0, s)) }
}
## function factory: two-sided t p-value calculator
## @param df degrees of freedom of null zero-centered t-distribution
rt_2pvaluer <- function(df) {
function(x) { 2 * (1 - pt(abs(x), df)) }
}
## function factory: chi-sq p-value calculator
## @param df degrees of freedom of null zero-centered t-distribution
rchisq_pvaluer <- function(df) {
function(x) { 1 - pchisq(x, df) }
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.