Nothing
## ----setup, include=FALSE-----------------------------------------------------
library(knitr)
code <- file.path("false-discovery-rate",
c("model_functions.R",
"method_functions.R",
"eval_functions.R",
"main.R"))
code_lastmodified <- max(file.info(code)$mtime)
sapply(code, read_chunk)
## -----------------------------------------------------------------------------
library(simulator)
## ---- echo = FALSE, results = 'hide', warning = FALSE, message = FALSE--------
library(mvtnorm)
make_correlated_pvalues <- function(n, pi0, rho) {
# Gaussian copula model...
#
# n pvalues, the first n*pi0 of which are null, coming from a multivariate
# normal with all correlations rho.
sigma <- matrix(rho, n, n)
diag(sigma) <- 1
n0 <- round(n * pi0)
delta <- 2 # size of signal
mu <- rep(c(0, delta), c(n0, n - n0)) # n0 are null
new_model(name = "correlated-pvalues",
label = sprintf("pi0 = %s, rho = %s", pi0, rho),
params = list(n = n, rho = rho, sigma = sigma,
pi0 = pi0, mu = mu, delta = delta,
nonnull = which(mu != 0)),
simulate = function(n, mu, sigma, nsim) {
# this function must return a list of length nsim
x <- rmvnorm(nsim, mean = mu, sigma = sigma)
pvals <- 1 - pnorm(x)
return(split(pvals, row(pvals))) # make each row its own list element
})
}
make_bh <- function(q) {
# q is the desired level of control for the FDR
new_method(name = paste0("bh", q),
label = sprintf("BH (q = %s)", q),
settings = list(q = q),
method = function(model, draw, q) {
p <- sort(draw)
cutline <- seq(model$n) * q / model$n
threshold <- max(p[p <= cutline], 0)
list(rejected = which(draw <= threshold))
})
}
qvalues <- c(0.05, 0.1, 0.2)
bh_methods <- sapply(qvalues, make_bh)
fdp <- new_metric(name = "fdp",
label = "false discovery proportion",
metric = function(model, out) {
fp <- setdiff(out$rejected, model$nonnull)
nd <- max(length(out$rejected), 1)
return(length(fp) / nd)
})
nd <- new_metric(name = "nd",
label = "number of discoveries",
metric = function(model, out) length(out$rejected))
## ---- eval = FALSE------------------------------------------------------------
# name_of_simulation <- "fdr"
# sim <- new_simulation(name = name_of_simulation,
# label = "False Discovery Rate") %>%
# generate_model(make_correlated_pvalues, seed = 123,
# n = 20,
# pi0 = list(0.8, 1),
# rho = list(-0.01, 0, 0.1, 0.9),
# vary_along = c("pi0", "rho")) %>%
# simulate_from_model(nsim = 25, index = 1:4) %>%
# run_method(bh_methods, parallel = list(socket_names = 2)) %>%
# evaluate(list(fdp, nd))
## ---- echo = FALSE, results = 'hide', message = FALSE, warning = FALSE--------
name_of_simulation <- "fdr"
sim_lastmodified <- file.info(sprintf("files/sim-%s.Rdata",
name_of_simulation))$mtime
if (is.na(sim_lastmodified) || code_lastmodified > sim_lastmodified) {
sim <- new_simulation(name = name_of_simulation,
label = "False Discovery Rate") %>%
generate_model(make_correlated_pvalues, seed = 123,
n = 20,
pi0 = list(0.8, 1),
rho = list(-0.01, 0, 0.1, 0.9),
vary_along = c("pi0", "rho")) %>%
simulate_from_model(nsim = 25, index = 1:4) %>%
run_method(bh_methods, parallel = list(socket_names = 2)) %>%
evaluate(list(fdp, nd))
sim <- sim %>%
simulate_from_model(nsim = 25, index = 5:8) %>%
run_method(bh_methods, parallel = list(socket_names = 2)) %>%
evaluate(list(fdp, nd))
}
sim <- load_simulation(name_of_simulation) %>% subset_simulation(index = 1:4)
## ---- results = 'asis'--------------------------------------------------------
sim %>%
subset_simulation(rho == 0) %>%
tabulate_eval(metric_name = "fdp", output_type = "html",
format_args = list(digits = 1, nsmall = 2))
## ---- eval = FALSE------------------------------------------------------------
# sim <- load_simulation("fdr")
## ---- eval = FALSE------------------------------------------------------------
# sim <- sim %>%
# simulate_from_model(nsim = 25, index = 5:8) %>%
# run_method(bh_methods, parallel = list(socket_names = 2)) %>%
# evaluate(list(fdp, nd))
## ---- echo = FALSE, results = 'hide', message = FALSE, warning = FALSE--------
sim <- load_simulation("fdr") # load the one with index = 1:8
## ---- results = 'asis'--------------------------------------------------------
sim %>%
subset_simulation(rho == 0) %>%
tabulate_eval(metric_name = "fdp", output_type = "html",
format_args = list(digits = 1, nsmall = 2))
## ---- fig.width = 6, fig.height = 4, results = 'hide', warning = FALSE, message = FALSE----
sim %>%
subset_simulation(rho == 0) %>%
plot_eval(metric_name = "fdp")
## ---- fig.width = 6, fig.height = 4, results = 'hide', warning = FALSE, message = FALSE----
sim %>%
subset_simulation(pi0 == 0.8) %>%
plot_eval(metric_name = "fdp", varying = "rho")
## ---- fig.width = 6, fig.height = 4, results = 'hide', warning = FALSE, message = FALSE----
sim %>%
subset_simulation(pi0 == 0.8) %>%
plot_eval_by(metric_name = "fdp", varying = "rho")
## ---- fig.width = 6, fig.height = 4, results = 'hide', warning = FALSE, message = FALSE----
sim %>%
subset_simulation(pi0 == 1) %>%
plot_eval_by(metric_name = "fdp", varying = "rho")
## ---- eval = FALSE------------------------------------------------------------
# sim2 <- subset_simulation(sim, pi0 == 1 & rho == -0.01) %>%
# rename("fdr-negdep") %>%
# relabel("BH Procedure under negative dependence") %>%
# simulate_from_model(nsim = 25, index = 9:20) %>%
# run_method(bh_methods, parallel = list(socket_names = 2)) %>%
# evaluate(list(fdp, nd))
## ---- echo = FALSE, results = 'hide', message = FALSE, warning = FALSE--------
sim_lastmodified <- file.info(sprintf("files/sim-%s.Rdata",
"negdep"))$mtime
if (is.na(sim_lastmodified) || code_lastmodified > sim_lastmodified) {
sim2 <- subset_simulation(sim, pi0 == 1 & rho == -0.01) %>%
rename("fdr-negdep") %>%
relabel("BH Procedure under negative dependence") %>%
simulate_from_model(nsim = 25, index = 9:20) %>%
run_method(bh_methods, parallel = list(socket_names = 2)) %>%
evaluate(list(fdp, nd))
} else{
sim2 <- load_simulation("negdep")
}
## ---- results = 'asis'--------------------------------------------------------
tabulate_eval(sim2, metric_name = "fdp", output_type = "html",
format_args = list(digits = 1, nsmall = 2))
## -----------------------------------------------------------------------------
m <- model(sim, pi0 == 1 & rho == -0.01)
m2 <- model(sim2)
all.equal(m, m2)
d <- draws(sim, pi0 == 1 & rho == -0.01)
d2 <- draws(sim2, index = 1:8)
all.equal(d, d2)
## ---- eval = FALSE------------------------------------------------------------
# library(mvtnorm)
# make_correlated_pvalues <- function(n, pi0, rho) {
# # Gaussian copula model...
# #
# # n pvalues, the first n*pi0 of which are null, coming from a multivariate
# # normal with all correlations rho.
# sigma <- matrix(rho, n, n)
# diag(sigma) <- 1
# n0 <- round(n * pi0)
# delta <- 2 # size of signal
# mu <- rep(c(0, delta), c(n0, n - n0)) # n0 are null
# new_model(name = "correlated-pvalues",
# label = sprintf("pi0 = %s, rho = %s", pi0, rho),
# params = list(n = n, rho = rho, sigma = sigma,
# pi0 = pi0, mu = mu, delta = delta,
# nonnull = which(mu != 0)),
# simulate = function(n, mu, sigma, nsim) {
# # this function must return a list of length nsim
# x <- rmvnorm(nsim, mean = mu, sigma = sigma)
# pvals <- 1 - pnorm(x)
# return(split(pvals, row(pvals))) # make each row its own list element
# })
# }
## ---- eval = FALSE------------------------------------------------------------
# make_bh <- function(q) {
# # q is the desired level of control for the FDR
# new_method(name = paste0("bh", q),
# label = sprintf("BH (q = %s)", q),
# settings = list(q = q),
# method = function(model, draw, q) {
# p <- sort(draw)
# cutline <- seq(model$n) * q / model$n
# threshold <- max(p[p <= cutline], 0)
# list(rejected = which(draw <= threshold))
# })
# }
#
# qvalues <- c(0.05, 0.1, 0.2)
# bh_methods <- sapply(qvalues, make_bh)
## ---- eval = FALSE------------------------------------------------------------
# fdp <- new_metric(name = "fdp",
# label = "false discovery proportion",
# metric = function(model, out) {
# fp <- setdiff(out$rejected, model$nonnull)
# nd <- max(length(out$rejected), 1)
# return(length(fp) / nd)
# })
#
# nd <- new_metric(name = "nd",
# label = "number of discoveries",
# metric = function(model, out) length(out$rejected))
## ---- results='asis'----------------------------------------------------------
citation("simulator")
## ---- include=FALSE-----------------------------------------------------------
unlink("files", recursive = TRUE)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.