#' This function generates the simulated exposure and response data, based on a binary and an
#' optional continuous confounder.
#'
#' The exposure is generated as a function of the confounders and the response is then generated
#' as a function of the exposure and the confounders
#'
#' The binary confounder is labeled "U1" and the continuous "U2".
#' Confounders can be correlated, using the `rho` argument.
#' Linear moderation can also be introduced via the `modUB` argument between the continuous
#' confounder and the exposure/outcome.
#'
#' The method argument specifies how the exposure (E) and outcome (Y) variables are generated.
#' The "continuous_confounder" option means that E and Y are based on both the binary and
#' continuous confounder. The "no_continuous_confounder" option means that E and Y are a function
#' of just the binary confounder. If this is selected, the unmeasured confounder U2 is still generated
#' and may be correlated with U1, although it will be uncorrelated with both E and Y.
#' @export
#'
#' @param obs the number of observations in the simulated data (number of rows)
#' @param orUE the odds ratio OR(ue)
#' @param probE_0 probability that E=1 when (all confounders)= 0
#' @param probY_0 p(y=1/exposure=0 & confounders=0)
#' @param orEY_0 odds ratio OR(ey)
#' @param probU p(u=1) prevalence of the binary unmeasured confounder
#' @param modUB moderating effect of the UC on OR(ey)
#' @param orME odds ratio OR(me)
#' @param orMY_0 odds ratio OR(my/e=0)
#' @param rho correlation coefficient between U1 and U2
#' @param orUY_0
#' @param method
#' @return object of class 'UCsims' which is a list containing a dataframe of the simulated data, a vector of parameter values and the method.
#'
simulate_UC <- function(obs, orUE, probE_0, probY_0, orEY_0, probU,
modUB, orME, orMY_0, rho, orUY_0 = NULL,
method = c("continuous_confounder", "no_continuous_confounder")){
oddsE_0 <- probE_0 / (1 - probE_0)
oddsY_0 <- probY_0 / (1 - probY_0)
probY_1 <- orEY_0 * probY_0 / (1 - probY_0 + orEY_0 * probY_0)
if(is.null(orUY_0)) orUY_0 <- orUE
UP <- runif(obs)
Ulatent <- qnorm(1 - UP)
# binary unmeasured confounder:
U_binary <- as.integer(UP < probU)
# Continuous measured confounder correlated with Ulatent at value rho
M_continuous <- 0 + rho * Ulatent + sqrt(1 - rho * rho) * rnorm(obs)
method <- match.arg(method)
parameters = sapply(names(formals()), function(x) eval(parse(text=x)))
parm_names <- names(parameters)
parameters = as.numeric(parameters[names(parameters) != "method"])
names(parameters) <- parm_names[parm_names != "method"]
if(method == "continuous_confounder"){
# Generate data for the exposure as a function of the set of confounders
exposure <- as.integer(runif(obs) < plogis(log(oddsE_0) + log(orUE) * U_binary))
# Now generate data for the outcome as a function of the exposure and all confounders
y <- as.integer(runif(obs) < plogis(log(oddsY_0) + log(orEY_0) * exposure + log(orUY_0) * U_binary + log(modUB)*(U_binary)*(exposure)))
} else if(method == "no_continuous_confounder"){
# Generate data for the exposure as a function of the set of confounders
# Model: binary UC with measured confounder and linear moderation
exposure <- as.integer(runif(obs) < plogis(log(oddsE_0) + log(orUE) * U_binary + log(orME) * M_continuous))
# Now generate data for the outcome as a function of the exposure and all confounders
y <- as.integer(runif(obs) < plogis(log(oddsY_0) + log(orEY_0) * exposure + log(orUY_0) * U_binary +
log(modUB) * (U_binary) * exposure + log(orMY_0) * M_continuous))
} else stop("You need to specify a method for determining exposure and outcome!")
structure(list(data = data.frame(E = exposure, Y = y, U1 = U_binary, U2 = M_continuous),
parameters = c(parameters,
oddsE_0 = oddsE_0, oddsY_0 = oddsY_0, probY_1 = probY_1),
method = method), class = "UCsims")
}
#' wrapper function for simulate_UC to stop any perfectly collinear datasets being generated
catch_collinear <- function(orUE, BC, MC, subgroups, ...){
dat <- simulate_UC(orUE = orUE, ...)
if(mean(dat$data$Y[dat$data$E == 0 & dat$data[[BC]] == 1]) %in% c(0, 1) ||
mean(dat$data$Y[dat$data$E == 1 & dat$data[[BC]] == 1]) %in% c(0, 1)) {
message("Regenerating data due to collinearity")
catch_collinear(orUE, BC, MC, subgroups, ...)
} else dat
}
#' wrapper function for adjust functions
#' Runs both functions over a range of orUE levels
#' returns a dataframe of parameter values from the simulation and the models
#' calculates the adjustments for binary and continuous UCs
#' @export
run_UC_levels <- function(orUE_levels, method = "no_moderator", adjust_fn = lin_adjust,
BC = "U1", MC = "U2", subgroups = 5, ...){
as.data.frame(t(sapply(orUE_levels, function(lev){
dat <- catch_collinear(orUE = lev, BC = BC, method = method, ...) #sims_fn(orUE = lev, ...)
adjust_fn(dat, subgroups = subgroups, BC = BC, MC = MC)
})))
}
#' This is the replication function.
#' Runs run_UC_levels over reps replicates and binds to a single dataframe
#' Runs simulations in parallel if cores > 1 )
#' Set cores to 1 if running on a windows machine
#' @export
rep_sims <- function(orUE_levels, reps = 10, cores = 1, ...){
do.call(`rbind`, mclapply(1:reps, function(x){
d <- run_UC_levels(orUE_levels = orUE_levels, ...)
d$reps <- x
cat(".")
d
}, mc.cores = cores))
}
#' Helper function to automate running different conditions
#' @export
run_sims <- function(Ubs, aggregator = median, rawdata = TRUE, agg_subset = NULL, ...){
dat <- rep_sims(orUE_levels = Ubs, ...)
if(!is.null(agg_subset)){
dat_reduced <- dat[, agg_subset]
}else dat_reduced <- dat
if(rawdata){
list(data = dat, aggregates = aggregate(dat_reduced, by = list(orUE = dat_reduced$orUE), FUN = aggregator))
} else aggregate(dat_reduced, by = list(orUE = dat_reduced$orUE), FUN = aggregator)
}
#' Helper function for overall_prev
#' This determines how the new values are generated
#' @export
iter_fn_1 <- function(prob_in, target, par_mean){
prob_out <- prob_in * 2 * target / (target + par_mean)
if(prob_out > 1) prob_out <- 1
if(prob_out < 0) prob_out <- 0
prob_out
}
#' This function uses an iterative algorithm to obtain overall prevalences of Y and E.
#' @export
overall_prev <- function(method, iter_fn = iter_fn_1, pY_target, pE_target, probE_0 = 0.2,
probY_0 = 0.7, tol_Y = 0.0005, tol_E = 0.0005, verbose = TRUE, ...){
sim_dat <- simulate_UC(..., probE_0 = probE_0, probY_0 = probY_0, method = method)
diff_Y <- 1
diff_E <- 1
iters <- 0
while(diff_Y > tol_Y || diff_E > tol_E){
old_pE0 <- sim_dat$parameters[["probE_0"]]
old_pY0 <- sim_dat$parameters[["probY_0"]]
new_pE0 <- iter_fn(old_pE0, pE_target, mean(sim_dat$data$E))
new_pY0 <- iter_fn(old_pY0, pY_target, mean(sim_dat$data$Y))
diff_E <- abs(mean(sim_dat$data$E) - pE_target)
diff_Y <- abs(mean(sim_dat$data$Y) - pY_target)
sim_dat <- simulate_UC(..., probE_0 = new_pE0, probY_0 = new_pY0, method = method)
iters <- iters + 1
if(verbose){
message("iteration #", iters,
"\n\tprobE_0 = ", old_pE0, ", newPE = ", new_pE0, ", targetE = ", pE_target, ", meanE = ", mean(sim_dat$data$E),
"\n\tprobY_0 = ", old_pY0, ", newPY = ", new_pY0, ", targetY = ", pY_target, ", meanY = ", mean(sim_dat$data$Y))
} else {
cat(".")
}
}
message("tolerance met in ", iters, " iterations.")
sim_dat
}
#' This function generates stable parameters across a range of ORs and then produces data for replicates at these parameter levels.
#'
#' @export
#'
#' @param Ubs vector of odds ratios
#' @param pY_target numeric target for the total population prevalence of Y
#' @param pE_target numeric target for the total population prevalence of E
#' @param obs_stable number of observations for the stabilising run
#' @param obs_run number of observations for the main run
#' @param method the simulation method to be used. See `simulate_UC`
#' @param iter_fn function defining how new stabilising parameters are generated
#' @param adjust_fn the method to be used for adjustment for unmeasured confounders
#' @param cores number of processor cores to use (passed to mclapply)
#' @param \dots arguments to be passed to sims_fn
#' @return list containing data and aggregated data
stable_run <- function(Ubs, pY_target, pE_target, obs_stable, obs_run, method,
iter_fn = iter_fn_1, adjust_fn, cores = 1, reps = 10, agg_subset = NULL, subgroups = NULL, ...){
out <- do.call(`rbind`, lapply(Ubs, function(U){
message("orU = ", U)
message("Iterating to find stable parameters...")
stable_params <- lapply(overall_prev(orUE = U, iter_fn = iter_fn, method = method,
pY_target = pY_target, pE_target = pE_target, obs = obs_stable, verbose = FALSE, ...)$parameters,
function(x) x)
stable_params$obs <- obs_run
stable_params <- stable_params[names(stable_params) %in% names(formals(simulate_UC))]
stable_params[["orUE"]] <- NULL
message("Running simulations with stablised parameters (", reps, " reps)...")
do.call(`run_sims`, c(stable_params, cores = cores, Ubs = U, method = method,
adjust_fn = adjust_fn, reps = reps, subgroups = subgroups))$data
}))
if(!is.null(agg_subset)){
dat_reduced <- out[, agg_subset]
} else dat_reduced <- out
list(data = out, aggregates = aggregate(dat_reduced, by = list(orUE = dat_reduced$orUE), FUN = median))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.