#' Model fits for simulations for multi-arm designs with dichotomous outcome.
#'
#' Generally called from \code{cps.ma.binary()}, this function uses iterative
#' simulations to model significance of treatment effects for cluster-randomized
#' controlled trials. Users can modify a variety of parameters to suit the
#' simulations to their desired experimental situation.
#'
#' This function can be called directly in order to give the user access to the simulated
#' model fits in addition to the simulated data, the latter of which can also be accessed
#' here or using the function \code{cps.ma.binary()}. For the power estimates, use
#' \code{cps.ma.binary()}.
#'
#' Users (or the wrapper function) must specify the desired number of
#' simulations, number of subjects per
#' cluster, number of clusters per treatment arm, group proportions, and
#' between-cluster variance; significance level, analytic method, progress updates,
#' and simulated data set output may also be specified.
#'
#' @param nsim Number of datasets to simulate; accepts integer (required).
#' @param str.nsubjects Number of subjects per treatment group; accepts a list with one entry per arm.
#' Each entry is a vector containing the number of subjects per cluster (required).
#' @param probs Expected probability of outcome for each arm; accepts a vector of length \code{narms} (required).
#' @param sigma_b_sq Between-cluster variance; accepts a vector of length \code{narms} (required).
#' @param alpha Significance level; default = 0.05.
#' @param method Analytical method, either Generalized Linear Mixed Effects
#' Model (GLMM) or Generalized Estimating Equation (GEE). Accepts c('glmm',
#' 'gee') (required); default = 'glmm'.
#' @param quiet When set to FALSE, displays simulation progress and estimated
#' completion time; default is FALSE.
#' @param all.sim.data Option to output list of all simulated datasets;
#' default = FALSE.
#' @param seed Option to set.seed. Default is NULL.
#' @param poor.fit.override Option to override \code{stop()} if more than 25\%
#' of fits fail to converge.
#' @param low.power.override Option to override \code{stop()} if the power
#' is less than 0.5 after the first 50 simulations and every ten simulations
#' thereafter. On function execution stop, the actual power is printed in the
#' stop message. Default = FALSE. When TRUE, this check is ignored and the
#' calculated power is returned regardless of value.
#' @param timelimitOverride Logical. When FALSE, stops execution if the estimated
#' completion time is more than 2 minutes. Defaults to TRUE.
#' @param tdist Logical; use t-distribution instead of normal distribution for
#' simulation values, default = FALSE.
#' @param cores A string ("all") NA, or numeric value indicating the number of cores to be used for parallel computing.
#' When this option is set to NA, no parallel computing is used.
#' @param nofit Option to skip model fitting and analysis and return the simulated data.
#' Defaults to \code{FALSE}.
#' @param optmethod User-specified optimizer method. Accepts \code{bobyqa},
#' \code{Nelder_Mead}, and optimizers wrapped in the \code{optimx} package.
#' @param return.all.models Logical; Returns all of the fitted models and the simulated data.
#' Defaults to FALSE.
#'
#' @return A list with the following components:
#' \itemize{
#' \item List of length(nsim) containing gee- or glmm-fitted the model summaries.
#' \item Compares fitted model to a model for H0 using ML (anova).
#' \item List of data frames, each containing:
#' "y" (Simulated response value),
#' "trt" (Indicator for treatment group),
#' "clust" (Indicator for cluster)
#' \item A vector of logical values \code{nsim}.
#' When a model fails to converge, FALSE, otherwise TRUE.
#' }
#'
#' @examples
#' \dontrun{
#'
#' nsubjects.example <- list(c(200,200,200,250), c(150, 200, 200, 210), c(170, 200, 210))
#' probs.example <- c(0.25, 0.15, 0.3)
#' sigma_b_sq.example <- c(0.01, 0.01, 0.01)
#'
#' bin.ma.rct <- cps.ma.binary.internal (nsim = 10,
#' str.nsubjects = nsubjects.example,
#' probs = probs.example,
#' sigma_b_sq = sigma_b_sq.example,
#' alpha = 0.05, all.sim.data = FALSE,
#' poor.fit.override = TRUE,
#' seed = 123, cores="all",
#' optmethod = "Nelder-Mead")
#' }
#'
#' @author Alexandria C. Sakrejda (\email{acbro0@@umass.edu}), Alexander R. Bogdan, and Ken Kleinman (\email{ken.kleinman@@gmail.com})
#'
#' @noRd
cps.ma.binary.internal <-
function(nsim = 1000,
str.nsubjects = NULL,
probs = NULL,
sigma_b_sq = NULL,
alpha = 0.05,
quiet = FALSE,
method = 'glmm',
all.sim.data = FALSE,
seed = NA,
poor.fit.override = FALSE,
low.power.override = FALSE,
timelimitOverride = TRUE,
tdist = FALSE,
cores = cores,
nofit = FALSE,
optmethod = optmethod,
return.all.models = FALSE) {
# Create vectors to collect iteration-specific values
simulated.datasets <- list()
# Create NCLUSTERS, NARMS, from str.nsubjects
narms = length(str.nsubjects)
nclusters = sapply(str.nsubjects, length)
# This container keeps track of how many models converged
converged <- rep(FALSE, nsim)
# Create a container for the simulated.dataset and model output
sim.dat = vector(mode = "list", length = nsim)
model.values <- list()
model.compare <- list()
# option for reproducibility
if (!is.na(seed)) {
set.seed(seed = seed)
}
# Create indicators for treatment group & cluster for the sim.data output
trt1 = list()
clust1 = list()
index <- 0
for (arm in 1:length(str.nsubjects)) {
trt1[[arm]] = list()
clust1[[arm]] = list()
for (cluster in 1:length(str.nsubjects[[arm]])) {
index <- index + 1
trt1[[arm]][[cluster]] = rep(arm, sum(str.nsubjects[[arm]][[cluster]]))
clust1[[arm]][[cluster]] = rep(index, sum(str.nsubjects[[arm]][[cluster]]))
}
}
# Calculate log odds for each group
logit.p <- list()
for (i in 1:length(probs)) {
logit.p[[i]] = log(probs[i] / (1 - probs[i]))
}
logit.p <- unlist(logit.p)
#Alert the user if using t-distribution
if (tdist == TRUE) {
print("using t-distribution because tdist = TRUE")
}
#make the simulated data
trt <- as.factor(unlist(trt1))
clust <- as.factor(unlist(clust1))
if (length(trt) != length(clust)) {
stop("trt and clust are not the same length, see line 134")
}
sim.dat <- matrix(nrow = length(clust), ncol = nsim)
# function to produce the simulated data
make.sim.dat <- function(tdist = tdist,
logit.p = logit.p,
nclusters = nclusters,
sigma_b_sq = sigma_b_sq,
str.nsubjects = str.nsubjects) {
# Generate between-cluster effects for non-treatment and treatment
if (tdist == TRUE) {
randint = mapply(function(n, df)
stats::rt(n, df = df),
n = nclusters,
df = Inf)
} else {
randint = mapply(
function(nc, s, mu)
stats::rnorm(nc, mean = mu, sd = sqrt(s)),
nc = nclusters,
s = sigma_b_sq,
mu = 0
)
}
if (typeof(randint) == "list") {
randint.holder <- list()
for (j in 1:length(logit.p)) {
randint.holder[[j]] <- logit.p[j] + randint[[j]]
}
randintrandint <-
sapply(randint.holder, expit)
} else {
randint.holder <-
matrix(nrow = nclusters[1], ncol = length(logit.p))
for (j in 1:length(logit.p)) {
randint.holder[, j] <- logit.p[j] + randint[, j]
}
randintrandint <- expit(randint.holder)
}
# Create y-value
y.intercept <- vector(mode = "numeric",
length = length(unlist(str.nsubjects)))
y.intercept <- sapply(1:sum(nclusters),
function(x)
rep(unlist(randintrandint)[x],
length.out = unlist(str.nsubjects)[x]))
y <-
sapply(unlist(y.intercept), function(x)
stats::rbinom(1, 1, x))
y <- as.numeric(y)
return(y)
} #end make.sim.dat function definition
sim.dat <-
data.frame(
as.factor(trt),
as.factor(clust),
replicate(
nsim,
make.sim.dat(
tdist = tdist,
logit.p = logit.p,
nclusters = nclusters,
sigma_b_sq = sigma_b_sq,
str.nsubjects = str.nsubjects
)
),
stringsAsFactors = TRUE
)
sim.num <- 1:nsim
temp <- paste0("y", sim.num)
colnames(sim.dat) <- c("arm", "cluster", temp)
#option to return simulated data only
if (nofit == TRUE) {
return(sim.dat)
}
#setup for parallel computing
## Do computations with multiple processors:
## Number of cores:
if (!is.na(cores)) {
if (cores == "all") {
nc <- parallel::detectCores()
} else {
nc <- cores
}
## Create clusters and initialize the progress bar
cl <-
parallel::makeCluster(rep("localhost", nc), type = "SOCK")
doParallel::registerDoParallel(cl)
}
pb <- txtProgressBar(min = 1, max = nsim, style = 3)
progress <- function(n)
setTxtProgressBar(pb, n)
opts <- list(progress = progress)
# define function to perform parallel vs sequential computing
if (is.na(cores)) {
`%fun%` <- foreach::`%do%`
} else {
`%fun%` <- foreach::`%dopar%`
}
## BEGIN GLMM METHOD
if (method == "glmm") {
# Update simulation progress information
sim.start <- Sys.time()
lme4::glmer(sim.dat[, 3] ~ trt + (1 |
clust),
family = stats::binomial(link = 'logit'))
avg.iter.time = as.numeric(difftime(Sys.time(), sim.start, units = 'secs'))
time.est = avg.iter.time * (nsim) / 60
hr.est = time.est %/% 60
min.est = round(time.est %% 60, 3)
#time limit override (for Shiny)
if (min.est > 2 && timelimitOverride == FALSE) {
stop(paste0(
"Estimated completion time: ",
hr.est,
'Hr:',
min.est,
'Min'
))
}
if (quiet == FALSE) {
message(
paste0(
'Begin simulations :: Start Time: ',
Sys.time(),
' :: Estimated completion time: ',
hr.est,
'Hr:',
min.est,
'Min'
)
)
# initialize progress bar
if (is.na(cores)) {
prog.bar = progress::progress_bar$new(
format = "(:spin) [:bar] :percent eta :eta",
total = 5,
clear = FALSE,
show_after = 0
)
prog.bar$tick(0)
}
}
if (is.na(cores) & quiet == FALSE) {
# Iterate progress bar
prog.bar$update(1 / 5)
Sys.sleep(1 / 100)
}
# Create simulation loop
if (!is.na(cores) & quiet == FALSE) {
message("Fitting models")
}
my.mod <- foreach::foreach(
i = 1:nsim,
.options.parallel = opts,
.packages = c("lme4", "optimx"),
.inorder = FALSE
) %fun% {
lme4::glmer(
sim.dat[, i + 2] ~ trt + (1 | clust),
family = stats::binomial(link = 'logit')
)
}
if (is.na(cores) & quiet == FALSE) {
# Iterate progress bar
prog.bar$update(3 / 5)
Sys.sleep(1 / 100)
}
for (i in 1:nsim) {
converged[i] <-
ifelse(is.null(my.mod[[i]]@optinfo$conv$lme4$messages),
TRUE,
FALSE)
}
# option to stop the function early if fits are singular
if (poor.fit.override == FALSE & i > 50) {
if (sum(unlist(converged), na.rm = TRUE) < (nsim * .75)) {
stop("more than 25% of simulations are singular fit: check model specifications")
}
}
# refit once if model did not converge
idx <- which(converged == FALSE)
if (length(idx > 0)) {
for (j in idx)
my.mod[[j]] <- lme4::glmer(
sim.dat[, j + 2] ~ trt + (1 | clust),
family = stats::binomial(link = 'logit')
)
converged[j] <-
ifelse(is.null(my.mod[[j]]@optinfo$conv$lme4$messages),
TRUE,
FALSE)
}
if (!is.na(cores) & quiet == FALSE) {
message("\r Performing null model comparisons")
}
# get the overall p-values (>Chisq)
model.compare <- foreach::foreach(
i = 1:nsim,
.options.parallel = opts,
.packages = c("car", "optimx"),
.inorder = FALSE
) %fun% {
car::Anova(my.mod[[i]], type = "II")
}
for (i in 1:nsim) {
# stop the loop if power is <0.5
if (low.power.override == FALSE &&
i > 50 &&
(i %% 10 == 0) && length(model.compare) != 0) {
temp.power.checker <-
try(matrix(
unlist(model.compare[1:i]),
ncol = 3,
nrow = i,
byrow = TRUE
))
sig.val.temp <-
ifelse(temp.power.checker[, 3][1:i] < alpha, 1, 0)
pval.power.temp <- sum(sig.val.temp) / i
if (pval.power.temp < 0.5) {
stop(
paste(
"Calculated power is < ",
pval.power.temp,
", auto stop at simulation ",
i,
". Set low.power.override==TRUE to run the simulations anyway.",
sep = ""
)
)
}
}
}
if (is.na(cores) & quiet == FALSE) {
# Iterate progress bar
prog.bar$update(4 / 5)
Sys.sleep(1 / 100)
}
# get the model summaries
if (!is.na(cores) & quiet == FALSE) {
message("\r Retrieving model summaries")
}
model.values <-
foreach::foreach(
i = 1:nsim,
.options.parallel = opts,
.packages = "car",
.inorder = FALSE
) %fun% {
summary(my.mod[[i]])
}
if (is.na(cores) & quiet == FALSE) {
# Iterate progress bar
prog.bar$update(5 / 5)
Sys.sleep(1 / 100)
}
# turn off parallel computing
if (!is.na(cores)) {
#stop the progress bar
close(pb)
parallel::stopCluster(cl)
}
} #end of GLMM method
# Fit GEE (geeglm)
if (method == 'gee') {
sim.start <- Sys.time()
geepack::geeglm(
sim.dat[, 3] ~ trt,
family = stats::binomial(link = 'logit'),
id = clust,
corstr = "exchangeable"
)
avg.iter.time <-
as.numeric(difftime(Sys.time(), sim.start, units = 'secs'))
time.est = avg.iter.time * (nsim - 1) / 60
hr.est = time.est %/% 60
min.est = round(time.est %% 60, 3)
#time limit override (for Shiny)
if (min.est > 2 && timelimitOverride == FALSE) {
stop(paste0(
"Estimated completion time: ",
hr.est,
'Hr:',
min.est,
'Min'
))
}
if (quiet == FALSE && nsim > 100) {
message(
paste0(
'Begin simulations :: Start Time: ',
Sys.time(),
' :: Estimated completion time: ',
hr.est,
'Hr:',
min.est,
'Min'
)
)
# initialize progress bar
if (is.na(cores)) {
prog.bar = progress::progress_bar$new(format = "(:spin) [:bar] :percent eta :eta",
total = 5,
clear = FALSE)
prog.bar$tick(0)
}
}
if (!is.na(cores) & quiet == FALSE) {
message("Fitting models")
}
if (is.na(cores) & quiet == FALSE) {
# Iterate progress bar
prog.bar$update(2 / 5)
Sys.sleep(1 / 100)
}
my.mod <- foreach::foreach(
i = 1:nsim,
.options.parallel = opts,
.packages = "geepack",
.inorder = FALSE
) %fun% {
geepack::geeglm(
sim.dat[, i + 2] ~ trt,
family = stats::binomial(link = 'logit'),
id = clust,
corstr = "exchangeable"
)
}
# check for gee convergence
for (i in 1:length(my.mod)) {
converged[i] <- ifelse(summary(my.mod[[i]])$error == 0, TRUE, FALSE)
}
if (!is.na(cores) & quiet == FALSE) {
message("Performing null model comparisons")
}
if (is.na(cores) & quiet == FALSE) {
# Iterate progress bar
prog.bar$update(3 / 5)
Sys.sleep(1 / 100)
}
# get the overall p-values (>Chisq)
model.compare <-
foreach::foreach(i = 1:nsim, .inorder = FALSE) %fun% {
anova(my.mod[[i]])
}
if (is.na(cores) & quiet == FALSE) {
# Iterate progress bar
prog.bar$update(4 / 5)
Sys.sleep(1 / 100)
}
if (!is.na(cores) & quiet == FALSE) {
message("Retrieving model summaries")
}
# get the model summaries
model.values <-
foreach::foreach(i = 1:nsim,
.packages = "car",
.inorder = FALSE) %fun% {
summary(my.mod[[i]])
}
# turn off parallel computing
if (!is.na(cores)) {
#stop the progress bar
close(pb)
parallel::stopCluster(cl)
}
} # end of GEE method
## Output objects
if (all.sim.data == TRUE) {
complete.output.internal <- list(
"estimates" = model.values,
"model.comparisons" = model.compare,
"converged" = unlist(converged),
"sim.data" = data.frame(sim.dat)
)
} else {
complete.output.internal <- list(
"estimates" = model.values,
"model.comparisons" = model.compare,
"converged" = unlist(converged)
)
}
return(complete.output.internal)
} #end of function
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.