Nothing
#' Model fits for simulations for multi-arm designs with count outcome.
#'
#' Generally called from \code{cps.ma.count()}, this function uses iterative
#' simulations to model significance of treatment effects for cluster-randomized
#' controlled trials. Users can modify a variety of parameters to suit the
#' simulations to their desired experimental situation.
#'
#' This function can be called directly in order to give the user access to the
#' simulated model fits in addition to the simulated data, the latter of which
#' can also be accessed here or using the function \code{cps.ma.count()}. This
#' function does not produce power estimates, just simulated data and model fits.
#'
#' To call this function independenty, users must specify the desired number of
#' simulations, number of subjects per cluster, number of clusters per treatment
#' arm, group proportions, and between-cluster variance. Significance level,
#' analytic method, progress updates and simulated data set output may also be
#' specified.
#'
#' @param nsim Number of datasets to simulate; accepts integer (required).
#'
#' @param family A string consisting of either 'poisson' or 'neg.binom'.
#'
#' @param str.nsubjects Number of subjects per treatment group; accepts a list
#' with one entry per arm. Each entry is a vector containing the number of
#' subjects per cluster (required).
#'
#' @param counts Expected outcome for each arm; accepts a vector
#' of length \code{narms} (required).
#'
#' @param sigma_b_sq Between-cluster variance; accepts a vector of length
#' \code{narms} (required).
#'
#' @param alpha Significance level; default = 0.05.
#'
#' @param method Analytical method, either Generalized Linear Mixed Effects
#' Model (GLMM) or Generalized Estimating Equation (GEE). Accepts c('glmm',
#' 'gee') (required); default = 'glmm'.
#'
#' @param analysis Family used for data analysis; currently only applicable when \code{method = 'glmm'}.
#' Accepts c('poisson', 'neg.binom'); default = 'poisson'. Required.
#'
#' @param negBinomSize Only used when generating simulated data from the
#' negative binomial (family = 'neg.binom'), this is the target for number of
#' successful trials, or the dispersion parameter (the shape parameter of the gamma
#' mixing distribution). Must be positive and defaults to 1. Required when
#' family = 'neg.binom'.
#'
#' @param quiet When set to FALSE, displays simulation progress and estimated
#' completion time; default is FALSE.
#'
#' @param all.sim.data Option to output list of all simulated datasets;
#' default = FALSE.
#'
#' @param seed Option to set.seed. Default is NULL.
#'
#' @param poor.fit.override Option to override \code{stop()} if more than 25\%
#' of fits fail to converge.
#'
#' @param low.power.override Option to override \code{stop()} if the power
#' is less than 0.5 after the first 50 simulations and every ten simulations
#' thereafter. On function execution stop, the actual power is printed in the
#' stop message. Default = FALSE. When TRUE, this check is ignored and the
#' calculated power is returned regardless of value.
#'
#' @param timelimitOverride Logical. When FALSE, stops execution if the estimated
#' completion time is more than 2 minutes. Defaults to TRUE.
#'
#' @param tdist Logical; use t-distribution instead of normal distribution for
#' simulation values, default = FALSE.
#'
#' @param cores A string ("all") NA, or numeric value indicating the number of
#' cores to be used for parallel computing. When this option is set to NA, no
#' parallel computing is used.
#'
#' @param nofit Option to skip model fitting and analysis and return the
#' simulated data. Defaults to \code{FALSE}.
#'
#' @param opt Option to fit with a different optimizer algorithm. Setting this
#' to "auto" tests an example fit using the \code{nloptr} package and selects
#' the first algorithm that converges.
#'
#' @return A list with the following components:
#' \itemize{
#' \item List of length(nsim) containing gee- or glmm-fitted the model
#' summaries.
#' \item Compares fitted model to a model for H0 using ML (anova).
#' \item List of data frames, each containing:
#' "y" (Simulated response value),
#' "trt" (Indicator for treatment group),
#' "clust" (Indicator for cluster)
#' \item A vector of length \code{nsim} consisting of 1 and 0.
#' When a model fails to converge, failed.to.converge==1, otherwise 0.
#' }
#'
#' @examples
#' \dontrun{
#'
#' nsubjects.example <- list(c(20,20,20,25), c(15, 20, 20, 21), c(17, 20, 21))
#' counts.example <- c(75, 120, 100)
#' sigma_b_sq.example <- c(0.2, 0.1, 0.1)
#'
#' count.ma.rct <- cps.ma.count.internal (nsim = 10,
#' str.nsubjects = nsubjects.example,
#' counts = counts.example,
#' sigma_b_sq = sigma_b_sq.example,
#' alpha = 0.05, all.sim.data = FALSE,
#' seed = 123, cores="all", poor.fit.override=TRUE,
#' opt = "NLOPT_LN_BOBYQA")
#' }
#'
#' @author Alexandria C. Sakrejda (\email{acbro0@@umass.edu}), Alexander R. Bogdan, and Ken Kleinman (\email{ken.kleinman@@gmail.com})
#'
#' @noRd
cps.ma.count.internal <-
function(nsim = 1000,
str.nsubjects = NULL,
counts = NULL,
family = "poisson",
analysis = "poisson",
negBinomSize = 1,
sigma_b_sq = NULL,
alpha = 0.05,
quiet = FALSE,
method = 'glmm',
all.sim.data = FALSE,
seed = NA,
poor.fit.override = FALSE,
low.power.override = FALSE,
timelimitOverride = TRUE,
tdist = FALSE,
cores = cores,
nofit = FALSE,
opt = opt) {
# Create vectors to collect iteration-specific values
simulated.datasets <- list()
goodopt <- opt
# Create NCLUSTERS, NARMS, from str.nsubjects
narms = length(str.nsubjects)
nclusters = sapply(str.nsubjects, length)
# This container keeps track of how many models failed to converge
converged <- rep(FALSE, nsim)
# Create a container for the simulated.dataset and model output
sim.dat = vector(mode = "list", length = nsim)
model.values <- list()
model.compare <- list()
# option for reproducibility
if (!is.na(seed)) {
set.seed(seed = seed)
}
# Create indicators for treatment group & cluster for the sim.data output
trt1 = list()
clust1 = list()
index <- 0
for (arm in 1:length(str.nsubjects)) {
trt1[[arm]] = list()
clust1[[arm]] = list()
for (cluster in 1:length(str.nsubjects[[arm]])) {
index <- index + 1
trt1[[arm]][[cluster]] = rep(arm, sum(str.nsubjects[[arm]][[cluster]]))
clust1[[arm]][[cluster]] = rep(index, sum(str.nsubjects[[arm]][[cluster]]))
}
}
#Alert the user if using t-distribution
if (tdist == TRUE) {
print("using t-distribution because tdist = TRUE")
}
#make the simulated data
trt <- as.factor(unlist(trt1))
clust <- as.factor(unlist(clust1))
if (length(trt) != length(clust)) {
stop("trt and clust are not the same length, see line 134")
}
gc()
sim.dat <- matrix(nrow = length(clust), ncol = nsim)
# function to produce the simulated data
make.sim.dat <- function(tdist. = tdist,
counts. = counts,
nclusters. = nclusters,
sigma_b_sq. = sigma_b_sq,
str.nsubjects. = str.nsubjects,
family. = family) {
# Generate between-cluster effects for non-treatment and treatment
if (tdist. == TRUE) {
randint = mapply(function(n, df)
stats::rt(n, df = df),
n = nclusters.,
df = Inf)
} else {
randint = mapply(
function(nc, s, mu)
stats::rnorm(nc, mean = mu,
sd = sqrt(s)),
nc = nclusters.,
s = sigma_b_sq.,
mu = 0
)
}
if (typeof(randint) == "list") {
randint.holder <- list()
for (j in 1:length(counts.)) {
randint.holder[[j]] <- log(counts.[j]) + randint[[j]]
}
randintrandint <- sapply(randint.holder, exp)
} else {
randint.holder <-
matrix(nrow = nclusters.[1], ncol = length(counts.))
for (j in 1:length(counts.)) {
randint.holder[, j] <- log(counts.[j]) + randint[, j]
}
randintrandint <- exp(randint.holder)
}
# Create y-value
y.intercept <- vector(mode = "numeric",
length = sum(unlist(str.nsubjects.)))
y.intercept <- sapply(1:sum(nclusters.),
function(x)
rep(unlist(randintrandint)[x],
length.out = unlist(str.nsubjects.)[x]))
y.intercept <- as.vector(unlist(y.intercept))
if (family. == 'poisson') {
y <- stats::rpois(length(y.intercept), y.intercept)
}
if (family. == 'neg.binom') {
y <-
stats::rnbinom(length(y.intercept), size = negBinomSize, mu = y.intercept)
}
return(y)
}
sim.dat <- replicate(
nsim,
make.sim.dat(
tdist. = tdist,
nclusters. = nclusters,
sigma_b_sq. = sigma_b_sq,
str.nsubjects. = str.nsubjects,
counts. = counts,
family. = family
)
)
#option to return simulated data only
if (nofit == TRUE) {
sim.dat <- data.frame(trt, clust, sim.dat)
sim.num <- 1:nsim
temp <- paste0("y", sim.num)
colnames(sim.dat) <- c("arm", "cluster", temp)
return(sim.dat)
}
`%fun%` <- foreach::`%dopar%`
if (is.na(cores)) {
`%fun%` <- foreach::`%do%`
}
#setup for parallel computing
## Do computations with multiple processors:
## Number of cores:
if (!is.na(cores)) {
if (cores == "all") {
nc <- parallel::detectCores()
} else {
nc <- cores
}
## Create clusters and initialize the progress bar
cl <-
parallel::makeCluster(rep("localhost", nc), type = "SOCK")
doParallel::registerDoParallel(cl)
}
pb <- txtProgressBar(min = 1, max = nsim, style = 3)
progress <- function(n)
setTxtProgressBar(pb, n)
opts <- list(progress = progress)
# Update simulation progress information
sim.start <- Sys.time()
lme4::glmer(sim.dat[, 1] ~ as.factor(trt) + (1 |
clust),
family = stats::poisson(link = 'log'))
avg.iter.time = as.numeric(difftime(Sys.time(), sim.start, units = 'secs'))
time.est = avg.iter.time * (nsim - 1) / 60
hr.est = time.est %/% 60
min.est = round(time.est %% 60, 3)
#time limit override (for Shiny)
if (min.est > 2 && timelimitOverride == FALSE) {
stop(paste0("Estimated completion time: ",
hr.est,
'Hr:',
min.est,
'Min'))
}
if (quiet == FALSE) {
message(
paste0(
'Begin simulations :: Start Time: ',
Sys.time(),
' :: Estimated completion time: ',
hr.est,
'Hr:',
min.est,
'Min'
)
)
# initialize progress bar
if (is.na(cores)) {
prog.bar = progress::progress_bar$new(
format = "(:spin) [:bar] :percent eta :eta",
total = 5,
clear = FALSE,
show_after = 0
)
prog.bar$tick(0)
}
}
if (is.na(cores) & quiet == FALSE) {
# Iterate progress bar
prog.bar$update(1 / 5)
Sys.sleep(1 / 100)
}
if (method == "glmm") {
# Fit the models
if (!is.na(cores) & quiet == FALSE) {
message("Fitting models")
}
if (analysis == 'poisson') {
if (opt == "auto") {
message("testing optimizer algorithms:")
algoptions <- c(
"NLOPT_LN_BOBYQA",
"NLOPT_GN_CRS2_LM",
"NLOPT_LN_COBYLA",
"NLOPT_LN_NEWUOA",
"NLOPT_LN_NEWUOA_BOUND",
"NLOPT_LN_NELDERMEAD",
"NLOPT_LN_SBPLX"
)
for (i in 1:length(algoptions)) {
print(algoptions[i])
R.utils::withTimeout(Sys.sleep(10), timeout = 30)
model_flex1 <-
try(lme4::glmer(
sim.dat[, 1] ~ as.factor(trt) + (1 | clust),
family = stats::poisson(link = 'log'),
control = lme4::glmerControl(
optimizer = "nloptwrap",
optCtrl = list(
algorithm = algoptions[i],
maxeval = 1e7,
xtol_abs = 1e-9,
ftol_abs = 1e-9
)
)
))
if (class(model_flex1) != "try-error") {
if (is.null(model_flex1@optinfo$conv$lme4$messages)) {
goodopt <- algoptions[i]
break
}
}
}
}
my.mod <- foreach::foreach(
i = 1:nsim,
.options.parallel = opts,
.packages = c("lme4", "optimx", "nloptr"),
.inorder = FALSE
) %fun% {
lme4::glmer(
sim.dat[, i] ~ as.factor(trt) + (1 | clust),
family = stats::poisson(link = 'log'),
control = lme4::glmerControl(
optimizer = "nloptwrap",
calc.derivs = TRUE,
optCtrl = list(
algorithm = goodopt,
starttests = FALSE,
kkt = FALSE
)
)
)
}
}
if (analysis == 'neg.binom') {
if (opt == "auto") {
mod <- lme4::glmer.nb(
sim.dat[, 1] ~ as.factor(trt) + (1 | clust),
control = lme4::glmerControl(
optimizer = "nloptwrap",
calc.derivs = TRUE,
optCtrl = list(
method = goodopt,
starttests = FALSE,
kkt = FALSE
)
)
)
}
my.mod <- foreach::foreach(
i = 1:nsim,
.options.parallel = opts,
.packages = c("lme4", "optimx", "nloptr"),
.inorder = FALSE
) %fun% {
lme4::glmer.nb(
sim.dat[, i] ~ as.factor(trt) + (1 | clust),
control = lme4::glmerControl(
optimizer = "nloptwrap",
calc.derivs = TRUE,
optCtrl = list(
method = goodopt,
starttests = FALSE,
kkt = FALSE
)
)
)
}
}
if (is.na(cores) & quiet == FALSE) {
# Iterate progress bar
prog.bar$update(3 / 5)
Sys.sleep(1 / 100)
}
# re-fit models that didn't converge and
#option to stop the function early if fits are singular
for (i in 1:nsim) {
converged[i] <-
ifelse(is.null(my.mod[[i]]@optinfo$conv$lme4$messages),
TRUE,
FALSE)
}
singular <- which(unlist(converged) == FALSE)
for (j in singular) {
if (family == 'poisson') {
my.mod[[j]] <-
lme4::glmer(
sim.dat[, j] ~ as.factor(trt) + (1 | clust),
family = stats::poisson(link = 'log'),
control = lme4::glmerControl(
optimizer = "nloptwrap",
calc.derivs = TRUE,
optCtrl = list(
method = goodopt,
starttests = FALSE,
kkt = FALSE
)
)
)
}
if (family == 'neg.binom') {
my.mod[[j]] <-
lme4::glmer.nb(
sim.dat[, j] ~ as.factor(trt) + (1 | clust),
control = lme4::glmerControl(
optimizer = "nloptwrap",
calc.derivs = TRUE,
optCtrl = list(
method = goodopt,
starttests = FALSE,
kkt = FALSE
)
)
)
}
}
for (i in 1:nsim) {
converged[i] <-
ifelse(is.null(my.mod[[i]]@optinfo$conv$lme4$messages),
TRUE,
FALSE)
}
if (poor.fit.override == FALSE) {
if (sum(isFALSE(converged), na.rm = TRUE) > (nsim * .25)) {
stop("more than 25% of simulations are singular fit: check model specifications")
}
}
if (!is.na(cores) & quiet == FALSE) {
message("\r Performing null model comparisons")
}
# get the overall p-values (>Chisq)
model.compare <- foreach::foreach(
i = 1:nsim,
.options.parallel = opts,
.packages = "car",
.inorder = FALSE
) %fun% {
car::Anova(my.mod[[i]], type = "II")
}
if (is.na(cores) & quiet == FALSE) {
# Iterate progress bar
prog.bar$update(4 / 5)
Sys.sleep(1 / 100)
}
gc()
# get the model summaries
if (!is.na(cores) & quiet == FALSE) {
message("\r Retrieving model summaries")
}
model.values <-
foreach::foreach(
i = 1:nsim,
.options.parallel = opts,
.packages = "car",
.inorder = FALSE
) %fun% {
summary(my.mod[[i]])
}
if (is.na(cores) & quiet == FALSE) {
# Iterate progress bar
prog.bar$update(5 / 5)
Sys.sleep(1 / 100)
}
# turn off parallel computing
if (!is.na(cores)) {
#stop the progress bar
close(pb)
parallel::stopCluster(cl)
}
} #end of GLMM method
# Fit GEE (geeglm)
if (method == 'gee') {
sim.start <- Sys.time()
geepack::geeglm(
sim.dat[, 1] ~ as.factor(trt),
family = stats::poisson(link = 'log'),
id = clust,
corstr = "exchangeable"
)
time.est = avg.iter.time * (nsim - 1) / 60
hr.est = time.est %/% 60
min.est = round(time.est %% 60, 3)
#time limit override (for Shiny)
if (min.est > 2 && timelimitOverride == FALSE) {
stop(paste0(
"Estimated completion time: ",
hr.est,
'Hr:',
min.est,
'Min'
))
}
if (quiet == FALSE) {
message(
paste0(
'Begin simulations :: Start Time: ',
Sys.time(),
' :: Estimated completion time: ',
hr.est,
'Hr:',
min.est,
'Min'
)
)
# initialize progress bar
if (is.na(cores)) {
prog.bar = progress::progress_bar$new(format = "(:spin) [:bar] :percent eta :eta",
total = 5,
clear = FALSE)
prog.bar$tick(0)
}
}
if (!is.na(cores) & quiet == FALSE) {
message("Fitting models")
}
if (is.na(cores) & quiet == FALSE) {
# Iterate progress bar
prog.bar$update(2 / 5)
Sys.sleep(1 / 100)
}
if (family == "poisson") {
my.mod <- foreach::foreach(
i = 1:nsim,
.options.parallel = opts,
.packages = "geepack",
.inorder = FALSE
) %fun% {
geepack::geeglm(
sim.dat[, i] ~ as.factor(trt),
family = stats::poisson(link = 'log'),
id = clust,
corstr = "exchangeable"
)
}
}
if (family == "quasipoisson") {
my.mod <- foreach::foreach(
i = 1:nsim,
.options.parallel = opts,
.packages = "geepack",
.inorder = FALSE
) %fun% {
geepack::geeglm(
sim.dat[, i] ~ as.factor(trt),
family = stats::quasipoisson(link = 'log'),
id = clust,
corstr = "exchangeable"
)
}
}
# check for gee convergence
for (i in 1:length(my.mod)){
converged[i] <- ifelse(summary(my.mod[[i]])$error == 0, TRUE, FALSE)
}
if (!is.na(cores) & quiet == FALSE) {
message("Performing null model comparisons")
}
if (is.na(cores) & quiet == FALSE) {
# Iterate progress bar
prog.bar$update(3 / 5)
Sys.sleep(1 / 100)
}
null.mod <- list()
null.mod[[i]] <- geepack::geeglm(
sim.dat[, i] ~ 1,
family = stats::quasipoisson(link = 'log'),
id = clust,
corstr = "exchangeable"
)
# get the overall p-values (>Chisq)
model.compare <-
foreach::foreach(i = 1:nsim, .inorder = FALSE) %fun% {
try(anova(my.mod[[i]], null.mod[[i]]))
}
if (is.na(cores) & quiet == FALSE) {
# Iterate progress bar
prog.bar$update(4 / 5)
Sys.sleep(1 / 100)
}
if (!is.na(cores) & quiet == FALSE) {
message("Retrieving model summaries")
}
# get the model summaries
model.values <- foreach::foreach(i = 1:nsim,
.packages = "car",
.inorder = FALSE) %fun% {
summary(my.mod[[i]])
}
# turn off parallel computing
if (!is.na(cores)) {
#stop the progress bar
close(pb)
parallel::stopCluster(cl)
}
} # end of GEE method
## Output objects
if (all.sim.data == TRUE) {
complete.output.internal <- list(
"estimates" = model.values,
"model.comparisons" = model.compare,
"converged" = unlist(converged),
"sim.data" = data.frame(sim.dat),
"optimizer algorithm" = goodopt
)
} else {
complete.output.internal <- list(
"estimates" = model.values,
"model.comparisons" = model.compare,
"converged" = unlist(converged),
"optimizer algorithm" = goodopt
)
}
return(complete.output.internal)
} #end of function
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.