Nothing
#' Generate a data set using a \code{study_parameters}-object
#'
#' @param paras An object created by \code{\link{study_parameters}}
#' @param n Optional; specifies which row \code{n} should be used if \code{object}
#' is a \code{data.frame} containing multiple setups.
#'
#' @return A \code{data.frame} with the simulated data in long form. With the following columns:
#' \itemize{
#' \item \code{y} the outcome vector, with missing values as NA
#' \item \code{y_c} the outcome vector, without missing values removed.
#' \item \code{time} the time vector
#' \item \code{treatment} treatment indicator (0 = "control", 1 = "treatment")
#' \item \code{subject} subject-level id variable, from 1 to total number of subjects.
#' \item \code{cluster} for three-level models; the cluster-level id variable,
#' from 1 to the total number of clusters.
#' }
#' @export
#'
#' @examples
#' p <- study_parameters(n1 = 11,
#' n2 = 10,
#' n3 = 4,
#' T_end = 10,
#' fixed_intercept = 37,
#' fixed_slope = -0.65,
#' sigma_subject_intercept = 2.89,
#' sigma_cluster_intercept = 0.6,
#' icc_slope = 0.1,
#' var_ratio = 0.03,
#' sigma_error = 1.5,
#' cor_subject = -0.5,
#' cor_cluster = 0,
#' cohend = 0.5)
#'
#' d <- simulate_data(p)
simulate_data <- function(paras, n = 1) {
UseMethod("simulate_data")
}
create_dropout_indicator <- function(paras) {
ind <- dropout_process(unlist(paras$dropout), paras)
ind <- ind$missing
ind
}
# sim formula -------------------------------------------------------------
#' Create a simulation formula
#'
#' @param formula A \code{character} containing a \pkg{lme4} formula.
#' @param data_transform Optional; a \code{function} that applies a transformation
#' to the data during each simulation.
#' @param test A \code{character} vector indicating which parameters should be tested.
#' Only applies to tests using Satterthwaite \emph{dfs}, or when calculating confidence intervals.
#'
#' @details
#'
#' It is possible to fit model without any random effects. If no random effects is specified
#' the model is fit using \code{lm()}.
#'
#' @return Object with class \code{plcp_sim_formula}
#' @seealso \code{\link{sim_formula_compare}}, \code{\link{transform_to_posttest}}
#' @export
#'
#' @examples
#' # 2-lvl model
#' f <- sim_formula("y ~ treatment * time + (1 + time | subject)")
#'
#' # ANCOVA using 'data_transform'
#' f <- sim_formula("y ~ treatment + pretest",
#' data_transform = transform_to_posttest,
#' test = "treatment")
#'
sim_formula <- function(formula, data_transform = NULL, test = "time:treatment") {
x <- list("formula" = formula,
"data_transform" = data_transform,
"data_transform_lab" = substitute(data_transform),
"test" = test)
class(x) <- append(class(x), "plcp_sim_formula")
x
}
#' Compare multiple simulation formulas
#'
#' This functions allows comparing multiple models fit to the same data set
#' during simulation.
#'
#' @param ... Named formulas that should be compared, see \emph{Examples}.
#'
#' @return Object with class \code{plcp_compare_sim_formula}
#' @seealso \code{\link{sim_formula}}
#' @export
#'
#' @examples
#'
#' # Formulas can be a named character
#' # uses the defaults 'sim_formula()'
#' f <- sim_formula_compare("m0" = "y ~ time * treatment + (1 | subject)",
#' "m1" = "y ~ time * treatment + (1 + time | subject)")
#'
#' # Can also use sim_formula()
#' f0 <- sim_formula("y ~ time * treatment + (1 | subject)")
#' f1 <- sim_formula("y ~ time * treatment + (1 + time | subject)")
#' f <- sim_formula_compare("m0" = f0, "m1" = f1)
#'
sim_formula_compare <- function(...) {
x <- list(...)
if(any(is.null(names(x)))) stop("formula(s) must have a name.", call. = FALSE)
for(i in seq_along(x)) {
f <- x[[i]]
if(is.character(f)) x[[i]] <- sim_formula(f)
}
class(x) <- append(class(x), "plcp_compare_sim_formula")
x
}
#' Print method for simulation formulas
#'
#' @param x A formula object.
#' @param ... Not used
#'
#' @export
#'
#' @method print plcp_sim_formula
print.plcp_sim_formula <- function(x, ...) {
cat("# Simulation formula\n")
f <- x$formula
data_transform <- x$data_transform_lab
test <- paste(x$test, collapse = "', '")
if(!is.null(data_transform))
data_transform <- paste0("\n data_transform: '", data_transform, "'")
cat(paste(" formula: '", f,
"'\n test: '", test, "'",
data_transform,
sep = ""), sep = "\n")
cat("\n")
}
#' @rdname print.plcp_sim_formula
#' @method print plcp_compare_sim_formula
#' @export
print.plcp_compare_sim_formula <- function(x, ...) {
cat("# Simulation formulas\n")
lapply(seq_along(x), .print_plcp_sim_formula, x = x)
}
.print_plcp_sim_formula <- function(i, x) {
lab <- names(x)[i]
f <- x[[i]]$formula
data_transform <- x[[i]]$data_transform_lab
test <- paste(x[[i]]$test, collapse = "', '")
if(!is.null(data_transform))
data_transform <- paste0("\n data_transform: '", data_transform, "'")
cat(paste("#", i, " '", lab,"': '", f,
"'\n test: '", test, "'",
data_transform,
sep = ""), sep = "\n")
cat("\n")
}
# data transform helpers --------------------------------------------------
#' Helper to transform the simulated longitudinal \code{data.frame}
#'
#' This is en example of a data transformation applied during simulation.
#' It takes the longitudinal data and transforms it into a pretest-posttest
#' model in wide format. Useful if you want to compare the longitudinal LMM with
#' e.g. AN(C)OVA models.
#'
#' @param data a \code{data.frame} created using \code{\link{simulate_data}}
#'
#' @return a \code{data.frame} with \code{y} now only includes the posttest values.
#' Also includes three new columns:
#' \itemize{
#' \item \code{pre} subject-level pretest scores.
#' \item \code{pre_cluster} cluster-level pretest scores.
#' \item \code{pre_subject_c} subject-level pretest scores center
#' around the cluster-level pretest.
#' }
#' @export
#'
#' @seealso \code{\link{simulate.plcp}}, \code{\link{study_parameters}}
#'
#' @examples
#'
#' # Compare longitudinal 3-level model to 2-level model
#' # fit to just the posttest data
#' #
#' # Both models are fit to the same dataset during simulation.
#' p <- study_parameters(n1 = 11,
#' n2 = 20,
#' n3 = 3,
#' icc_pre_subject = 0.5,
#' icc_pre_cluster = 0.1,
#' icc_slope = 0.05,
#' var_ratio = 0.03)
#'
#' # simulation formulas
#' # analyze as a posttest only 2-level model
#' f_pt <- sim_formula("y ~ treatment + (1 | cluster)",
#' test = "treatment",
#' data_transform = transform_to_posttest)
#'
#' # analyze as 3-level longitudinal
#' f_lt <- sim_formula("y ~ time*treatment +
#' (1 + time | subject) +
#' (1 + time | cluster)")
#'
#' f <- sim_formula_compare("posttest" = f_pt,
#' "longitudinal" = f_lt)
#' \dontrun{
#' res <- simulate(p,
#' formula = f,
#' nsim = 2000,
#' cores = parallel::detectCores(),
#' satterthwaite = TRUE)
#' summary(res)
#'}
#'
transform_to_posttest <- function(data) {
tmp <- data[data$time == max(data$time), ]
tmp$pretest <- data[data$time == min(data$time), "y"]
tmp$pretest_cluster <- tapply(tmp$pretest, tmp$cluster, mean)[tmp$cluster]
tmp$pretest_subject_c <- tmp$pretest - tmp$pretest_cluster
tmp
}
add_NA_values_from_indicator <- function(d, missing) {
d$miss <- missing
d$y <- ifelse(d$miss == 1, NA, d$y)
d <- d[, c("y",
"y_c",
grep("^y$|^y_c$", colnames(d), value = TRUE, invert = TRUE))]
d
}
#' @rdname simulate_data
#' @export
simulate_data.plcp <- function(paras, n = NULL) {
if (is.data.frame(paras))
paras <- as.list(paras)
if(is.null(paras$prepared)) {
tmp <- prepare_paras(paras)
} else tmp <- paras
paras <- tmp$control
paras_tx <- tmp$treatment
slope_diff <- get_slope_diff(paras) / paras$T_end
paras$effect_size <- NULL
paras_tx$effect_size <- NULL
paras_tx$fixed_slope <- paras_tx$fixed_slope + slope_diff
paras_tx$partially_nested <- NULL
paras$partially_nested <- NULL
paras$allocation_ratio <- NULL
paras_tx$allocation_ratio <- NULL
# replace NA
paras[is.na(paras)] <- 0
paras_tx[is.na(paras_tx)] <- 0
d_tx <- simulate_3lvl_data(paras_tx)
d_c <- simulate_3lvl_data(paras)
# drop outs
if (is.list(paras$dropout) |
is.function(paras$dropout) |
length(paras$dropout) > 1) {
miss_c <- create_dropout_indicator(paras)
d_c <- add_NA_values_from_indicator(d_c, miss_c)
}
if (is.list(paras_tx$dropout) |
is.function(paras_tx$dropout) |
length(paras_tx$dropout) > 1) {
miss_tx <- create_dropout_indicator(paras_tx)
d_tx <- add_NA_values_from_indicator(d_tx, miss_tx)
}
# combine
d_tx$treatment <- 1
d_c$treatment <- 0
d_c$subject <- d_c$subject + max(d_tx$subject)
d_tx$cluster <- d_tx$cluster + max(d_c$cluster)
dt <- rbind(d_c, d_tx)
dt
}
#' @rdname simulate_data
#' @export
simulate_data.plcp_multi <- function(paras, n = 1) {
simulate_data.plcp(as.plcp(paras[n,]))
}
#' Perform a simulation study using a \code{study_parameters}-object
#'
#' @param object An object created by \code{\link{study_parameters}}.
#' @param nsim The number of simulations to run.
#' @param seed Currently ignored.
#' @param formula Model formula(s) used to analyze the data, see \emph{Details}.
#' Should be created using \code{\link{sim_formula}}. It is also possible to compare multiple
#' models, e.g. a correct and a misspecified model, by combining the formulas using \code{\link{sim_formula_compare}}.
#' See \emph{Examples}. If \code{NULL} the formula is made automatically,
#' using \code{\link{create_lmer_formula}}, which does not support objects with multiple
#' simulation setups.
#' @param satterthwaite Logical; if \code{TRUE} Satterthwaite's degrees of freedom
#' approximation will be used when computing \emph{p}-values. This is implemented using
#' the \code{lmerTest}-package. See \emph{Details}.
#' @param CI Logical; if \code{TRUE} coverage rates for 95 \% confidence intervals
#' will be calculated. See \emph{Details}.
#' @param cores Number of CPU cores to use. If called from a GUI environment (e.g. RStudio) or
#' a computer running Microsoft Windows, PSOCK clusters will be used. If called from a
#' non-interactive Unix environment forking is utilized.
#' @param progress \code{logical}; will display progress if \code{TRUE}. Currently
#' ignored on \emph{Windows}. Package \code{pbmclapply} is used to display progress,
#' which relies on forking. \strong{N.B} using a progress bar will noticeably
#' increase the simulation time, due to the added overhead.
#' @param batch_progress \code{logical}; if \code{TRUE} progress will be shown for
#' simulations with multiple setups.
#' @param ... Optional arguments, see \emph{Saving} in \emph{Details} section.
#'
#' @importFrom stats simulate as.formula confint pnorm pt qnorm qt rmultinom sd time vcov reshape
#' @details
#'
#' See also \code{vignette("simulations", package = "powerlmm")} for a tutorial.
#'
#' \strong{Model formula}
#'
#' If no data transformation is used, the available model terms are:
#' \itemize{
#' \item \code{y} the outcome vector, with potential missing data.
#' \item \code{y_c} the complete version of \code{y}, before dropout was simulated.
#' \item \code{time} the time vector.
#' \item \code{treatment} treatment indicator (0 = "control", 1 = "treatment").
#' \item \code{subject} subject-level id variable, from 1 to total number of subjects.
#' \item \code{cluster} for three-level models; the cluster-level id variable,
#' from 1 to the total number of clusters.
#' }
#'
#' See \emph{Examples} and the simulation-vignette for formula examples. For
#' \code{object}s that contain a single study setup, then the lmer formula
#' can be created automatically using \code{\link{create_lmer_formula}}.
#'
#' \strong{Satterthwaite's approximation, and CI coverage}
#'
#' To decrease the simulation time the default is to only calculate Satterthwaite's \emph{dfs}
#' and the CIs' coverage rates for the test of 'time:treatment'-interaction. This can be
#' changed using the argument \code{test} in \code{\link{sim_formula}}.
#'
#' Confidence intervals are both calculated using profile likelihood and by
#' the Wald approximation, using a 95 \% confidence level.
#'
#' \strong{Saving intermediate results for multi-sims}
#'
#' Objects with multi-sims can be save after each batch is finished. This is highly
#' recommended when many designs are simulated. The following additional arguments
#' control saving behavior:
#'
#' \itemize{
#' \item \code{'save'}, \code{logical}, if \code{TRUE} each batch is saved as a
#' \code{RDS}-file. Results are saved in your working directory, in the directory
#' specified by \code{save_folder}.
#' \item \code{'save_folder'} a \code{character} indicating the folder name. Default is \code{'save'}.
#' \item \code{'save_folder_create'}, \code{logical}, if \code{TRUE} then \code{save_folder}
#' will be created if it does not exist in your working directory.
#' }
#'
#' @seealso \code{\link{sim_formula}}, \code{\link{sim_formula_compare}}, \code{\link{summary.plcp_sim}}, \code{\link{simulate_data}}
#'
#' @examples
#' \dontrun{
#' # Two-level ---------------------------------------------------------------
#' p <- study_parameters(n1 = 11,
#' n2 = 25,
#' sigma_subject_intercept = 1.44,
#' sigma_subject_slope = 0.2,
#' sigma_error = 1.44,
#' effect_size = cohend(0.5))
#'
#' f <- sim_formula("y ~ treatment * time + (1 + time | subject)")
#'
#'
#' res <- simulate(object = p,
#' nsim = 1000,
#' formula = f,
#' satterthwaite = TRUE,
#' progress = FALSE,
#' cores = 1,
#' save = FALSE)
#'
#' summary(res)
#'
#'
#' # Three-level (nested) ------------------------------------------------------
#' p <- study_parameters(n1 = 10,
#' n2 = 20,
#' n3 = 4,
#' sigma_subject_intercept = 1.44,
#' icc_pre_cluster = 0,
#' sigma_subject_slope = 0.2,
#' icc_slope = 0.05,
#' sigma_error = 1.44,
#' effect_size = 0)
#'
#' ## compare correct and miss-specified model
#' f0 <- "y ~ treatment * time + (1 + time | subject)"
#' f1 <- "y ~ treatment * time + (1 + time | subject) + (0 + time | cluster)"
#' f <- sim_formula_compare("correct" = f1,
#' "wrong" = f0)
#'
#' res <- simulate(object = p,
#' nsim = 1000,
#' formula = f,
#' satterthwaite = TRUE,
#' progress = FALSE,
#' cores = 1,
#' save = FALSE)
#'
#' summary(res)
#'
#' ## Compare random effects using LRT,
#' ## summarise based on best model from each sim
#' summary(res,
#' model_selection = "FW",
#' LRT_alpha = 0.1,
#' para = "treatment:time")
#'
#' # Partially nested design ---------------------------------------------------
#' p <- study_parameters(n1 = 11,
#' n2 = 10,
#' n3 = 4,
#' sigma_subject_intercept = 1.44,
#' icc_pre_cluster = 0,
#' sigma_subject_slope = 0.2,
#' cor_subject = -0.5,
#' icc_slope = 0.05,
#' sigma_error = 1.44,
#' partially_nested = TRUE,
#' effect_size = cohend(-0.5))
#'
#' f <- sim_formula("y ~ treatment * time + (1 + time | subject) +
#' (0 + treatment:time | cluster)")
#'
#' res <- simulate(object = p,
#' nsim = 1000,
#' formula = f,
#' satterthwaite = TRUE,
#' progress = FALSE,
#' cores = 4,
#' save = FALSE)
#'
#' summary(res)
#'
#' # Run multiple designs -----------------------------------------------------
#' p <- study_parameters(n1 = 10,
#' n2 = 20,
#' n3 = c(2, 4, 6),
#' sigma_subject_intercept = 1.44,
#' icc_pre_cluster = 0,
#' sigma_subject_slope = 0.2,
#' icc_slope = 0.05,
#' sigma_error = 1.44,
#' effect_size = cohend(0.5))
#'
#' f0 <- "y ~ treatment * time + (1 + time | subject)"
#' f1 <- "y ~ treatment * time + (1 + time | subject) + (0 + time | cluster)"
#' f <- sim_formula_compare("correct" = f1,
#' "wrong" = f0)
#'
#' res <- simulate(object = p,
#' nsim = 1000,
#' formula = f,
#' satterthwaite = TRUE,
#' progress = FALSE,
#' cores = 1,
#' save = FALSE)
#'
#' # Summarize 'time:treatment' results for n3 = c(2, 4, 6) for 'correct' model
#' summary(res, para = "time:treatment", model = "correct")
#'
#' # Summarize cluster-level random slope for n3 = c(2, 4, 6) for 'correct' model
#' summary(res, para = "cluster_slope", model = "correct")
#' }
#' @export
simulate.plcp <- function(object,
nsim,
seed = NULL,
formula = NULL,
satterthwaite = FALSE,
CI = FALSE,
cores = 1,
progress = FALSE,
batch_progress = TRUE,
...) {
if(is.null(formula)) formula <- sim_formula(create_lmer_formula(object))
formula <- check_formula(formula)
if(satterthwaite) check_installed("lmerTest")
if (is.null(nrow(object))) {
simulate.plcp_list(
object = object,
nsim = nsim,
formula = formula,
satterthwaite = satterthwaite,
CI = CI,
cores = cores,
progress = progress,
...
)
} else if (nrow(object) > 1) {
simulate.plcp_data_frame(
object = object,
nsim = nsim,
formula = formula,
satterthwaite = satterthwaite,
CI = CI,
cores = cores,
progress = progress,
batch_progress = batch_progress,
...
)
}
}
#' @rdname simulate.plcp
#' @export
simulate.plcp_multi <- function(object,
nsim,
seed = NULL,
formula = NULL,
satterthwaite = FALSE,
CI = FALSE,
cores = 1,
progress = FALSE,
batch_progress = TRUE,
...) {
if(is.null(formula)) stop("Argument 'formula' is missing. Automatic formula creation is not yet supported for multi-sim objects")
simulate.plcp(
object = object,
nsim = nsim,
seed = NULL,
satterthwaite = satterthwaite,
CI = CI,
formula = formula,
cores = cores,
progress = progress,
batch_progress = batch_progress,
...
)
}
simulate.plcp_list <-
function(object,
nsim,
seed = NULL,
formula,
satterthwaite = FALSE,
CI = FALSE,
cores = 1,
progress = TRUE,
save = FALSE,
save_folder = NULL,
save_folder_create = NULL,
cl = NULL,
...) {
ptm <- proc.time()
is_windows <- .Platform$OS.type == "windows"
if (progress & !is_windows) {
check_installed("pbmcapply")
res <-
pbmcapply::pbmclapply(
1:nsim,
simulate_,
paras = object,
formula = formula,
satterthwaite = satterthwaite,
CI = CI,
mc.cores = cores,
...
)
} else if (cores > 1) {
if(.Platform$OS.type == "unix" && !interactive()) {
res <- parallel::mclapply(X = 1:nsim,
FUN = simulate_,
paras = object,
formula = formula,
satterthwaite = satterthwaite,
CI = CI,
mc.cores = cores,
...)
} else {
if(is.null(cl)) {
cl <- parallel::makeCluster(min(cores, nsim))
on.exit(parallel::stopCluster(cl))
}
parallel::clusterEvalQ(cl, expr =
suppressPackageStartupMessages(require(powerlmm, quietly = TRUE)))
clust_call <- (function(object, satterthwaite, formula, CI) {
function(i) {
simulate_(i, paras = object,
satterthwaite = satterthwaite,
formula = formula,
CI = CI)
}
})(object, satterthwaite, formula, CI)
res <- parLapply(cl,
X = 1:nsim,
clust_call
)
}
} else {
res <- lapply(1:nsim,
FUN = simulate_,
paras = object,
formula = formula,
satterthwaite = satterthwaite,
CI = CI,
...)
}
time <- proc.time() - ptm
out <-
list(
res = res,
paras = object,
nsim = nsim,
time = time["elapsed"],
formula = formula
)
out <- munge_results(out)
if(inherits(formula, "plcp_compare_sim_formula")) {
class(out) <- append(class(out), "plcp_sim_formula_compare")
}
else {
class(out) <- append(class(out), "plcp_sim")
}
out
}
simulate.plcp_data_frame <-
function(object,
nsim,
seed = NULL,
formula,
satterthwaite,
CI = FALSE,
cores,
progress,
batch_progress,
save = FALSE,
save_folder = "save",
save_folder_create = FALSE) {
if (save) {
if(!dir.exists(save_folder)) {
if(save_folder_create) {
message("Creating save_folder: ", file.path(save_folder))
dir.create(file.path(save_folder))
} else {
stop("Directory '", save_folder, "' does not exist.")
}
}
output_dir <- format(Sys.time(), "%Y%m%d_%H%M_%S")
output_dir <- file.path(save_folder, output_dir)
dir.create(output_dir)
if(!dir.exists(output_dir)) stop("Could not create save directory.")
cat("Each batch is being saved to: ", output_dir, "\n", sep = "")
}
res <- lapply(1:nrow(object), function(i) {
if (batch_progress) {
cat("\rBatch: ",
i,
"/",
nrow(object),
rep(" ", options()$width - 13))
cat("\n")
}
if(cores > 1 && interactive()) {
cl <- parallel::makeCluster(cores)
on.exit(parallel::stopCluster(cl))
} else cl <- NULL
x <- simulate.plcp_list(
as.plcp(object[i,]),
nsim = nsim,
formula = formula,
satterthwaite = satterthwaite,
CI = CI,
cores = cores,
progress = progress,
cl = cl
)
if (save) {
fname <- paste("sim", i, ".rds", sep ="")
f <- file.path(output_dir, fname)
saveRDS(x, f)
}
x
})
if(save) saveRDS(object, file.path(output_dir, "paras.rds"))
class(res) <- append(class(res), "plcp_multi_sim")
attr(res, "paras") <- object
res
}
simulate_ <- function(sim, paras, satterthwaite, CI, formula) {
prepped <- prepare_paras(paras)
d <- simulate_data(prepped)
#saveRDS(d, file = paste0("/tmp/R/sim",sim, ".rds"))
tot_n <- length(unique(d[d$time == 0,]$subject))
fit <- analyze_data(formula, d)
res <- extract_results(fit = fit,
CI = CI,
satterthwaite = satterthwaite,
df_bw = get_balanced_df(prepped),
tot_n = tot_n,
sim = sim)
res
}
# Checks ------------------------------------------------------------------
# checks
check_formula <- function(formula) {
if(!inherits(formula, "plcp_sim_formula") & !inherits(formula, "plcp_compare_sim_formula")) {
stop("`formula` should be created using `sim_formula` or `sim_formula_compare`", call. = FALSE)
}
#if (!all(names(formula) %in% c("correct", "wrong")))
# stop("Formula names must be either 'correct' or 'wrong'")
if(inherits(formula, "plcp_sim_formula")) formula <- list("default" = formula)
if (inherits(formula, "plcp_compare_sim_formula") & length(names(formula)) > 1) {
if (length(names(formula)) != length(unique(names(formula)))) {
stop(
"Formulas should have unique names."
)
}
}
#for (f in formula)
# check_formula_terms(f)
formula
}
check_formula_terms <- function(f) {
f <- as.formula(f$formula)
x <- all.vars(f)
ind <- x %in% c("y", "y_c", "treatment", "time", "subject", "cluster")
wrong <- x[!ind]
if (length(wrong) > 0) {
stop(
paste(
wrong,
"is not an allowed variable name. Allowed names are: 'y', 'treatment', 'time', 'subject', 'cluster'"
)
)
}
# test functions
x <- all.vars(f, functions = TRUE)
ind <- x %in% c("y", "y_c", "treatment", "time", "subject", "cluster")
x <- x[!ind]
ind <- x %in% c("~", "+", "*", "(", "|", "||", ":")
x <- x[!ind]
if (length(x) > 0) {
stop(
paste(
"Functions are not yet supported in model formula. Problematic function was:",
x
)
)
}
}
# analyze -----------------------------------------------------------------
analyze_data <- function(formula, d) {
fit <-
lapply(formula, function(f) {
if(inherits(f, "plcp_sim_formula")) {
if(is.function(f$data_transform))
d <- f$data_transform(d)
lmmf <- as.formula(f$formula)
}
# LMM or OLS
if(is.null(lme4::findbars(lmmf))) {
fit <- tryCatch(
#do.call(lme4::lmer, list(formula=f, data=d))
fit <- stats::lm(formula = lmmf, data = d)
)
} else {
fit <- tryCatch(
#do.call(lme4::lmer, list(formula=f, data=d))
fit <- lme4::lmer(formula = lmmf, data = d)
)
}
list("fit" = fit,
"test" = f$test)
})
fit
}
extract_results <- function(fit, CI = FALSE, satterthwaite = FALSE, df_bw, tot_n, sim) {
lapply(fit, extract_results_,
CI = CI,
satterthwaite = satterthwaite,
df_bw = df_bw,
tot_n = tot_n,
sim = sim)
}
extract_random_effects <- function(fit) {
UseMethod("extract_random_effects")
}
extract_random_effects.lm <- function(fit) {
data.frame(grp = "Residual",
var1 = NA,
var2 = NA,
vcov = stats::sigma(fit)^2,
parameter = "Residual_NA",
stringsAsFactors = FALSE)
}
extract_random_effects.lmerMod <- function(fit) {
x <- as.data.frame(lme4::VarCorr(fit))
vcovs <- x[is.na(x$var2),]
vcovs$parameter <- paste(vcovs$grp, vcovs$var1, sep = "_")
vcovs$sdcor <- NULL
correlations <- x[!is.na(x$var2), ]
correlations$parameter <- paste(correlations$grp,
correlations$var1,
correlations$var2,
sep = "_")
correlations$vcov <- NULL
names(correlations)[names(correlations) == "sdcor"] <- "vcov"
rbind(vcovs, correlations)
}
add_p_value <- function(fit, test, ...) {
UseMethod("add_p_value")
}
#' @importFrom utils packageVersion
add_p_value.lmerMod <- function(fit, test, satterthwaite, df_bw = NULL, ...) {
if(satterthwaite) {
tmp <- tryCatch(summary(fit))
tmp <- tmp$coefficients
ff <- rownames(tmp)
# satterth dfs only for time:treatment
satterth_term <- any(ff %in% test)
## satterthwaite
if(satterth_term) {
ind <- which(ff %in% test)
L <- rep(0, length(ff))
L <- rep(list(L), length(ind))
for(i in seq_along(L)) {
L[[i]][ind[i]] <- 1
}
# calcSatterth will be deprecated
if(packageVersion("lmerTest") >= "3.0.0") {
satt <- suppressMessages(tryCatch(lmerTest::contest(fit, L),
error = function(e) { NA }))
} else {
satt <- suppressMessages(tryCatch(lmerTest::calcSatterth(fit, L),
error = function(e) { NA }))
}
df <- rep(NA, length(ff))
p <- rep(NA, length(ff))
if(inherits(satt, "list")) {
df[ind] <- satt$denom
p[ind] <- satt$pvalue
} else if(inherits(satt, "data.frame")) {
df[ind] <- satt$DenDF
p[ind] <- satt$`Pr(>F)`
}
if(any(is.na(p[ind]))) {
ind <- which(is.na(p[ind]))
tval <- tmp[ind, "t value"]
p[ind] <- 2*(1 - pt(abs(tval), df = df_bw))
}
res <- list("df" = df,
"p" = p)
}
} else {
res <- list("df" = NA,
"p" = NA)
}
res
}
add_p_value.lm <- function(fit, test, ...) {
tmp <- summary(fit)
tmp <- as.data.frame(tmp$coefficients)
ff <- rownames(tmp)
ind <- which(ff %in% test)
# satterth dfs only for time:treatment
if(any(ff %in% test)) {
df <- rep(NA, length(ff))
p <- rep(NA, length(ff))
df[ind] <- fit$df.residual
p[ind] <- tmp$`Pr(>|t|)`[ind]
list("df" = df,
"p" = p)
} else {
list("df" = NA,
"p" = NA)
}
}
fix_sath_NA_pval <- function(x, df) {
ind <- is.na(x$pval)
tmp <- x[ind, ]
t <- with(tmp, estimate/se)
pval <- 2*(1 - pt(abs(t), df = df))
x[ind, "pval"] <- pval
x
}
get_fixef <- function(fit) {
UseMethod("get_fixef")
}
get_fixef.lmerMod <- function(fit) {
lme4::fixef(fit)
}
get_fixef.lm <- function(fit) {
stats::coef(fit)
}
get_converged_bool <- function(fit) {
UseMethod("get_converged_bool")
}
get_converged_bool.lmerMod <- function(fit) {
is.null(fit@optinfo$conv$lme4$code)
}
get_converged_bool.lm <- function(fit) {
TRUE
}
extract_results_ <- function(fit, CI, satterthwaite, df_bw, tot_n, sim) {
# need to know in which order time and treatment was entered
# then adjust 'fit$test' to match
ff <- get_fixef(fit$fit)
rnames <- names(ff)
TbT <- c("time:treatment", "treatment:time")
ind <- TbT %in% rnames
if(any(ind) & any(fit$test %in% TbT)) {
TbT_model <- TbT[TbT %in% rnames]
fit$test[fit$test %in% TbT] <- TbT_model
}
if(any(!fit$test %in% rnames)) stop("At least of the values in 'test' do no match the model's parameters: ", paste(rnames, collapse = ", "), call. = FALSE)
se <- sqrt(diag(vcov(fit$fit)))
tmp_p <- add_p_value(fit = fit$fit,
test = fit$test,
satterthwaite = satterthwaite,
df_bw = df_bw)
FE <- data.frame(
"estimate" = ff,
"se" = se,
"pval" = tmp_p$p,
"df" = tmp_p$df
)
rownames(FE) <- NULL
FE <- cbind(data.frame(parameter = rnames,
stringsAsFactors = FALSE),
FE)
FE[FE$parameter %in% fit$test, "df_bw"] <- df_bw
if (CI) {
CI <- tryCatch(confint(fit$fit, parm = fit$test),
error = function(e) NA)
CI_wald <-
tryCatch(confint(fit$fit, method = "Wald", parm = fit$test),
error = function(e) NA)
CIs <- fit$test
if(all(is.na(CI))) {
FE[FE$parameter %in% CIs, "CI_lwr"] <- NA
FE[FE$parameter %in% CIs, "CI_upr"] <- NA
} else {
FE[FE$parameter %in% CIs, "CI_lwr"] <- CI[CIs, 1]
FE[FE$parameter %in% CIs, "CI_upr"] <- CI[CIs, 2]
}
if(all(is.na(CI_wald))) {
FE[FE$parameter %in% CIs, "CI_wald_lwr"] <- NA
FE[FE$parameter %in% CIs, "CI_wald_upr"] <- NA
} else {
FE[FE$parameter %in% CIs, "CI_wald_lwr"] <- CI_wald[CIs, 1]
FE[FE$parameter %in% CIs, "CI_wald_upr"] <- CI_wald[CIs, 2]
}
}
RE <- extract_random_effects(fit$fit)
conv <- get_converged_bool(fit$fit)
# save for postprocess LRT test
ll <- stats::logLik(fit$fit, REML = TRUE)
df <- attr(ll, "df")
RE$sim <- sim
FE$sim <- sim
list("RE" = RE,
"FE" = FE,
"logLik" = as.numeric(ll),
"df" = df,
"tot_n" = tot_n,
"conv" = conv)
}
rename_rr_ <- function(.x, match, new) {
.x[.x$parameter %in% match, "parameter"] <- new
.x
}
rename_random_effects <- function(.x) {
.x <- rename_rr_(
.x,
match = c(
"cluster_time",
"cluster.1_time",
"cluster_time:treatment",
"cluster_treatment:time",
"cluster.1_treatment:time"
),
new = "cluster_slope"
)
.x <- rename_rr_(.x,
match = "Residual_NA",
new = "error")
.x <- rename_rr_(.x,
match = c("subject_(Intercept)"),
new = "subject_intercept")
.x <- rename_rr_(.x,
match = c("cluster_(Intercept)",
"cluster_treatment"),
new = "cluster_intercept")
.x <- rename_rr_(.x,
match = c("subject.1_time", "subject_time"),
new = "subject_slope")
.x <- rename_rr_(.x,
match = "subject_(Intercept)_time",
new = "cor_subject")
.x <- rename_rr_(.x,
match = c("cluster_(Intercept)_time",
"cluster_(Intercept)_treatment:time",
"cluster_(Intercept)_time:treatment",
"cluster_treatment_treatment:time"),
new = "cor_cluster")
}
order_model_lists <- function(models, RE, FE, ll, df, tot_n, convergence) {
x <- list()
for (i in seq_along(models)) {
x[[i]] <- list("RE" = RE[[i]],
"FE" = FE[[i]],
"logLik" = ll[[i]],
"df" = df[[i]],
"tot_n" = tot_n[[i]],
"convergence" = convergence[[i]])
}
names(x) <- models
x
}
munge_results <- function(res) {
x <- res$res
models <- names(x[[1]])
RE <- lapply(models, munge_results_, x, "RE")
FE <- lapply(models, munge_results_, x, "FE")
convergence <- lapply(models, munge_results_, x, "conv")
tot_n <- lapply(models, munge_results_, x, "tot_n")
RE <- lapply(RE, rename_random_effects)
ll <- lapply(models, munge_results_, x, "logLik")
df <- lapply(models, munge_results_, x, "df")
x <- order_model_lists(models,
RE = RE,
FE = FE,
ll = ll,
df = df,
tot_n = tot_n,
convergence = convergence)
res$res <- x
res
}
munge_results_ <- function(model, res, effect) {
res <- lapply(seq_along(res), function(i) {
x <- res[[i]][[model]][[effect]]
x
})
res <- do.call(rbind, res)
res
}
# Print -------------------------------------------------------------------
.print_effect <- function(effect, lab, digits) {
if(nrow(effect) >= 1) {
if(c("model" %in% colnames(effect))) {
fix_eff <- paste(as.character(unique(effect$parameter)), collapse = "', '")
tmp <- effect[ , colnames(effect) %in% c("model"), drop = FALSE]
cat("\n", lab, ": '", fix_eff, "'\n\n", sep = "")
} else {
cat("\n", lab, " \n\n", sep = "")
tmp <- effect[ , colnames(effect) %in% c("parameter", "model"), drop = FALSE]
}
effect <- effect[, !colnames(effect) %in% c("parameter", "model"), drop = FALSE]
effect <- signif(effect, digits)
effect <- cbind(tmp, effect)
effect[t(do.call(rbind, lapply(effect, is.nan)))] <- "."
effect[t(do.call(rbind, lapply(effect, is.na)))] <- "."
print.data.frame(effect,
row.names = FALSE,
digits = digits,
quote = FALSE)
}
}
print_model <- function(i, x, digits = 2) {
models <- names(x)
cat("Model: ", models[i], "\n")
.print_effect(x[[i]]$RE,
lab = "Random effects",
digits = digits)
.print_effect(x[[i]]$FE,
lab = "Fixed effects",
digits = digits)
cat("---\n")
}
#' @importFrom stats median
tot_n_ <- function(x) {
x <- as.numeric(x)
if(length(unique(x)) > 1) {
x_mean <- mean(x)
x_median <- median(x)
x_sd <- sd(x)
x <- paste(round(x_mean, 1),
"(mean)",
round(x_median, 1),
"(median)",
round(x_sd, 2),
"(SD)")
}
x
}
print_test_NA <- function(i, x) {
mod_name <- names(x[i])
Satt_NA <- x[[i]]$Satt_NA
CI_NA <- x[[i]]$CI_NA
non_convergence <- 1 - x[[i]]$convergence
mess <- NULL
if(!is.na(Satt_NA) && Satt_NA > 0) {
message("[Model: ", mod_name, "] ", round(Satt_NA, 4) * 100, "% of the Satterthwaite calculations failed")
}
if(!is.na(CI_NA) && CI_NA > 0) {
message("[Model: ", mod_name, "] ", round(CI_NA, 4) * 100, "% of the profile likelihood CIs failed")
}
if(!is.na(non_convergence) && non_convergence > 0) {
message("[Model: ", mod_name, "] ", round(non_convergence, 4) * 100, "% of the models threw convergence warnings")
}
}
#' Print method for \code{summary.plcp_sim}-objects
#' @param x An object of class \code{plcp_sim_summary}
#' @param verbose \code{logical}; indicates if additional information
#' should be printed (default is \code{TRUE}).
#' @param digits number of significant digits.
#' @param ... Optional arguments.
#' @method print plcp_sim_summary
#' @export
print.plcp_sim_summary <- function(x, verbose = TRUE, digits = 2, ...) {
res <- x
#print
x <- res$summary
tot_n <- tot_n_(res$tot_n)
if(length(unique(tot_n)) == 1) tot_n <- unique(tot_n)
n3 <- get_n3(res$paras)
n2 <- get_n2(res$paras)
if(is.per_treatment(res$paras$n2)) {
n2$treatment <- deparse_n2(n2$treatment)
n2$control <- deparse_n2(n2$control)
n2 <- unlist(n2)
} else n2 <- deparse_n2(n2$treatment)
if(is.unequal_clusters(res$paras$n2)) {
n2_lab <- "\nSubjects per cluster: "
n2 <- print_per_treatment(prepare_print_n2(res$paras),
hanging = 23,
n2 = TRUE)
} else {
n2_lab <- "\nSubjects per cluster (n2 x n3): "
n2 <- print_per_treatment(prepare_print_n2(res$paras),
hanging = 33,
n2 = TRUE)
}
lapply(seq_along(x), print_model, x, digits = digits)
if(verbose) {
cat("Number of simulations:", res$nsim, " | alpha: ", res$alpha)
cat("\nTime points (n1): ", res$paras$n1)
cat(n2_lab, n2)
cat("\nTotal number of subjects: ", tot_n, "\n")
lapply(seq_along(x), print_test_NA, x = x)
if("model_selected" %in% names(res)) {
cat("---\nResults based on LRT model comparisons, using direction: ",
res$model_direction, " (alpha = ", res$LRT_alpha, ")\n", sep = "")
print(res$model_selected)
}
data_bool <- vapply(res$formula, function(x) is.function(x$data_transform), logical(1))
if(any(data_bool)) cat("---\nAt least one of the models applied a data transformation during simulation,\n",
"summaries that depend on the true parameter values will no longer be correct,\n",
"see 'help(summary.plcp_sim)'\n", sep = "")
}
if(any(x$correct$RE$is_NA > 0)) warning("Some estimated random effects were removed due to being NA.", call. = FALSE)
}
#' Print method for \code{simulate.plcp_multi}-objects
#' @param x An object created with \code{\link{simulate.plcp_multi}}
#' @param ... Optional arguments.
#' @importFrom utils object.size
#' @method print plcp_multi_sim
#' @export
print.plcp_multi_sim <- function(x, ...) {
nsim <- x[[1]]$nsim
nmulti <- length(x)
cat("# ", nmulti, " x ", nsim, "grid of simulations")
cat("\n")
ff <- x[[1]]$formula
if(inherits(ff, "plcp_compare_sim_formula")) {
print(ff)
} else if(inherits(ff[[1]], "plcp_sim_formula")) {
print(ff[[1]])
}
cat("# Object size:", format(object.size(x), units = "auto"))
cat("\n")
}
cat_formulas <- function(f) {
nams <- names(f)
names
cat(paste("# '", nams,"': ", f, sep = ""), sep = "\n")
}
#' Print method for \code{simulate.plcp}-objects
#' @param x An object created with \code{\link{simulate.plcp}}
#' @param ... Optional arguments.
#' @method print plcp_sim
#' @export
print.plcp_sim <- function(x, ...) {
cat(
"# A 'plcp_sim'-object containing",
x$nsim,
"simulations.",
"Use summary() to view results."
)
cat("\n")
print(x$formula[[1]])
cat("# Object size:", format(object.size(x), units = "auto"))
cat("\n")
invisible(x)
}
#' @rdname print.plcp_sim
#' @method print plcp_sim_formula_compare
#' @export
print.plcp_sim_formula_compare <- function(x, ...) {
cat(
"# A 'plcp_sim'-object containing",
x$nsim,
"simulations.",
"Use summary() to view results."
)
cat("\n")
print(x$formula)
cat("# Object size:", format(object.size(x), units = "auto"))
cat("\n")
invisible(x)
}
# #' @export
# p_sim <- function(res) {
# res <- lapply(res$res, summary_.plcp_sim, paras = res$paras)
#
#
# }
#' Summarize the results from a simulation of a single study design-object
#' @param object A \code{simulate.plcp}-object
#' @param alpha Indicates the significance level. Default is 0.05 (two-tailed),
#' one-tailed tests are not yet implemented.
#' @param para Selects a parameter to return. Default is \code{NULL},
#' which returns all parameters. If multiple model formulas are compared a named list can be
#' used to specify different parameters per model.
#' @param ... Currently not used
#'
#' @details
#'
#' \strong{Model selection}
#'
#' It is possible to summarize the performance of a data driven model selection strategy
#' based on the formulas used in the simulation (see \code{\link{sim_formula_compare}}).
#' The two model selection strategies are:
#' \itemize{
#' \item \code{FW}: Forward selection of the models. Starts with the first model formula and
#' compares it with the next formula. Continues until the test of M_i vs M_{i + 1} is non-significant,
#' and then picks M_i. Thus if three models are compared, and the comparison of M_1 vs M_2 is non-significant, M_3
#' will not be tested and M_1 is the winning model.
#' \item \code{BW}: Backward selection of the models. Starts with the last model formula and
#' compares it with the previous formula. Continues until the test of M_i vs M_{i - 1} is significant or until
#' all adjacent formulas have been compared. Thus if three models are compared, and the comparison of M_3 vs M_2 is non-significant,
#' M2 vs M1 will be tested and M2 will be picked if significant, and M1 if not.
#' }
#'
#' The model comparison is performed using a likelihood ratio test based the REML criterion. Hence, it assumed you are comparing models
#' with the same fixed effects, and that one of the models is a reduced version of the other (nested models). The LRT test is done as a
#' post-processing step, so \code{model_selection} option will not re-run the simulation. This also means that different alpha levels
#' for the LRTs can be investigated without re-running the simulation.
#'
#' \strong{Data transformation}
#'
#' If the data has been transformed \code{sim_formula(data_transform = ...)}, then
#' true parameter values (\code{theta}s shown in the summary will most likely no longer
#' apply. Hence, relative bias and CI coverage will be in relation to the original model.
#' However, the empirical estimates will be summarized correctly, enabling investigation of
#' power and Type I errors using arbitrary transformations.
#'
#' @return Object with class \code{plcp_sim_summary}. It contains
#' the following output:
#' \itemize{
#' \item \code{parameter} is the name of the coefficient
#' \item \code{M_est} is the mean of the estimates taken over all the simulations.
#' \item \code{M_se} is the mean estimated standard error taken over all the simulations.
#' \item \code{SD_est} is the empirical standard error; i.e. the standard
#' deviation of the distribution of the generated estimates.
#' \item \code{power} is the empirical power of the Wald Z test, i.e. the proportion
#' of simulated p-values < alpha.
#' \item \code{power_satt} is the empirical power of the Wald \emph{t} test using
#' Satterthwaite's degree of freedom approximation.
#' \item \code{satt_NA} is the proportion of Satterthwaite's approximations that failed.
#' \item \code{prop_zero} is the proportion of the simulated estimates that
#' are zero; only shown for random effects.
#' }
#'
#'
#' @method summary plcp_sim
#' @export
summary.plcp_sim <- function(object, model = NULL, alpha = 0.05, para = NULL, ...) {
res <- object
x <- lapply(res$res, summary_.plcp_sim,
paras = res$paras,
alpha = alpha,
...)
x <- list(summary = x,
nsim = res$nsim,
paras = res$paras,
tot_n = x[[1]]$tot_n,
alpha = alpha,
formula = res$formula)
if(!is.null(para)) {
#check_para <- vapply(x$summary, function(x) para %in% x$FE$parameter, logical(1))
#if(!all(check_para)) stop("The parameter: '", para, "' does not exist in all models.", call. = FALSE)
nr <- length(x$summary)
FE <- vector("list", nr)
RE <- vector("list", nr)
for(i in 1:nr) {
mod <- names(x$summary)[i]
tmp <- x$summary[[i]]
if(is.list(para)) {
pp <- para[[mod]]
} else if(is.character(para)) {
if(length(para) > 1) stop("'para' must have length equal to 1. ",
"If you want different parameters per model ",
"formula use a named list.", call. = FALSE)
pp <- para
}
FE[[i]] <- tmp$FE[tmp$FE$parameter == pp, ]
RE[[i]] <- tmp$RE[tmp$RE$parameter == pp, ]
if(is.list(para) && nrow(FE[[i]]) == 0 && nrow(RE[[i]]) == 0) {
stop("No 'para': ", pp, " found in 'model': ", mod, call. = FALSE)
}
if(nrow(FE[[i]]) >= 1) FE[[i]]$model <- names(x$summary)[i]
if(nrow(RE[[i]]) >= 1) RE[[i]]$model <- names(x$summary)[i]
}
FE <- do.call(rbind, FE)
RE <- do.call(rbind, RE)
x$summary <- list("summary" =
list("RE" = RE,
"FE" = FE,
"tot_n" = NA,
"convergence" = NA,
"Satt_NA" = NA,
"CI_NA" = NA)
)
}
if("model_selected" %in% names(res)) {
x$model_selected <- res$model_selected
x$model_direction <- res$model_direction
x$LRT_alpha <- res$LRT_alpha
}
class(x) <- append("plcp_sim_summary", class(x))
x
}
#' @rdname summary.plcp_sim
#' @param model Indicates which model that should be returned.
#' Default is \code{NULL} which return results from all model formulas. Can also be a \code{character} matching the
#' names used in \code{\link{sim_formula_compare}}.
#' @param model_selection indicates if the summary should be based on a LRT model selection strategy. Default is \code{NULL},
#' which returns all models, if \code{FW} or \code{BW} a forward or backward model selection strategy is used, see \emph{Details}.
#' @param LRT_alpha Indicates the alpha level used if doing LRT model comparisons.
#' @method summary plcp_sim_formula_compare
#' @export
summary.plcp_sim_formula_compare <- function(object, model = NULL, alpha = 0.05, model_selection = NULL, LRT_alpha = 0.1, para = NULL, ...) {
if(is.null(model_selection)) {
if(is.null(model)) {
summary.plcp_sim(object,
alpha = alpha,
para = para,
...)
} else {
if(is.numeric(model)) model <- names(object$res)[model]
if(!model %in% names(object$res)) stop("No 'model' named: ", model, call. = FALSE)
object$res <- object$res[model]
summary.plcp_sim(object,
alpha = alpha,
para = para,
...)
}
} else if(model_selection %in% c("FW", "BW")) {
x <- do_model_selection(object,
direction = model_selection,
alpha = LRT_alpha)
summary.plcp_sim(x,
alpha = alpha,
para = para,
...)
}
}
summarize_RE <- function(res, theta) {
d <- res$RE
parms <- unique(d$parameter)
tmp <- vector("list", length(parms))
for(i in seq_along(parms)) {
para <- parms[[i]]
vcov <- d[d$parameter == para, "vcov"]
theta_i <- theta[theta$parameter == para, "theta"]
theta_i[is.na(theta_i)] <- 0 # for when para is NA
if(length(theta_i) == 0) theta_i <- 0 # for when para does not exist
est <- mean(vcov, na.rm=TRUE)
res <- data.frame(
parameter = para,
M_est = est,
theta = theta_i,
est_rel_bias = get_RB(est, theta_i),
prop_zero = mean(is_approx(vcov, 0), na.rm=TRUE),
is_NA = mean(is.na(vcov))
)
if (para %in% c("cor_subject", "cor_cluster")) {
res$prop_zero <- mean(is_approx(abs(vcov), 1), na.rm=TRUE)
}
tmp[[i]] <- res
}
RE <- do.call(rbind, tmp)
RE
}
summarize_FE <- function(res, theta, alpha, df_bw = NULL) {
d <- res$FE
parms <- unique(d$parameter)
tmp <- vector("list", length(parms))
for(i in seq_along(parms)) {
para <- parms[[i]]
ind <- d$parameter == para
se <- d[ind, "se"]
estimate <- d[ind, "estimate"]
pval <- d[ind, "pval"]
df <- d[ind, "df"]
if(is.null(df_bw)) {
tmp_df_bw <- unique(d[ind, "df_bw"])
} else if(is.list(df_bw)) {
tmp_df_bw <- df_bw[[para]]
}
if(any(!is.na(pval))) {
Satt_NA <- mean(is.na(df))
} else Satt_NA <- NA
# para <- i
#theta_i <- theta[[i]]
pvals_bw <- get_p_val_df(t = estimate/se,
df = tmp_df_bw,
parameter = para,
test = unique(d[!is.na(d$df_bw), "parameter"]))
if(para %in% names(theta)) {
theta_i <- theta[[para]]
} else theta_i <- NA
tmp[[i]] <- data.frame(
parameter = para,
M_est = mean(estimate),
theta = theta_i,
M_se = mean(se),
SD_est = sd(estimate),
Power = mean(get_cover(estimate, se, alpha = alpha)),
Power_bw = mean(pvals_bw < alpha),
Power_satt = mean(pval < alpha, na.rm=TRUE),
Satt_NA = Satt_NA
)
}
FE <- do.call(rbind, tmp)
FE
}
summarize_CI <- function(res, theta = NULL) {
## TODO: fix so theta matches fit$tests
d <- res$FE
parms <- unique(res$FE$parameter)
tmp <- vector("list", length(parms))
thetas <- theta
for(i in seq_along(parms)) {
para <- parms[[i]]
ind <- d$parameter == para
if(is.null(thetas)) {
theta <- mean(d[ind, "estimate"])
} else if(length(thetas) > 1) {
theta <- thetas[[para]]
} else theta <- thetas
CI_lwr <- d[ind, "CI_lwr"]
CI_upr <- d[ind, "CI_upr"]
CI_wald_lwr <- d[ind, "CI_wald_lwr"]
CI_wald_upr <- d[ind, "CI_wald_upr"]
tmp[[i]] <- data.frame(
parameter = para,
CI_cover = mean(CI_lwr < theta &
CI_upr > theta, na.rm = TRUE),
CI_Wald_cover = mean(CI_wald_lwr < theta &
CI_wald_upr > theta, na.rm=TRUE),
CI_NA = mean(is.na(CI_lwr)))
}
CI_cov <- do.call(rbind, tmp)
CI_cov
}
summary_.plcp_sim <- function(res, paras, alpha, df_bw = NULL) {
RE_params <-
data.frame(
parameter = c(
"subject_intercept",
"subject_slope",
"cluster_intercept",
"cluster_slope",
"error",
"cor_subject",
"cor_cluster"
),
theta = c(
paras$sigma_subject_intercept ^ 2,
paras$sigma_subject_slope ^ 2,
paras$sigma_cluster_intercept ^ 2,
paras$sigma_cluster_slope ^ 2,
paras$sigma_error ^ 2,
paras$cor_subject,
paras$cor_cluster
)
)
RE <- summarize_RE(res, theta = RE_params)
# falseconv
false_conv <-
res$RE[res$RE$parameter == "cluster_slope", "vcov"]
false_conv <- mean(is_approx(false_conv, 0))
theta = list(
"(Intercept)" = paras$fixed_intercept,
"treatment" = 0,
"time" = paras$fixed_slope,
"time:treatment" = get_slope_diff(paras) / paras$T_end
)
# support both variants of time * treatment
t_b_t <- res$FE$parameter
check_tbt <- t_b_t %in% c("time:treatment", "treatment:time")
if(any(check_tbt)) {
t_b_t <- unique(t_b_t[check_tbt])
names(theta)[4] <- t_b_t
}
FE <- summarize_FE(res = res,
theta = theta,
alpha = alpha,
df_bw = df_bw)
if(all(is.na(res$FE$pval))) {
Satt_NA <- 0
} else {
ind <- which(!is.na(FE$Power_satt))[1]
Satt_NA <- FE[ind, "Satt_NA"]
}
FE$Satt_NA <- NULL
## TODO: update to make compatible with fit$test
## update so function is agnostic to if time:treatment or treatment:time
CI_NA <- 0
if ("CI_lwr" %in% colnames(res$FE)) {
CI_cov <- summarize_CI(res, theta)
FE$CI_Cover <- CI_cov$CI_cover
i <- which.min(CI_cov$CI_Wald_cover >= 0)
CI_NA <- CI_cov[i, "CI_NA"]
FE$CI_Wald_cover <- CI_cov$CI_Wald_cover
}
# FE_eff <- c("(Intercept)",
# "treatment",
# "time",
# "time:treatment")
#
# FE_eff <- FE_eff[FE_eff %in% FE$parameter]
# ind <- vapply(FE_eff, function(i)
# which(FE$parameter == i), numeric(1))
# FE <- FE[ind,]
convergence <- mean(res$convergence)
list("RE" = RE,
"FE" = FE,
"tot_n" = res$tot_n,
"convergence" = convergence,
"Satt_NA" = Satt_NA,
"CI_NA" = CI_NA)
}
# multi-sim ---------------------------------------------------------------
# as.plcp_sim <- function(object) {
# UseMethod("as.plcp_sim")
# }
#
# as.plcp_sim.data.frame <- function(object) {
# object <- as.list(object)
# class(object) <- append("plcp", class(object))
#
# object
# }
.check_para_in_mod <- function(eff_names, para) {
x <- lapply(seq_along(eff_names), function(i) {
mod <- names(eff_names[i])
any(eff_names[[mod]] %in% para[[mod]])
} )
unlist(x)
}
#' Summarize simulations based on a combination of multiple parameter values
#'
#' @param object A multiple simulation object created with
#' \code{\link{simulate.plcp_multi}}
#' @param para The name of the fixed or random effect that should be summarized.
#' @param model Specifies which model that should be summarized. Accepts either
#' a \code{character} with the name used in \code{\link{sim_formula_compare}}, or
#' an \code{integer} value.
#' @param alpha Indicates the significance level. Default is 0.05 (two-tailed),
#' one-tailed tests are not yet implemented.
#' @param model_selection Indicates if model selection should be performed. If \code{NULL} (default),
#' all models are returned, if \code{FW} or \code{BW} model selection is performed using LRT, and the result
#' is based on the selected model from each simulation. See \code{\link{summary.plcp_sim}} for more information.
#' @param LRT_alpha Indicates the alpha level used when comparing models during model selection.
#' @param ... Optional arguments.
#'
#' @method summary plcp_multi_sim
#'
#' @return A \code{list} with class \code{plcp_multi_sim_summary}. It can be coursed to a \code{data.frame},
#' using \code{as.data.frame}. Each row summarizes one of the parameter combinations used in the simulation.
#' In addition to the setup parameter values, it contains the following columns:
#' \itemize{
#' \item \code{parameter} is the name of the coefficient
#' \item \code{M_est} is the mean of the estimates taken over all the simulations.
#' \item \code{theta} is the population parameter values specified with \code{study_parameters}
#' \item \code{M_se} is the mean estimated standard error taken over all the simulations.
#' \item \code{SD_est} is the empirical standard error; i.e. the standard
#' deviation of the distribution of the generated estimates
#' \item \code{power} is the empirical power of the Wald Z test, i.e. the proportion
#' of simulated p-values < alpha
#' \item \code{power_satt} is the empirical power of the Wald \emph{t} test using
#' Satterthwaite's degree of freedom approximation.
#' \item \code{satt_NA} is the proportion of Satterthwaite's approximations that failed.
#' \item \code{prop_zero} is the proportion of the simulated estimates that
#' are zero; only shown for random effects.
#' }
#'
#' @export
#'
summary.plcp_multi_sim <- function(object,
para = "time:treatment",
model = NULL,
alpha = 0.05,
model_selection = NULL,
LRT_alpha = 0.1,
...) {
res <- object
mod_names <- names(res[[1]][[1]])
mod_n <- length(mod_names)
if (is.null(model_selection)) {
if(!is.null(model) && is.character(model) && !model %in% mod_names) {
stop("Incorrect 'model', no model named: ", model, call. = FALSE)
}
if(is.numeric(model)) {
if(model > length(mod_names)) stop("Numeric argument 'model' is too large.", call. = FALSE)
model <- mod_names[model]
mod_n <- 1
}
if(!is.null(model) && is.character(model)) {
if(is.list(para)) {
if(!model %in% names(para)) stop("'model' not found in 'para'", call. = FALSE)
para <- para[[model]]
}
if(is.numeric(para)) stop("'para' can't be a numeric value", call. = FALSE)
mod_n <- 1
}
if(is.null(model) && is.list(para)) {
if(!all(mod_names %in% names(para))) stop("At least one of the model names in 'para' does not exist.", call. = FALSE)
}
}
# model selection
if(!is.null(model_selection)) {
for(i in seq_along(res)) {
res[[i]] <- do_model_selection(res[[i]],
direction = model_selection,
alpha = LRT_alpha)
model <- "model_selection"
}
}
# check 'paras'
RE_names <- lapply(res[[1]]$res, function(x) unique(x$RE$parameter))
FE_names <- lapply(res[[1]]$res, function(x) unique(x$FE$parameter))
if(!is.null(model)) {
RE_names <- RE_names[[model]]
FE_names <- FE_names[[model]]
}
if(length(para) == 1 && is.character(para)) {
if(para %in% unique(unlist(RE_names))) {
type <- "random"
} else if(para %in% unique(unlist(FE_names))) {
type <- "fixed"
} else stop("No 'para' named: ", para, call. = FALSE)
} else if(is.list(para)) {
if(length(para) != mod_n) stop("When 'para' is a list it ",
"must be the same length as the number of models: ",
mod_n)
if(any(.check_para_in_mod(RE_names, para))) {
type <- "random"
} else if(any(.check_para_in_mod(FE_names, para))) {
type <- "fixed"
} else {
stop("At least one of the 'para'(s) was not found", call. = FALSE)
}
}
# summarize
if (type == "fixed") {
out <- lapply(res, function(x) summary(x,
para = para,
model = model)$summary[[1]]$FE)
} else if (type == "random") {
out <- lapply(res, function(x) summary(x,
para = para,
model = model)$summary[[1]]$RE)
}
out <- as.data.frame(do.call(rbind, out))
paras <- attr(object, "paras")
paras <- prepare_multi_setup(paras)$out_dense
paras <- as.data.frame(paras)
paras <- paras[rep(1:nrow(paras), each = mod_n), ]
out <- cbind(paras, as.data.frame(out))
out <- out[order(out$model), ]
class(out) <- append("plcp_multi_sim_summary", class(out))
attr(out, "type") <- type
attr(out, "model") <- model
attr(out, "used_models") <- unique(out$model)
attr(out, "used_tests") <- unique(out$parameter)
attr(out, "nsim") <- res[[1]]$nsim
out
}
#' Convert a multi-sim summary object to a tidy data.frame
#'
#' @param x Object with class \code{plcp_multi_sim_summary}.
#' @param ... Not used
#'
#' @return a \code{data.frame} with one row for each simulation.
#' Columns include the simulation study parameters and the results.
#' @export
as.data.frame.plcp_multi_sim_summary <- function(x, ...) {
attr(x, "dense")
}
#' @export
as.data.frame.plcp_sim_summary <- function(x, ...) {
RE <- lapply(seq_along(x$summary), function(i) {
d <- x$summary[[i]]
tmp <- d$RE
tmp$model <- names(x$summary)[i]
tmp$type <- "random"
tmp
})
}
#' Print method for \code{summary.plcp_multi_sim}-objects
#' @param x An object of class \code{plcp_multi_sim_summary}
#' @param add_cols \code{character} vector; indicates the names of the
#' additional columns that should be added to the output. Intended use case is
#' when you want to add some of the setup parameters, this print method
#' is not smart enough to figure out which parameters you are investigating.
#' @param bias \code{logical}; indicates if parameter bias should be printed.
#' @param power \code{logical}; indicates if empirical power should be printed.
#' @param estimates \code{logical}; indicates if the parameter estimates should be printed.
#' @param digits number of significant digits.
#' @param ... Optional arguments.
#' @method print plcp_multi_sim_summary
#' @export
print.plcp_multi_sim_summary <- function(x,
add_cols = NULL,
bias = TRUE,
power = TRUE,
estimates = TRUE,
digits = 2, ...) {
if(!is.null(add_cols) & !all(add_cols %in% colnames(x))) {
not_found <- which(!add_cols %in% colnames(x))
stop("No column called: '",
paste(add_cols[not_found], sep = "','"),
"'",
call. = FALSE)
}
model <- attr(x, "model")
type <- attr(x, "type")
nsim <- attr(x, "nsim")
out <- as.data.frame.data.frame(x)
if(estimates) est_cols <- c("M_est", "theta", "M_se", "SD_est") else est_cols <- NULL
if(power) power_cols <- c("Power" ,"Power_bw" ,"Power_satt") else power_cols <- NULL
print_cols <- c("model", add_cols, est_cols, power_cols, c("CI_Cover", "CI_Wald_cover"))
out <- out[, colnames(x) %in% print_cols]
para <- paste(as.character(unique(attr(x, "used_tests"))), collapse = "', '")
out <- out[, !colnames(out) %in% "parameter", ]
mod_col <- colnames(out) %in% "model"
if(length(attr(x, "used_models")) > 1) {
out <- cbind(out[, mod_col, drop = FALSE],
out[, !mod_col])
} else out <- out[, !mod_col]
out[t(do.call(rbind, lapply(out, is.nan)))] <- "."
out[t(do.call(rbind, lapply(out, is.na)))] <- "."
if(is.null(model)) model <- "All"
cat("Model: '", model, "' | Type: '", type, "' | Parameter(s): '", para, "'\n", sep = "")
cat("---\n")
print(out, digits = digits, row.names = FALSE)
cat("---\n")
cat("nsim: ", nsim, "|", sum(!colnames(x) %in% print_cols), "columns not shown\n")
if(any(x$is_NA > 0)) warning("Some simulations had NA estimates that was removed.", call. = FALSE)
invisible(out)
}
## random effect
summary_fixed.plcp_multi_sim <- function(res, para, model, alpha) {
out <- res$res[[model]]$FE
# for convenience catch reversed interaction
mod_paras <- unique(out$parameter)
TbT <- c("time:treatment", "treatment:time")
ind <- TbT %in% mod_paras
if(any(ind) & para %in% TbT) {
TbT_model <- TbT[TbT %in% mod_paras]
para <- TbT_model
}
theta <- switch(
para,
"(Intercept)" = res$paras$fixed_intercept,
"treatment" = 0,
"time" = res$paras$fixed_slope,
"time:treatment" = get_slope_diff(res$paras) / res$paras$T_end,
"treatment:time" = get_slope_diff(res$paras) / res$paras$T_end,
NA
)
if(all(out$parameter != para)) stop("Argument 'para': ", "'", para, "' is not a valid parameter.", call. = FALSE)
out <- out[out$parameter == para,]
est <- mean(out$estimate)
se_est <- mean(out$se)
se_hat <- sd(out$estimate)
power <- mean(get_cover(out$estimate,
out$se,
alpha = alpha))
power_bw <- get_p_val_df(t = out$estimate/out$se,
df = out$df_bw,
parameter = para,
test = para)
Satt_NA <- mean(is.na(out$df))
ret <- with(
out,
data.frame(
parameter = para,
M_est = est,
theta = theta,
est_rel_bias = ifelse(theta == 0,
est - theta,
(est - theta) / theta),
M_se = se_est,
SD_est = se_hat,
se_rel_bias = (se_est - se_hat) / se_hat,
Power = power,
Power_bw = mean(power_bw < alpha),
Power_satt = mean(out$pval < alpha, na.rm = FALSE),
Satt_NA = Satt_NA
)
)
if ("CI_lwr" %in% colnames(out)) {
CI_cov <- summarize_CI(res$res[[model]], theta)
CI_cov <- CI_cov[CI_cov$parameter == para, ]
ret$CI_Cover <- CI_cov$CI_cover
ret$CI_Wald_cover <- CI_cov$CI_Wald_cover
ret$CI_NA <- CI_cov$CI_NA
}
ret
}
summary_random.plcp_multi_sim <- function(res, para, model) {
tmp <- res$res[[model]]$RE
if (!para %in% unique(tmp$parameter)) {
stop(
"No random effect named: '",
para,
"'.\n Available random effects are: \n ",
paste(unique(tmp$parameter), collapse = ",\n ")
)
}
theta <- switch(
para,
"subject_intercept" = res$paras$sigma_subject_intercept^2,
"subject_slope" = res$paras$sigma_subject_slope^2,
"subject_intercept" = res$paras$sigma_subject_intercept^2,
"cor_subject" = res$paras$cor_subject,
"cluster_intercept" = res$paras$sigma_cluster_intercept^2,
"cluster_slope" = res$paras$sigma_cluster_slope^2,
"cor_cluster" = res$paras$cor_cluster,
"error" = res$paras$sigma_error^2
)
out <- res$res[[model]]$RE
out <- out[out$parameter == para,]
vcov <- out$vcov
est <- mean(vcov, na.rm = TRUE)
out <- data.frame(
parameter = para,
M_est = est,
theta = theta,
est_rel_bias = get_RB(est, theta),
prop_zero = mean(is_approx(vcov, 0), na.rm=TRUE),
is_NA = mean(is.na(vcov))
)
if (para %in% c("cor_subject", "cor_cluster")) {
out$prop_zero <- mean(is_approx(abs(vcov), 1), na.rm=TRUE)
}
out
}
get_RB <- function(est, theta) {
if(theta == 0) {
RB <- (est - theta)
} else {
RB <- (est - theta) / theta
}
RB
}
# power -------------------------------------------------------------------
get_cover <- function(est, se, alpha) {
# CI cover
abs(est) - qnorm(1 - (alpha/2)) * se > 0
}
get_p_val_df <- function(t, df, parameter, test = "time:treatmen") {
if (unique(parameter) %in% test) {
p <- (1 - pt(abs(t), df)) * 2
} else
p <- NA
p
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.