#' Visualize T-test Power for Pooling Design as Function of Processing Error
#' Variance
#'
#' Useful for choosing a sample size such that power will be adequate even if
#' the processing errors are larger than anticipated.
#'
#'
#' @param g Numeric value specifying the pool size.
#' @param n Numeric value specifying the number of assays per group. If
#' unspecified, function figures out \code{n} required for
#' 100 (1 - \code{beta})\% power when \code{sigsq_p = 0}.
#' @param d Numeric value specifying true difference in group means.
#' @param mu1,mu2 Numeric value specifying group means. Required if
#' \code{multiplicative = TRUE}.
#' @param sigsq Numeric value specifying the variance of observations.
#' @param sigsq1,sigsq2 Numeric value specifying the variance of observations
#' for each group.
#' @param sigsq_p_predicted Numeric value specifying predicted processing error
#' variance. Used to calculate \code{n} if \code{n} is unspecified.
#' @param sigsq_p_range Numeric vector specifying range of processing error
#' variances to consider.
#' @param sigsq_m Numeric value specifying the variance of measurement errors.
#' @param multiplicative Logical value for whether to assume multiplicative
#' rather than additive errors.
#' @param alpha Numeric value specifying type-1 error rate.
#' @param beta Numeric value specifying type-2 error rate. Only used if
#' \code{n = NULL}.
#' @param labels Logical value.
#'
#'
#' @return Plot generated by \code{\link[ggplot2]{ggplot}}.
#'
#'
#' @examples
#' # Determine optimal pool size and number of assays to detect a difference in
#' # group means of 0.5, with a common variance of 1, processing errors with
#' # variance of 0.1, and measurement errors with variance of 0.2. Assume costs
#' # of $100 per assay and $10 per subject.
#' poolcost_t(
#' g = 1: 10,
#' d = 0.5,
#' sigsq = 1,
#' sigsq_p = 0.1,
#' sigsq_m = 0.2,
#' assay_cost = 100,
#' other_costs = 10
#' )
#'
#' # Visualize how power of the study will be affected if the true processing
#' # error variance is not exactly 0.1.
#' poolcushion_t(
#' g = 7,
#' n = 29,
#' d = 0.5,
#' sigsq = 1,
#' sigsq_p_predicted = 0.1,
#' sigsq_m = 0.2
#' )
#'
#'
#'@export
poolcushion_t <- function(g = NULL,
n = NULL,
d = NULL,
mu1 = NULL,
mu2 = NULL,
sigsq = NULL,
sigsq1 = sigsq,
sigsq2 = sigsq,
sigsq_p_predicted = 0,
sigsq_p_range = NULL,
sigsq_m = 0,
multiplicative = FALSE,
alpha = 0.05,
beta = 0.2,
labels = TRUE) {
# Error checking
if (multiplicative & (is.null(mu1) | is.null(mu2))) {
stop("For multiplicative errors, you must specify mu1 and mu2")
}
# If unspecified, create reasonable default range for sigsq_p
if (is.null(sigsq_p_range)) {
max_sigsq <- max(sigsq1, sigsq2)
sigsq_p_range <- c(0, max_sigsq)
}
# Create vector of sigsq_p values
sigsq_p <- seq(sigsq_p_range[1], sigsq_p_range[2], diff(sigsq_p_range) / 1000)
if (! multiplicative) {
# Calculate d if mu1 and mu2 are specified
if (is.null(d)) {
d <- abs(mu1 - mu2)
}
# If unspecified, calculate n for target power at sigsq_p_predicted
if (is.null(n)) {
sigsq_pm_predicted <- sigsq_p_predicted * ifelse(g > 1, 1, 0) + sigsq_m
n <- n_2t_unequal(
d = d,
sigsq1 = sigsq1 / g + sigsq_pm_predicted,
sigsq2 = sigsq2 / g + sigsq_pm_predicted,
alpha = alpha,
beta = beta
)
}
# Calculate variance of errors and pooled observations
sigsq_pm <- sigsq_p * ifelse(g > 1, 1, 0) + sigsq_m
sigsq_xtilde1 <- sigsq1 / g + sigsq_pm
sigsq_xtilde2 <- sigsq2 / g + sigsq_pm
# Calculate power as function of sigsq_p
power <- mapply(
FUN = function(SIGSQ_XTILDE1, SIGSQ_XTILDE2) {
power_2t_unequal(
n = n,
d = d,
sigsq1 = SIGSQ_XTILDE1,
sigsq2 = SIGSQ_XTILDE2,
alpha = alpha
)
},
SIGSQ_XTILDE1 = sigsq_xtilde1,
SIGSQ_XTILDE2 = sigsq_xtilde2
)
} else {
# Check that mu1 and mu2 are specified and that mu1 > mu2
if (is.null(mu1) | is.null(mu2)) {
stop("For multiplicative errors, you have to specify mu1 and mu2.")
} else if (mu1 <= mu2) {
stop("mu1 should be larger than mu2")
}
# Calculate d
if (is.null(d)) {
d <- mu1 - mu2
}
# If unspecified, calculate n for target power at sigsq_p_predicted
if (is.null(n)) {
sigsq_pm_predicted <- sigsq_m + sigsq_p_predicted * (sigsq_m + 1) * ifelse(g > 1, 1, 0)
n <- n_2t_unequal(
d = d,
sigsq1 = sigsq_pm_predicted * (mu1^2 + sigsq1 / g) + sigsq1 / g,
sigsq2 = sigsq_pm_predicted * (mu2^2 + sigsq2 / g) + sigsq2 / g,
alpha = alpha,
beta = beta
)
}
# Calculate variance of errors and pooled observations
sigsq_pm <- sigsq_m + sigsq_p * (sigsq_m + 1) * ifelse(g > 1, 1, 0)
sigsq_xtilde1 <- sigsq_pm * (mu1^2 + sigsq1 / g) + sigsq1 / g
sigsq_xtilde2 <- sigsq_pm * (mu2^2 + sigsq2 / g) + sigsq2 / g
# Calculate power as function of sigsq_p
power <- mapply(
FUN = function(SIGSQ_XTILDE1, SIGSQ_XTILDE2) {
power_2t_unequal(
n = n,
d = d,
sigsq1 = SIGSQ_XTILDE1,
sigsq2 = SIGSQ_XTILDE2,
alpha = alpha
)
},
SIGSQ_XTILDE1 = sigsq_xtilde1,
SIGSQ_XTILDE2 = sigsq_xtilde2
)
}
# Prep for ggplot
df <- data.frame(sigsq_p = sigsq_p, power = power)
df$power.lab <- c(ifelse(diff(sign(df$power - (1 - beta))) < 0, 1, 0), 0)
# Create labels
df$powerlabel <- paste("sigma[p]^2 == ", df$sigsq_p, sep = "")
# Create plot
p <- ggplot(df, aes(sigsq_p, power)) +
geom_point() +
geom_line() +
labs(title = paste("Power vs. Processing Error Variance (g = ", g, ", n = ", n, ")", sep = ""),
y = "Power",
x = expression(sigma[p]^2)) +
geom_hline(yintercept = unique(c(0, 0.5, 1 - beta, 1)), linetype = 2) +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
scale_y_continuous(breaks = unique(c(0, 0.5, 1 - beta, 1)))
# Label last data point with adequate power
if (labels) {
power.lab <- NULL
p <- p + geom_label_repel(
data = subset(df, power.lab == 1),
aes_string(x = "sigsq_p", y = "power", label = "powerlabel"),
min.segment.length = 0,
label.padding = 0.4,
show.legend = FALSE,
parse = TRUE
)
}
p
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.