R/poolvar_t.R In pooling: Fit Poolwise Regression Models

Documented in poolvar_t

```#' Visualize Ratio of Variance of Each Pooled Measurement to Variance of Each
#' Unpooled Measurement as Function of Pool Size
#'
#' Useful for determining whether pooling is a good idea, and finding the
#' optimal pool size if it is.
#'
#'
#' @param g Numeric vector of pool sizes to include.
#' @param mu1,mu2 Numeric value specifying group means. Required if
#' \code{multiplicative = TRUE}.
#' @param sigsq Numeric value specifying the variance of observations.
#' @param sigsq1,sigsq2 Numeric value specifying the variance of observations
#' for each group.
#' @param sigsq_p Numeric value specifying the variance of processing errors.
#' @param sigsq_m Numeric value specifying the variance of measurement errors.
#' @param multiplicative Logical value for whether to assume multiplicative
#' @param assay_cost Numeric value specifying cost of each assay.
#' @param other_costs Numeric value specifying other per-subject costs.
#' @param labels Logical value.
#' @param ylim Numeric vector.
#'
#'
#' @return Plot generated by \code{\link[ggplot2]{ggplot}}.
#'
#'
#' @examples
#' # Plot ratio of variances vs. pool size with default settings
#' poolvar_t(sigsq = 1)
#'
#' # Add processing error and other per-subject costs
#' poolvar_t(sigsq = 1, sigsq_p = 0.2, other_costs = 0.1)
#'
#'
#'@export
poolvar_t <- function(g = 1: 10,
mu1 = NULL,
mu2 = NULL,
sigsq = NULL,
sigsq1 = sigsq,
sigsq2 = sigsq,
sigsq_p = 0,
sigsq_m = 0,
multiplicative = FALSE,
assay_cost = 100,
other_costs = 0,
labels = TRUE,
ylim = NULL) {

# Error checking
if (multiplicative & (is.null(mu1) | is.null(mu2))) {
stop("For multiplicative errors, you must specify mu1 and mu2")
}

# Calculate ratio of costs, traditional / pooling
costs.ratio <- (assay_cost + other_costs) / (assay_cost + g * other_costs)

# Calculate ratio of variances, traditional / pooling
if (! multiplicative) {

sigsq_pm <- sigsq_p * ifelse(g > 1, 1, 0) + sigsq_m
var_ratio1 <- (sigsq1 + sigsq_m) / (sigsq1 / g + sigsq_pm)
var_ratio2 <- (sigsq2 + sigsq_m) / (sigsq2 / g + sigsq_pm)

} else {

sigsq_pm <- sigsq_m + sigsq_p * (sigsq_m + 1) * ifelse(g > 1, 1, 0)
var_ratio1 <- (sigsq_m * (mu1^2 + sigsq1) + sigsq1) /
(sigsq_pm * (mu1^2 + sigsq1 / g) + sigsq1 / g)
var_ratio2 <- (sigsq_m * (mu2^2 + sigsq2) + sigsq2) /
(sigsq_pm * (mu2^2 + sigsq2 / g) + sigsq2 / g)

}

# Calculate cost-adjusted ratio of variances and find max
max1 <- max2 <- rep(0, length(g))

# Prep for ggplot
df <- data.frame(
g = c(g - 0.075, g + 0.075),
Group = as.factor(rep(c(1, 2), each = length(g))),
var_ratio = c(var_ratio1, var_ratio2),
max = c(max1, max2)
)

# Default ylim
if (is.null(ylim)) {
}

# Create plot
Group <- NULL
p <- ggplot(df, aes_string(x = "g", y = "var_ratio_adj", color = "Group")) +
geom_point() +
labs(title = "Ratio of Variances, Traditional vs. Pooled",
x = "Pool size") +
ylim(ylim) +
geom_hline(yintercept = 1, linetype = 2) +
scale_x_continuous(breaks = g) +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())

# Label max
if (labels) {
p <- p + geom_label_repel(
data = subset(df, max == 1),
label = paste(round(var_ratio_adj, 1), " (g = ", g, ")", sep = "")),
min.segment.length = 0,
show.legend = FALSE
)
}
p

}
```

