pooling: Fit Poolwise Regression Models

Documented in poolpower_t

#' Visualize T-test Power for Pooling Design
#'
#' Useful for assessing efficiency gains that might be achieved with a pooling
#' design.
#'
#'
#' @param g Numeric vector of pool sizes to include.
#' @param d Numeric value specifying true difference in group means.
#' @param mu1,mu2 Numeric value specifying group means. Required if
#' \code{multiplicative = TRUE}.
#' @param sigsq Numeric value specifying the variance of observations.
#' @param sigsq1,sigsq2 Numeric value specifying the variance of observations
#' for each group.
#' @param sigsq_p Numeric value specifying the variance of processing errors.
#' @param sigsq_m Numeric value specifying the variance of measurement errors.
#' @param multiplicative Logical value for whether to assume multiplicative
#' rather than additive errors.
#' @param alpha Numeric value specifying type-1 error rate.
#' @param beta Numeric value specifying type-2 error rate.
#' @param assay_cost Numeric value specifying cost of each assay.
#' @param other_costs Numeric value specifying other per-subject costs.
#' @param labels Logical value.
#'
#'
#' @return Plot of power vs. total costs generated by
#' \code{\link[ggplot2]{ggplot}}.
#'
#'
#' @examples
#' # Plot power vs. total study costs for d = 0.25, sigsq = 1, and costs of $100
#' # per assay and $0 in other per-subject costs.
#' poolpower_t(d = 0.5, sigsq = 1, assay_cost = 100, other_costs = 0)
#'
#' # Repeat but with $10 in per-subject costs.
#' poolpower_t(d = 0.5, sigsq = 1, assay_cost = 100, other_costs = 10)
#'
#' # Back to no per-subject costs, but with processing and measurement error
#' poolpower_t(d = 0.5, sigsq = 1, sigsq_p = 0.2, sigsq_m = 0.1,
#'             assay_cost = 100, other_costs = 0)
#'
#'
#'@export
poolpower_t <- function(g = c(1, 3, 10),
                        d = NULL,
                        mu1 = NULL,
                        mu2 = NULL,
                        sigsq = NULL,
                        sigsq1 = sigsq,
                        sigsq2 = sigsq,
                        sigsq_p = 0,
                        sigsq_m = 0,
                        multiplicative = FALSE,
                        alpha = 0.05,
                        beta = 0.2,
                        assay_cost = 100,
                        other_costs = 0,
                        labels = TRUE) {

  # Error checking
  if (! is.null(d) & (! is.null(mu1) | ! is.null(mu2))) {
    stop("Please specify d or specify mu1 and mu2")
  }

  if (! multiplicative) {

    # Calculate d if mu1 and mu2 are specified
    if (is.null(d)) {
      d <- abs(mu1 - mu2)
    }

    # Calculate assays per group for 99% power with traditional design
    n.max <- (qnorm(0.99) + qnorm(1 - alpha / 2))^2 / d^2 *
      (sigsq1 + sigsq2 + 2 * sigsq_m)
    n <- 3: ceiling(n.max)

    # Calculate variance of errors and pooled observations
    sigsq_pm <- sigsq_p * ifelse(g > 1, 1, 0) + sigsq_m
    sigsq_xtilde1 <- sigsq1 / g + sigsq_pm
    sigsq_xtilde2 <- sigsq2 / g + sigsq_pm

    # Create data frame for ggplot
    df <- data.frame(
      g = rep(g, each = length(n)),
      sigsq_xtilde1 = rep(sigsq_xtilde1, each = length(n)),
      sigsq_xtilde2 = rep(sigsq_xtilde2, each = length(n)),
      n = rep(n, length(g))
    )

    # Calculate power across n for each g
    df$power <- mapply(
      FUN = function(SIGSQ_XTILDE1, SIGSQ_XTILDE2, N) {
        power_2t_unequal(
          d = d,
          sigsq1 = SIGSQ_XTILDE1,
          sigsq2 = SIGSQ_XTILDE2,
          n = N,
          alpha = alpha
        )
      },
      SIGSQ_XTILDE1 = df$sigsq_xtilde1,
      SIGSQ_XTILDE2 = df$sigsq_xtilde2,
      N = df$n
    )

  } else {

    # Check that mu1 and mu2 are specified and that mu1 > mu2
    if (is.null(mu1) | is.null(mu2)) {
      stop("For multiplicative errors, you have to specify mu1 and mu2.")
    } else if (mu1 <= mu2) {
      stop("mu1 should be larger than mu2")
    }

    # Calculate d
    if (is.null(d)) {
      d <- mu1 - mu2
    }

    # Calculate assays per group for 99% power with traditional design
    n.max <- (qnorm(0.99) + qnorm(1 - alpha / 2))^2 / d^2 *
      (sigsq_m * (mu1^2 + sigsq1) + sigsq1 + sigsq_m * (mu2^2 + sigsq2) + sigsq2)
    n <- 3: n.max

    # Calculate variance of errors and pooled observations
    sigsq_pm <- sigsq_m + sigsq_p * (sigsq_m + 1) * ifelse(g > 1, 1, 0)
    sigsq_xtilde1 <- sigsq_pm * (mu1^2 + sigsq1 / g) + sigsq1 / g
    sigsq_xtilde2 <- sigsq_pm * (mu2^2 + sigsq2 / g) + sigsq2 / g

    # Create data frame for ggplot
    df <- data.frame(
      g = rep(g, each = length(n)),
      sigsq_xtilde1 = rep(sigsq_xtilde1, each = length(n)),
      sigsq_xtilde2 = rep(sigsq_xtilde2, each = length(n)),
      n = rep(n, length(g))
    )

    # Calculate power across n for each g
    df$power <- mapply(
      FUN = function(SIGSQ_XTILDE1, SIGSQ_XTILDE2, N) {
        power_2t_unequal(
          d = d,
          sigsq1 = SIGSQ_XTILDE1,
          sigsq2 = SIGSQ_XTILDE2,
          n = N,
          alpha = alpha
        )
      },
      SIGSQ_XTILDE1 = df$sigsq_xtilde1,
      SIGSQ_XTILDE2 = df$sigsq_xtilde2,
      N = df$n
    )

  }

  # Prep for ggplot
  df$n.assays <- df$n * 2
  df$n.subjects <- df$g * df$n.assays
  df$costs <- df$n.assays * assay_cost + df$n.subjects * other_costs
  df$power.lab <- c(0, ifelse(diff(sign(df$power - (1 - beta))) >= 1, 1, 0))

  # Exclude values with power > 99.9%
  df <- df %>% dplyr::filter(power <= 0.999)

  # Create labels
  if (all(df$costs[df$power.lab == 1] > 1000)) {
    df$costs <- df$costs / 1000
    df$costlabel <- paste("$", sprintf("%.1f", df$costs), "k (", df$n.assays, " total assays)", sep = "")
    dollar.units <- "($1,000's)"
  } else {
    df$costlabel <- paste("$", df$costs, " (", df$n.assays, " total assays)", sep = "")
    dollar.units <- "($)"
  }

  # Create plot
  costs <- NULL
  p <- ggplot(df, aes(costs, power, group = g, color = as.factor(g))) +
    geom_point() +
    geom_line() +
    labs(title = "Power vs. Total Study Costs",
         y = "Power",
         x = paste("Total costs", dollar.units),
         color = "Pool size") +
    geom_hline(yintercept = unique(c(0, 0.5, 1 - beta, 1)), linetype = 2) +
    theme_bw() +
    theme(panel.grid.major = element_blank(),
          panel.grid.minor = element_blank()) +
    scale_y_continuous(breaks = unique(c(0, 0.5, 1 - beta, 1)))

  # Label points with sufficient power
  if (labels) {
    power.lab <- NULL
    p <- p + geom_label_repel(
      data = subset(df, power.lab == 1),
      aes_string(x = "costs", y = "power", label = "costlabel"),
      min.segment.length = 0,
      label.padding = 0.4,
      show.legend = FALSE
    )
  }
  p

}