pooling: Fit Poolwise Regression Models

Documented in poolcushion_t

#' Visualize T-test Power for Pooling Design as Function of Processing Error
#' Variance
#'
#' Useful for choosing a sample size such that power will be adequate even if
#' the processing errors are larger than anticipated.
#'
#'
#' @param g Numeric value specifying the pool size.
#' @param n Numeric value specifying the number of assays per group. If
#' unspecified, function figures out \code{n} required for
#' 100 (1 - \code{beta})\% power when \code{sigsq_p = 0}.
#' @param d Numeric value specifying true difference in group means.
#' @param mu1,mu2 Numeric value specifying group means. Required if
#' \code{multiplicative = TRUE}.
#' @param sigsq Numeric value specifying the variance of observations.
#' @param sigsq1,sigsq2 Numeric value specifying the variance of observations
#' for each group.
#' @param sigsq_p_predicted Numeric value specifying predicted processing error
#' variance. Used to calculate \code{n} if \code{n} is unspecified.
#' @param sigsq_p_range Numeric vector specifying range of processing error
#' variances to consider.
#' @param sigsq_m Numeric value specifying the variance of measurement errors.
#' @param multiplicative Logical value for whether to assume multiplicative
#' rather than additive errors.
#' @param alpha Numeric value specifying type-1 error rate.
#' @param beta Numeric value specifying type-2 error rate. Only used if
#' \code{n = NULL}.
#' @param labels Logical value.
#'
#'
#' @return Plot generated by \code{\link[ggplot2]{ggplot}}.
#'
#'
#' @examples
#' # Determine optimal pool size and number of assays to detect a difference in
#' # group means of 0.5, with a common variance of 1, processing errors with
#' # variance of 0.1, and measurement errors with variance of 0.2. Assume costs
#' # of $100 per assay and $10 per subject.
#' poolcost_t(
#'   g = 1: 10,
#'   d = 0.5,
#'   sigsq = 1,
#'   sigsq_p = 0.1,
#'   sigsq_m = 0.2,
#'   assay_cost = 100,
#'   other_costs = 10
#' )
#'
#' # Visualize how power of the study will be affected if the true processing
#' # error variance is not exactly 0.1.
#' poolcushion_t(
#'   g = 7,
#'   n = 29,
#'   d = 0.5,
#'   sigsq = 1,
#'   sigsq_p_predicted = 0.1,
#'   sigsq_m = 0.2
#' )
#'
#'
#'@export
poolcushion_t <- function(g = NULL,
                          n = NULL,
                          d = NULL,
                          mu1 = NULL,
                          mu2 = NULL,
                          sigsq = NULL,
                          sigsq1 = sigsq,
                          sigsq2 = sigsq,
                          sigsq_p_predicted = 0,
                          sigsq_p_range = NULL,
                          sigsq_m = 0,
                          multiplicative = FALSE,
                          alpha = 0.05,
                          beta = 0.2,
                          labels = TRUE) {

  # Error checking
  if (multiplicative & (is.null(mu1) | is.null(mu2))) {
    stop("For multiplicative errors, you must specify mu1 and mu2")
  }

  # If unspecified, create reasonable default range for sigsq_p
  if (is.null(sigsq_p_range)) {
    max_sigsq <- max(sigsq1, sigsq2)
    sigsq_p_range <- c(0, max_sigsq)
  }

  # Create vector of sigsq_p values
  sigsq_p <- seq(sigsq_p_range[1], sigsq_p_range[2], diff(sigsq_p_range) / 1000)

  if (! multiplicative) {

    # Calculate d if mu1 and mu2 are specified
    if (is.null(d)) {
      d <- abs(mu1 - mu2)
    }

    # If unspecified, calculate n for target power at sigsq_p_predicted
    if (is.null(n)) {
      sigsq_pm_predicted <- sigsq_p_predicted * ifelse(g > 1, 1, 0) + sigsq_m
      n <- n_2t_unequal(
        d = d,
        sigsq1 = sigsq1 / g + sigsq_pm_predicted,
        sigsq2 = sigsq2 / g + sigsq_pm_predicted,
        alpha = alpha,
        beta = beta
      )
    }

    # Calculate variance of errors and pooled observations
    sigsq_pm <- sigsq_p * ifelse(g > 1, 1, 0) + sigsq_m
    sigsq_xtilde1 <- sigsq1 / g + sigsq_pm
    sigsq_xtilde2 <- sigsq2 / g + sigsq_pm

    # Calculate power as function of sigsq_p
    power <- mapply(
      FUN = function(SIGSQ_XTILDE1, SIGSQ_XTILDE2) {
        power_2t_unequal(
          n = n,
          d = d,
          sigsq1 = SIGSQ_XTILDE1,
          sigsq2 = SIGSQ_XTILDE2,
          alpha = alpha
        )
      },
      SIGSQ_XTILDE1 = sigsq_xtilde1,
      SIGSQ_XTILDE2 = sigsq_xtilde2
    )

  } else {

    # Check that mu1 and mu2 are specified and that mu1 > mu2
    if (is.null(mu1) | is.null(mu2)) {
      stop("For multiplicative errors, you have to specify mu1 and mu2.")
    } else if (mu1 <= mu2) {
      stop("mu1 should be larger than mu2")
    }

    # Calculate d
    if (is.null(d)) {
      d <- mu1 - mu2
    }

    # If unspecified, calculate n for target power at sigsq_p_predicted
    if (is.null(n)) {
      sigsq_pm_predicted <- sigsq_m + sigsq_p_predicted * (sigsq_m + 1) * ifelse(g > 1, 1, 0)
      n <- n_2t_unequal(
        d = d,
        sigsq1 = sigsq_pm_predicted * (mu1^2 + sigsq1 / g) + sigsq1 / g,
        sigsq2 = sigsq_pm_predicted * (mu2^2 + sigsq2 / g) + sigsq2 / g,
        alpha = alpha,
        beta = beta
      )
    }

    # Calculate variance of errors and pooled observations
    sigsq_pm <- sigsq_m + sigsq_p * (sigsq_m + 1) * ifelse(g > 1, 1, 0)
    sigsq_xtilde1 <- sigsq_pm * (mu1^2 + sigsq1 / g) + sigsq1 / g
    sigsq_xtilde2 <- sigsq_pm * (mu2^2 + sigsq2 / g) + sigsq2 / g

    # Calculate power as function of sigsq_p
    power <- mapply(
      FUN = function(SIGSQ_XTILDE1, SIGSQ_XTILDE2) {
        power_2t_unequal(
          n = n,
          d = d,
          sigsq1 = SIGSQ_XTILDE1,
          sigsq2 = SIGSQ_XTILDE2,
          alpha = alpha
        )
      },
      SIGSQ_XTILDE1 = sigsq_xtilde1,
      SIGSQ_XTILDE2 = sigsq_xtilde2
    )

  }

  # Prep for ggplot
  df <- data.frame(sigsq_p = sigsq_p, power = power)
  df$power.lab <- c(ifelse(diff(sign(df$power - (1 - beta))) < 0, 1, 0), 0)

  # Create labels
  df$powerlabel <- paste("sigma[p]^2 == ", df$sigsq_p, sep = "")

  # Create plot
  p <- ggplot(df, aes(sigsq_p, power)) +
    geom_point() +
    geom_line() +
    labs(title = paste("Power vs. Processing Error Variance (g = ", g, ", n = ", n, ")", sep = ""),
         y = "Power",
         x = expression(sigma[p]^2)) +
    geom_hline(yintercept = unique(c(0, 0.5, 1 - beta, 1)), linetype = 2) +
    theme_bw() +
    theme(panel.grid.major = element_blank(),
          panel.grid.minor = element_blank()) +
    scale_y_continuous(breaks = unique(c(0, 0.5, 1 - beta, 1)))

  # Label last data point with adequate power
  if (labels) {
    power.lab <- NULL
    p <- p + geom_label_repel(
      data = subset(df, power.lab == 1),
      aes_string(x = "sigsq_p", y = "power", label = "powerlabel"),
      min.segment.length = 0,
      label.padding = 0.4,
      show.legend = FALSE,
      parse = TRUE
    )
  }
  p

}