assayr2: Tools for analyzing assay data

Documented in make_cis make_fcs

# Functions related to plotting model summaries and statistics

#' Generic function to do build confidence intervals.
#'
#' Build response variables and aggregate into a plot ready data.frame.
#' @param mod A model object. Only `lm` and `lmer` models are currently accepted.
#' @param level The confidence level used between 0-1. Default is .95.
#' @param all Boolean, should all predictor varaibles be returned or just the first predictor (default).
#' @param restore Boolean, shold response variables be transformed back to linear space.
#' @return A data.frame with the mean, upper and lower bounds for the response variable confidence interval.
#' @examples
#' \dontrun{
#' data(mtcars)
#' mtcars$cyl %<>% as.factor()
#' m <- lm(log10(mpg) ~ 0 + cyl, data = mtcars)
#' make_cis(m)
#'
#' library(lme4); data(sleepstudy)
#' sleepstudy$Days %<>% as.factor()
#' mm <- lmer(Reaction ~ 0 + Days + (1 | Subject), data = sleepstudy)
#' make_cis(mm)
#' }
#' @importFrom stats coef confint formula
#' @importFrom dplyr mutate filter select mutate_at
#' @importFrom lme4 fixef
#' @export
make_cis <- function(mod, level = .95, all = FALSE, restore = TRUE) {
  UseMethod("make_cis")
}
#' @export
make_cis.lm <- function(mod, level = .95, all = FALSE, restore = TRUE) {
  if (attributes(mod$terms)$intercept == 1) {
    stop("Use a model without intercept for easier plotting")
  }
  if (length(attributes(mod$terms)$term.labels) > 1 & all == TRUE) {
    warning("More than 1 predictor variable, watch for extra rows in return or set all = FALSE")
  }
  ci <- as.data.frame(confint(mod, level = level))
  plot_pred <- attributes(mod$terms)$term.labels[1]
  ci <- dplyr::mutate(ci,
    est = coef(mod),
    temp = gsub(plot_pred, "", rownames(ci))
  )

  if (!all) {
    not_wanted <- attributes(mod$terms)$term.labels[-1]
    not_wanted <- not_wanted[!grepl(plot_pred, not_wanted)]
    for (p in not_wanted) {
      ci %<>% dplyr::filter(!grepl(p, temp))
    }
  }
  names(ci) <- c("lwr", "upr", "est", plot_pred)

  ci %<>% dplyr::select(!!plot_pred, est, lwr, upr)

  if (restore) {
    response_var <- as.character(formula(mod))[2]

    response_base <- suppressWarnings(as.numeric(ifelse(grepl("ln\\(", response_var),
      exp(1),
      gsub(".*log(\\d+).*", "\\1", response_var)
    )))

    if (is.na(response_base)) {
      return(ci)
    }
    ci %<>% dplyr::mutate_at(dplyr::vars(est:upr), ~response_base^.)
  }
  ci
}
#' @export
make_cis.lmerMod <- function(mod, level = .95, all = FALSE, restore = TRUE) {
  num_sigma <- length(attributes(mod)$flist) + 1
  ci <- as.data.frame(confint(mod))[-(1:num_sigma), ]
  if (rownames(ci)[1] == "(Intercept)") {
    stop("Use model without intercept for easier plotting")
  }
  ci$est <- lme4::fixef(mod)
  plot_pred <- colnames(attributes(mod)$frame)[2]

  ci$temp <- gsub(plot_pred, "", rownames(ci))
  if (all == FALSE) {
    not_wanted <- colnames(attributes(mod)$frame)[c(-1, -2)]
    not_wanted <- not_wanted[!grepl(plot_pred, not_wanted)]
    for (p in not_wanted) {
      ci %<>% dplyr::filter(!grepl(p, temp))
    }
  }
  colnames(ci) <- c("lwr", "upr", "est", plot_pred)

  ci %<>% dplyr::select(!!plot_pred, est, lwr, upr)

  if (restore) {
    response_var <- as.character(formula(mod))[2]

    response_base <- suppressWarnings(as.numeric(ifelse(grepl("ln\\(", response_var),
      exp(1),
      gsub(".*log(\\d+).*", "\\1", response_var)
    )))

    if (is.na(response_base)) {
      return(ci)
    }

    ci %<>% dplyr::mutate_at(dplyr::vars(est:upr), ~response_base^.)
  }

  ci
}


#' Generic function to construct confidence intervals.
#'
#' Perform hypothesis test via `glht::multcomp()` and construct a `data.frame`
#' with summary values.
#' @param mod A model object of class `lm` or `lmer/lmerTest`.
#' @param level A numeric value between 0 and 1 for confidence level. Default is .95.
#' @param sig_figs A numeric value specifying the number of decimal places in percent change columns in result.
#' @param restore Boolean, shold fold-change confidence interval values be transformed into linear space.
#' @param pretty Boolean, should the labels for pvalues lower < .001, be adjusted to "p < .001"  to avoid scienctific notation.
#' @return A data.frame with:
#' * fold-change confidence interval (fc_est, fc_lwr, fc_upr)
#' * the contrast used in `glht::multcomp` (group - baseline)
#' * log2_fc (from baseline)
#' * pct_change (from baseline)
#' * p-value (adjusted for multiple comparrisons)
#' * label (pre-formatted with pct_change and p-value for easy plotting)
#' @examples
#' data(iris)
#' mod <- lm(Sepal.Length ~ 0 + Species, data = iris) # use model with intercept set to 0
#' fc <- make_fcs(mod)
#' @importFrom stats terms
#' @importFrom multcomp mcp glht
#' @importFrom tidyr separate
#' @importFrom dplyr mutate_at mutate select everything
#' @export
#' @md
make_fcs <- function(mod, level = .95, sig_figs = 2, restore = TRUE, pretty = TRUE) {
  UseMethod("make_fcs")
}
#' @export
make_fcs.lm <- function(mod, level = .95, sig_figs = 2, restore = TRUE, pretty = TRUE) {
  response_var <- attributes(stats::terms(mod)) %>%
    .$dataClasses %>%
    names() %>%
    .[1]

  response_base <- suppressWarnings(as.numeric(ifelse(grepl("ln\\(", response_var),
    exp(1),
    gsub(".*log(\\d+).*", "\\1", response_var)
  )))

  main_var <- attributes(terms(mod)) %>%
    .$dataClasses %>%
    names() %>%
    .[2]

  mcp_call <- parse(text = paste0("multcomp::mcp(", main_var, " = 'Dunnett')"))

  ht <- multcomp::glht(mod, linfct = eval(mcp_call))

  if (sum(grepl("(log)|(ln)", ht$model$call$formula[[2]])) < 1) {
    warning("No log transformation detected for response variable in model.
            Check model for heteroskedasticity.")
  }

  if (attributes(ht$model$terms)$intercept == 1) {
    stop("Redo model with intercept = 0 for easier plotting")
  }

  ht_df <- confint(ht, level)$confint %>%
    as.data.frame() %>%
    dplyr::rename(est = Estimate) %>%
    dplyr::rename_all(~paste0("fc_", .)) %>%
    dplyr::mutate(
      contrast = rownames(.),
      pval = summary(ht)$test$pvalues %>% signif(., sig_figs)
    ) %>%
    tidyr::separate(contrast, c(main_var, "baseline"), sep = " - ", remove = FALSE)

  # fold change caculations
  ht_df %<>% dplyr::rowwise() %>%
    dplyr::mutate(
      pct_change = signif(ifelse(is.na(response_base),
        (fc_est / coef(mod)[1]) * 100,
        (1 - (response_base^fc_est)) * -100
      ), sig_figs),
      log2_fc = log2(ifelse(is.na(response_base),
        (coef(mod)[1] + fc_est) / coef(mod)[1],
        response_base^fc_est
      ))
    ) %>%
    dplyr::ungroup()

  if (restore & !is.na(response_base)) {
    ht_df %<>% dplyr::mutate_at(dplyr::vars(dplyr::matches("^fc")), ~response_base^.)
  }

  if (pretty) {
    txt_pvals <- sprintf("%.3f", ht_df$pval) # helps avoid scientific notation parsing

    p_labels <- ifelse(txt_pvals == "0.000",
      "<0.001",
      paste0("=", txt_pvals)
    ) %>%
      gsub("\\.?0+$", "", .)
  }

  if (!pretty) {
    p_labels <- paste0("=", ht_df$pval)
  }

  ht_df %>%
    dplyr::mutate(label = paste0(pct_change, "%\np", p_labels)) %>%
    dplyr::select(!!main_var, contrast, baseline, dplyr::everything())
}

#' @export
make_fcs.lmerMod <- function(mod, level = .95, sig_figs = 2, restore = TRUE, pretty = TRUE) {
  response_var <- colnames(attributes(mod)$frame)[1]

  response_base <- suppressWarnings(as.numeric(ifelse(grepl("ln\\(", response_var),
    exp(1),
    gsub(".*log(\\d+).*", "\\1", response_var)
  )))

  main_var <- colnames(attributes(mod)$frame)[2]

  mcp_call <- parse(text = paste0("multcomp::mcp(", main_var, " = 'Dunnett')"))

  ht <- multcomp::glht(mod, linfct = eval(mcp_call))

  if (sum(grepl("(log)|(ln)", attributes(ht$model)$call$formula[[2]])) < 1) {
    warning("No log transformation detected for response variable in model.
            Check model for heteroskedasticity.")
  }

  if (names(ht$coef)[1] == "(Intercept)") {
    stop("Redo model with intercept = 0 for easier plotting")
  }

  ht_df <- confint(ht, level)$confint %>%
    as.data.frame() %>%
    dplyr::rename(est = Estimate) %>%
    dplyr::rename_all(~paste0("fc_", .)) %>%
    dplyr::mutate(
      contrast = rownames(.),
      pval = summary(ht)$test$pvalues %>% signif(., sig_figs)
    ) %>%
    tidyr::separate(contrast, c(main_var, "baseline"), sep = " - ", remove = FALSE)

  # fold change caculations
  ht_df %<>% dplyr::rowwise() %>%
    dplyr::mutate(
      pct_change = signif(ifelse(is.na(response_base),
        (fc_est / lme4::fixef(mod)[1]) * 100,
        (1 - (response_base^fc_est)) * -100
      ), sig_figs),
      log2_fc = log2(ifelse(is.na(response_base),
        (lme4::fixef(mod)[1] + fc_est) / lme4::fixef(mod)[1],
        response_base^fc_est
      ))
    ) %>%
    dplyr::ungroup()

  if (restore & !is.na(response_base)) {
    ht_df %<>% dplyr::mutate_at(dplyr::vars(dplyr::matches("^fc")), ~response_base^.)
  }

  if (pretty) {
    txt_pvals <- sprintf("%.3f", ht_df$pval) # helps avoid scientific notation parsing

    p_labels <- ifelse(txt_pvals == "0.000",
      "<0.001",
      paste0("=", txt_pvals)
    ) %>%
      gsub("\\.?0+$", "", .)
  }

  if (!pretty) {
    p_labels <- paste0("=", ht_df$pval)
  }

  ht_df %>%
    dplyr::mutate(label = paste0(pct_change, "%\np", p_labels)) %>%
    dplyr::select(!!main_var, contrast, baseline, dplyr::everything())
}

#' A super-high level wrapper for building plot ready model summaries.
#'
#' Uses `make_cis()` and `make_fcs` to assemble a useful data.frame with columns for building
#' `geom_crossbar` summaries (est, lwr, upr) and simple statistic text summary (label).
#' @param mod A model object of class `lm` or `lmer/lmerTest`.
#' @param level A numeric value between 0 and 1 for confidence level. Default is .95.
#' @param all Boolean, should all predictor varaibles be returned or just the first predictor (default).
#' @param sig_figs A numeric value specifying the number of decimal places in percent change columns in result.
#' @param restore Boolean, shold response variables be transformed back to linear space.
#' @param pretty Boolean, should p-values < 0.001 be represented by '<=0.001'.
#' @examples
#' data(mtcars)
#' mtcars$cyl <- as.factor(mtcars$cyl)
#' m <- lm(log10(mpg) ~ 0 + cyl, data = mtcars)
#' make_stats(m)
#'
#' library(lme4)
#' data(sleepstudy)
#' sleepstudy$Days <- as.factor(sleepstudy$Days)
#' mm <- lmer(Reaction ~ 0 + Days + (1 | Subject), data = sleepstudy)
#' make_stats(mm)
#' @export
make_stats <- function(mod, level = .95, sig_figs = 2, all = FALSE, restore = TRUE, pretty = TRUE) {
  cis <- make_cis(mod, level = level, all = all, restore = restore)
  fcs <- make_fcs(mod, level = level, sig_figs = sig_figs, restore = restore, pretty = pretty)

  full_join(cis, fcs)
}

#' Function to run makeStats in parallel.
#'
#' Works via `parallel::mclapply()`
#' @param mod_list  A list model object of class `lm` or `lmer/lmerTest`.
#' @param max The number of cores to use. Default (`NULL`) is half of total cores available.
#' Can be specifed as integer or `Inf`(), to use all cores available.
#' @param stat_func One of the following functions as bare names: `makeStats` (Default) `makeCI`, or `makeFC`, .
#' @param ... Arguments to pass through to `stat_func`.
#' @md
#' @examples
#' \dontrun{
#' par_stats(mod_list) # equal to line below
#' par_stats(mod_list, makeStats)
#'
#' par_stats(mod_list, makeStats, max = Inf) # use all cores
#' }
#' @importFrom parallel detectCores
#' @importFrom pbmcapply pbmclapply
#' @export
par_stats <- function(mod_list, stat_func = make_stats, ..., max = NULL) {
  if (is.null(max)) {
    cores <- parallel::detectCores() / 2
  }
  else if (is.infinite(max)) {
    cores <- parallel::detectCores()
  }
  else {
    cores <- as.integer(max)
  }

  pbmcapply::pbmclapply(mod_list, stat_func, ..., mc.cores = cores)
}

#' @title Generic function to do inverse prediction.
#' @description Predict x from y in a model object.
#' @param object A model object. Currently only 'lm' and 'drc' class model objects are supported.
#' @param y The y value.
#' @return The x value corresponding to the given y.
#' @examples
#' object <- lm(mpg~hp, data=mtcars)
#' inverse_predict(object, 15)
#' 
#' object <- drc::drm(mpg~hp, data=mtcars, fct= drc::LL.4())
#' inverse_predict(object, 15)
#' @export
inverse_predict <- function(object, y) {
  UseMethod("inverse_predict")
}

#' @title Inverse predict based on a linear model object.
#' @description Predict x from y.
#' @param object An object of class lm with an intercept and a single coefficient.
#' @param y The y value.
#' @return The x value corresponding to the given y.
#' @examples
#' object <- lm(mpg~hp, data=mtcars)
#' inverse_predict(object, 15)
#' @export
inverse_predict.lm <- function(object, y) {
  coefs <- object$coefficients
  if (!((length(coefs) == 2) & names(coefs)[1] == "(Intercept)")) {
    stop("No intercept and/or too many coefficients in object")
  }
  x <- unname((y - coefs[1]) / coefs[2])
  return(x)
}

#' @title Inverse predict based on 'drc' model object.
#' @description Predict x from y.
#' @param object An object of class drc object with \code{drm()} and the argument \code{fct = LL.4()}.
#' @param y The y value.
#' @return The x value corresponding to the given y.
#' @examples
#' object <- drc::drm(mpg~hp, data=mtcars, fct= drc::LL.4())
#' inverse_predict(object, 15)
#' @export
inverse_predict.drc <- function(object, y) {
  if (class(object$fct) != "llogistic" | object$fct$noParm != 4) {
    stop("Only 4 paramenter log logistic fits are supported")
  }
  coefs <- object$coefficients
  b <- coefs[1] %>% unname()
  c <- coefs[2] %>% unname()
  d <- coefs[3] %>% unname()
  e <- coefs[4] %>% unname()
  to_log <- ((d - c) / (y - c)) - 1
  logx <- (1 / b) * log(to_log) + log(e)
  res <- exp(logx)
  return(res)
}