R/toffee_tbl.R

#' Create a nicely formatted table from a model object.
#'
#' Given a model object (like from [stats::lm()], [stats::glm()], or
#' [lme4::glmer()]) this function makes a table intended for interactive use or
#' publication. The model coefficients and confidence intervals displayed in the
#' table have had their [inverse link function](https://en.wikipedia.org/wiki/Generalized_linear_model#Link_function)
#' applied, therefore they are in the units of the response variable.
#'
#' @param model A model object that is compatible with [broom::tidy()].
#' @param conf_level The confidence level that will be used for computing the
#' confidence interval (for example: `0.95` or `0.99`).
#' @param digits The number of decimal places that numbers in the table should
#' be rounded to. For no rounding use `Inf`. This argument is provided to
#' [base::round()]. The default value is `2`.
#' @param concat_signif If `TRUE` the resulting data frame will append
#' significance symbols as generated by [toffee_signif()]. If `FALSE` the
#' significance symbols will appear in a separate column called `Significant`.
#' @param odds_ratio If `TRUE` and the family of the model provided is
#' `logit` then the model coefficients and confidence intervals returned in
#' the resulting table will be odds ratios.
#' @param ci_fmt If `TRUE` then the confidence interval will be formatted between
#' brackets in one column called `CI`. If `FALSE` the lower and upper bounds of
#' the confidence interval will be in their own columns called `Lower_CI` and
#' `Upper_CI` respectfully.
#' @param ... Arguments that will be passed to [toffee_signif()].
#' @return A [tibble::tibble()] with the following columns: the
#' name of the coefficient in the model (`Variable`), the value of the
#' coefficient with inverse of the link function applied (`Estimate` or
#' `Odds_Ratio`), the confidence interval for the coefficient (`CI`), the
#' p-value for the estimate, (`p_value`), and optionally symbols representing
#' levels of significance (`Significant`).
#'
#' @export
#' @importFrom broom tidy
#' @importFrom purrr map_dbl map_chr is_null map2_chr
#' @importFrom dplyr mutate mutate_at select rename "%>%"
#' @examples
#'
#' library(dplyr)
#' library(toffee)
#'
#' linear_model <- toffee_forest %>%
#'   lm(Volume ~ Girth + Height + Branches, data = .)
#'
#' # Basic usage
#' linear_model %>%
#'   toffee_tbl()
#'
#'  # # A tibble: 4 x 4
#'  #   Variable    Estimate CI               p_value
#'  #   <chr>          <dbl> <chr>            <chr>
#'  # 1 (Intercept)   -67.2  [-86.81, -47.59] < 0.01***
#'  # 2 Girth           4.75 [4.23, 5.27]     < 0.01***
#'  # 3 Height          0.33 [0.08, 0.59]     0.01*
#'  # 4 Branches        0.46 [-0.03, 0.95]    0.07
#'
#' # Using the `chars` argument from toffee_signif
#' linear_model %>%
#'   toffee_tbl(thresholds = 0.01, chars = "*")
#'
#'  # # A tibble: 4 x 4
#'  #   Variable    Estimate CI               p_value
#'  #   <chr>          <dbl> <chr>            <chr>
#'  # 1 (Intercept)   -67.2  [-86.81, -47.59] < 0.01*
#'  # 2 Girth           4.75 [4.23, 5.27]     < 0.01*
#'  # 3 Height          0.33 [0.08, 0.59]     0.01
#'  # 4 Branches        0.46 [-0.03, 0.95]    0.07
#'
#' # Separating the significance symbols into their own column
#' linear_model %>%
#'   toffee_tbl(concat_signif = FALSE)
#'
#'  # # A tibble: 4 x 5
#'  #   Variable    Estimate CI               p_value Significant
#'  #   <chr>          <dbl> <chr>            <chr>   <chr>
#'  # 1 (Intercept)   -67.2  [-86.81, -47.59] < 0.01  ***
#'  # 2 Girth           4.75 [4.23, 5.27]     < 0.01  ***
#'  # 3 Height          0.33 [0.08, 0.59]     0.01    *
#'  # 4 Branches        0.46 [-0.03, 0.95]    0.07    ""
#'
#' logistic_model <- toffee_forest %>%
#'   glm(Healthy ~ Girth + Height + Branches + Volume, data = .,
#'       family = binomial())
#'
#' logistic_model %>%
#'   toffee_tbl()
#'
#'  # # A tibble: 5 x 4
#'  #   Variable    Odds_Ratio CI                  p_value
#'  #   <chr>            <dbl> <chr>               <chr>
#'  # 1 (Intercept)       0.05 [0, 10626998633.06] 0.81
#'  # 2 Girth             0.75 [0.14, 3.65]        0.72
#'  # 3 Height            0.92 [0.73, 1.14]        0.45
#'  # 4 Branches          2    [1.31, 3.82]        0.01**
#'  # 5 Volume            1.03 [0.75, 1.45]        0.86
#'
toffee_tbl <- function(model, conf_level = 0.95, digits = 2,
                       concat_signif = TRUE, odds_ratio = TRUE,
                       ci_fmt = TRUE, ...) {

  tryCatch(family(model),
    error = function(c) {
      stop("This model object is not compatible with toffee_tbl.", call. = FALSE)
    }
  )

  link <- family(model)$link

  if(link == "logit" && odds_ratio) {
    mean_function <- exp
  } else {
    mean_function <- make.link(link)$linkinv
  }

  result <- tidy(model, conf.int = TRUE, conf.level = conf_level) %>%
    mutate(Significant = toffee_signif(p.value, ...)) %>%
    mutate_at(c("estimate", "conf.low", "conf.high"), mean_function) %>%
    mutate_at(c("estimate", "conf.low", "conf.high", "p.value"),
              round, digits = digits) %>%
    mutate(p.value = map_chr(p.value, as.character)) %>%
    mutate(p.value = map_chr(p.value, ~ if_else(.x < 0.01, "< 0.01", .x)))

  if (ci_fmt) {
    result <- result %>%
      mutate(CI = map2_chr(conf.low, conf.high,
                           ~ paste0("[", .x, ", ", .y, "]"))) %>%
      dplyr::select(term, estimate, CI, p.value, Significant) %>%
      rename(Variable = term, Estimate = estimate, p_value = p.value)
  } else {
    result <- result %>%
      dplyr::select(term, estimate, conf.low, conf.high, p.value, Significant) %>%
      rename(Variable = term, Estimate = estimate, Lower_CI = conf.low,
             Upper_CI = conf.high, p_value = p.value)
  }

  if(link == "logit" && odds_ratio) {
    result <- result %>%
      rename(Odds_Ratio = Estimate)
  }

  if(concat_signif) {
    result <- result %>%
      mutate(p_value = paste0(p_value, Significant)) %>%
      select(-Significant)
  }

  result
}
seankross/toffee documentation built on May 29, 2019, 9:33 a.m.