R/adorn-tidytableone.R

Defines functions adorn_tidytableone

Documented in adorn_tidytableone

#' @title Format tidytableone for display and reporting
#'
#' @description
#' Take the raw summary statistics from `create_tidy_table_one` and format them
#' for inclusion in a presentable table.
#'
#' @param tidy_t1 Results in a tibble from `create_tidy_table_one`
#' @param default_continuous A glue statement that provides the formatting for
#'   continuous variables, Default is `"{mean} ({sd})"`
#' @param default_categorical A glue statement that provides the formatting for
#'   categorical variables, Default is `"{n} ({p})"`
#' @param fmt_vars A list of variable names and glue statements to override the
#'   defaults for specific variables.
#' @param con_accuracy A number to round to for continuous variables. Use (e.g.)
#'   0.01 to show 2 decimal places of precision. Default is 0.1.
#' @param cat_accuracy A number to round to for categorical variables. Use
#'   (e.g.) 0.01 to show 2 decimal places of precision. Default is 0.1.
#' @param p_accuracy  A number to round to for p-values and smd (when shown).
#'   Use (e.g.) 0.01 to show 2 decimal places of precision. Default is 0.001.
#' @param prefix Additional text to display before the number. The suffix is
#'   applied to absolute value before style_positive and style_negative are
#'   processed so that prefix = "$" will yield (e.g.) ⁠-$1⁠ and ⁠($1)⁠.
#' @param suffix Additional text to display after the number.
#' @param big_mark Character used between every 3 digits to separate thousands.
#' @param decimal_mark The character to be used to indicate the numeric decimal point.
#' @param style_positive A string that determines the style of positive numbers.
#'   See `?scales:number` for more information.
#' @param style_negative  A string that determines the style of negative
#'   numbers. See `?scales:number` for more information.
#' @param scale_cut Named numeric vector that allows you to rescale large (or
#'   small) numbers and add a prefix. See `?scales:number` for more information.
#' @param con_trim Logical, if FALSE, values are right-justified to a common width (see `base::format()`).
#' @param cat_trim Logical, if FALSE, values are right-justified to a common width (see `base::format()`).
#' @param show_pct Logical, if FALSE, "%" is omitted from percentages.
#' @param exact String vector of variable names to use exact tests for p-values.
#' @param nonnormal String vector of variable names to use non-parametric tests for p-values.
#' @param equal_variance String vector of variable names to assume equal variance.
#' @param no_cont_correction String vector of variable names to assume continuity correction.
#' @param monte_carlo_p String vector of variable names to simulate p-values.
#' @param show_test Logical, if FALSE, the names of the test are omitted from the table.
#' @param show_smd Logical, if FALSE, Standardized Mean Differences (SMD) are not included.
#' @param use_labels Logical, if TRUE, labels are used instead of variable names.
#' @param combine_level_col Combines the `var` and `level` columns into one instead
#'   of two. HTML won't recognize the extra spaces when printing, but you can
#'   used `flextable::padding` to indent the right rows in that column later.
#' @param missing Indicates whether to include counts of NA values in the table.
#'   Allowed values are "no" (shows a column of number not missing), "ifany" (only display if
#'   any NA values), and "always" (includes NA count row for all variables).
#'   Default is "no".
#' @param missing_text Character string to use in place of `NA` when missing is
#'   "ifany" or "always". Default is "(Missing)".
#' @param default_miss A glue statement that provides the formatting for
#'   missing, Default is `"{n}"`
#' @param ... Additional arguments. Not used.
#'
#' @importFrom dplyr across
#' @importFrom dplyr add_row
#' @importFrom dplyr bind_rows
#' @importFrom dplyr case_when
#' @importFrom dplyr distinct
#' @importFrom dplyr everything
#' @importFrom dplyr filter
#' @importFrom dplyr if_else
#' @importFrom dplyr left_join
#' @importFrom dplyr mutate
#' @importFrom dplyr pull
#' @importFrom dplyr rows_update
#' @importFrom dplyr rowwise
#' @importFrom dplyr select
#' @importFrom glue glue
#' @importFrom purrr map_chr
#' @importFrom purrr map_df
#' @importFrom scales number
#' @importFrom scales pvalue
#' @importFrom stringr str_replace
#' @importFrom stringr str_replace_all
#' @importFrom tibble tibble
#' @importFrom tidyr pivot_wider
#' @importFrom tidyr separate_longer_delim
#'
#' @return A tibble
#'
#' @export
#'
#' @examples
#' library(dplyr)
#'
#' dplyr::glimpse(pbc_mayo)
#'
#' tab1 <- create_tidy_table_one(data = pbc_mayo,
#'                               strata = "trt",
#'                               vars = c("time",
#'                                        "status",
#'                                        "age",
#'                                        "sex",
#'                                        "ascites",
#'                                        "hepato",
#'                                        "spiders",
#'                                        "edema",
#'                                        "bili",
#'                                        "chol",
#'                                        "albumin",
#'                                        "copper",
#'                                        "alk_phos",
#'                                        "ast",
#'                                        "trig",
#'                                        "platelet",
#'                                        "protime",
#'                                        "stage"))
#'
#' dplyr::glimpse(tab1)
#'
#' # Check to see if assumptions may be violated
#' check_tests <- calc_table_one_tests(tab1)
#'
#'
#' dplyr::distinct(tab1, var)
#'
#'
#' adorn_tidytableone(tidy_t1 = tab1)
#'
#' adorn_tidytableone(tidy_t1 = tab1,
#'                    show_test = TRUE)
#'
#'
#' adorn_tidytableone(tidy_t1 = tab1,
#'                    show_test = TRUE,
#'                    exact = "status",
#'                    nonnormal = check_tests$non_normal_shapiro)


adorn_tidytableone <- function(tidy_t1,
                               default_continuous = "{mean} ({sd})",
                               default_categorical = "{n} ({p})",
                               fmt_vars = NULL,
                               con_accuracy = 0.1,
                               cat_accuracy = 0.1,
                               p_accuracy = 0.001,
                               prefix = "",
                               suffix = "",
                               big_mark = "",
                               decimal_mark = ".",
                               style_positive = c("none", "plus"),
                               style_negative = c("hyphen", "minus", "parens"),
                               scale_cut = NULL,
                               con_trim = TRUE,
                               cat_trim = FALSE,
                               show_pct = TRUE,
                               exact = NULL,
                               nonnormal = NULL,
                               equal_variance = NULL,
                               no_cont_correction = NULL,
                               monte_carlo_p = NULL,
                               show_test = FALSE,
                               show_smd = FALSE,
                               use_labels = TRUE,
                               combine_level_col = TRUE,
                               missing = "no",
                               missing_text = "(Missing)",
                               default_miss = "{n}", ...) {

  # Silence no visible binding for global variable
  p_value <- test <- smd <- label <- glue_formula <- NULL
  strata <- num_not_miss <- NULL

  if (!"strata" %in% names(tidy_t1)) {
    # stop("Currently, the function only works when a strata is given.")
    res_stats <- adorn_tidytableone_no_strata(tidy_t1 = tidy_t1,
                                              default_continuous = default_continuous,
                                              default_categorical = default_categorical,
                                              fmt_vars = fmt_vars,
                                              con_accuracy = con_accuracy,
                                              cat_accuracy = cat_accuracy,
                                              p_accuracy = p_accuracy,
                                              prefix = prefix,
                                              suffix = suffix,
                                              big_mark = big_mark,
                                              decimal_mark = decimal_mark,
                                              style_positive = style_positive,
                                              style_negative = style_negative,
                                              scale_cut = scale_cut,
                                              con_trim = con_trim,
                                              cat_trim = cat_trim,
                                              show_pct = show_pct,
                                              exact = exact,
                                              nonnormal = nonnormal,
                                              equal_variance = equal_variance,
                                              no_cont_correction = no_cont_correction,
                                              monte_carlo_p = monte_carlo_p,
                                              show_test = show_test,
                                              show_smd = show_smd,
                                              use_labels = use_labels,
                                              combine_level_col = combine_level_col,
                                              missing = missing,
                                              missing_text = missing_text,
                                              default_miss = default_miss, ...)

    #### Return results --------------------------------

    return(res_stats)

  }


  #### get variable labels --------------------------------

  var_lbls <- tidy_t1 |>
    dplyr::select(var, var_type, label) |>
    dplyr::distinct() |>
    mutate(label = dplyr::if_else(is.na(label), var, label))

  #### Get the stats --------------------------------

  tab_stats <- make_t1_pretty(t1 = tidy_t1,
                              default_continuous = default_continuous,
                              default_categorical = default_categorical,
                              fmt_vars = fmt_vars,
                              con_accuracy = con_accuracy,
                              cat_accuracy = cat_accuracy,
                              prefix = prefix,
                              suffix = suffix,
                              big_mark = big_mark,
                              decimal_mark = decimal_mark,
                              style_positive = style_positive[[1]],
                              style_negative = style_negative[[1]],
                              scale_cut = scale_cut,
                              con_trim = con_trim,
                              cat_trim = cat_trim,
                              show_pct = show_pct,
                              missing = missing)

  #### Get the p-values --------------------------------

  if (any(tidy_t1$var_type == "continuous") & any(tidy_t1$var_type == "categorical")) {

    tab_pvals <- tidy_t1 |>
      dplyr::distinct(var,
                      var_type,
                      chisq_test,
                      chisq_test_no_correction,
                      chisq_test_simulated,
                      fisher_test,
                      fisher_test_simulated,
                      oneway_test_unequal_var,
                      oneway_test_equal_var,
                      kruskal_test) |>
      mutate(p_value = dplyr::case_when(
        var_type == "continuous" & var %in% nonnormal ~ kruskal_test,
        var_type == "continuous" & var %in% equal_variance ~ oneway_test_equal_var,
        var_type == "continuous"  ~ oneway_test_unequal_var,
        var_type == "categorical" & var %in% exact & var %in% monte_carlo_p ~ fisher_test_simulated,
        var_type == "categorical" & var %in% exact ~ fisher_test,
        var_type == "categorical" & var %in% no_cont_correction ~ chisq_test_no_correction,
        var_type == "categorical" & var %in% monte_carlo_p ~ chisq_test_simulated,
        var_type == "categorical" ~ chisq_test,
        TRUE ~ NA_real_),
        test = dplyr::case_when(
          var_type == "continuous" & var %in% nonnormal ~ "Kruskal-Wallis Rank Sum Test",
          var_type == "continuous" & var %in% equal_variance ~ "Oneway test, equal variance",
          var_type == "continuous"  ~ "Oneway test, unequal variance",
          var_type == "categorical" & var %in% exact & var %in% monte_carlo_p ~ "Fisher's Exact Test, simuation",
          var_type == "categorical" & var %in% exact ~ "Fisher's Exact Test",
          var_type == "categorical" & var %in% no_cont_correction ~ "Chi-squared Test, no continuity correction",
          var_type == "categorical" & var %in% monte_carlo_p ~ "Chi-squared Test, simulation",
          var_type == "categorical" ~ "Chi-squared Test, with continuty correction",
          TRUE ~ NA_character_),
        p_value = scales::pvalue(p_value,
                                 accuracy = p_accuracy,
                                 decimal.mark = ".",
                                 prefix = NULL,
                                 add_p = FALSE)) |>
      dplyr::select(var,
                    p_value,
                    test)

  } else if (any(tidy_t1$var_type == "continuous")) {

    tab_pvals <- tidy_t1 |>
      dplyr::distinct(var,
                      var_type,
                      oneway_test_unequal_var,
                      oneway_test_equal_var,
                      kruskal_test) |>
      mutate(p_value = dplyr::case_when(
        var_type == "continuous" & var %in% nonnormal ~ kruskal_test,
        var_type == "continuous" & var %in% equal_variance ~ oneway_test_equal_var,
        var_type == "continuous"  ~ oneway_test_unequal_var,
        TRUE ~ NA_real_),
        test = dplyr::case_when(
          var_type == "continuous" & var %in% nonnormal ~ "Kruskal-Wallis Rank Sum Test",
          var_type == "continuous" & var %in% equal_variance ~ "Oneway test, equal variance",
          var_type == "continuous"  ~ "Oneway test, unequal variance",
          TRUE ~ NA_character_),
        p_value = scales::pvalue(p_value,
                                 accuracy = p_accuracy,
                                 decimal.mark = ".",
                                 prefix = NULL,
                                 add_p = FALSE)) |>
      dplyr::select(var,
                    p_value,
                    test)

  } else if (any(tidy_t1$var_type == "categorical")) {

    tab_pvals <- tidy_t1 |>
      dplyr::distinct(var,
                      var_type,
                      chisq_test,
                      chisq_test_no_correction,
                      chisq_test_simulated,
                      fisher_test,
                      fisher_test_simulated) |>
      mutate(p_value = dplyr::case_when(
        var_type == "categorical" & var %in% exact & var %in% monte_carlo_p ~ fisher_test_simulated,
        var_type == "categorical" & var %in% exact ~ fisher_test,
        var_type == "categorical" & var %in% no_cont_correction ~ chisq_test_no_correction,
        var_type == "categorical" & var %in% monte_carlo_p ~ chisq_test_simulated,
        var_type == "categorical" ~ chisq_test,
        TRUE ~ NA_real_),
        test = dplyr::case_when(
          var_type == "categorical" & var %in% exact & var %in% monte_carlo_p ~ "Fisher's Exact Test, simuation",
          var_type == "categorical" & var %in% exact ~ "Fisher's Exact Test",
          var_type == "categorical" & var %in% no_cont_correction ~ "Chi-squared Test, no continuity correction",
          var_type == "categorical" & var %in% monte_carlo_p ~ "Chi-squared Test, simulation",
          var_type == "categorical" ~ "Chi-squared Test, with continuty correction",
          TRUE ~ NA_character_),
        p_value = scales::pvalue(p_value,
                                 accuracy = p_accuracy,
                                 decimal.mark = ".",
                                 prefix = NULL,
                                 add_p = FALSE)) |>
      dplyr::select(var,
                    p_value,
                    test)
  }

  #### Get the SMD --------------------------------

  tab_smd <- tidy_t1 |>
    dplyr::distinct(var, smd) |>
    mutate(smd = scales::number(smd,
                                accuracy = p_accuracy,
                                decimal.mark = "."))


  #### Get the missing --------------------------------

  tab_miss <- tidy_t1 |>
    get_miss(missing = missing,
             missing_text = missing_text,
             default_miss = default_miss,
             cat_accuracy = cat_accuracy,
             prefix = prefix,
             suffix = suffix,
             big_mark = big_mark,
             decimal_mark = decimal_mark,
             style_positive = style_positive[[1]],
             style_negative = style_negative[[1]],
             scale_cut = scale_cut,
             con_trim = con_trim,
             cat_trim = cat_trim,
             show_pct = show_pct)


  #### Make the table --------------------------------

  tab_vars <- dplyr::distinct(tidy_t1, var) |>
    dplyr::pull() |>
    as.character()


  adorned_tidy_t1 <- purrr::map_df(.x = tab_vars,
                                   .f = ~ build_tab1(tab_var = .x,
                                                     tab_pvals = tab_pvals,
                                                     tab_stats = tab_stats,
                                                     tab_miss = tab_miss,
                                                     missing = missing,
                                                     show_test = show_test,
                                                     show_smd = show_smd,
                                                     tab_smd = tab_smd)) |>
    dplyr::mutate(dplyr::across(.cols = dplyr::everything(),
                                .fns = ~ dplyr::if_else(is.na(.), "", .)))


  #### Apply labels --------------------------------

  if (use_labels) {

    adorned_tidy_t1 <- adorned_tidy_t1 |>
      dplyr::left_join(var_lbls,
                       by = "var") |>
      dplyr::mutate(label = dplyr::if_else(is.na(label), "", label),
                    var = label)

  } else {

    adorned_tidy_t1 <- adorned_tidy_t1 |>
      dplyr::left_join(var_lbls,
                       by = "var")

  }


  adorned_tidy_t1 <- adorned_tidy_t1 |>
    tidyr::fill(var_type,
                .direction = "down") |>
    mutate(level = dplyr::if_else(level == "" & var_type == "continuous",
                                  glue_formula,
                                  level),
           glue_formula = dplyr::if_else(glue_formula == "", NA_character_, glue_formula)) |>
    tidyr::fill(glue_formula,
                .direction = "up") |>
    dplyr::mutate(var = dplyr::if_else(var_type == "categorical" & var != "",
                                       glue::glue("{var}, {glue_formula}"),
                                       var)) |>
    dplyr::select(-glue_formula,
                  -var_type,
                  -label)


  #### Combine var and level columns --------------------------------

  if (combine_level_col) {

    adorned_tidy_t1 <- adorned_tidy_t1 |>
      mutate(var = glue::glue("{var}  {level}")) |>
      dplyr::select(-level)

  }


  #### Top row (n) --------------------------------

  if (any(tidy_t1$var_type == "continuous") & any(tidy_t1$var_type == "categorical")) {

    top_row <- tidy_t1 |>
      dplyr::distinct(strata,
                      n) |>
      dplyr::filter(!is.na(n)) |>
      mutate(n = scales::number(x = n,
                                accuracy = 1.0,
                                scale = 1,
                                prefix = "",
                                suffix = "",
                                big.mark = "",
                                decimal.mark = ".",
                                style_positive = "none",
                                style_negative = "hyphen",
                                scale_cut = NULL,
                                trim = FALSE)) |>
      tidyr::pivot_wider(names_from = strata,
                         values_from = n) |>
      mutate(var = "n",
             p_value = "") |>
      dplyr::select(var,
                    dplyr::everything())

  } else if (any(tidy_t1$var_type == "continuous")) {

    top_row <- tidy_t1 |>
      dplyr::distinct(strata,
                      n) |>
      dplyr::filter(!is.na(n)) |>
      mutate(n = scales::number(x = n,
                                accuracy = 1.0,
                                scale = 1,
                                prefix = "",
                                suffix = "",
                                big.mark = "",
                                decimal.mark = ".",
                                style_positive = "none",
                                style_negative = "hyphen",
                                scale_cut = NULL,
                                trim = FALSE)) |>
      tidyr::pivot_wider(names_from = strata,
                         values_from = n) |>
      mutate(var = "n",
             p_value = "") |>
      dplyr::select(var,
                    dplyr::everything())


  } else if (any(tidy_t1$var_type == "categorical")) {

    top_row <- tidy_t1 |>
      dplyr::distinct(strata,
                      n_strata) |>
      dplyr::rename(n = n_strata) |>
      dplyr::filter(!is.na(n)) |>
      mutate(n = scales::number(x = n,
                                accuracy = 1.0,
                                scale = 1,
                                prefix = "",
                                suffix = "",
                                big.mark = "",
                                decimal.mark = ".",
                                style_positive = "none",
                                style_negative = "hyphen",
                                scale_cut = NULL,
                                trim = FALSE)) |>
      tidyr::pivot_wider(names_from = strata,
                         values_from = n) |>
      mutate(var = "n",
             p_value = "") |>
      dplyr::select(var,
                    dplyr::everything())
  }


  empty_row <- tibble::as_tibble(lapply(top_row, function(x) ""))

  adorned_tidy_t1 <- top_row |>
    dplyr::bind_rows(empty_row) |>
    dplyr::bind_rows(adorned_tidy_t1)


  #### Not combine var and level columns --------------------------------

  if (!combine_level_col) {

    adorned_tidy_t1 <- adorned_tidy_t1 |>
      dplyr::relocate(level,
                      .after = var)

  }

  #### Final clean-up --------------------------------

  if (missing == "no") {

    adorned_tidy_t1 <- adorned_tidy_t1 |>
      dplyr::relocate(num_not_miss,
                      .before = "Overall")|>
      mutate(num_not_miss = as.character(num_not_miss),
             num_not_miss = tidyr::replace_na(num_not_miss, ""))

  }


  if (show_test == TRUE) {

    adorned_tidy_t1 <- adorned_tidy_t1 |>
      mutate(test = tidyr::replace_na(test, ""))

  }

  if (show_smd == FALSE) {

    adorned_tidy_t1 <- adorned_tidy_t1 |>
      mutate(smd = tidyr::replace_na(smd, ""))

  }


  adorned_tidy_t1 <- adorned_tidy_t1 |>
    mutate(dplyr::across(.cols = dplyr::everything(),
                         .fns = ~ tidyr::replace_na(., ""))) |>
    mutate(dplyr::across(.cols = dplyr::everything(),
                         .fns = ~ as.character(.)))


  #### Return table --------------------------------

  return(adorned_tidy_t1)


}


#### Build tab1 --------------------------------

build_tab1 <- function(tab_var,
                       tab_pvals,
                       tab_stats,
                       tab_miss,
                       missing = "no",
                       show_test = FALSE,
                       show_smd = FALSE,
                       tab_smd) {

  # Silence no visible binding for global variable
  p_value <- test <- smd <- num_not_miss <- NULL

  p_i <- tab_pvals |>
    dplyr::filter(var == tab_var) |>
    dplyr::pull(p_value)

  s_i <- tab_stats |>
    dplyr::filter(var == tab_var) |>
    dplyr::select(-var,
                  -var_type) |>
    mutate(var = NA_character_,
           p_value = NA_character_)

  t_i <- tab_pvals |>
    dplyr::filter(var == tab_var) |>
    dplyr::pull(test)

  smd_i <- tab_smd |>
    dplyr::filter(var == tab_var) |>
    dplyr::pull(smd)

  if (missing == "no") {

    m_i <- tab_miss |>
      dplyr::filter(var == tab_var) |>
      dplyr::pull(num_not_miss)

    res <- tibble::tibble(var = tab_var,
                          num_not_miss = m_i,
                          p_value = p_i,
                          test = t_i,
                          smd = smd_i) |>
      dplyr::bind_rows(s_i) |>
      dplyr::select(var,
                    dplyr::everything(),
                    -p_value,
                    -test,
                    -smd,
                    p_value,
                    test,
                    smd) |>
      dplyr::add_row()

  } else {

    m_i <- tab_miss |>
      dplyr::filter(var == tab_var) |>
      dplyr::select(-var)

    res <- tibble::tibble(var = tab_var,
                          p_value = p_i,
                          test = t_i,
                          smd = smd_i) |>
      dplyr::bind_rows(s_i) |>
      dplyr::bind_rows(m_i) |>
      dplyr::select(var,
                    dplyr::everything(),
                    -p_value,
                    -test,
                    -smd,
                    p_value,
                    test,
                    smd) |>
      dplyr::add_row()

  }



  if (!show_test) {

    res <- res |>
      dplyr::select(-test)

  }

  if (!show_smd) {

    res <- res |>
      dplyr::select(-smd)

  }



  return(res)

}


#### Make t1 pretty --------------------------------

make_t1_pretty <- function(t1,
                           default_continuous = "{mean} ({sd})",
                           default_categorical = "{n} ({p})",
                           fmt_vars = NULL,
                           con_accuracy = 0.1,
                           cat_accuracy = 0.1,
                           prefix = "",
                           suffix = "",
                           big_mark = "",
                           decimal_mark = ".",
                           style_positive = "none",
                           style_negative = "hyphen",
                           scale_cut = NULL,
                           con_trim = TRUE,
                           cat_trim = FALSE,
                           show_pct = TRUE,
                           missing = "ifany", ...) {

  # Silence no visible binding for global variable
  glue_formula <- pct <- cv <- strata <- glue_formula2 <- NULL
  n_level_valid <- n_strata_valid <- NULL

  # Percentage suffix
  if (show_pct) {
    pct_suffix = "%"
  } else {
    pct_suffix = ""
  }


  #### Glue formulae --------------------------------

  if (is.null(fmt_vars)) {

    formula_for_table <- t1 |>
      dplyr::distinct(var,
                      var_type) |>
      mutate(glue_formula = dplyr::case_when(
        var_type == "continuous" ~ default_continuous,
        var_type == "categorical" ~ default_categorical,
        .default = NA_character_))

  } else {

    override_formulae <- tibble::tibble(var = names(fmt_vars),
                                        glue_formula = purrr::map_chr(.x = var,
                                                                      .f = ~ fmt_vars[[.x]]))
    formula_for_table <- t1 |>
      dplyr::distinct(var,
                      var_type) |>
      mutate(glue_formula = dplyr::case_when(
        var_type == "continuous" ~ default_continuous,
        var_type == "categorical" ~ default_categorical,
        .default = NA_character_)) |>
      dplyr::rows_update(override_formulae,
                         by = "var")

  }

  ## Fix formula ----------------

  formula_for_table <- formula_for_table |>
    mutate(glue_formula = stringr::str_replace_all(string = glue_formula,
                                                   pattern = "median|Median|med|med",
                                                   replacement = "p50")) |>
    mutate(glue_formula = stringr::str_replace_all(string = glue_formula,
                                                   pattern = "min|Min|minimum|Minimum",
                                                   replacement = "p0")) |>
    mutate(glue_formula = stringr::str_replace_all(string = glue_formula,
                                                   pattern = "max|Max|maximum|Maximum",
                                                   replacement = "p100")) |>
    mutate(glue_formula = stringr::str_replace_all(string = glue_formula,
                                                   pattern = "\\{iqr\\}",
                                                   replacement = "{p25} to {p75}")) |>
    mutate(glue_formula = stringr::str_replace_all(string = glue_formula,
                                                   pattern = "\\{IQR\\}",
                                                   replacement = "{p25} to {p75}")) |>
    mutate(glue_formula = stringr::str_replace_all(string = glue_formula,
                                                   pattern = "\\{range\\}",
                                                   replacement = "{p0} to {p100}")) |>
    mutate(glue_formula = stringr::str_replace_all(string = glue_formula,
                                                   pattern = "\\{p\\}",
                                                   replacement = "{pct}")) |>
    mutate(glue_formula = stringr::str_replace_all(string = glue_formula,
                                                   pattern = "\\{n\\}",
                                                   replacement = "{n_level_valid}")) |>
    mutate(glue_formula = stringr::str_replace_all(string = glue_formula,
                                                   pattern = "\\{N\\}",
                                                   replacement = "{n_strata_valid}"))



  #### Make the pretty t1 --------------------------------

  if (any(t1$var_type == "continuous") & any(t1$var_type == "categorical")) {

    pretty_t1 <- t1 |>
      mutate(pct = n_level_valid / n_strata_valid) |>
      # Format counts for strata
      mutate(dplyr::across(.cols = c(n_level_valid,
                                     n_strata_valid),
                           .fns = ~ scales::number(x = .,
                                                   accuracy = 1.0,
                                                   scale = 1,
                                                   prefix = "",
                                                   suffix = "",
                                                   big.mark = "",
                                                   decimal.mark = ".",
                                                   style_positive = "none",
                                                   style_negative = "hyphen",
                                                   scale_cut = NULL,
                                                   trim = FALSE))) |>
      # Format categorical Percentages
      mutate(pct = scales::percent(x = pct,
                                   accuracy = cat_accuracy,
                                   scale = 100,
                                   prefix = prefix,
                                   suffix = pct_suffix,
                                   big.mark = big_mark,
                                   decimal.mark = decimal_mark,
                                   style_positive = style_positive,
                                   style_negative = style_negative,
                                   scale_cut = scale_cut,
                                   trim = cat_trim)) |>
      # Format for continuous data
      mutate(dplyr::across(.cols = c(mean:cv),
                           .fns = ~ scales::number(x = .,
                                                   accuracy = con_accuracy,
                                                   scale = 1,
                                                   prefix = prefix,
                                                   suffix = suffix,
                                                   big.mark = big_mark,
                                                   decimal.mark = decimal_mark,
                                                   style_positive = style_positive,
                                                   style_negative = style_negative,
                                                   scale_cut = scale_cut,
                                                   trim = con_trim))) |>
      dplyr::filter(!(is.na(level) & var_type == "categorical"))


  } else if (any(t1$var_type == "categorical")) {

    pretty_t1 <- t1 |>
      mutate(pct = n_level_valid / n_strata_valid) |>
      # Format counts for strata
      mutate(dplyr::across(.cols = c(n_level_valid,
                                     n_strata_valid),
                           .fns = ~ scales::number(x = .,
                                                   accuracy = 1.0,
                                                   scale = 1,
                                                   prefix = "",
                                                   suffix = "",
                                                   big.mark = "",
                                                   decimal.mark = ".",
                                                   style_positive = "none",
                                                   style_negative = "hyphen",
                                                   scale_cut = NULL,
                                                   trim = FALSE))) |>
      # Format categorical Percentages
      mutate(pct = scales::percent(x = pct,
                                   accuracy = cat_accuracy,
                                   scale = 100,
                                   prefix = prefix,
                                   suffix = pct_suffix,
                                   big.mark = big_mark,
                                   decimal.mark = decimal_mark,
                                   style_positive = style_positive,
                                   style_negative = style_negative,
                                   scale_cut = scale_cut,
                                   trim = cat_trim)) |>
      dplyr::filter(!is.na(level))

  } else if (any(t1$var_type == "continuous")) {

    # Format continuous stats: mean, sd, median, etc.
    pretty_t1 <- t1 |>
      mutate(dplyr::across(.cols = c(mean:cv),
                           .fns = ~ scales::number(x = .,
                                                   accuracy = con_accuracy,
                                                   scale = 1,
                                                   prefix = prefix,
                                                   suffix = suffix,
                                                   big.mark = big_mark,
                                                   decimal.mark = decimal_mark,
                                                   style_positive = style_positive,
                                                   style_negative = style_negative,
                                                   scale_cut = scale_cut,
                                                   trim = con_trim)))

  }

  if (!any(names(pretty_t1) == "level")) {

    pretty_t1 <- pretty_t1 |>
      mutate(level = "")
  }



  pretty_t1 <- pretty_t1 |>
    dplyr::left_join(formula_for_table,
                     by = c("var",
                            "var_type")) |>
    dplyr::rowwise() |>
    mutate(glue_formula2 = glue::glue(glue_formula)) |>
    dplyr::select(strata,
                  var,
                  level,
                  var_type,
                  glue_formula,
                  glue_formula2) |>
    tidyr::pivot_wider(names_from = strata,
                       values_from = glue_formula2) |>
    tidyr::separate_longer_delim(cols = c(-var, -var_type),
                                 delim = "\n") |>
    # Replace labels for stats
    mutate(glue_formula = stringr::str_replace(glue_formula,
                                               pattern = "\\{mean\\}",
                                               replacement = "Mean"),
           glue_formula = stringr::str_replace(glue_formula,
                                               pattern = "\\{sd\\}",
                                               replacement = "SD"),
           glue_formula = stringr::str_replace(glue_formula,
                                               pattern = "\\{p50\\}",
                                               replacement = "Median"),
           glue_formula = stringr::str_replace(glue_formula,
                                               pattern = "\\{p0\\}",
                                               replacement = "Min."),
           glue_formula = stringr::str_replace(glue_formula,
                                               pattern = "\\{p100\\}",
                                               replacement = "Max."),
           glue_formula = stringr::str_replace(glue_formula,
                                               pattern = "\\{n_level_valid\\}",
                                               replacement = "n"),
           glue_formula = stringr::str_replace(glue_formula,
                                               pattern = "\\{n_strata_valid\\}",
                                               replacement = "N"),
           glue_formula = stringr::str_replace(glue_formula,
                                               pattern = "\\{pct\\}",
                                               replacement = "%"),
           glue_formula = stringr::str_replace(glue_formula,
                                               pattern = "\\{p25\\} to \\{p75\\}",
                                               replacement = "IQR"))


  return(pretty_t1)


}


#### Handle NAs --------------------------------

get_miss <- function(t1,
                     missing = "no",
                     missing_text = "(Missing)",
                     default_miss = "{n}",
                     cat_accuracy = 0.1,
                     prefix = "",
                     suffix = "",
                     big_mark = "",
                     decimal_mark = ".",
                     style_positive = "none",
                     style_negative = "hyphen",
                     scale_cut = NULL,
                     con_trim = TRUE,
                     cat_trim = FALSE,
                     show_pct = TRUE, ...) {

  # Silence no visible binding for global variable
  glue_formula <- pct <- cv <- strata <- glue_formula2 <- NULL
  n_strata_valid <- n_available <- p_available <- missing_p <- num_not_miss <- n_miss <- NULL


  # Percentage suffix
  if (show_pct) {
    pct_suffix = "%"
  } else {
    pct_suffix = ""
  }


  if (any(t1$var_type == "continuous") & any(t1$var_type == "categorical")) {

    t1 <- t1

  } else if (any(t1$var_type == "continuous")) {

    t1 <- t1 |>
      mutate(n_strata = NA_integer_,
             n_strata_valid = NA_integer_,
             level = NA_character_)

  } else if (any(t1$var_type == "categorical")) {

    t1 <- t1 |>
      mutate(n = NA_integer_,
             complete = NA_integer_,
             missing = NA_integer_)

  }


  if (missing == "no") {

    miss_tab <- t1 |>
      dplyr::filter(strata == "Overall") |>
      dplyr::distinct(var,
                      n,
                      complete,
                      n_strata,
                      n_strata_valid,
                      var_type) |>
      mutate(n = dplyr::coalesce(n, n_strata),
             n_available = dplyr::coalesce(complete, n_strata_valid),
             p_available = n_available / n) |>
      dplyr::select(var, n, n_available, p_available)


    miss_tab <- miss_tab |>
      # Format counts for strata
      mutate(dplyr::across(.cols = c(n,
                                     n_available),
                           .fns = ~ scales::number(x = .,
                                                   accuracy = 1.0,
                                                   scale = 1,
                                                   prefix = "",
                                                   suffix = "",
                                                   big.mark = "",
                                                   decimal.mark = ".",
                                                   style_positive = "none",
                                                   style_negative = "hyphen",
                                                   scale_cut = NULL,
                                                   trim = FALSE))) |>
      # Format categorical Percentages
      mutate(p_available = scales::percent(x = p_available,
                                           accuracy = cat_accuracy,
                                           scale = 100,
                                           prefix = prefix,
                                           suffix = pct_suffix,
                                           big.mark = big_mark,
                                           decimal.mark = decimal_mark,
                                           style_positive = style_positive,
                                           style_negative = style_negative,
                                           scale_cut = scale_cut,
                                           trim = cat_trim))

  } else if (missing == "ifany") {

    any_miss <- t1 |>
      dplyr::filter(strata == "Overall") |>
      mutate(n = dplyr::coalesce(n, n_strata),
             missing = dplyr::if_else(var_type == "continuous",
                                      missing,
                                      n_strata - n_strata_valid)) |>
      dplyr::filter(missing > 0) |>
      dplyr::pull(var)

    miss_tab <- t1 |>
      dplyr::filter(var %in% any_miss) |>
      dplyr::select(strata,
                    var,
                    n,
                    missing,
                    level,
                    n_strata,
                    n_strata_valid,
                    var_type) |>
      mutate(n = dplyr::coalesce(n, n_strata),
             missing = dplyr::if_else(var_type == "continuous",
                                      missing,
                                      n_strata - n_strata_valid),
             missing_p = missing / n) |>
      dplyr::distinct(strata,
                      var,
                      n,
                      missing,
                      missing_p,
                      var_type)


    miss_tab <- miss_tab |>
      # Format counts for strata
      mutate(dplyr::across(.cols = c(n,
                                     missing),
                           .fns = ~ scales::number(x = .,
                                                   accuracy = 1.0,
                                                   scale = 1,
                                                   prefix = "",
                                                   suffix = "",
                                                   big.mark = "",
                                                   decimal.mark = ".",
                                                   style_positive = "none",
                                                   style_negative = "hyphen",
                                                   scale_cut = NULL,
                                                   trim = FALSE))) |>
      # Format categorical Percentages
      mutate(missing_p = scales::percent(x = missing_p,
                                         accuracy = cat_accuracy,
                                         scale = 100,
                                         prefix = prefix,
                                         suffix = pct_suffix,
                                         big.mark = big_mark,
                                         decimal.mark = decimal_mark,
                                         style_positive = style_positive,
                                         style_negative = style_negative,
                                         scale_cut = scale_cut,
                                         trim = cat_trim))



  } else if (missing == "always") {

    miss_tab <- t1 |>
      dplyr::select(strata,
                    var,
                    n,
                    missing,
                    level,
                    n_strata,
                    n_strata_valid,
                    var_type) |>
      mutate(n = dplyr::coalesce(n, n_strata),
             missing = dplyr::if_else(var_type == "continuous",
                                      missing,
                                      n_strata - n_strata_valid),
             missing_p = missing / n) |>
      dplyr::distinct(strata,
                      var,
                      n,
                      missing,
                      missing_p,
                      var_type)


    miss_tab <- miss_tab |>
      # Format counts for strata
      mutate(dplyr::across(.cols = c(n,
                                     missing),
                           .fns = ~ scales::number(x = .,
                                                   accuracy = 1.0,
                                                   scale = 1,
                                                   prefix = "",
                                                   suffix = "",
                                                   big.mark = "",
                                                   decimal.mark = ".",
                                                   style_positive = "none",
                                                   style_negative = "hyphen",
                                                   scale_cut = NULL,
                                                   trim = FALSE))) |>
      # Format categorical Percentages
      mutate(missing_p = scales::percent(x = missing_p,
                                         accuracy = cat_accuracy,
                                         scale = 100,
                                         prefix = prefix,
                                         suffix = pct_suffix,
                                         big.mark = big_mark,
                                         decimal.mark = decimal_mark,
                                         style_positive = style_positive,
                                         style_negative = style_negative,
                                         scale_cut = scale_cut,
                                         trim = cat_trim))



  }



  # Format results

  if (missing == "no") {

    miss_tab <- miss_tab |>
      mutate(glue_formula = default_miss,
             glue_formula = stringr::str_replace_all(string = glue_formula,
                                                     pattern = "\\{n\\}",
                                                     replacement = "{n_available}"),
             glue_formula = stringr::str_replace_all(string = glue_formula,
                                                     pattern = "\\{p\\}",
                                                     replacement = "{p_available}")) |>
      dplyr::rowwise() |>
      mutate(num_not_miss = glue::glue(glue_formula)) |>
      dplyr::select(var, num_not_miss)

  } else {

    miss_tab <- miss_tab |>
      mutate(glue_formula = default_miss,
             glue_formula = stringr::str_replace_all(string = glue_formula,
                                                     pattern = "\\{n\\}",
                                                     replacement = "{missing}"),
             glue_formula = stringr::str_replace_all(string = glue_formula,
                                                     pattern = "\\{p\\}",
                                                     replacement = "{missing_p}")) |>
      dplyr::rowwise() |>
      mutate(n_miss = glue::glue(glue_formula)) |>
      mutate(level = missing_text) |>
      dplyr::select(strata, var, level, n_miss) |>
      tidyr::pivot_wider(names_from = strata,
                         values_from = n_miss)

  }




  return(miss_tab)


}
emilelatour/tidytableone documentation built on Jan. 6, 2025, 9:20 a.m.