R/table1_design.R
In rifttable: Results Tables to Bridge the Rift Between Epidemiologists and Their Data

Documented in table1_design

#' Design A Descriptive Table
#'
#' @description This function generates a \code{design} table from which
#'   \code{\link[rifttable]{rifttable}} can generate a descriptive table.
#'
#' @param data Data set
#' @param ... Optional: Variables to include or exclude (using \code{-variable})
#' @param by Optional: Stratification variable. Typically the exposure.
#' @param total Optional: Whether to add the total count at the beginning.
#'   Defaults to \code{TRUE}.
#' @param empty_levels Optional: Whether to include empty levels of factor
#'   variables. Defaults to \code{FALSE}.
#' @param na_always Optional: Whether to add the count of missing values for
#'   each variable, even if there are none. Defaults to \code{FALSE}, i.e.,
#'   the count of missing values will only be shown if there are any.
#' @param na_label Label for count of missing values. Defaults to
#'   \code{"Unknown"}.
#' @param continuous_type Estimator (\code{type} in
#'   \code{\link[rifttable]{rifttable}} \code{design}) for continuous variables.
#'   Defaults to \code{"median (iqr)"}.
#' @param binary_type Estimator (\code{type} in
#'   \code{\link[rifttable]{rifttable}} \code{design}) for binary variables and
#'   strata of categorical variables. Defaults to \code{"outcomes (risk)"}
#'   (count and column proportion).
#'
#' @return \code{design} tibble that can be passed on to
#'   \code{\link[rifttable]{rifttable}}. Contains an attribute \code{rt_data}
#'   so that the dataset does not have to be provided to
#'   \code{\link[rifttable]{rifttable}} another time.
#' @export
#'
#' @examples
#' # Data preparation
#' cars <- tibble::as_tibble(mtcars) |>
#'   dplyr::mutate(
#'     gear = factor(
#'       gear,
#'       levels = 3:5,
#'       labels = c("Three", "Four", "Five")
#'     ),
#'     # Categorical version of "hp", shows each category
#'     hp_categorical = dplyr::if_else(
#'       hp >= 200,
#'       true = "200+ hp",
#'       false = "<200 hp"
#'     ),
#'     # Binary version of "hp", shows the TRUEs
#'     hp_binary = hp >= 200
#'   )
#' # Label some variables. Better alternative: labelled::set_variable_labels()
#' attr(cars$hp, "label") <- "Horsepower"
#' attr(cars$hp_categorical, "label") <- "Horsepower"
#' attr(cars$hp_binary, "label") <- "200+ hp"
#' attr(cars$am, "label") <- "Automatic transmission"
#' attr(cars$gear, "label") <- "Gears"
#'
#' # Generate table "design"
#' design <- cars |>
#'   table1_design(
#'     hp, hp_categorical, hp_binary, mpg, am,
#'     by = gear
#'   )
#'
#' # Use "design" to create a descriptive table.
#' design |>
#'   rifttable(diff_digits = 0)
#'
#' # Obtain a formatted table
#' design |>
#'   rifttable(diff_digits = 0) |>
#'   rt_gt()
table1_design <- function(
    data,
    ...,
    by = NULL,
    total = TRUE,
    empty_levels = FALSE,
    na_always = FALSE,
    na_label = "Unknown",
    continuous_type = "median (iqr)",
    binary_type = "outcomes (risk)"
) {
  olddata <- data
  data <- data |>
    dplyr::select(!!!rlang::enquos(...))
  if (ncol(data) == 0) {
    data <- olddata
  }
  if (!missing(by)) {
    if (deparse(substitute(by)) %in% names(data)) {
      data <- data |>
        dplyr::select(!{{ by }})
    }
  }

  label_list <- purrr::map(
    .x = data,
    .f = attr,
    which = "label"
  )
  design <- tibble::tibble(
    variable = names(label_list),
    var_label = as.character(label_list),
    type = purrr::map_chr(
      .x = data,
      .f = class
    )
  ) |>
    dplyr::mutate(
      levels = purrr::map(
        .x = .data$variable,
        .f = \(x) sort(unique(stats::na.exclude(data[[x]])))
      )
    )
  if (empty_levels == TRUE) {
    design <- design |>
      dplyr::mutate(
        levels_f = purrr::map(
          .x = .data$variable,
          .f = \(x) levels(data[[x]])
        ),
        levels = dplyr::if_else(
          .data$type == "factor",
          true = .data$levels_f,
          false = .data$levels
        )
      )
  }
  design <- design |>
    dplyr::mutate(
      has_na = purrr::map_lgl(
        .x = .data$variable,
        .f = \(x) anyNA(data[, x])
      ),
      variable_type = dplyr::case_when(
        .data$type %in% c(
          "character", "factor", "ordered"
        ) ~
          "categorical",
        purrr::map_lgl(
          .x = .data$levels, # this is TRUE also for "logical" variable
          .f = \(x) all(stats::na.omit(x) %in% c(0, 1))
        ) ~
          "binary",
        .data$type %in% c("numeric", "integer") ~
          "numeric",
        TRUE ~
          "ERROR-undefined"
      ),
      outcome = purrr::pmap(
        .l = list(
          .data$variable_type,
          .data$variable,
          .data$levels
        ),
        .f = \(variable_type, variable, levels) {
          if (variable_type == "categorical") {
            c(
              "",
              paste(variable, levels, sep = "@"),
              paste0(variable, "@_NA_")
            )
          } else {
            c(variable, paste0(variable, "@_NA_"))
          }
        }
      )
    ) |>
    tidyr::unnest_longer(col = "outcome") |>
    dplyr::filter(
      !(.data$has_na == FALSE &
          na_always == FALSE &
          stringr::str_detect(
            string = .data$outcome,
            pattern = "@_NA_$"
          )
      )
    ) |>
    dplyr::group_by(.data$variable) |>
    dplyr::mutate(
      type = dplyr::case_when(
        stringr::str_detect(
          string = .data$outcome,
          pattern = "@_NA_$"
        ) ~
          "outcomes",
        .data$variable_type == "categorical" &
          dplyr::row_number() == 1 ~
          "",
        .data$variable_type == "categorical" &
          dplyr::row_number() > 1 ~
          binary_type,
        .data$variable_type == "binary" ~
          binary_type,
        .data$variable_type == "numeric" ~
          continuous_type,
        TRUE ~
          "ERROR"
      ),
      label = dplyr::case_when(
        stringr::str_detect(
          string = .data$outcome,
          pattern = "@_NA_$"
        ) ~
          paste0("  ", na_label),
        .data$variable_type == "categorical" &
          dplyr::row_number() == 1 &
          !(.data$var_label %in% c("", "NULL")) ~
          .data$var_label,
        .data$variable_type == "categorical" &
          dplyr::row_number() == 1 &
          !(.data$var_label %in% c("", "NULL")) ~
          .data$variable,
        .data$variable_type == "categorical" &
          dplyr::row_number() > 1 ~
          paste0(
            "  ",
            stringr::str_remove(
              string = .data$outcome,
              pattern = paste0("^", .data$variable, "@")
            )
          ),
        !(.data$var_label %in% c("", "NULL")) ~
          .data$var_label,
        .data$var_label %in% c("", "NULL") ~
          .data$variable
      ),
      na_rm = .data$has_na &
        !stringr::str_detect(
          string = .data$outcome,
          pattern = "@_NA_$"
        ) &
        !(.data$variable_type == "categorical" &
          dplyr::row_number() == 1)
    ) |>
    dplyr::ungroup() |>
    dplyr::select("label", "outcome", "type", "na_rm")
  # Have "na_rm" column only in "design" if >= 1 variable has missing data
  if (!any(design$na_rm)) {
    design <- design |>
      dplyr::select(-"na_rm")
  }
  if (total == TRUE) {
    design <- dplyr::bind_rows(
      tibble::tibble(
        label = "N",
        outcome = "",
        type = "total"
      ),
      design
    )
  }
  if (!missing(by)) {
    design$exposure <- stringr::str_remove_all(
      string = deparse(substitute(by)),
      pattern = "[\"\']"
    )
  }
  if (length(rlang::enquos(...)) > 0) {
    data_for_attr <- olddata |>
      dplyr::select(!!!rlang::enquos(...), {{ by }})
  } else {
    data_for_attr <- olddata
  }
  attr(x = design, which = "rt_data") <- data_for_attr
  return(design)
}

Any scripts or data that you put into this service are public.

rifttable documentation built on June 8, 2025, 1:52 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

rifttable
Results Tables to Bridge the Rift Between Epidemiologists and Their Data

R/table1_design.R
In rifttable: Results Tables to Bridge the Rift Between Epidemiologists and Their Data

Defines functions table1_design

Documented in table1_design

Try the rifttable package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

rifttable Results Tables to Bridge the Rift Between Epidemiologists and Their Data

R/table1_design.R In rifttable: Results Tables to Bridge the Rift Between Epidemiologists and Their Data

Defines functions table1_design

Documented in table1_design

Try the rifttable package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

rifttable
Results Tables to Bridge the Rift Between Epidemiologists and Their Data

R/table1_design.R
In rifttable: Results Tables to Bridge the Rift Between Epidemiologists and Their Data