R/desc_cont.R

Defines functions error_required_format_values check_columns_numeric_integer error_columns_numeric_integer check_columns_in_data error_columns_in_data desc_cont

Documented in desc_cont

#' Summarize continuous variables
#'
#' @description `r lifecycle::badge('stable')` Summarize continuous data and
#' handle output format.
#'
#' @details Many other packages provide tools to summarize data. This one is just
#' the package author's favorite.
#' This makes it much easier to map to nice labeling thereafter.
#' The `format` argument shows the output of the function. You can change square
#' and round brackets, spaces, separators... Important `format`
#' inputs are
#' \itemize{
#' \item `median` the median value
#' \item `q1` the first quartile
#' \item `q3` the third quartile
#' \item `min` the minimum value
#' \item `max` the maximum value
#' }
#' The analogous for categorical variables is [desc_facvar()].
#'
#' @param .data A data.frame, where `vc` are column names of continuous variables
#' @param vc A character vector, list of column names. Should only contain continuous variables
#' @param format A character string. How would you like the output? See details.
#' @param digits A numeric. How many digits? This argument calls internal formatting function
#' @param export_raw_values A logical. Should the raw values be exported?
#'
#' @returns A data.frame with columns
#' \itemize{
#' \item `var` the variable name
#' \item `level` NA, it is provided to have a consistent output
#' with [desc_facvar()]
#' \item `value` the formatted value with possibly the median,
#' interquartile range, and range (see details)
#' \item `n_avail` the number of cases with available data for this
#' variable.
#' }
#'
#' @importFrom stats median qnorm quantile var
#' @importFrom rlang .data
#' @importFrom rlang .env
#' @export
#' @seealso [desc_facvar()]
#'
#' @examples
#' df <-
#'   data.frame(
#'     smoke_status = c("smoker", "non-smoker",
#'            "smoker", "smoker",
#'            "smoker", "smoker",
#'            "non-smoker"
#'            ),
#'     age = c(60, 50, 56, 49, 75, 69, 85),
#'     bmi = c(18, 30, 25, 22, 23, 21, 22)
#'   )
#'
#' # Use default formatting
#'
#' desc_cont(.data = df, vc = c("age", "bmi"))
#'
#' # Use custom formatting
#'
#' desc_cont(.data = df,
#'           vc = c("age", "bmi"),
#'           format = "median (q1;q3)"
#'           )
#'
#' # You might want to export raw values, to run plotting or
#' # other formatting functions
#'
#' desc_cont(.data = df, vc = c("age", "bmi"),
#'           export_raw_values = TRUE)

desc_cont <-
  function(.data,
           vc,
           format = "median (q1-q3) [min-max]",
           digits = 1,
           export_raw_values = FALSE
           ){

    # checkers ----

    check_columns_in_data(.data, vc)

    # only numeric or integer vars ----

    check_columns_numeric_integer(.data, vc)

    # ---- formatting arguments ----

    display_median <-
      stringr::str_detect(format, "median")

    display_q1 <-
      stringr::str_detect(format, "q1")

    display_q3 <-
      stringr::str_detect(format, "q3")

    display_min <-
      stringr::str_detect(format, "min")

    display_max <-
      stringr::str_detect(format, "max")

    if(!any(display_median,
            display_q1,
            display_q3,
            display_min,
            display_max)
       ){
      error_required_format_values(
        format = format,
        required_values = c("median", "q1", "q3", "min", "max")
      )

    }

    var_to_export <-
      if(export_raw_values){
        c("var", "level", "value", "n_avail",
          "median", "q1", "q3", "min", "max")
      } else {
        c("var", "level", "value", "n_avail")
      }

    # ---- core ----

    cc_core <- function(one_var){
      vc_s <- rlang::ensym(one_var)

      check_all_na <-
        all(is.na(.data[[one_var]]))

      if (check_all_na)
        message("var ", one_var, " is empty")

      r1 <-
        if (!check_all_na) {
          .data |>
            dplyr::summarise(
              var = one_var,
              level = NA_character_,
              median    =
                median({{ vc_s }}, na.rm = TRUE),
              q1 =
                quantile({{ vc_s }}, .25, na.rm = TRUE),
              q3  =
                quantile({{ vc_s }}, .75, na.rm = TRUE),
              min =
                min({{ vc_s }}, na.rm = TRUE),
              max =
                max({{ vc_s }}, na.rm = TRUE),

              dplyr::across(dplyr::all_of(c("median", "q1", "q3", "min", "max")),
                     ~ cff(.x, dig = .env$digits),
                     .names = "{.col}_fmt"),

              value =
                .env$format |>
                stringr::str_replace_all(
                  "median",
                  paste0(.data$median_fmt)
                ) |>
                stringr::str_replace_all(
                  "q1",
                  paste0(.data$q1_fmt)
                ) |>
                stringr::str_replace_all(
                  "q3",
                  .data$q3_fmt
                ) |>
                stringr::str_replace_all(
                  "min",
                  .data$min_fmt
                ) |>
                stringr::str_replace_all(
                  "max",
                  .data$max_fmt
                )
              ,
              n_missing =
                sum(is.na({{ vc_s }})),
              n_avail =
                sum(!is.na({{ vc_s }}))
            ) |>
            dplyr::select(dplyr::all_of(var_to_export))
        } else {
          .data |>
            dplyr::summarise(
              var = one_var,
              level = NA_character_,
              value = "-",
              n_avail = sum(!is.na({
                {
                  vc_s
                }
              }))
            )
        }

      r1
    }

    # ---- apply cc_core ----

    purrr::map(
      vc,
      cc_core
    ) |>
      purrr::list_rbind()
  }

# Helpers ------------

error_columns_in_data <-
  function(
    col_arg,
    must_be_in,
    missing_cols,
    call = rlang::caller_env()
    ){
    cli::cli_abort(
      message =
        c("{.arg {col_arg}} columns must be in {.arg {must_be_in}}.",
          "x" = "The followings were not found in {.arg {must_be_in}}: {.val {missing_cols}}."),
      class  = "columns_not_in_data",
      col_arg = col_arg,
      must_be_in = must_be_in,
      missing_cols = missing_cols,
      call = call
    )
  }

check_columns_in_data <-
  function(
    .data,
    cols,
    call = rlang::caller_env()
    )  {
      if(!all(cols %in% names(.data))){
        missing_cols <-
          cols[!cols %in% names(.data)]

        error_columns_in_data(
          col_arg = rlang::caller_arg(cols),
          must_be_in = ".data",
          missing_cols = missing_cols,
          call = call
        )
      }
    }

error_columns_numeric_integer <-
  function(
    col_arg,
    not_numeric_integer,
    call = rlang::caller_env()
    ){
    cli::cli_abort(
      message =
        c("{.arg {col_arg}} columns must be numeric or integer.",
          "x" = "The following {?is/are} not numeric/integer: {.val {not_numeric_integer}}."),
      class  = "columns_not_numeric_integer",
      col_arg = col_arg,
      not_numeric_integer = not_numeric_integer,
      call = call
    )
  }

check_columns_numeric_integer <-
  function(.data, cols, call = rlang::caller_env())  {
    col_classes <-
      purrr::map(.data, class) |>
      purrr::keep_at(cols) |>
      purrr::list_simplify()

    if (!all(col_classes %in% c("numeric", "integer"))) {
      not_numeric_integer <-
        cols[!(col_classes %in% c("numeric", "integer"))]


      not_numeric_integer <-
        cols[!(purrr::map_lgl(.data[cols], is.numeric) |
                 purrr::map_lgl(.data[cols], is.integer))]

      error_columns_numeric_integer(
        col_arg = rlang::caller_arg(cols),
        not_numeric_integer = not_numeric_integer,
        call = call
      )
    }
  }

error_required_format_values <-
  function(
    format,
    required_values,
    call = rlang::caller_env()
  ){
    cli::cli_abort(
      message =
        '{.arg format} must contain at least one of {glue::glue_collapse(required_values, sep = ", ", last = " or ")}',
      class  = "required_format_values",
      format = format,
      required_values = required_values,
      call = call
    )
  }

Try the vigicaen package in your browser

Any scripts or data that you put into this service are public.

vigicaen documentation built on April 3, 2025, 8:55 p.m.