R/cat_group_tbl.R
In summarytabl: Generate Summary Tables for Categorical, Ordinal, and Continuous Data

Documented in cat_group_tbl

#' @title Summarize two categorical variables
#'
#' @description `cat_group_tbl()` summarizes nominal or categorical 
#' variables by a grouping variable, returning frequency counts and 
#' percentages.
#'
#' @param data A data frame.
#' @param row_var A character string of the name of a variable in `data` 
#' containing categorical data. This is the primary categorical variable.
#' @param col_var A character string of the name of a variable in `data` 
#' containing categorical data. This is the secondary categorical variable.
#' @param margins A character string that determines how percentage values 
#' are calculated; whether they sum to one across rows, columns, or the 
#' entire table (i.e., all). Defaults to `all`, but can also be set to 
#' `rows` or `columns`.
#' @param na.rm.row_var A logical value indicating whether missing values for 
#' `row_var` should be removed before calculations. Default is `FALSE`.
#' @param na.rm.col_var A logical value indicating whether missing values for 
#' `col_var` should be removed before calculations. Default is `FALSE`.
#' @param pivot A character string that determines the format of the table. By 
#' default, `longer` returns the data in the long format. To return the data in 
#' the `wide` format, specify `wider`.
#' @param only A character string or vector of character strings of the types 
#' of summary data to return. Default is `NULL`, which returns both counts and 
#' percentages. To return only counts or percentages, use `count` or `percent`, 
#' respectively.
#' @param ignore An optional named vector or list that defines values to exclude 
#' from `row_var` and `col_var`. If set to `NULL` (default), all values are retained. 
#' To exclude multiple values from `row_var` or `col_var`, provide them as a named 
#' list.
#' 
#' @returns A tibble showing the count and percentage of each category in `row_var` 
#' by each category in `col_var`.
#'
#' @author Ama Nyame-Mensah
#'
#' @examples
#' cat_group_tbl(data = nlsy,
#'               row_var = "gender",
#'               col_var = "bthwht",
#'               pivot = "wider",
#'               only = "count")
#'
#' cat_group_tbl(data = nlsy,
#'               row_var = "birthord",
#'               col_var = "breastfed",
#'               pivot = "longer")
#'
#' @export
cat_group_tbl <- function(data,
                          row_var,
                          col_var,
                          margins = "all",
                          na.rm.row_var = FALSE,
                          na.rm.col_var = FALSE,
                          pivot = "longer",
                          only = NULL,
                          ignore = NULL) {
  set_call()
  on.exit({ .summarytabl$env <- NULL }, add = TRUE)
  
  args <- list(
    data = data,
    table_type = "cat",
    group_func = TRUE,
    row_var = row_var,
    col_var = col_var,
    margins = margins,
    var_label_row = "row_var",
    var_label_col = "col_var",
    variable_type = "valid_var_types",
    na_rm_row_var = na.rm.row_var,
    na_rm_col_var = na.rm.col_var,
    label_na_rm_row = "na.rm.row_var",
    label_na_rm_col = "na.rm.col_var",
    pivot = pivot,
    only = only,
    ignore = ignore
  )
  
  checks <- check_cat_group_args(args)
  check_row_name <- checks$row_var$var
  check_col_name <- checks$col_var$var
  check_ignore <- checks$ignore
  check_row_na.rm <- checks$na_row$na.rm
  check_col_na.rm <- checks$na_col$na.rm
  check_only <- checks$only$only
  check_table_type <- checks$table_type$table_type
  check_pivot <- checks$pivot$pivot
  check_margins <- checks$margins$margins
  vars_to_filter <- 
    c(if (check_row_na.rm) check_row_name, 
      if (check_col_na.rm) check_col_name)
  
  data_sub <- checks$data$df[c(check_row_name, check_col_name)]
  
  ignore_result <-
    extract_ignore_map(
      vars = c(check_row_name, check_col_name),
      ignore = check_ignore,
      var_stem_map = NULL
    )
  ignore_map <- ignore_result$ignore_map
  
  if (!is.null(ignore_map)) {
    cols_to_modify <- names(ignore_map)
    data_sub[cols_to_modify] <- lapply(cols_to_modify, function(col) {
      replace_with_na(data_sub[[col]], ignore_map[[col]])
    })
  }
  
  if (!is.null(vars_to_filter)) {
    for (var in vars_to_filter) {
      data_sub <- data_sub[!is.na(data_sub[[var]]), ]
    }
  }
  
  cat_group_tabl <-
    summarize_cat_group(
      data = data_sub,
      row_var = check_row_name,
      col_var = check_col_name,
      margins = check_margins
    )
  
  if (check_pivot == "wider") {
    cat_group_tabl <-
      pivot_tbl_wider(
        data = cat_group_tabl,
        id_cols = check_row_name,
        names_from = check_col_name,
        names_glue = paste0("{.value}_", check_col_name, "_{", check_col_name, "}"),
        values_from = c("count", "percent")
      )
  }
  
  cat_group_tabl <-
    drop_only_cols(
      data = cat_group_tabl,
      only = check_only,
      only_type = only_type(check_table_type)
    )
  
  return(tibble::as_tibble(cat_group_tabl))
}

#' @keywords internal
summarize_cat_group <- function(data,
                                row_var,
                                col_var,
                                margins) {
  margin_col <- if (margins == "rows") row_var else col_var
  
  grouped_data <- 
    data |> dplyr::count(.data[[row_var]], .data[[col_var]], name = "count")
  
  if (margins %in% c("rows", "columns")) {
    grouped_data <- 
      grouped_data |>
      dplyr::group_by(.data[[margin_col]]) |>
      dplyr::mutate(percent = count / sum(count)) |>
      dplyr::ungroup() |>
      dplyr::arrange(.data[[margin_col]])
  } else if (margins == "all") {
    total <- sum(grouped_data$count)
    grouped_data <- 
      grouped_data |>
      dplyr::mutate(percent = count / total) |>
      dplyr::arrange(.data[[row_var]], .data[[col_var]])
  }
  
  return(grouped_data)
}

Any scripts or data that you put into this service are public.

summarytabl documentation built on Nov. 6, 2025, 5:07 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

summarytabl
Generate Summary Tables for Categorical, Ordinal, and Continuous Data

R/cat_group_tbl.R
In summarytabl: Generate Summary Tables for Categorical, Ordinal, and Continuous Data

Defines functions summarize_cat_group cat_group_tbl

Documented in cat_group_tbl

Try the summarytabl package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

summarytabl Generate Summary Tables for Categorical, Ordinal, and Continuous Data

R/cat_group_tbl.R In summarytabl: Generate Summary Tables for Categorical, Ordinal, and Continuous Data

Defines functions summarize_cat_group cat_group_tbl

Documented in cat_group_tbl

Try the summarytabl package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

summarytabl
Generate Summary Tables for Categorical, Ordinal, and Continuous Data

R/cat_group_tbl.R
In summarytabl: Generate Summary Tables for Categorical, Ordinal, and Continuous Data