Nothing
#' @title Summarize two categorical variables
#'
#' @description `cat_group_tbl()` summarizes nominal or categorical
#' variables by a grouping variable, returning frequency counts and
#' percentages.
#'
#' @param data A data frame.
#' @param row_var A character string of the name of a variable in `data`
#' containing categorical data. This is the primary categorical variable.
#' @param col_var A character string of the name of a variable in `data`
#' containing categorical data. This is the secondary categorical variable.
#' @param margins A character string that determines how percentage values
#' are calculated; whether they sum to one across rows, columns, or the
#' entire table (i.e., all). Defaults to `all`, but can also be set to
#' `rows` or `columns`.
#' @param na.rm.row_var A logical value indicating whether missing values for
#' `row_var` should be removed before calculations. Default is `FALSE`.
#' @param na.rm.col_var A logical value indicating whether missing values for
#' `col_var` should be removed before calculations. Default is `FALSE`.
#' @param pivot A character string that determines the format of the table. By
#' default, `longer` returns the data in the long format. To return the data in
#' the `wide` format, specify `wider`.
#' @param only A character string or vector of character strings of the types
#' of summary data to return. Default is `NULL`, which returns both counts and
#' percentages. To return only counts or percentages, use `count` or `percent`,
#' respectively.
#' @param ignore An optional named vector or list that defines values to exclude
#' from `row_var` and `col_var`. If set to `NULL` (default), all values are retained.
#' To exclude multiple values from `row_var` or `col_var`, provide them as a named
#' list.
#'
#' @returns A tibble showing the count and percentage of each category in `row_var`
#' by each category in `col_var`.
#'
#' @author Ama Nyame-Mensah
#'
#' @examples
#' cat_group_tbl(data = nlsy,
#' row_var = "gender",
#' col_var = "bthwht",
#' pivot = "wider",
#' only = "count")
#'
#' cat_group_tbl(data = nlsy,
#' row_var = "birthord",
#' col_var = "breastfed",
#' pivot = "longer")
#'
#' @export
cat_group_tbl <- function(data,
row_var,
col_var,
margins = "all",
na.rm.row_var = FALSE,
na.rm.col_var = FALSE,
pivot = "longer",
only = NULL,
ignore = NULL) {
set_call()
on.exit({ .summarytabl$env <- NULL }, add = TRUE)
args <- list(
data = data,
table_type = "cat",
group_func = TRUE,
row_var = row_var,
col_var = col_var,
margins = margins,
var_label_row = "row_var",
var_label_col = "col_var",
variable_type = "valid_var_types",
na_rm_row_var = na.rm.row_var,
na_rm_col_var = na.rm.col_var,
label_na_rm_row = "na.rm.row_var",
label_na_rm_col = "na.rm.col_var",
pivot = pivot,
only = only,
ignore = ignore
)
checks <- check_cat_group_args(args)
check_row_name <- checks$row_var$var
check_col_name <- checks$col_var$var
check_ignore <- checks$ignore
check_row_na.rm <- checks$na_row$na.rm
check_col_na.rm <- checks$na_col$na.rm
check_only <- checks$only$only
check_table_type <- checks$table_type$table_type
check_pivot <- checks$pivot$pivot
check_margins <- checks$margins$margins
vars_to_filter <-
c(if (check_row_na.rm) check_row_name,
if (check_col_na.rm) check_col_name)
data_sub <- checks$data$df[c(check_row_name, check_col_name)]
ignore_result <-
extract_ignore_map(
vars = c(check_row_name, check_col_name),
ignore = check_ignore,
var_stem_map = NULL
)
ignore_map <- ignore_result$ignore_map
if (!is.null(ignore_map)) {
cols_to_modify <- names(ignore_map)
data_sub[cols_to_modify] <- lapply(cols_to_modify, function(col) {
replace_with_na(data_sub[[col]], ignore_map[[col]])
})
}
if (!is.null(vars_to_filter)) {
for (var in vars_to_filter) {
data_sub <- data_sub[!is.na(data_sub[[var]]), ]
}
}
cat_group_tabl <-
summarize_cat_group(
data = data_sub,
row_var = check_row_name,
col_var = check_col_name,
margins = check_margins
)
if (check_pivot == "wider") {
cat_group_tabl <-
pivot_tbl_wider(
data = cat_group_tabl,
id_cols = check_row_name,
names_from = check_col_name,
names_glue = paste0("{.value}_", check_col_name, "_{", check_col_name, "}"),
values_from = c("count", "percent")
)
}
cat_group_tabl <-
drop_only_cols(
data = cat_group_tabl,
only = check_only,
only_type = only_type(check_table_type)
)
return(tibble::as_tibble(cat_group_tabl))
}
#' @keywords internal
summarize_cat_group <- function(data,
row_var,
col_var,
margins) {
margin_col <- if (margins == "rows") row_var else col_var
grouped_data <-
data |> dplyr::count(.data[[row_var]], .data[[col_var]], name = "count")
if (margins %in% c("rows", "columns")) {
grouped_data <-
grouped_data |>
dplyr::group_by(.data[[margin_col]]) |>
dplyr::mutate(percent = count / sum(count)) |>
dplyr::ungroup() |>
dplyr::arrange(.data[[margin_col]])
} else if (margins == "all") {
total <- sum(grouped_data$count)
grouped_data <-
grouped_data |>
dplyr::mutate(percent = count / total) |>
dplyr::arrange(.data[[row_var]], .data[[col_var]])
}
return(grouped_data)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.