Nothing
#' Easily summarize categorical data
#'
#' @description \code{ezsummary_categorical()} summarizes categorical data.
#'
#' @param tbl A vector, a data.frame or a \code{dplyr} \code{tbl}.
#' @param n A T/F value; total counts of records. Default is
#' \code{FALSE}.
#' @param count A T/F value; count of records in each category.
#' Default is \code{TRUE}.
#' @param p A T/F value; proportion or percentage of records in each category.
#' Default is \code{TRUE}.
#' @param p_type A character string determining the output format of \code{p};
#' possible values are \code{decimal} and \code{percent}. Default value is
#' \code{decimal}.
#' @param digits A numeric value determining the rounding digits; Replacement
#' for \code{round.N}. Default setting is to read from \code{getOption()}.
#' @param rounding_type A character string determining the rounding method;
#' possible values are \code{round}, \code{signif}, \code{ceiling} and
#' \code{floor}. When \code{ceiling} or \code{floor} is selected, \code{digits}
#' won't have any effect.
#' @param flavor A character string with two possible inputs: "long" and "wide".
#' "Long" is the default setting which will put grouping information on the left
#' side of the table. It is more machine readable and is good to be passed into
#' the next analytical stage if needed. "Wide" is more print ready (except for
#' column names, which you can fix in the next step, or fix in LaTex or
#' packages like \code{htmlTable}). In the "wide" mode, the analyzed variable
#' will be the only "ID" variable and all the stats values will be presented
#' ogranized by the grouping variables (if any). If there is no grouping, the
#' outputs of "wide" and "long" will be the same.
#' @param fill If set, missing values created by the "wide" flavor will be
#' replaced with this value. Please check \code{\link[tidyr]{spread}} for
#' details. Default value is \code{0}
#' @param unit_markup When unit_markup is not NULL, it will call the ezmarkup
#' function and perform column combination here. To make everyone's life
#' easier, I'm using the term "unit" here. Each unit mean each group of
#' statistical summary results. If you want to know mean and stand deviation,
#' these two values are your units so you can put something like "[. (.)]" there
#' #' @param P Deprecated; Will change the value of \code{p_type} if used in this
#' version.
#' @param P Deprecated; Will change the value of \code{p_type} if used in this
#' version.
#' @param round.N Deprecated; Will change the value of \code{rounding_type} if
#' used in this version.
#'
#' @examples
#' library(dplyr)
#' mtcars %>%
#' group_by(am) %>%
#' select(cyl, gear, carb) %>%
#' ezsummary_categorical()
#'
#' mtcars %>%
#' select(cyl, gear, carb) %>%
#' ezsummary_categorical(n=TRUE, round.N = 2)
#'
#' @export
ezsummary_categorical <- function(
tbl, n = FALSE, count = TRUE, p = TRUE, p_type = c("decimal", "percent"),
digits = 3,
rounding_type = c("round", "signif", "ceiling", "floor"),
P = FALSE, round.N = 3,
flavor = c("long", "wide"), fill = 0, unit_markup = NULL
){
# Define the following variable to avoid NOTE on RMD check
variable = value = analysis = var_origin = var_options = NULL
# Option P and round.N have been deprecated. I'm still keeping the variables
# here so people can still use them.
if(P == TRUE){
warning("Option P has been deprecated. Please use p_type instead.")
p <- TRUE
p_type <- "percent"
}
if(round.N != 3){
warning("Option round.N has been deprecated. Please use 'digits' instead.")
digits <- round.N
}
if(is.vector(tbl)){
tbl <- as.tbl(as.data.frame(tbl))
attributes(tbl)$names <- "unknown"
warning('ezsummary cannot detect the naming information from an atomic ',
'vector. If you want to have full naming information, please ',
'pass the value in as a data frame using `select` from dplyr.')
}
p_type <- match.arg(p_type)
rounding_type <- match.arg(rounding_type)
flavor <- match.arg(flavor)
# Try to obtain grouping and variable information from the input tbl
group_name <- attributes(tbl)$vars
var_name <- attributes(tbl)$names
if (!is.null(group_name)){
group_name <- as.character(group_name)
var_name <- var_name[!var_name %in% group_name]
}
n_group <- length(group_name)
n_var <- length(var_name)
if(n_group == 0 & flavor == "wide"){flavor <- "long"}
available_tasks <- c(
# count is calculated by default
n = "sum(count)",
count = "count",
p = `if`(
p_type == "decimal",
paste0(rounding_type, "(count/sum(count)",
`if`(rounding_type %in% c("round", "signif"),
paste0(", ", digits, ")"), ")")),
p = paste0("paste0(", rounding_type, "(count/sum(count) * 100",
`if`(rounding_type %in% c("round", "signif"),
paste0(", ", digits, ")"), ")"), ", '%')")
)
)
tasks_list <- available_tasks[c(n, count, p)]
tasks_names <- names(tasks_list)
tbl_summary <- lapply(
var_name,
function(x){
summary_tmp <- tbl %>%
group_by_(x, add = TRUE) %>%
tally() %>%
rename(count = n) %>%
group_by_(.dots = group_name) %>%
mutate_(.dots = tasks_list)
summary_tmp[n_group + 1][[1]] <- paste(
x, summary_tmp[n_group + 1][[1]], sep = "_")
names(summary_tmp)[n_group + 1] <- "variable"
summary_tmp
}
)
category_sizes <- unlist(lapply(tbl_summary, nrow))
tbl_summary <- tbl_summary %>% bind_rows()
if(flavor == "wide"){
tbl_summary <- tbl_summary %>%
gather_("analysis", "value", tasks_names)
tbl_summary[group_name] <- sapply(
group_name, function(x){paste(x, tbl_summary[x][[1]], sep = ".")}
)
tbl_summary <- tbl_summary %>%
unite_("analysis", c(group_name, "analysis"))
group_reorder <- sapply(group_name, function(x){
paste(x, attr(tbl, "labels")[x][[1]], sep = ".")
}) %>%
apply(1, paste, collapse = "_")
tasks_names <- c(sapply(group_reorder, function(x){
paste(x, tasks_names, sep = "_")
}))
tbl_summary <- tbl_summary %>%
spread(analysis, value, fill = fill) %>%
ungroup() %>%
separate(variable, into = c("var_origin", "var_options"),
remove = FALSE, sep = "_(?=[^_]+$)") %>%
mutate(var_origin = factor(var_origin, levels = var_name)) %>%
arrange_(c("var_origin", group_name)) %>%
select(-var_origin, -var_options)
category_sizes <- sapply(
var_name,
function(x){
nrow(unique(ungroup(tbl)[x]))
}
)
}
tbl_summary <- tbl_summary[c(
`if`(flavor == "long" & n_group != 0, group_name, NULL),
"variable", tasks_names
)]
# Ezmarkup
if(!is.null(unit_markup)){
if(flavor == "wide" & n_group != 0){
ezmarkup_formula <- paste0(
".", paste0(rep(unit_markup, nrow(attr(tbl, "labels"))), collapse = ""))
}else{
ezmarkup_formula <- paste0(paste0(rep(".", n_group), collapse = ""),
".", unit_markup)
}
tbl_summary <- ezmarkup(tbl_summary, ezmarkup_formula)
}
attr(tbl_summary, "categories") <- rep(var_name, category_sizes)
attr(tbl_summary, "flavor") <- flavor
return(tbl_summary)
}
#' Shorthand for ezsummary_categorical
#' @rdname ezsummary_categorical
#' @export
ezsummary_c <- ezsummary_categorical
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.