#' Calculates and formats descriptive statistics for Table 1.
#'
#' The `fmt_table1` function calculates descriptive statistics by groups for
#' continuous, categorical, and dichotomous variables. Review the `fmt_table1`
#' vignette for detailed examples.
#'
#' @param data data frame.
#' @param by a character name of a categorical variable in data, `by = "group"`.
#' Summary statistics will be calculated separately for each level of the by variable.
#' If `NULL`, summary statistics
#' are calculated using all observations.
#' @param label A list of variable labels,
#' e.g. `list(age = "Age, yrs", ptstage = "Path T Stage")`. If `NULL`, the function
#' will take the label attribute (`attr(data$age, "label")`). If
#' the label doesn't exist, then the label is assigned as the variable name.
#' @param type A list that includes specified summary types. Accepted values
#' are `c("continuous", "categorical", "dichotomous")`,
#' e.g. `type = list(age = "continuous", female = "dichotomous")`.
#' If type not specified for a variable, the function
#' will default to an appropriate summary type.
#' @param statistic A list of the type of statistics to return. The list can contain
#' two names lists (`continuous` and `categorical`). The value within the list is the types of
#' summary statistics to be returned. For continuous variables the choices are:
#' `median`, `q1` (first quartile), `q3` (third quartile), `mean`, `sd` (standard deviation),
#' `min` (minimum), `max` (maximum). For categorical variables the choices are `n` (frequency),
#' `N` (denominator, or cohort size), `p` (percent). The defaults are
#' `continuous = "{median} ({q1}, {q3})"` and `categorical = "{n} ({p}\%)"`.
#' The syntax follows from the \code{\link[glue]{glue}} function. Dichotomous variables
#' follow the same format as categorical.
#' @param digits integer indicating the number of decimal places to round continuous
#' summary statistics. `sprintf(glue::glue("%.{digits}f"), x)`
#' @param id Character vector of an ID or grouping variable. Summary statistics
#' will not be printed for this column. The column may be used in \code{\link{add_comparison}} to
#' calculate p-values with correlated data. Default is `NULL`
#' @param missing whether to include `NA` values in the table. `missing` controls
#' if the table includes counts of `NA` values: the allowed values correspond to
#' never (`"no"`), only if the count is positive (`"ifany"`) and even for
#' zero counts (`"always"`). Default is `"ifany"`.
#' @return Data frame including formatted descriptive statistics.
#' @examples
#' fmt_table1(trial, by = "trt")
#'
#' # convert numeric 'am' to factor to display nicely in header
#' mtcars %>%
#' dplyr::mutate(am = factor(am, c(0, 1), c("Automatic", "Manual"))) %>%
#' fmt_table1(by = "am") %>%
#' add_comparison()
#' @export
fmt_table1 <- function(data, by = NULL, label = NULL, type = NULL,
statistic = NULL, digits = NULL, id = NULL,
missing = c("ifany", "always", "no")) {
missing <- match.arg(missing)
# ungrouping data
data <- data %>% dplyr::ungroup()
# will return call, and all object passed to in fmt_table1 call
# the object func_inputs is a list of every object passed to the function
fmt_table1_inputs <- as.list(environment())
# checking function inputs
fmt_table1_input_checks(
data, by, label, type,
statistic, digits, missing, id
)
# creating a table with meta data about each variable
meta_data <- tibble::tibble(.variable = names(data))
# excluding by variable
if (!is.null(by)) meta_data <- meta_data %>% dplyr::filter(.data$.variable != by)
# excluding id variable
if (!is.null(id)) meta_data <- meta_data %>% dplyr::filter(!(.data$.variable %in% id))
# assigning variable characteristics
meta_data <- meta_data %>%
dplyr::mutate(
# assigning class, if entire var is NA, then assigning class NA
.class = assign_class(data, .data$.variable),
.summary_type = assign_summary_type(
data, .data$.variable, .data$.class, type
),
.dichotomous_value = assign_dichotomous_value(data, .data$.variable,
.data$.summary_type, .data$.class),
.var_label = assign_var_label(data, .data$.variable, label),
.stat_display = assign_stat_display(.data$.summary_type, statistic),
.digits = continuous_digits_guess(
data, .data$.variable, .data$.summary_type, .data$.class, digits
)
)
# calculating summary statistics
table1 <-
meta_data %>%
dplyr::mutate(
# creating summary stat table formatted properly
.stat_table = purrr::pmap(
list(
.data$.variable, .data$.summary_type, .data$.dichotomous_value,
.data$.var_label, .data$.stat_display, .data$.digits, .data$.class
),
~ calculate_summary_stat(data,
variable = ..1, by = get("by"), summary_type = ..2,
dichotomous_value = ..3, var_label = ..4, stat_display = ..5,
digits = ..6, class = ..7, missing = missing
)
)
) %>%
dplyr::select(dplyr::one_of(".variable", ".summary_type", ".stat_table")) %>%
tidyr::unnest_(".stat_table")
# adding header rows to table1
if (is.null(by)) {
header_list <-
create_header(
data = data,
label = c("Variable"),
stat_overall = c("N = {N}")
)
}
if (!is.null(by)) {
header_list <-
create_header(
data = data,
by = by,
label = c("Variable", ""),
stat_by = c("{level}", "N = {n}")
)
}
# stacking header on top of table1
table1 <-
header_list %>%
purrr::map_dfc(~.x) %>%
dplyr::bind_rows(table1) %>%
dplyr::select(dplyr::one_of(".variable", "row_type"), dplyr::everything())
# assigning a class of fmt_table1 (for special printing in Rmarkdown)
results <- list()
class(results) <- "fmt_table1"
# returning all results in a list
results[["table1"]] <- table1 %>% dplyr::select(-dplyr::one_of(".summary_type"))
results[["by"]] <- by
results[["meta_data"]] <- meta_data
results[["call"]] <- sys.call()
results[["inputs"]] <- fmt_table1_inputs
results[["call_list"]] <- list(fmt_table1 = match.call())
return(results)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.