Nothing
#' @title Estimate Mean and 95 Percent Confidence Intervals in dplyr Pipelines
#'
#' @description The mean_table function produces overall and grouped
#' tables of means with related statistics. In addition to means, the
#' mean_table missing/non-missing frequencies, the standard error of the
#' mean (sem), the 95% confidence intervals for the mean(s), the minimum
#' value, and the maximum value. For grouped tibbles, mean_table displays
#' these statistics for each category of the group_by variable.
#'
#' @param .data A tibble or grouped tibble.
#'
#' @param .x The continuous response variable for which the statistics are
#' desired.
#'
#' @param t_prob (1 - alpha / 2). Default value is 0.975, which corresponds to
#' an alpha of 0.05. Used to calculate a critical value from Student's t
#' distribution with n - 1 degrees of freedom.
#'
#' @param output Options for this parameter are "default" and "all".
#'
#' Default output includes the n, mean, sem, and 95% confidence interval for
#' the mean. Using output = "all" also returns the the number of missing
#' values for .x and the critical t-value.
#'
#' @param digits Round mean, lcl, and ucl to digits. Default is 2.
#'
#' @param ... Other parameters to be passed on.
#'
#' @return A tibble of class "mean_table" or "mean_table_grouped"
#' @export
#' @importFrom dplyr %>%
#'
#' @references
#' SAS documentation: http://support.sas.com/documentation/cdl/en/proc/65145/HTML/default/viewer.htm#p0klmrp4k89pz0n1p72t0clpavyx.htm
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' library(meantables)
#'
#' data(mtcars)
#'
#' # Overall mean table with defaults
#'
#' mtcars %>%
#' mean_table(mpg)
#'
#' # A tibble: 1 x 9
#' response_var n mean sd sem lcl ucl min max
#' <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#' 1 mpg 32 20.1 6.03 1.07 17.9 22.3 10.4 33.9
#'
#' # Grouped means table with defaults
#'
#' mtcars %>%
#' group_by(cyl) %>%
#' mean_table(mpg)
#'
#' # A tibble: 3 x 11
#' response_var group_var group_cat n mean sd sem lcl ucl min max
#' <chr> <chr> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#' 1 mpg cyl 4 11 26.7 4.51 1.36 23.6 29.7 21.4 33.9
#' 2 mpg cyl 6 7 19.7 1.45 0.549 18.4 21.1 17.8 21.4
#' 3 mpg cyl 8 14 15.1 2.56 0.684 13.6 16.6 10.4 19.2
#' }
mean_table <- function(.data, .x, t_prob = 0.975, output = default, digits = 2, ...) {
# ------------------------------------------------------------------
# Prevents R CMD check: "no visible binding for global variable ‘.’"
# ------------------------------------------------------------------
n = sd = t_crit = sem = lcl = ucl = var = n_groups = group_1 = NULL
group_2 = output_arg = default = response_var = `.` = NULL
group_var = group_cat = group_1_cat = group_2_cat = n_miss = NULL
# ===========================================================================
# Quick data checks
# Input data frame is class data.frame
# The ".x" argument is a numeric vector
# There are zero, one, or two group_by variables
# ===========================================================================
if (!("data.frame" %in% class(.data))) {
message("Expecting the class of .data to include data.frame. Instead, the ",
"class was ", class(.data))
}
if (missing(.x)) {
stop("No argument was passed to the '.x' parameter. Expecting '.x' to be a ",
"numeric column.")
}
# ===========================================================================
# Enquo arguments
# enquo the .x argument so that it can be used in the dplyr pipeline below.
# The .x argument is the variable you want the mean of.
# enquo/quo_name/UQ the output argument so that I don't have to use
# quotation marks around the argument being passed.
# ===========================================================================
.x <- rlang::enquo(.x)
output_arg <- rlang::enquo(output) %>% rlang::quo_name()
# ===========================================================================
# Grouping variables
# Count the number of them - accept zero, one, or two
# Grab their names - later returned in summary table
# ===========================================================================
if ("grouped_df" %in% class(.data)) {
n_groups <- attributes(.data)$groups %>% length() - 1
} else {
n_groups <- 0L
}
if (n_groups == 1) {
group_1 <- attributes(.data)$groups[1] %>% names()
} else if (n_groups == 2) {
group_1 <- attributes(.data)$groups[1] %>% names()
group_2 <- attributes(.data)$groups[2] %>% names()
} else if (n_groups > 2) {
stop(".data can be grouped by up to two variables. It is currently grouped ",
n_groups, " variables")
}
# ===========================================================================
# Create a general summary table of means and related stats. Then, add
# group variable names to summary table where applicable
# 1. No group_by variables
# 2. One group_by variable
# 3. Two group_by variables
# ===========================================================================
out <- .data %>%
dplyr::summarise(
# Grab variable (.x) name
response_var = rlang::quo_name(.x),
n_miss = sum(is.na(.data[[rlang::quo_name(.x)]])),
n = sum(!is.na(.data[[rlang::quo_name(.x)]])),
mean = mean(!! .x, na.rm = TRUE),
sd = stats::sd(!! .x, na.rm = TRUE),
t_crit = stats::qt(t_prob, n - 1),
sem = sd / sqrt(n),
lcl = mean - t_crit * sem,
ucl = mean + t_crit * sem,
mean = round(mean, digits), # Round mean
sd = round(sd, digits), # Round sd
lcl = round(lcl, digits), # Round lcl
ucl = round(ucl, digits), # Round ucl
min = min(!! .x, na.rm = TRUE),
max = max(!! .x, na.rm = TRUE)
) %>%
tibble::as_tibble()
# ===========================================================================
# Add group variable names to summary table - if applicable
# Then move to the front of the summary table.
# Also add classes to "out"
# If the input data frame (.data) was a grouped data frame, then the output
# will be a bivariate analysis of means ("mean_table_grouped"). Pass that
# information on to "out." It can be used later in mean_format.
# Otherwise the output will be a univariate analysis of means ("mean_table")
# That class will also be used later in mean_format.
# ===========================================================================
if (n_groups == 0) {
out <- out %>%
dplyr::select(response_var, dplyr::everything())
class(out) <- c("mean_table", class(out))
}
else if (n_groups == 1) {
out <- out %>%
dplyr::mutate(group_var = group_1) %>%
dplyr::rename(group_cat = !! names(.)[1]) %>%
dplyr::select(response_var, group_var, group_cat, dplyr::everything())
class(out) <- c("mean_table_grouped", class(out))
} else if (n_groups == 2) {
out <- out %>%
dplyr::mutate(
group_1 = group_1,
group_2 = group_2
) %>%
dplyr::rename(
group_1_cat = !! names(.)[1],
group_2_cat = !! names(.)[2]
) %>%
dplyr::select(response_var, group_1, group_1_cat, group_2, group_2_cat,
dplyr::everything()) %>%
dplyr::ungroup()
class(out) <- c("mean_table_grouped", class(out))
}
# ===========================================================================
# Control output:
# Typically, I only want the frequency, mean, 95% CI, sem, min, and max.
# Make that the default.
# ===========================================================================
if (output_arg == "default") {
out <- out %>%
dplyr::select(-c(n_miss, t_crit))
}
# Return summary table
out
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.