Nothing
#' @name build_row
#' @title Build summary rows
#' @description Summarize data into a data.frame with row(s). Includes optional
#' stratification and null hypothesis testing using a factor or logical
#' variable.
#' @param x A data.frame, numeric, factor, or logical. Data to summarize.
#' @param y A factor or logical. Data to optionally stratify \code{x} by.
#' @param label A character. A label for the summarized data.
#' @param label.stat A logical. Append the summary statistic used to the label.
#' @param inverse A logical. For logical data, report frequencies of the
#' \code{FALSE} values instead.
#' @param stat A character. Name of the summary statistic to use. Supported options
#' include the mean (\code{'mean'}) and median (\code{'median'}) for continuous data.
#' @param stat.pct.sign A logical. Paste a percentage symbol with each frequency.
#' frequency.
#' @param col.overall A logical. Append a column with the statistic for all data.
#' If \code{y} is not specified, this parameter is ignored.
#' @param col.missing A logical. Append a column with counts of missing data.
#' @param test A character. Name of statistical test to compare groups.
#' Supported options: [continuous data] ANOVA linear model (\code{'anova'}),
#' Kruskal-Wallis (\code{'kruskal'}), and Wilcoxon rank sum (\code{'wilcoxon'}) tests;
#' [nominal data] Pearson's Chi-squared Test (\code{'chisq'}) and Fisher's Exact Test
#' (\code{'fisher'}).
#' @param test.simulate.p A logical. Whether to use Monte Carlo simulation of
#' the p-value when testing nominal data.
#' @param col.test A logical. Append a column with the name of the statistical
#' test used.
#' @param digits An integer. Number of digits to round to.
#' @param p.digits An integer. Number of p-value digits to report.
#' @param ... Arguments passed to the appropriate S3 method.
#' @return An object of class \code{tbl_df} (tibble) summarizing the provided
#' data.
#' @examples
#' strata <- as.factor(datasets::mtcars$cyl)
#'
#' # Create a "count" row from a data.frame for a factor
#' build_row(x = datasets::mtcars, y = strata)
#'
#' # Create a row summarizing a numeric by a factor
#' build_row(label = 'MPG', x = as.numeric(datasets::mtcars$mpg), y = strata)
#'
#' # Create a row summarizing a logical by a factor
#' build_row(label = 'VS', x = as.logical(datasets::mtcars$vs), y = strata)
#'
#' # Create a row summarizing a factor by a factor
#' build_row(label = 'Carb', x = as.factor(datasets::mtcars$carb), y = strata)
#' @export
build_row <- function (
x,
...
) UseMethod('build_row')
#' @export
build_row.default <- function (x, label = NULL, ...) {
warning('Warning: \'', label, '\' <', class(x), '> not supported for summary.')
NULL
}
#' @rdname build_row
#' @export
build_row.data.frame <- function (
x,
y = NA_real_,
label = NULL,
label.stat = TRUE,
stat.pct.sign = FALSE,
col.overall = TRUE,
col.missing = FALSE,
col.test = FALSE,
digits = 1,
...
) {
# Statistic Function
paste_stat_ <- function (...) {
utile.tools::paste_freq(
...,
na.rm = FALSE,
percent.sign = stat.pct.sign,
digits = digits
)
}
# Retrieve by variable levels
y_levels <- .get_levels(y)
# Count rows
x_cnt <- nrow(x)
# Build row
cols <- list()
# Variable label
cols$Variable <- paste(c(label, if (label.stat) 'n(%)'), collapse = ', ')
# Overall count
if (col.overall || length(y_levels) == 0) {
cols$Overall <- as.character(x_cnt)
}
# Frequencies by level
if (length(y_levels) > 0) {
cols <- c(
cols,
purrr::map(
y_levels,
function (.y) {
paste_stat_(
x = nrow(x[y == .y & !is.na(y),]),
y = x_cnt
)
}
)
)
}
# Missing
if (col.missing) cols$Missing <- paste_stat_(x = y[is.na(y)], y = y)
# Hypothesis testing columns
if (length(y_levels) > 1) cols[c('p', if (col.test) 'Test')] <- ''
# Return converted tibble
dplyr::as_tibble(cols)
}
#' @rdname build_row
#' @export
build_row.numeric <- function (
x,
y = NA_real_,
label = NULL,
label.stat = TRUE,
stat = c('mean', 'median'),
stat.pct.sign = FALSE,
col.overall = TRUE,
col.missing = FALSE,
test = c('anova', 'kruskal', 'wilcoxon'),
col.test = FALSE,
digits = 1,
p.digits = 4,
...
) {
# Check arguments
stat <- match.arg(stat)
test <- match.arg(test)
# Statistic functions
paste_stat_ <- function (...) {
if (stat == 'median') utile.tools::paste_median(..., digits = digits)
else utile.tools::paste_mean(..., digits = digits)
}
paste_freq_ <- function (...) {
utile.tools::paste_freq(
...,
na.rm = FALSE,
percent.sign = stat.pct.sign,
digits = digits
)
}
# Retrieve by variable levels
y_levels <- .get_levels(y)
# Create column object
cols <- list()
# Variable label +/- statistic name
cols$Variable <- paste(
c(
label,
if (label.stat) {
if (stat == 'median') 'median[IQR]'
else 'mean\u00B1SD'
}
),
collapse = ', '
)
# Summary statistic: Overall
if (col.overall | length(y_levels) == 0) cols$Overall <- paste_stat_(x = x)
# Summary statistic: By strata
if (length(y_levels) > 0) {
cols <- c(
cols,
purrr::map_chr(
y_levels,
function (.y) paste_stat_(x = x[!is.na(x) & y %in% .y])
)
)
}
# Missing
if (col.missing) {
cols$Missing <- paste_freq_(x = sum(is.na(x)), y = length(x))
}
# Hypothesis testing
if (length(y_levels) > 1) {
# Statistical test
test <- utile.tools::test_hypothesis(
x = x,
y = y,
test = test,
digits = digits,
p.digits = p.digits
)
# Addend test data
cols$p <- test$p
if (col.test) cols$Test <- test$test
}
# Return converted tibble
dplyr::as_tibble(cols)
}
#' @rdname build_row
#' @export
build_row.logical <- function (
x,
y = NA_real_,
label = NULL,
label.stat = TRUE,
inverse = FALSE,
stat.pct.sign = FALSE,
col.overall = TRUE,
col.missing = FALSE,
test = c('chisq', 'fisher'),
test.simulate.p = FALSE,
col.test = FALSE,
digits = 1,
p.digits = 4,
...
) {
# Check arguments
test <- match.arg(test)
# Statistic factory
paste_stat_ <- function (...) {
utile.tools::paste_freq(
...,
na.rm = FALSE,
percent.sign = stat.pct.sign,
digits = digits
)
}
# Retrieve by variable levels
y_levels <- .get_levels(y)
# Set inverse, if applicable
if (inverse) x <- !x
# Create column object
cols <- list()
# Variable label +/- statistic name
cols$Variable <- paste(
c(
label,
if (inverse) 'no',
if (label.stat) 'n(%)'
),
collapse = ', '
)
# Overall statistic
if (col.overall || length(y_levels) == 0) {
cols$Overall <- paste_stat_(x = x[x & !is.na(x)], y = x)
}
# Strata statistics
if (length(y_levels) > 0) {
cols <- c(
cols,
purrr::map(
y_levels,
function (.y) {
paste_stat_(
x = x[(x & !is.na(x)) & (y %in% .y)],
y = x[y %in% .y]
)
}
)
)
}
# Missing
if (col.missing) cols$Missing <- paste_stat_(x = x[is.na(x)], y = x)
# Hypothesis testing
if (length(y_levels) > 1) {
# Statistical test
test <- utile.tools::test_hypothesis(
x = x,
y = y,
test = test,
simulate.p = test.simulate.p,
digits = digits,
p.digits = p.digits
)
# Addend test data
cols$p <- test$p
if (col.test) cols$Test <- test$test
}
# Return converted tibble
dplyr::as_tibble(cols)
}
#' @rdname build_row
#' @export
build_row.factor <- function (
x,
y = NA_real_,
label = NULL,
label.stat = TRUE,
stat.pct.sign = FALSE,
col.overall = TRUE,
col.missing = FALSE,
test = c('chisq', 'fisher'),
test.simulate.p = FALSE,
col.test = FALSE,
digits = 1,
p.digits = 4,
...
) {
# Check arguments
test <- match.arg(test)
# Statistic function
paste_stat_ <- function (...) {
utile.tools::paste_freq(
...,
na.rm = FALSE,
percent.sign = stat.pct.sign,
digits = digits
)
}
# Retrieve y variable levels
y_levels <- .get_levels(y)
# Identify x levels, make any NA explicit
x_levels <- .get_levels(x)
if (any(is.na(x))) x_levels <- c(x_levels, "Missing" = NA_character_)
level_fill <- rep('', length(x_levels))
# Create column object
cols <- list()
# Variable labels
cols$Variable <- c(
paste(c(label, if (label.stat) 'n(%)'), collapse = ', '),
paste0(' ', names(x_levels))
)
# Overall summary statistic
if (col.overall || length(y_levels) == 0) {
cols$Overall <- c(
'',
purrr::map_chr(x_levels, function (.x) {
paste_stat_(x = x[x %in% .x], y = x)
})
)
}
# Strata statistics
if (length(y_levels) > 0) {
cols <- c(
cols,
purrr::map(
y_levels,
function (.y) {
c(
'',
purrr::map_chr(
x_levels,
function (.x) {
paste_stat_(
x = x[x %in% .x & y %in% .y],
y = x[y %in% .y]
)
}
)
)
}
)
)
}
# Show missing count
if (col.missing) {
cols$Missing <- c(paste_stat_(x = x[is.na(x)], y = x), level_fill)
}
# Testing with by variable
if (length(y_levels) > 1) {
# Statistical test
test <- utile.tools::test_hypothesis(
x = x,
y = y,
test = test,
simulate.p = test.simulate.p,
digits = digits,
p.digits = p.digits
)
# Addend test data
cols$p <- c(test$p, level_fill)
if (col.test) cols$Test <- c(test$test, level_fill)
}
# Return converted tibble
dplyr::as_tibble(cols)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.