# Row functions
# wilcox_row --------------------------------------------------------------
#' Wilcox test row
#'
#' @param data_item item to be taken from data for row
#' @param row_digits digits for data item (overrides table as a whole)
#' @param na.rm whether to remove NA before reporting median and quartiles
#' @param data separate dataset to use
#' @param data_filter filter to apply to dataset
#'
#' @export
#'
#' @examples
#' first_table(
#' mtcars,
#' .column_variable = am,
#' wilcox_row(disp, row_digits = 2)
#' )
wilcox_row <- function(data_item,
data = NULL,
data_filter = NULL,
row_digits = NULL,
na.rm = TRUE) {
list(
data_item = enquo(data_item),
data = data,
data_filter = enquo(data_filter),
data_function = function(row_item, col_item, ft_options) {
digits <- row_digits %||% ft_options$digits
if ((ft_options$include_p || ft_options$include_estimate_diff) &&
length(unique(col_item[!is.na(row_item)])) == 2L) {
test <- stats::wilcox.test(row_item ~ col_item, conf.int = ft_options$include_estimate_diff)
}
list(row_output = med_iqr(row_item, col_item, digits, na.rm, ft_options),
estimate_diff = if (ft_options$include_estimate_diff) {
if (length(unique(col_item[!is.na(row_item)])) == 2L) {
sprintf(
"%2$.*1$f (%3$.*1$f - %4$.*1$f)",
digits,
-test$estimate,
-test$conf.int[2],
-test$conf.int[1]
)
} else {
NA_character_
}
} else {
NULL
},
p = if (ft_options$include_p) {
if (length(unique(col_item[!is.na(row_item)])) == 2L) {
test$p.value
} else {
NA_real_
}
} else {
NULL
}
)
}
)
}
# null_trans --------------------------------------------------------------
null_trans <- function(x) x
# med_iqr -----------------------------------------------------------------
med_iqr <- function(row_item, col_item, digits, na.rm, ft_options) {
num_data <- split(row_item, col_item)
if (ft_options$include_overall_column) {
num_data <- c(num_data, list(row_item))
}
quartiles <- lapply(
num_data,
stats::quantile,
probs = seq(0.25, 0.75, 0.25),
na.rm = na.rm
)
quartiles <- simplify2array(quartiles)
out <- sprintf(
"%2$.*1$f (%3$.*1$f - %4$.*1$f)",
digits,
quartiles[2, ],
quartiles[1, ],
quartiles[3, ]
)
out[out == "NA (NA - NA)"] <- ft_options$na_text
out
}
# parametric_row --------------------------------------------------------------
#' Row for parametric data
#'
#' @param data_item item to be taken from data for row
#' @param row_digits digits for data item (overrides table as a whole)
#' @param row_digits_sd digits for standard deviation (defaults to the same as
#' \code{row_digits} if set, otherwise defaults to what is set in table options)
#' @param na.rm whether to remove NA before reporting means and standard deviations
#' @param data separate dataset to use
#' @param data_filter filter to apply to dataset
#' @param trans function to apply to data prior to generating means, standard deviations
#' and p values
#' @param atrans inverse function to apply to data after generating means, standard deviations
#' and p values
#'
#' @export
#' @examples
#' first_table(
#' mtcars,
#' .column_variable = am,
#' parametric_row(disp, row_digits = 2)
#' )
parametric_row <- function(data_item,
data = NULL,
data_filter = NULL,
row_digits = NULL,
row_digits_sd = NULL,
na.rm = TRUE,
trans = NULL,
atrans = NULL) {
list(
data_item = enquo(data_item),
data = data,
data_filter = enquo(data_filter),
data_function = function(row_item, col_item, ft_options) {
digits <- row_digits %||% ft_options$digits
trans <- trans %||% ft_options$default_param_trans %||% null_trans
atrans <- atrans %||% ft_options$default_param_atrans %||% null_trans
digits_sd <- row_digits_sd %||% row_digits %||% ft_options$digits_sd
if ((ft_options$include_p || ft_options$include_estimate_diff) &&
length(unique(col_item[!is.na(row_item)])) == 2L) {
test <- stats::t.test(row_item ~ col_item)
}
list(row_output = mean_sd(row_item = row_item, col_item = col_item,
digits = digits, na.rm = na.rm, trans = trans,
atrans = atrans, digits_sd = digits_sd, ft_options = ft_options),
estimate_diff = if (ft_options$include_estimate_diff) {
if (length(unique(col_item[!is.na(row_item)])) == 2L) {
sprintf(
"%2$.*1$f (%3$.*1$f - %4$.*1$f)",
digits,
diff(test$estimate),
-test$conf.int[2],
-test$conf.int[1]
)
} else {
NA_character_
}
} else {
NULL
},
p = if (ft_options$include_p) {
if (length(unique(col_item[!is.na(row_item)])) == 2L) {
stats::t.test(trans(row_item) ~ col_item)$p.value
} else {
summary(stats::aov(trans(row_item) ~ col_item))[[1]]$`Pr(>F)`[[1]]
}
} else {
NULL
})
}
)
}
# mean_sd -----------------------------------------------------------------
mean_sd <- function(row_item, col_item, digits, na.rm, trans, atrans, digits_sd = digits, ft_options) {
values <- lapply(
split(row_item, col_item),
function(x) {c(atrans(mean(trans(x), na.rm = na.rm)), atrans(stats::sd(trans(x), na.rm = na.rm)))}
)
values <- simplify2array(values)
out <- sprintf(
"%2$.*1$f (%3$.*4$f)",
digits,
values[1, ],
values[2, ],
digits_sd
)
out[out == "NA (NA)"] <- ft_options$na_text
out
}
# kruskal_row --------------------------------------------------------------
#' Kruskal Wallis test row
#'
#' @param data_item item to be taken from data for row
#' @param row_digits digits for data item (overrides table as a whole)
#' @param na.rm whether to remove NA before reporting median and quartiles
#' @param data separate dataset to use
#' @param data_filter filter to apply to dataset
#'
#' @export
#' @examples
#' first_table(
#' mtcars,
#' .column_variable = cyl,
#' kruskal_row(disp, row_digits = 2)
#' )
kruskal_row <- function(data_item,
data = NULL,
data_filter = NULL,
row_digits = NULL,
na.rm = TRUE) {
list(
data_item = enquo(data_item),
data = data,
data_filter = enquo(data_filter),
data_function = function(row_item, col_item, ft_options) {
digits <- row_digits %||% ft_options$digits
list(
row_output = med_iqr(row_item, col_item, digits, na.rm, ft_options),
p = if (ft_options$include_p) {
if (length(unique(col_item[!is.na(row_item)])) >= 2L) {
stats::kruskal.test(row_item ~ factor(col_item))$p.value
} else {
NA_real_
}
} else {
NULL
}
)
}
)
}
# fisher_row --------------------------------------------------------------
#' Row using Fisher's exact test
#'
#' @inheritParams wilcox_row
#' @param na.rm whether to include NA in the denominator for percentages
#' @param reference_level a level of the variable to drop from display
#' @param include_reference whether to include the first level of the factor
#' in the report
#' @param workspace passed onto \code{\link[stats]{fisher.test}}
#' @param include_denom whether to include the denominator for categorical
#' variables
#' @param percent_first whether to put the percent before the n for categorical
#' variables
#' @param cat_out_of_row whether percentages in categories should be calculated
#' out of row rather than column
#'
#' @export
#'
#' @examples
#' first_table(
#' mtcars,
#' .column_variable = cyl,
#' fisher_row(gear, row_digits = 2, include_reference = TRUE)
#' )
fisher_row <- function(data_item,
data = NULL,
data_filter = NULL,
row_digits = NULL,
na.rm = TRUE,
reference_level = NULL,
include_reference = TRUE,
workspace = NULL,
include_denom = NULL,
percent_first = NULL,
cat_out_of_row = NULL
) {
list(
data_item = enquo(data_item),
data = data,
data_filter = enquo(data_filter),
data_function = function(row_item, col_item, ft_options) {
digits <- row_digits %||% ft_options$digits_percent
include_denom <- include_denom %||% ft_options$include_denom
percent_first <- percent_first %||% ft_options$percent_first
cat_out_of_row <- cat_out_of_row %||% ft_options$cat_out_of_row
if (is.logical(row_item)) {
row_item <- factor(row_item, levels = c(FALSE, TRUE))
}
tab <- table(row_item, col_item)
if (nrow(tab) == 0) {
tab <- table(row_item, col_item, useNA = "ifany")
}
output <-
n_percent(
tab,
na.rm = na.rm,
digits = digits,
include_denom = include_denom,
percent_first = percent_first,
include_reference = include_reference,
reference_level = reference_level,
cat_out_of_row = cat_out_of_row,
include_overall_column = ft_options$include_overall_column,
hide_level_logical = ft_options$hide_level_logical
)
list(
row_output = output,
p = if (ft_options$include_p) {
if (all(dim(tab) > 1L) &&
sum(rowSums(tab) > 0, na.rm = TRUE) > 1 &&
sum(colSums(tab) > 0, na.rm = TRUE) > 1) {
workspace <- workspace %||% ft_options$workspace
hybrid <- any(dim(tab) > 2L) && ft_options$hybrid_fisher
simulate.p.value <- any(dim(tab) > 2L) && ft_options$simulate_p_value_fisher
stats::fisher.test(
tab,
workspace = workspace,
hybrid = hybrid,
simulate.p.value = simulate.p.value
)$p.value
} else {
NA_real_
}
} else {
NULL
}
)
}
)
}
# chisq_row --------------------------------------------------------------
#' Row using chi squared test
#'
#' @inheritParams wilcox_row
#' @param na.rm whether to include NA in the denominator for percentages
#' @param reference_level a level of the variable to drop from display
#' @param include_reference whether to include the first level of the factor
#' in the report
#' @param include_denom whether to include the denominator for categorical
#' variables
#' @param percent_first whether to put the percent before the n for categorical
#' variables
#' @param cat_out_of_row whether percentages in categories should be calculated
#' out of row rather than column
#'
#' @export
#'
#' @examples
#' first_table(
#' mtcars,
#' .column_variable = cyl,
#' chisq_row(gear, row_digits = 2, include_reference = TRUE)
#' )
chisq_row <- function(data_item,
data = NULL,
data_filter = NULL,
row_digits = NULL,
na.rm = TRUE,
reference_level = NULL,
include_reference = TRUE,
include_denom = NULL,
percent_first = NULL,
cat_out_of_row = NULL) {
list(
data_item = enquo(data_item),
data = data,
data_filter = enquo(data_filter),
data_function = function(row_item, col_item, ft_options) {
digits <- row_digits %||% ft_options$digits_percent
include_denom <- include_denom %||% ft_options$include_denom
percent_first <- percent_first %||% ft_options$percent_first
cat_out_of_row <- cat_out_of_row %||% ft_options$cat_out_of_row
if (is.logical(row_item)) {
row_item <- factor(row_item, levels = c(FALSE, TRUE))
}
tab <- table(row_item, col_item)
if (nrow(tab) == 0) {
tab <- table(row_item, col_item, useNA = "ifany")
}
output <-
n_percent(
tab,
na.rm = na.rm,
digits = digits,
include_denom = include_denom,
percent_first = percent_first,
include_reference = include_reference,
reference_level = reference_level,
cat_out_of_row = cat_out_of_row,
include_overall_column = ft_options$include_overall_column,
hide_level_logical = ft_options$hide_level_logical
)
list(
row_output = output,
p = if (ft_options$include_p) {
if (all(dim(tab) > 1L)) {
stats::chisq.test(tab)$p.value
} else {
NA_real_
}
} else {
NULL
}
)
}
)
}
n_percent <- function(tab,
na.rm,
digits,
include_denom,
percent_first,
include_reference,
reference_level,
cat_out_of_row,
include_overall_column,
hide_level_logical) {
if (include_overall_column) {
tab_display <- cbind(tab, rowSums(tab))
} else {
tab_display <- tab
}
if (cat_out_of_row) {
totals <- rep(rowSums(tab, na.rm = na.rm), ncol(tab_display))
} else {
totals <- rep(colSums(tab_display, na.rm = na.rm), each = nrow(tab))
}
pattern <- "%2$d"
variables <- quos(
digits,
tab_display,
tab_display / totals * 100
)
if (include_denom) {
pattern <- paste0(pattern, "/%4$d")
variables <- c(variables, quos(totals))
}
if (percent_first) {
pattern <- paste0("%3$.*1$f%% (", pattern, ")")
} else {
pattern <- paste0(pattern, " (%3$.*1$f%%)")
}
output <- rlang::eval_tidy(expr(sprintf(pattern, !!!variables)))
dim(output) <- dim(tab_display)
output <- cbind(rownames(tab_display), output)
if (!include_reference && is.null(reference_level)) {
reference_level <- rownames(tab)[1]
}
if (!include_reference && !is.null(reference_level) && nrow(tab) > 1) {
output <- output[rownames(tab) != reference_level, , drop = FALSE]
if (hide_level_logical && identical(output[, 1], "TRUE")) {
output[, 1] <- ""
}
}
output
}
#' Cox Proportional Hazards Row
#'
#' @inheritParams wilcox_row
#' @param row_digits Number of digits to include in the HR
#' @param include_reference whether to include a row for the reference level of
#' a factor
#'
#' @return row for inclusion in `first_table`
#' @export
#'
#' @examples
#' library(survival)
#' first_table(lung,
#' .column_variable = Surv(time, status),
#' ECOG = coxph_row(factor(ph.ecog), row_digits = 2)
#' )
#'
coxph_row <- function(data_item,
data = NULL,
data_filter = NULL,
row_digits = NULL,
include_reference = TRUE) {
stopifnot(requireNamespace("survival"))
list(
data_item = enquo(data_item),
data = data,
data_filter = enquo(data_filter),
data_function = function(row_item, col_item, ft_options) {
digits <- row_digits %||% ft_options$digits
if (!all(is.na(row_item))) {
model <- survival::coxph(col_item ~ row_item)
hrs <- exp(stats::coef(model))
cis <- exp(stats::confint(model))
ps <- stats::pchisq(
(summary(model)$coefficients[, "z", drop = TRUE]) ^ 2,
df = 1,
lower.tail = FALSE
)
if (names(hrs)[1L] == "row_item") {
levs <- ""
cis <- matrix(cis, ncol = 2)
} else {
levs <- sub("row_item", "", names(hrs))
}
output <- sprintf(
"%2$.*1$f (%3$.*1$f - %4$.*1$f)",
digits,
hrs,
cis[, 1, drop = TRUE],
cis[, 2, drop = TRUE]
)
if (include_reference & !identical(levs, "") & !is.logical(row_item)) {
output <- c("Reference", output)
levs <- c(levels(as.factor(row_item))[1L], levs)
ps <- c(NA, ps)
}
list(row_output = cbind(levs, output),
p = if (ft_options$include_p) ps else NULL
)
} else {
list(row_output = matrix(c("", ft_options$na_text), nrow = 1), p = NA_real_)
}
}
)
}
#' Row with type selected by firsttable
#' @inheritParams wilcox_row
#' @param include_reference whether to include a row for the reference level of
#' a factor (only relevant for logical/factor/character variables)
#' @param reference_level a level of the variable to drop from display (only
#' relevant for logical/factor/character variables)
#' @param workspace passed onto \code{\link[stats]{fisher.test}}
#' @param non_parametric whether to use non-parametric tests
#' @param row_digits_default digits where \code{.column_type = "default"}
#' @param row_digits_surv digits where \code{.column_type = "default"} and
#' \code{.column_variable} inherits \code{Surv}
#' @param row_digits_numeric digits where \code{.column_type = "numeric"}
#' @param cat_out_of_row whether percentages in categories should be calculated
#' out of row rather than column
#'
#' @return row for inclusion in `first_table`
#'
#' @details This provides a generic row for \code{\link{first_table}} with
#' the type of row determined from the \code{class} of the data. This allows a
#' \code{list} of \code{\link[rlang]{quos}} to be created and then used for
#' both a standard \code{\link{first_table}} and one that uses a
#' \code{\link[survival]{Surv}} column.
#'
#' @import rlang
#' @export
#'
#' @examples
#' library(survival)
#' first_table(lung,
#' .column_variable = Surv(time, status),
#' .options = list(include_n = TRUE, include_n_per_col = TRUE),
#' `Meal calories` = first_table_row(meal.cal, row_digits = 2)
#' )
first_table_row <- function(data_item,
data = NULL,
data_filter = NULL,
row_digits = NULL,
na.rm = TRUE,
reference_level = NULL,
include_reference = NULL,
workspace = NULL,
non_parametric = NULL,
row_digits_default = NULL,
row_digits_surv = NULL,
row_digits_numeric = NULL,
cat_out_of_row = NULL) {
data_item <- enquo(data_item)
data_filter <- enquo(data_filter)
list(
data_item = data_item,
data = data,
data_filter = data_filter,
data_function = function(row_item, col_item, ft_options) {
digits <- row_digits %||% ft_options$digits
workspace <- workspace %||% ft_options$workspace
non_parametric <- non_parametric %||% ft_options$default_non_parametric
if (inherits(col_item, "Surv")) {
row_function <- coxph_row(!!data_item, data = data, data_filter = !!data_filter,
row_digits = row_digits_surv %||% row_digits,
include_reference = if (is.null(include_reference)) TRUE else include_reference)
} else if (is.numeric(row_item) && !is.numeric(col_item)) {
if (non_parametric) {
if (length(unique(stats::na.omit(col_item))) <= 2) {
row_function <- wilcox_row(!!data_item, data = data, data_filter = !!data_filter,
row_digits = row_digits_default %||% row_digits, na.rm = na.rm)
} else {
row_function <- kruskal_row(!!data_item, data = data, data_filter = !!data_filter,
row_digits = row_digits_default %||% row_digits, na.rm = na.rm)
}
} else {
row_function <- parametric_row(!!data_item, data = data, data_filter = !!data_filter,
row_digits = row_digits_default %||% row_digits, na.rm = na.rm)
}
} else if (is.numeric(row_item) && is.numeric(col_item)) {
row_function <- cor_row(!!data_item, data = data, data_filter = !!data_filter,
row_digits = row_digits_numeric %||% row_digits)
} else if (is.logical(row_item)) {
row_function <- fisher_row(!!data_item, data = data, data_filter = !!data_filter,
row_digits = row_digits_default %||% row_digits,
na.rm = na.rm, reference_level = reference_level %||% "FALSE",
include_reference = if (is.null(include_reference)) cat_out_of_row %||% FALSE else include_reference,
workspace = workspace, cat_out_of_row = cat_out_of_row)
} else {
row_function <- fisher_row(!!data_item, data = data, data_filter = !!data_filter,
row_digits = row_digits_default %||% row_digits,
na.rm = na.rm, reference_level = reference_level,
include_reference = if (is.null(include_reference)) TRUE else include_reference,
workspace = workspace, cat_out_of_row = cat_out_of_row)
}
row_function$data_function(row_item, col_item, ft_options)
}
)
}
# cor_row --------------------------------------------------------------
#' Correlation row
#'
#' @inheritParams wilcox_row
#' @param method method parameter passed onto \code{\link[stats]{cor}}:
#' \code{"pearson"}, \code{"kendall"} or \code{"spearman"}.
#'
#' @export
#'
#' @examples
#' first_table(
#' mtcars,
#' .column_variable = gear,
#' .column_type = "numeric",
#' cor_row(disp, method = "spearman")
#' )
cor_row <- function(data_item,
data = NULL,
data_filter = NULL,
row_digits = NULL,
method = c("pearson", "kendall", "spearman")) {
if (missing(method)) {
method <- NULL
} else {
method <- match.arg(method)
}
list(
data_item = enquo(data_item),
data = data,
data_filter = enquo(data_filter),
data_function = function(row_item, col_item, ft_options) {
digits <- row_digits %||% ft_options$digits_cor %||% ft_options$digits
method <- method %||% ft_options$cor_method
if (sum(!is.na(col_item)) <= 3) {
list(row_output = "", p = if (ft_options$include_p) NA_real_ else NULL)
} else {
test_output <- stats::cor.test(row_item, col_item, method = method)
list(
row_output = sprintf(
"%4$s = %2$.*1$f%3$s",
digits,
test_output$estimate,
if (!is.null(test_output$conf.int)) {
sprintf(" (%2$.*1$f - %3$.*1$f)", digits, test_output$conf.int[1], test_output$conf.int[2])
} else {
""
},
c("r", "tau", "rho")[match(method, c("pearson", "kendall", "spearman"))]
),
p = if (ft_options$include_p) {
test_output$p.value
} else {
NULL
})
}
}
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.