Nothing
#' Cross-Tabulation
#'
#' Cross-tabulation for a pair of categorical variables with either
#' row, column, or total proportions, as well as marginal sums. Works with
#' numeric, character, as well as factor variables.
#'
#' @param x First categorical variable - values will appear as row names.
#' @param y Second categorical variable - values will appear as column names.
#' @param prop Character. Indicates which proportions to show: \dQuote{r}
#' (rows, default), \dQuote{c} (columns), \dQuote{t} (total), or \dQuote{n}
#' (none). Default value can be changed using \code{\link{st_options}},
#' option \code{ctable.prop}.
#' @param useNA Character. One of \dQuote{ifany} (default), \dQuote{no}, or
#' \dQuote{always}. This argument is passed on \sQuote{as is} to
#' \code{\link[base]{table}}, or adapted for \code{\link[stats]{xtabs}} when
#' weights are used.
#' @param totals Logical. Show row and column totals. Defaults to
#' \code{TRUE} but can be set globally with \code{\link{st_options}}, option
#' \code{ctable.totals}.
#' @param style Character. Style to be used by \code{\link[pander]{pander}}. One
#' of \dQuote{simple} (default), \dQuote{grid}, \dQuote{rmarkdown}, or
#' \dQuote{jira}. Can be set globally with \code{\link{st_options}}.
#' @param round.digits Numeric. Number of significant digits to keep. Defaults
#' to \code{1}. To change this default value, use \code{\link{st_options}},
#' option \code{ctable.round.digits}.
#' @param justify Character. Horizontal alignment; one of \dQuote{l} (left),
#' \dQuote{c} (center), or \dQuote{r} (right, default).
#' @param plain.ascii Logical. Used by \code{\link[pander]{pander}}; when
#' \code{TRUE}, no markup characters are generated (useful when printing
#' to console). Defaults to \code{TRUE} unless \code{style = 'rmarkdown'},
#' in which case it is set to \code{FALSE} automatically. To change the
#' default value globally, use \code{\link{st_options}}.
#' @param headings Logical. Show heading section. \code{TRUE} by default; can be
#' set globally with \code{\link{st_options}}.
#' @param display.labels Logical. Display data frame label in the heading
#' section. \code{TRUE} by default, can be changed globally with
#' \code{\link{st_options}}.
#' @param split.tables Numeric. \code{\link[pander]{pander}} argument that
#' specifies how many characters wide a table can be. \code{Inf} by default.
#' @param dnn Character vector. Variable names to be used in output table. In
#' most cases, setting this parameter is not required as the names are
#' automatically generated.
#' @param chisq Logical. Display chi-square statistic along with p-value.
#' @param OR Logical or numeric. Set to \code{TRUE} to show odds ratio with 95%
#' confidence interval, or specify confidence level explicitly (\emph{e.g.},
#' \code{.90}). CI's are calculated using Wald's method of normal approximation.
#' @param RR Logical or numeric. Set to \code{TRUE} to show risk ratio (also
#' called \emph{relative risk} with 95% confidence interval, or specify
#' confidence level explicitly (\emph{e.g.} \code{.90}). CI's are
#' calculated using Wald's method of normal approximation.
#' @param weights Numeric. Vector of weights; must have the same length as
#' \code{x}.
#' @param rescale.weights Logical. When \code{TRUE}, a global constant is
#' applied so that the sum of counts equals \code{nrow(x)}. \code{FALSE} by
#' default.
#' @param \dots Additional arguments passed to \code{\link[pander]{pander}} or
#' \code{\link[base]{format}}.
#'
#' @return A list containing two matrices, \emph{cross_table} and
#' \emph{proportions}. The \emph{print} method takes care of assembling
#' figures from those matrices into a single table. The returned object is
#' of classes \dQuote{\emph{summarytools}} and \dQuote{\emph{list}}, unless
#' \code{\link[summarytools]{stby}} is used, in which case we have an
#' object of class \dQuote{\emph{stby}}.
#'
#' @note Markdown does not fully support multi-header tables;
#' until such support is available, the recommended way to display
#' cross-tables in .Rmd documents is to use `method=render`. See package
#' vignettes for examples.
#'
#' @examples
#' data("tobacco")
#' ctable(tobacco$gender, tobacco$smoker)
#'
#' # Use with() to simplify syntax
#' with(tobacco, ctable(smoker, diseased))
#'
#' # Show column proportions, without totals
#' with(tobacco, ctable(smoker, diseased, prop = "c", totals = FALSE))
#'
#' # Simple 2 x 2 table with odds ratio and risk ratio
#' with(tobacco, ctable(gender, smoker, totals = FALSE, headings = FALSE, prop = "n",
#' OR = TRUE, RR = TRUE))
#'
#' # Grouped cross-tabulations
#' with(tobacco, stby(data = list(x = smoker, y = diseased),
#' INDICES = gender, FUN = ctable))
#'
#'
#' \dontrun{
#' ct <- ctable(tobacco$gender, tobacco$smoker)
#'
#' # Show html results in browser
#' print(ct, method = "browser")
#'
#' # Save results to html file
#' print(ct, file = "ct_gender_smoker.html")
#'
#' # Save results to text file
#' print(ct, file = "ct_gender_smoker.txt")
#' }
#' @seealso \code{\link[base]{table}}, \code{\link[stats]{xtabs}}
#'
#' @keywords classes category
#' @author Dominic Comtois, \email{dominic.comtois@@gmail.com}
#' @export
#' @importFrom stats addmargins na.omit chisq.test qnorm
ctable <- function(x,
y,
prop = st_options("ctable.prop"),
useNA = "ifany",
totals = st_options("ctable.totals"),
style = st_options("style"),
round.digits = st_options("ctable.round.digits"),
justify = "right",
plain.ascii = st_options("plain.ascii"),
headings = st_options("headings"),
display.labels = st_options("display.labels"),
split.tables = Inf,
dnn = c(substitute(x), substitute(y)),
chisq = FALSE,
OR = FALSE,
RR = FALSE,
weights = NA,
rescale.weights = FALSE,
...) {
# Check for group_by()
if (any(grepl("group_by(", deparse(sys.calls()[[1]]), fixed = TRUE))) {
stop("ctable() doesn't support group_by(); use stby() instead")
}
# Support for by()
if (length(dim(x)) == 2) {
x_tmp <- x[[1]]
y <- x[[2]]
x <- x_tmp
flag_by <- TRUE
} else {
flag_by <- FALSE
}
# Convert 1-column data frames into vectors
if (inherits(x, "data.frame") && ncol(x) == 1) {
x <- x[[1]]
}
if (inherits(y, "data.frame") && ncol(y) == 1) {
y <- y[[1]]
}
# Validate arguments ---------------------------------------------------------
errmsg <- character() # problems with arguments will be stored here
if (!is.factor(x) && !is.atomic(x)) {
x <- try(as.vector(x), silent = TRUE)
if (inherits(x, "try-error")) {
errmsg %+=% "'x' must be a factor or an object coercible to a vector"
}
}
if (!is.factor(y) && !is.atomic(x)) {
y <- try(as.vector(y), silent = TRUE)
if (inherits(y, "try-error")) {
errmsg %+=% "'y' must be a factor or an object coercible to a vector"
}
}
errmsg <- c(errmsg, check_args(match.call(), list(...)))
if (length(errmsg) > 0) {
stop(paste(errmsg, collapse = "\n "))
}
# When style is rmarkdown, make plain.ascii FALSE unless specified explicitly
if (style == "rmarkdown" && isTRUE(plain.ascii) &&
(!"plain.ascii" %in% (names(match.call())))) {
plain.ascii <- FALSE
}
# Replace NaN's by NA's (This simplifies matters a lot)
if (NaN %in% x) {
message(paste(sum(is.nan(x)), "NaN value(s) converted to NA in x\n"))
x[is.nan(x)] <- NA
}
if (NaN %in% y) {
message(paste(sum(is.nan(y)), "NaN value(s) converted to NA in y\n"))
y[is.nan(y)] <- NA
}
# Get x & y metadata from parsing function
if (isTRUE(flag_by)) {
parse_info_x <- try(
parse_args(sys.calls(), sys.frames(), match.call(),
var = c("x", "y"), silent = "dnn" %in% names(match.call()),
var_label = FALSE, caller = "ctable"),
silent = TRUE)
if (inherits(parse_info_x, "try-error")) {
parse_info_x <- list()
} else {
if (!is.null(parse_info_x$df_name)) {
df_name <- parse_info_x$df_name
}
if (!is.null(parse_info_x$df_label)) {
df_label <- parse_info_x$df_label
}
}
} else {
parse_info_x <- try(
parse_args(sys.calls(), sys.frames(), match.call(),
var = "x", silent = "dnn" %in% names(match.call()),
var_label = FALSE, caller = "ctable"),
silent = TRUE)
if (inherits(parse_info_x, "try-error")) {
parse_info_x <- list()
}
parse_info_y <- try(
parse_args(sys.calls(), sys.frames(), match.call(),
var = "y", silent = "dnn" %in% names(match.call()),
var_label = FALSE, caller = "ctable"),
silent = TRUE)
if (inherits(parse_info_y, "try-error")) {
parse_info_y <- list()
}
if (length(parse_info_x$df_name) == 1 &&
length(parse_info_y$df_name) == 1 &&
isTRUE(parse_info_x$df_name == parse_info_y$df_name)) {
df_name <- parse_info_x$df_name
}
if (length(parse_info_x$df_label) == 1) {
df_label <- parse_info_x$df_label
}
}
if ("dnn" %in% names(match.call())) {
x_name <- dnn[1]
y_name <- dnn[2]
} else if (!isTRUE(flag_by)) {
x_name <- na.omit(c(parse_info_x$var_name, deparse(dnn[[1]])))[1]
y_name <- na.omit(c(parse_info_y$var_name, deparse(dnn[[2]])))[1]
} else {
x_name <- na.omit(c(parse_info_x$var_name[1], deparse(dnn[[1]])))[1]
y_name <- na.omit(c(parse_info_x$var_name[2], deparse(dnn[[2]])))[1]
}
# Create xfreq table ---------------------------------------------------------
if (identical(NA, weights)) {
freq_table <- table(x, y, useNA = useNA)
# Generate minimal table for calculation of chi-square, OR and RR
freq_table_min <- table(x, y, useNA = "no")
} else {
# Weights are used
weights_string <- deparse(substitute(weights))
# Subset weights when called from by()/stby() to match current data subset
if (isTRUE(flag_by)) {
pf <- parent.frame(2)
weights <- weights[pf$X[[pf$i]]]
}
if (sum(is.na(weights)) > 0) {
warning("missing values on weight variable have been detected and were ",
"treated as zeroes")
weights[is.na(weights)] <- 0
}
if (isTRUE(rescale.weights)) {
weights <- weights / sum(weights) * length(x)
}
if (useNA == "no") {
freq_table <- xtabs(weights ~ x + y, addNA = FALSE)
freq_table_min <- freq_table
} else {
freq_table <- xtabs(weights ~ x + y, addNA = TRUE)
freq_table_min <- xtabs(weights ~ x + y, addNA = FALSE)
}
}
names(dimnames(freq_table)) <- c(x_name, y_name)
prop_table <- switch(prop,
t = prop.table(freq_table),
r = prop.table(freq_table, 1),
c = prop.table(freq_table, 2),
n = NULL)
# Add totals
freq_table <- addmargins(freq_table)
rownames(freq_table)[nrow(freq_table)] <- trs("total")
colnames(freq_table)[ncol(freq_table)] <- trs("total")
if (!is.null(prop_table)) {
prop_table[is.nan(prop_table)] <- 0
if (prop == "t") {
prop_table <- addmargins(prop_table)
} else if (prop == "r") {
prop_table <- addmargins(prop_table, 2)
sum_props <- c(prop.table(freq_table[nrow(freq_table),
-ncol(freq_table)]),
Total = 1)
prop_table <- rbind(prop_table, sum_props)
} else if (prop == "c") {
prop_table <- addmargins(prop_table, 1)
sum_props <- c(prop.table(freq_table[-nrow(freq_table),
ncol(freq_table)]),
Total = 1)
prop_table <- cbind(prop_table, sum_props)
}
rownames(prop_table)[nrow(prop_table)] <- trs("total")
colnames(prop_table)[ncol(prop_table)] <- trs("total")
}
# Change name of NA items to avoid potential problems when echoing to console
if (NA %in% rownames(freq_table)) {
row.names(freq_table)[is.na(row.names(freq_table))] <- "<NA>"
if (prop != "n") {
row.names(prop_table)[is.na(row.names(prop_table))] <- "<NA>"
}
}
if (NA %in% colnames(freq_table)) {
colnames(freq_table)[is.na(colnames(freq_table))] <- "<NA>"
if (prop != "n") {
colnames(prop_table)[is.na(colnames(prop_table))] <- "<NA>"
}
}
# Create output object -------------------------------------------------------
output <- list(cross_table = freq_table,
proportions = prop_table)
# Set output object's attributes
class(output) <- c("summarytools", class(output))
attr(output, "st_type") <- "ctable"
attr(output, "fn_call") <- match.call()
attr(output, "date") <- Sys.Date()
if (isTRUE(chisq)) {
tmp.chisq <- chisq.test(freq_table_min)
tmp.chisq <- c(Chi.squared = round(tmp.chisq$statistic[[1]], 4),
tmp.chisq$parameter,
p.value = round(tmp.chisq$p.value, 4))
attr(output, "chisq") <- tmp.chisq
}
if (!isFALSE(OR) || !isFALSE(RR)) {
if (identical(as.numeric(dim(freq_table_min)), c(2,2))) {
if (!isFALSE(OR)) {
or <- prod(freq_table_min[c(1,4)]) / prod(freq_table_min[c(2,3)])
se <- sqrt(sum(1/freq_table_min))
attr(output, "OR") <- c(or,
exp(log(or) - qnorm(p = 1 - ((1 - OR)/2)) * se),
exp(log(or) + qnorm(p = 1 - ((1 - OR)/2)) * se))
names(attr(output, "OR")) <- c("Odds Ratio", paste0("Lo - ", OR * 100, "%"),
paste0("Hi - ", OR * 100, "%"))
attr(output, "OR-level") <- OR
}
if (!isFALSE(RR)) {
rr <- (freq_table_min[1] / sum(freq_table_min[c(1,3)])) /
(freq_table_min[2] / sum(freq_table_min[c(2,4)]))
se <- sqrt(sum(1/freq_table_min[1],
1/freq_table_min[2],
-1/sum(freq_table_min[c(1,3)]),
-1/sum(freq_table_min[c(2,4)])))
attr(output, "RR") <- c(rr,
exp(log(rr) - qnorm(p = 1 - ((1 - RR)/2)) * se),
exp(log(rr) + qnorm(p = 1 - ((1 - RR)/2)) * se))
names(attr(output, "RR")) <- c("Risk Ratio", paste0("Lo - ", RR * 100, "%"),
paste0("Hi - ", RR * 100, "%"))
attr(output, "RR-level") <- RR
}
} else {
message("OR and RR can only be used with 2 x 2 tables; parameter(s) ignored")
}
}
# Determine data "type" for x, in a non-strict way
if (all(c("ordered", "factor") %in% class(x))) {
Data.type.x <- trs("factor.ordered")
} else if ("factor" %in% class(x)) {
Data.type.x <- trs("factor")
} else if (all(c("POSIXct", "POSIXt") %in% class(x))) {
Data.type.x <- trs("datetime")
} else if ("Date" %in% class(x)) {
Data.type.x <- trs("date")
} else if ("logical" %in% class(x)) {
Data.type.x <- trs("logical")
} else if ("character" %in% class(x)) {
Data.type.x <- trs("character")
} else if ("integer" %in% class(x)) {
Data.type.x <- trs("integer")
} else if ("numeric" %in% class(x)) {
Data.type.x <- trs("numeric")
} else {
Data.type.x <- ifelse(mode(x) %in% rownames(.keywords_context),
trs(mode(x)), mode(x))
}
# Determine data "type" for y, in a non-strict way
if (all(c("ordered", "factor") %in% class(y))) {
Data.type.y <- trs("factor.ordered")
} else if ("factor" %in% class(y)) {
Data.type.y <- trs("factor")
} else if (all(c("POSIXct", "POSIXt") %in% class(y))) {
Data.type.y <- trs("datetime")
} else if ("Date" %in% class(y)) {
Data.type.y <- trs("date")
} else if ("logical" %in% class(y)) {
Data.type.y <- trs("logical")
} else if ("character" %in% class(y)) {
Data.type.y <- trs("character")
} else if ("integer" %in% class(y)) {
Data.type.y <- trs("integer")
} else if ("numeric" %in% class(y)) {
Data.type.y <- trs("numeric")
} else {
Data.type.y <- ifelse(mode(y) %in% rownames(.keywords_context),
trs(mode(y)), mode(y))
}
# Store dataframe name in a variable since this will be used in
# several places in next step
dfn <- ifelse(exists("df_name", inherits = FALSE), df_name, NA)
# Prepare metadata to be stored as the data_info attribute
data_info <-
list(Data.frame = ifelse(exists("df_name", inherits = FALSE),
df_name, NA),
Data.frame.label = ifelse(exists("df_label", inherits = FALSE),
df_label, NA),
Row.variable = x_name,
Row.variable.label = ifelse(!is.na(label(x)), label(x), NA),
Col.variable = y_name,
Col.variable.label = ifelse(!is.na(label(y)), label(y), NA),
Row.x.Col = paste(x_name, y_name, sep = " * "),
Proportions = switch(prop,
r = "Row",
c = "Column",
t = "Total",
n = "None"),
Data.type.x = Data.type.x,
Data.type.y = Data.type.y,
Weights = ifelse(identical(weights, NA), NA,
ifelse(is.na(dfn),
weights_string,
sub(pattern = paste0(dfn, "$"),
replacement = "",
x = weights_string,
fixed = TRUE))),
Group = ifelse("by_group" %in% names(parse_info_x),
parse_info_x$by_group, NA),
by_first = ifelse("by_group" %in% names(parse_info_x),
parse_info_x$by_first, NA),
by_last = ifelse("by_group" %in% names(parse_info_x),
parse_info_x$by_last , NA))
attr(output, "data_info") <- data_info[!is.na(data_info)]
attr(output, "format_info") <- list(style = style,
round.digits = round.digits,
plain.ascii = plain.ascii,
justify = justify,
totals = totals,
split.tables = split.tables,
headings = headings,
display.labels = display.labels)
attr(output, "user_fmt") <- list(... = ...)
attr(output, "lang") <- st_options("lang")
return(output)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.