Nothing
#' Create correlation table (with stars for significance)
#' for scientific publication
#'
#' The `correltable` function can be used to create correlation
#' table (with stars for significance) for scientific publication
#' This is intended to summarize correlations between (`vars`)
#' from an input dataset (`data`).
#' Correlations are based on `stats::cor`, `use` and `method`
#' follow from that function.
#' Stars indicate significance: `*p<.05, **p<.01, ***p<.001`
#' For formatting, variables can be renamed, numbers can be rounded,
#' upper or lower triangle only can be selected (or whole matrix),
#' and empty columns/rows can be dropped if using triangles.
#' For more compact columns, variable names can be numbered in the
#' rows and column names will be corresponding numbers.
#' If only cross-correlation between two sets of variables is desired
#' (no correlations within a set of variables),
#' `vars2` and `var_names` can be used.
#' This function will drop any non-numeric variables by default.
#' Requires `tidyverse` and `stats` libraries.
#' @param data The input dataset.
#' @param vars A list of the names of variables to correlate,
#' e.g. c("Age","height","WASI"),
#' if NULL, all variables in `data` will be used.
#' @param var_names An optional list to rename the `vars` colnames
#' in the output table, e.g. c("Age (years)","Height (inches)","IQ").
#' Must match `vars` in length. If not supplied, `vars` will be printed as is.
#' @param vars2 If cross-correlation between two sets of variables
#' is desired, add a second list of variables to correlate with
#' `vars`; Overrides `tri`, `cutempty`, and `colnum`.
#' @param var_names2 An optional list to rename the `vars2` colnames
#' in the output table If not supplied, `vars2` will be printed as is.
#' @param method Type of correlation to calculate c("pearson", "spearman"),
#' based on `stats::cor`, default = "pearson".
#' @param use Use pairwise.complete.obs or restrict to complete cases
#' c("pairwise", "complete"), based on `stats::cor`, default = "pairwise".
#' @param round_n The number of decimal places to
#' round all output to (default=2).
#' @param tri Select output formatting c("upper", "lower","all");
#' KEEP the upper triangle, lower triangle, or all values, default ="upper"
#' @param cutempty If keeping only upper/lower triangle with `tri`,
#' cut empty row/column, default=FALSE.
#' @param colnum For more concise column names, number row names and
#' just use corresponding numbers as column names,
#' default=FALSE, if TRUE overrides cutempty.
#' @param html Format as html in viewer or not (default=F, print in console),
#' needs library(htmlTable) installed.
#' @param strata Split table by a 2-level factor variable
#' with level1 in the upper and level2 in the lower triangle
#' must have 2+ cases per level, cannot be combined with vars2
#' @return Output Table 1
#' @import dplyr
#' @importFrom forcats fct_relevel
#' @importFrom purrr negate set_names
#' @importFrom stats aov chisq.test complete.cases na.omit pf pt t.test
#' @import stringr
#' @importFrom tidyselect all_of
#' @export
#' @examples
#' \dontrun{
#' correltable(data = psydat)
#' correltable(
#' data = psydat, vars = c("Age", "Height", "iq"),
#' tri = "lower", html = TRUE
#' )
#' correltable(
#' data = psydat, vars = c("Age", "Height", "iq"),
#' tri = "lower", html = TRUE, strata = "Sex"
#' )
#' correltable(
#' data = psydat, vars = c("Age", "Height", "iq"),
#' var_names = c("Age (months)", "Height (inches)", "IQ"),
#' tri = "upper", colnum = TRUE, html = TRUE
#' )
#' correltable(
#' data = psydat, vars = c("Age", "Height", "iq"),
#' var_names = c("Age (months)", "Height (inches)", "IQ"),
#' vars2 = c("depressT", "anxT"),
#' var_names2 = c("Depression T", "Anxiety T"), html = TRUE
#' )
#' }
correltable <- function(data,
vars = NULL,
var_names = vars,
vars2 = NULL,
var_names2 = vars2,
method = "pearson",
use = "pairwise",
round_n = 2,
tri = "upper",
cutempty = FALSE,
colnum = FALSE,
html = FALSE,
strata = NULL) {
# Validate all inputs
validate_inputs(data, vars, vars2, strata)
# Handle defaults and duplicates
if (is.null(vars)) {
vars <- names(data)
var_names <- vars
}
# Remove duplicates
if (!is.null(vars)) {
keep <- !duplicated(vars)
vars <- vars[keep]
var_names <- var_names[keep]
}
if (!is.null(vars2)) {
keep <- !duplicated(vars2)
vars2 <- vars2[keep]
var_names2 <- var_names2[keep]
}
# Check name length matches
if (length(var_names) != length(vars)) {
stop("length of var_names must match length of vars", call. = FALSE)
}
if (!is.null(vars2) && length(var_names2) != length(vars2)) {
stop("length of var_names2 must match length of vars2", call. = FALSE)
}
# Combine vars if cross-tabulation requested
all_vars <- if (!is.null(vars2)) c(vars, vars2) else vars
all_var_names <- if (!is.null(vars2)) c(var_names, var_names2) else var_names
# Handle complete cases if requested
data_subset <- data[, all_vars, drop = FALSE]
n_missing <- 0
if (use == "complete") {
n_missing <- sum(!complete.cases(data_subset))
data_subset <- data_subset[complete.cases(data_subset), , drop = FALSE]
}
if (!is.null(strata)) {
tri="all"
}
# Build matrix (stratified or unstratified)
if (!is.null(strata)) {
strata_levels <- sort(unique(na.omit(data[[strata]])))
mat_upper <- assemble_matrix(
data[data[[strata]] == strata_levels[1], ],
all_vars, method, use, round_n
)
mat_lower <- assemble_matrix(
data[data[[strata]] == strata_levels[2], ],
all_vars, method, use, round_n
)
# Combine: upper from level 1, lower from level 2
mat_upper[lower.tri(mat_upper)] <- mat_lower[lower.tri(mat_lower)]
stat_matrix <- mat_upper
} else {
stat_matrix <- assemble_matrix(data_subset, all_vars, method, use, round_n)
}
# Apply names
rownames(stat_matrix) <- all_var_names
colnames(stat_matrix) <- all_var_names
# Handle vars2 cross-tabulation
if (!is.null(vars2)) {
stat_matrix <- stat_matrix[var_names, var_names2, drop = FALSE]
tri <- "all"
cutempty <- FALSE
}
# Apply triangle selection
stat_matrix <- apply_triangle(stat_matrix, tri, cutempty)
# Add column numbers if requested
if (colnum) {
rownames(stat_matrix) <- paste0(seq_along(rownames(stat_matrix)), ". ",
rownames(stat_matrix))
colnames(stat_matrix) <- seq_along(colnames(stat_matrix))
}
# Build caption
caption <- build_caption(method, use, nrow(data_subset), n_missing,
strata, data, all_vars)
# Return HTML or list
if (html) {
if (!requireNamespace("htmlTable", quietly = TRUE)) {
stop("Package 'htmlTable' required for HTML output. Install with: install.packages('htmlTable')",
call. = FALSE)
}
return(htmlTable::htmlTable(stat_matrix,
useViewer = TRUE,
caption = caption,
pos.caption = "bottom"))
} else {
return(list(table = noquote(stat_matrix), caption = caption))
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.