R/correltable.R

Defines functions correltable

Documented in correltable

#' Create correlation table (with stars for significance)
#'  for scientific publication
#'
#' The `correltable` function can be used to create correlation
#' table (with stars for significance) for scientific publication
#' This is intended to summarize correlations between (`vars`)
#'  from an input dataset (`data`).
#' Correlations are based on `stats::cor`, `use` and `method`
#'  follow from that function.
#' Stars indicate significance: `*p<.05, **p<.01, ***p<.001`
#' For formatting, variables can be renamed, numbers can be rounded,
#'  upper or lower triangle only can be selected (or whole matrix),
#'   and empty columns/rows can be dropped if using triangles.
#' For more compact columns, variable names can be numbered in the
#'  rows and column names will be corresponding numbers.
#' If only cross-correlation between two sets of variables is desired
#'  (no correlations within a set of variables),
#'   `vars2` and `var_names` can be used.
#' This function will drop any non-numeric variables by default.
#' Requires `tidyverse` and `stats` libraries.
#' @param data The input dataset.
#' @param vars A list of the names of variables to correlate,
#'  e.g. c("Age","height","WASI"),
#'   if NULL, all variables in `data` will be used.
#' @param var_names An optional list to rename the `vars` colnames
#'  in the output table, e.g. c("Age (years)","Height (inches)","IQ").
#'   Must match `vars` in length. If not supplied, `vars` will be printed as is.
#' @param vars2 If cross-correlation between two sets of variables
#'  is desired, add a second list of  variables to correlate with
#'   `vars`; Overrides `tri`, `cutempty`, and `colnum`.
#' @param var_names2 An optional list to rename the `vars2` colnames
#'  in the output table If not supplied, `vars2` will be printed as is.
#' @param method Type of correlation to calculate c("pearson", "spearman"),
#'  based on `stats::cor`, default = "pearson".
#' @param use  Use pairwise.complete.obs or restrict to complete cases
#'  c("pairwise", "complete"), based on `stats::cor`, default = "pairwise".
#' @param round_n The number of decimal places to
#'  round all output to (default=2).
#' @param tri Select output formatting c("upper", "lower","all");
#'  KEEP the upper triangle, lower triangle, or all values, default ="upper"
#' @param cutempty If keeping only upper/lower triangle with `tri`,
#'  cut empty row/column, default=FALSE.
#' @param colnum For more concise column names, number row names and
#'  just use corresponding numbers as column names,
#'   default=FALSE, if TRUE overrides cutempty.
#' @param html Format as html in viewer or not (default=F, print in console),
#'  needs library(htmlTable) installed.
#' @param strata Split table by a 2-level factor variable
#'  with level1 in the upper and level2 in the lower triangle
#'  must have 2+ cases per level, cannot be combined with vars2
#' @return Output Table 1
#' @import 	dplyr
#' @importFrom 	forcats fct_relevel
#' @importFrom 	purrr negate set_names
#' @importFrom 	stats aov chisq.test complete.cases na.omit pf pt t.test
#' @import 	stringr
#' @importFrom 	tidyselect all_of
#' @export
#' @examples
#' \dontrun{
#' correltable(data = psydat)
#' correltable(
#'   data = psydat, vars = c("Age", "Height", "iq"),
#'   tri = "lower", html = TRUE
#' )
#' correltable(
#'   data = psydat, vars = c("Age", "Height", "iq"),
#'   tri = "lower", html = TRUE, strata = "Sex"
#' )
#' correltable(
#'   data = psydat, vars = c("Age", "Height", "iq"),
#'   var_names = c("Age (months)", "Height (inches)", "IQ"),
#'   tri = "upper", colnum = TRUE, html = TRUE
#' )
#' correltable(
#'   data = psydat, vars = c("Age", "Height", "iq"),
#'   var_names = c("Age (months)", "Height (inches)", "IQ"),
#'   vars2 = c("depressT", "anxT"),
#'   var_names2 = c("Depression T", "Anxiety T"), html = TRUE
#' )
#' }

correltable <- function(data,
                        vars = NULL,
                        var_names = vars,
                        vars2 = NULL,
                        var_names2 = vars2,
                        method = "pearson",
                        use = "pairwise",
                        round_n = 2,
                        tri = "upper",
                        cutempty = FALSE,
                        colnum = FALSE,
                        html = FALSE,
                        strata = NULL) {

  # Validate all inputs
  validate_inputs(data, vars, vars2, strata)

  # Handle defaults and duplicates
  if (is.null(vars)) {
    vars <- names(data)
    var_names <- vars
  }

  # Remove duplicates
  if (!is.null(vars)) {
    keep <- !duplicated(vars)
    vars <- vars[keep]
    var_names <- var_names[keep]
  }

  if (!is.null(vars2)) {
    keep <- !duplicated(vars2)
    vars2 <- vars2[keep]
    var_names2 <- var_names2[keep]
  }

  # Check name length matches
  if (length(var_names) != length(vars)) {
    stop("length of var_names must match length of vars", call. = FALSE)
  }

  if (!is.null(vars2) && length(var_names2) != length(vars2)) {
    stop("length of var_names2 must match length of vars2", call. = FALSE)
  }

  # Combine vars if cross-tabulation requested
  all_vars <- if (!is.null(vars2)) c(vars, vars2) else vars
  all_var_names <- if (!is.null(vars2)) c(var_names, var_names2) else var_names

  # Handle complete cases if requested
  data_subset <- data[, all_vars, drop = FALSE]
  n_missing <- 0

  if (use == "complete") {
    n_missing <- sum(!complete.cases(data_subset))
    data_subset <- data_subset[complete.cases(data_subset), , drop = FALSE]
  }

  if (!is.null(strata)) {
    tri="all"
  }

  # Build matrix (stratified or unstratified)
  if (!is.null(strata)) {
    strata_levels <- sort(unique(na.omit(data[[strata]])))

    mat_upper <- assemble_matrix(
      data[data[[strata]] == strata_levels[1], ],
      all_vars, method, use, round_n
    )
    mat_lower <- assemble_matrix(
      data[data[[strata]] == strata_levels[2], ],
      all_vars, method, use, round_n
    )

    # Combine: upper from level 1, lower from level 2
    mat_upper[lower.tri(mat_upper)] <- mat_lower[lower.tri(mat_lower)]
    stat_matrix <- mat_upper

  } else {
    stat_matrix <- assemble_matrix(data_subset, all_vars, method, use, round_n)
  }

  # Apply names
  rownames(stat_matrix) <- all_var_names
  colnames(stat_matrix) <- all_var_names

  # Handle vars2 cross-tabulation
  if (!is.null(vars2)) {
    stat_matrix <- stat_matrix[var_names, var_names2, drop = FALSE]
    tri <- "all"
    cutempty <- FALSE
  }

  # Apply triangle selection
  stat_matrix <- apply_triangle(stat_matrix, tri, cutempty)

  # Add column numbers if requested
  if (colnum) {
    rownames(stat_matrix) <- paste0(seq_along(rownames(stat_matrix)), ". ",
                                    rownames(stat_matrix))
    colnames(stat_matrix) <- seq_along(colnames(stat_matrix))
  }

  # Build caption
  caption <- build_caption(method, use, nrow(data_subset), n_missing,
                           strata, data, all_vars)

  # Return HTML or list
  if (html) {
    if (!requireNamespace("htmlTable", quietly = TRUE)) {
      stop("Package 'htmlTable' required for HTML output. Install with: install.packages('htmlTable')",
           call. = FALSE)
    }

    return(htmlTable::htmlTable(stat_matrix,
                                useViewer = TRUE,
                                caption = caption,
                                pos.caption = "bottom"))
  } else {
    return(list(table = noquote(stat_matrix), caption = caption))
  }
}

Try the scipub package in your browser

Any scripts or data that you put into this service are public.

scipub documentation built on Jan. 10, 2026, 5:07 p.m.