R/cor_matrix.R

Defines functions cor_matrix

Documented in cor_matrix

#' Signed pairwise correlation matrix
#'
#' @description
#' Computes a square matrix of pairwise correlations for a set of numeric and/or categorical predictors.
#'
#' If \code{df} is already a correlation dataframe generated by [cor_df()]), the function transforms it into a correlation matrix. Otherwise, [cor_df()] is used internally to compute pairwise correlations before generating the matrix.
#'
#' Supports parallel computation via \code{future::plan()} and optional progress reporting via \code{progressr::handlers()}.
#'
#'
#' @inheritParams collinear
#' @param df (required; dataframe, tibble, or sf) A dataframe with predictors or the output of [cor_df()]. Default: NULL.
#' @return correlation matrix
#'
#' @examples
#' data(vi_smol)
#'
#' ## OPTIONAL: parallelization setup
#' ## irrelevant when all predictors are numeric
#' ## only worth it for large data with many categoricals
#' # future::plan(
#' #   future::multisession,
#' #   workers = future::availableCores() - 1
#' # )
#'
#' ## OPTIONAL: progress bar
#' # progressr::handlers(global = TRUE)
#'
#' predictors <- c(
#'   "koppen_zone", #character
#'   "soil_type", #factor
#'   "topo_elevation", #numeric
#'   "soil_temperature_mean" #numeric
#' )
#'
#' #from dataframe with predictors
#' x <- cor_matrix(
#'   df = vi_smol,
#'   predictors = predictors
#' )
#'
#' x
#'
#' #from correlation dataframe
#' x <- cor_df(
#'   df = vi,
#'   predictors = predictors
#' ) |>
#'   cor_matrix()
#'
#' x
#'
#' ## OPTIONAL: disable parallelization
#' #future::plan(future::sequential)
#' @autoglobal
#' @family multicollinearity_assessment
#' @author Blas M. Benito, PhD
#' @export
cor_matrix <- function(
  df = NULL,
  predictors = NULL,
  quiet = FALSE,
  ...
) {
  dots <- list(...)

  function_name <- validate_arg_function_name(
    default_name = "collinear::cor_matrix()",
    function_name = dots$function_name
  )

  df <- validate_arg_df_not_null(
    df = df,
    function_name = function_name
  )

  quiet <- validate_arg_quiet(
    quiet = quiet,
    function_name = function_name
  )

  if (
    "collinear_cor_matrix" %in%
      class(dots$m) &&
      all(predictors %in% colnames(dots$m))
  ) {
    if (length(predictors) > length(colnames(dots$m))) {
      m <- dots$m
      m <- m[predictors, predictors]
      class(m) <- c("collinear_cor_matrix", class(m))
      return(m)
    }

    return(dots$m)
  }

  #if df with predictors, compute correlation dataframe
  if (!"collinear_cor_df" %in% class(df)) {
    df <- cor_df(
      df = df,
      predictors = predictors,
      quiet = quiet,
      function_name = function_name
    )
  }

  #create all possible pairs
  df <- rbind(
    df[, c("x", "y", "correlation")],
    data.frame(
      x = df$y,
      y = df$x,
      correlation = df$correlation
    )
  )

  #rows and col names
  variables <- sort(
    unique(
      c(df$x, df$y)
    )
  )

  #empty square matrix
  m <- matrix(
    data = NA,
    nrow = length(variables),
    ncol = length(variables)
  )

  #named vector to map row/column names to indices
  index_map <- stats::setNames(
    object = seq_along(variables),
    nm = variables
  )

  #vectorized indexing to fill in the matrix
  m[
    cbind(
      index_map[df$x],
      index_map[df$y]
    )
  ] <- df$correlation

  #dim names
  dimnames(m) <- list(
    variables,
    variables
  )

  #replace NA in diag with 1
  diag(m) <- 1

  class(m) <- c("collinear_cor_matrix", class(m))

  m
}

Try the collinear package in your browser

Any scripts or data that you put into this service are public.

collinear documentation built on Dec. 8, 2025, 5:06 p.m.