Nothing
#' Signed pairwise correlation matrix
#'
#' @description
#' Computes a square matrix of pairwise correlations for a set of numeric and/or categorical predictors.
#'
#' If \code{df} is already a correlation dataframe generated by [cor_df()]), the function transforms it into a correlation matrix. Otherwise, [cor_df()] is used internally to compute pairwise correlations before generating the matrix.
#'
#' Supports parallel computation via \code{future::plan()} and optional progress reporting via \code{progressr::handlers()}.
#'
#'
#' @inheritParams collinear
#' @param df (required; dataframe, tibble, or sf) A dataframe with predictors or the output of [cor_df()]. Default: NULL.
#' @return correlation matrix
#'
#' @examples
#' data(vi_smol)
#'
#' ## OPTIONAL: parallelization setup
#' ## irrelevant when all predictors are numeric
#' ## only worth it for large data with many categoricals
#' # future::plan(
#' # future::multisession,
#' # workers = future::availableCores() - 1
#' # )
#'
#' ## OPTIONAL: progress bar
#' # progressr::handlers(global = TRUE)
#'
#' predictors <- c(
#' "koppen_zone", #character
#' "soil_type", #factor
#' "topo_elevation", #numeric
#' "soil_temperature_mean" #numeric
#' )
#'
#' #from dataframe with predictors
#' x <- cor_matrix(
#' df = vi_smol,
#' predictors = predictors
#' )
#'
#' x
#'
#' #from correlation dataframe
#' x <- cor_df(
#' df = vi,
#' predictors = predictors
#' ) |>
#' cor_matrix()
#'
#' x
#'
#' ## OPTIONAL: disable parallelization
#' #future::plan(future::sequential)
#' @autoglobal
#' @family multicollinearity_assessment
#' @author Blas M. Benito, PhD
#' @export
cor_matrix <- function(
df = NULL,
predictors = NULL,
quiet = FALSE,
...
) {
dots <- list(...)
function_name <- validate_arg_function_name(
default_name = "collinear::cor_matrix()",
function_name = dots$function_name
)
df <- validate_arg_df_not_null(
df = df,
function_name = function_name
)
quiet <- validate_arg_quiet(
quiet = quiet,
function_name = function_name
)
if (
"collinear_cor_matrix" %in%
class(dots$m) &&
all(predictors %in% colnames(dots$m))
) {
if (length(predictors) > length(colnames(dots$m))) {
m <- dots$m
m <- m[predictors, predictors]
class(m) <- c("collinear_cor_matrix", class(m))
return(m)
}
return(dots$m)
}
#if df with predictors, compute correlation dataframe
if (!"collinear_cor_df" %in% class(df)) {
df <- cor_df(
df = df,
predictors = predictors,
quiet = quiet,
function_name = function_name
)
}
#create all possible pairs
df <- rbind(
df[, c("x", "y", "correlation")],
data.frame(
x = df$y,
y = df$x,
correlation = df$correlation
)
)
#rows and col names
variables <- sort(
unique(
c(df$x, df$y)
)
)
#empty square matrix
m <- matrix(
data = NA,
nrow = length(variables),
ncol = length(variables)
)
#named vector to map row/column names to indices
index_map <- stats::setNames(
object = seq_along(variables),
nm = variables
)
#vectorized indexing to fill in the matrix
m[
cbind(
index_map[df$x],
index_map[df$y]
)
] <- df$correlation
#dim names
dimnames(m) <- list(
variables,
variables
)
#replace NA in diag with 1
diag(m) <- 1
class(m) <- c("collinear_cor_matrix", class(m))
m
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.