Nothing
#' @title Remove redundant spatial predictors
#' @description Removes spatial predictors that are highly correlated with other spatial predictors or with non-spatial predictors. Particularly useful when using multiple distance thresholds that produce correlated spatial predictors.
#' @param data Data frame containing the predictor variables. Default: `NULL`.
#' @param predictor.variable.names Character vector of non-spatial predictor names. Must match column names in `data`. Can also be a `variable_selection` object. Default: `NULL`.
#' @param spatial.predictors.df Data frame of spatial predictors (e.g., from [mem_multithreshold()]). Default: `NULL`.
#' @param cor.threshold Numeric between 0 and 1 (recommended: 0.5 to 0.75). Maximum allowed absolute Pearson correlation. Default: `0.50`.
#' @return Data frame containing only spatial predictors with correlations below `cor.threshold` (both among themselves and with non-spatial predictors).
#' @details
#' Filtering is performed in two steps:
#' \enumerate{
#' \item Remove spatial predictors correlated with each other (using [auto_cor()])
#' \item Remove spatial predictors correlated with non-spatial predictors
#' }
#' This two-step process ensures the retained spatial predictors are independent of both each other and the environmental predictors, improving model interpretability and reducing multicollinearity.
#' @examples
#' data(
#' plants_df,
#' plants_predictors,
#' plants_distance
#' )
#'
#' # Generate spatial predictors using multiple distance thresholds
#' mem.df <- mem_multithreshold(
#' distance.matrix = plants_distance,
#' distance.thresholds = c(0, 1000)
#' )
#'
#' # Filter spatial predictors to remove redundancy
#' # Removes spatial predictors correlated > 0.50 with each other
#' # or with environmental predictors
#' spatial.predictors.filtered <- filter_spatial_predictors(
#' data = plants_df,
#' predictor.variable.names = plants_predictors,
#' spatial.predictors.df = mem.df,
#' cor.threshold = 0.50
#' )
#'
#' # Check dimensions
#' ncol(mem.df) # original number
#' ncol(spatial.predictors.filtered) # after filtering
#'
#' @rdname filter_spatial_predictors
#' @family spatial_analysis
#' @export
filter_spatial_predictors <- function(
data = NULL,
predictor.variable.names = NULL,
spatial.predictors.df = NULL,
cor.threshold = 0.50
) {
#predictor.variable.names comes from auto_vif or auto_cor
if (!is.null(predictor.variable.names)) {
if (inherits(predictor.variable.names, "variable_selection")) {
predictor.variable.names <- predictor.variable.names$selected.variables
}
}
#filtering spatial predictors by pair-wise correlation
spatial.predictors.df <- auto_cor(
x = spatial.predictors.df,
preference.order = colnames(spatial.predictors.df),
cor.threshold = cor.threshold,
verbose = FALSE
)$selected.variables.df
#handle edge case: no spatial predictors remain after filtering
if (ncol(spatial.predictors.df) == 0) {
return(spatial.predictors.df)
}
#filtering spatial predictors by correlation with non-spatial ones
#generating df of non-spatial predictors
non.spatial.predictors.df <- data[, predictor.variable.names, drop = FALSE]
#correlation between spatial and non-spatial predictors
cor.predictors <- cor(
non.spatial.predictors.df,
spatial.predictors.df
)
#max correlation of the spatial predictors
max.cor.spatial.predictors <- apply(cor.predictors, 2, FUN = max)
#selected spatial predictors
selected.spatial.predictors <- names(max.cor.spatial.predictors[
max.cor.spatial.predictors < cor.threshold
])
#subsetting spatial.predictors.df
spatial.predictors.df <- spatial.predictors.df[,
selected.spatial.predictors,
drop = FALSE
]
#returning result
spatial.predictors.df
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.