#' @title Ranks spatial predictors
#' @description Ranks spatial predictors generated by [mem_multithreshold()] or [pca_multithreshold()] by their effect in reducing the Moran's I of the model residuals (`ranking.method = "effect"`), or by their own Moran's I (`ranking.method = "moran"`).
#'
#' In the former case, one model of the type `y ~ predictors + spatial_predictor_X` is fitted per spatial predictor, and the Moran's I of this model's residuals is compared with the one of the model without spatial predictors (`y ~ predictors`), to finally rank the spatial predictor from maximum to minimum difference in Moran's I.
#'
#' In the latter case, the spatial predictors are ordered by their Moran's I alone (this is the faster option).
#'
#' In both cases, spatial predictors that are redundant with others at a Pearson correlation > 0.5 and spatial predictors with no effect (no reduction of Moran's I or Moran's I of the spatial predictor equal or lower than 0) are removed.
#'
#' This function has been designed to be used internally by [rf_spatial()] rather than directly by a user.
#' @param data Data frame with a response variable and a set of predictors. Default: `NULL`
#' @param dependent.variable.name Character string with the name of the response variable. Must be in the column names of `data`. Default: `NULL`
#' @param predictor.variable.names Character vector with the names of the predictive variables. Every element of this vector must be in the column names of `data`. Default: `NULL`
#' @param reference.moran.i Moran's I of the residuals of the model without spatial predictors. Default: `1`
#' @param distance.matrix Squared matrix with the distances among the records in `data`. The number of rows of `distance.matrix` and `data` must be the same. If not provided, the computation of the Moran's I of the residuals is omitted. Default: `NULL`
#' @param distance.thresholds Numeric vector with neighborhood distances. All distances in the distance matrix below each value in `dustance.thresholds` are set to 0 for the computation of Moran's I. If `NULL`, it defaults to seq(0, max(distance.matrix), length.out = 4). Default: `NULL`
#' @param ranger.arguments List with \link[ranger]{ranger} arguments. See [rf] or [rf_repeat] for further details.
#' @param spatial.predictors.df Data frame of spatial predictors.
#' @param ranking.method Character, method used by to rank spatial predictors. The method "effect" ranks spatial predictors according how much each predictor reduces Moran's I of the model residuals, while the method "moran" ranks them by their own Moran's I. Default: `"moran"`.
#' @param verbose Logical, ff `TRUE`, messages and plots generated during the execution of the function are displayed, Default: `TRUE`
#' @param n.cores Integer, number of cores to use for parallel execution. Creates a socket cluster with `parallel::makeCluster()`, runs operations in parallel with `foreach` and `%dopar%`, and stops the cluster with `parallel::clusterStop()` when the job is done. Default: `parallel::detectCores() - 1`
#' @param cluster A cluster definition generated with `parallel::makeCluster()`. If provided, overrides `n.cores`. When `cluster = NULL` (default value), and `model` is provided, the cluster in `model`, if any, is used instead. If this cluster is `NULL`, then the function uses `n.cores` instead. The function does not stop a provided cluster, so it should be stopped with `parallel::stopCluster()` afterwards. The cluster definition is stored in the output list under the name "cluster" so it can be passed to other functions via the `model` argument, or using the `%>%` pipe. Default: `NULL`
#' @return A list with four slots:
#' \itemize{
#' \item `method`: Character, name of the method used to rank the spatial predictors.
#' \item `criteria`: Data frame with two different configurations depending on the ranking method. If `ranking.method = "effect"`, the columns contain the names of the spatial predictors, the r-squared of the model, the Moran's I of the model residuals, the difference between the Moran's I of the model including the given spatial predictor, and the Moran's I of the model fitted without spatial predictors, and the interpretation of the Moran's I value. If `ranking.method = "moran"`, only the name of the spatial predictor and it's Moran's I are in the output data frame.
#' \item `ranking`: Ordered character vector with the names of the spatial predictors selected.
#' \item `spatial.predictors.df`: data frame with the selected spatial predictors in the order of the ranking.
#' }
#' @examples
#' if(interactive()){
#'
#' #loading distance matrix
#' data(distance_matrix)
#'
#' #computing Moran's Eigenvector Maps
#' mem.df <- mem(
#' distance.matrix = distance_matrix[1:50, 1:50],
#' distance.threshold = 0
#' )
#'
#' #ranking by the Moran's I of the spatial predictor
#' rank <- rank_spatial_predictors(
#' distance.matrix = distance_matrix[1:50, 1:50],
#' distance.thresholds = 0,
#' spatial.predictors.df = mem.df,
#' ranking.method = "moran",
#' n.cores = 1
#' )
#'
#' #checking Moran's I of MEMs
#' rank$criteria
#'
#' #checking rank of MEMs
#' rank$ranking
#' }
#' @rdname rank_spatial_predictors
#' @export
rank_spatial_predictors <- function(
data = NULL,
dependent.variable.name = NULL,
predictor.variable.names = NULL,
distance.matrix = NULL,
distance.thresholds = NULL,
ranger.arguments = NULL,
spatial.predictors.df = NULL,
ranking.method = c("moran", "effect"),
reference.moran.i = 1,
verbose = FALSE,
n.cores = parallel::detectCores() - 1,
cluster = NULL
){
#predictor.variable.names comes from auto_vif or auto_cor
if(!is.null(predictor.variable.names)){
if(inherits(predictor.variable.names, "variable_selection")){
predictor.variable.names <- predictor.variable.names$selected.variables
}
}
#testing method argument
ranking.method <- match.arg(
arg = ranking.method,
choices = c("moran", "effect"),
several.ok = FALSE
)
#add write.forest = FALSE to ranger.arguments
if(is.null(ranger.arguments)){
ranger.arguments <- list()
}
ranger.arguments$write.forest <- TRUE
ranger.arguments$importance <- "none"
ranger.arguments$local.importance <- FALSE
ranger.arguments$keep.inbag <- FALSE
ranger.arguments$num.trees <- 500
#reference.moran.i
if(is.null(reference.moran.i)){reference.moran.i <- 1}
#CLUSTER SETUP
#cluster is provided
if(!is.null(cluster)){
#n.cores <- NULL
n.cores <- NULL
#flat to not stop cluster after execution
stop.cluster <- FALSE
} else {
#creates and registers cluster
cluster <- parallel::makeCluster(
n.cores,
type = "PSOCK"
)
#registering cluster
doParallel::registerDoParallel(cl = cluster)
#flag to stop cluster
stop.cluster <- TRUE
}
#parallelized loop
spatial.predictors.i <- NULL
spatial.predictors.order <- foreach::foreach(
spatial.predictors.i = seq(1, ncol(spatial.predictors.df)),
.combine = "rbind",
.verbose = verbose
) %dopar% {
#3.2.3.1 preparing data
#spatial predictor name
spatial.predictors.name.i <- colnames(spatial.predictors.df)[spatial.predictors.i]
#computing reduction in Moran's I
if(ranking.method == "effect"){
#training data
data.i <- data.frame(
data,
spatial.predictors.df[, spatial.predictors.i]
)
colnames(data.i)[ncol(data.i)] <- spatial.predictors.name.i
#new predictor.variable.names
predictor.variable.names.i <- c(predictor.variable.names, spatial.predictors.name.i)
#fitting model I
m.i <- spatialRF::rf(
data = data.i,
dependent.variable.name = dependent.variable.name,
predictor.variable.names = predictor.variable.names.i,
seed = spatial.predictors.i,
distance.matrix = distance.matrix,
distance.thresholds = distance.thresholds,
scaled.importance = FALSE,
ranger.arguments = ranger.arguments,
verbose = FALSE
)
#out.df
out.i <- data.frame(
spatial.predictors.name = spatial.predictors.name.i,
model.r.squared = m.i$performance$r.squared.oob,
moran.i = m.i$residuals$autocorrelation$max.moran,
p.value = m.i$residuals$autocorrelation$per.distance$p.value[1],
ranking.criteria = reference.moran.i - m.i$residuals$autocorrelation$max.moran
)
}
#computing Moran's I of the spatial predictors
if(ranking.method == "moran"){
#moran's I of spatial predictor
m.i <- spatialRF::moran(
x = spatial.predictors.df[, spatial.predictors.i],
distance.matrix = distance.matrix,
distance.threshold = distance.thresholds[1],
verbose = FALSE
)
#out.df
out.i <- data.frame(
spatial.predictors.name = spatial.predictors.name.i,
ranking.criteria = m.i$test$moran.i,
interpretation = m.i$test$interpretation
)
}
#returning output
return(out.i)
} #end of parallelized loop
#variables to avoid check complaints
ranking.criteria <- NULL
interpretation <- NULL
#getting only spatial predictors with positive spatial correlation
if(ranking.method == "moran"){
spatial.predictors.order <- dplyr::filter(
spatial.predictors.order,
interpretation == "Positive spatial correlation"
)
}
#arranging by Moran's I
spatial.predictors.order <- dplyr::arrange(
spatial.predictors.order,
dplyr::desc(ranking.criteria)
)
#return output
out.list <- list()
out.list$method <- ranking.method
out.list$criteria <- spatial.predictors.order
out.list$ranking <- spatial.predictors.order$spatial.predictors.name
out.list$spatial.predictors.df <- spatial.predictors.df[, spatial.predictors.order$spatial.predictors.name]
#stopping cluster
if(stop.cluster == TRUE){
parallel::stopCluster(cl = cluster)
}
#returning output list
out.list
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.