#' Select filtered occurrences
#'
#' @description
#' Select filtered occurrences based on number of records and spatial autocorrelation (see details)
#'
#' @param occ_list list. A list with filtered specie occurrences testing several values (see \code{\link{occfilt_env}} and \code{\link{occfilt_geo}})
#' @param x character. Column name with longitude data
#' @param y character. Column name with latitude data
#' @param env_layer SpatRaster. Raster variables that will be used to fit the model. Factor variables will be removed.
#' @param filter_prop logical. If TRUE, the function will return a list with the filtered occurrences and a tibble
#' with the spatial autocorrelation and number of occurrence values
#'
#' @details The function implement the approach used in Velazco et al. (2020)
#' which consists in calculating for each filtered dataset:
#' \itemize{
#' \item 1- the number of occurrence.
#' \item 2- the spatial autocorrelation based on Morans'I for each variable
#' \item 3- the mean spatial autocorrelation among variables
#' }
#'
#' Then function will select those dataset with average spatial autocorrelation lower
#' than the mean of all dataset, and from this subset will select the one with
#' the highest number occurrences.
#'
#' If use occfilt_select cite Velazco et al. (2020) as reference.
#'
#' @return If filter_prop = FALSE, a tibble with selected filtered occurrences.
#' If filter_prop = TRUE, a list with following objects:
#' \itemize{
#' \item A tibble with selected filtered occurrences
#' \item A tibble with filter properties with columns:
#' \itemize{
#' \item filt_value: values used for filtering, the value with an asterisk will denote the one selected
#' \item n_records: number of occurrence
#' \item mean_autocorr: mean spatial autocorrelation.
#' \item the remaining columns have the spatial autocorrelation values for each variable.
#' }
#' }
#'
#' @references
#' \itemize{
#' \item Velazco, S. J. E., Svenning, J-C., Ribeiro, B. R., & Laureto, L. M. O. (2020). On
#' opportunities and threats to conserve the phylogenetic diversity of Neotropical palms.
#' Diversity and Distributions, 27, 512–523. https://doi.org/10.1111/ddi.13215
#' }
#'
#' @export
#'
#' @seealso \code{\link{occfilt_env}}, \code{\link{occfilt_geo}}
#'
#' @examples
#' \dontrun{
#' require(terra)
#' require(dplyr)
#'
#' # Environmental variables
#' somevar <- system.file("external/somevar.tif", package = "flexsdm")
#' somevar <- terra::rast(somevar)
#'
#' plot(somevar)
#'
#' # Species occurrences
#' data("spp")
#' spp
#' spp1 <- spp %>% dplyr::filter(species == "sp1", pr_ab == 1)
#'
#' ## %######################################################%##
#' #### Cellsize method ####
#' ## %######################################################%##
#' # Using cellsize method
#' filtered_occ <- occfilt_geo(
#' data = spp1,
#' x = "x",
#' y = "y",
#' env_layer = somevar,
#' method = c("cellsize", factor = c(1, 4, 8, 12, 16, 20)),
#' prj = crs(somevar)
#' )
#'
#' filtered_occ
#'
#' # Select filtered occurrences based on
#' # number of records and spatial autocorrelation
#' occ_selected <- occfilt_select(
#' occ_list = filtered_occ,
#' x = "x",
#' y = "y",
#' env_layer = somevar,
#' filter_prop = FALSE
#' )
#' occ_selected
#'
#' occ_selected <- occfilt_select(
#' occ_list = filtered_occ,
#' x = "x",
#' y = "y",
#' env_layer = somevar,
#' filter_prop = TRUE
#' )
#' occ_selected$occ
#'
#' occ_selected$filter_prop
#' }
occfilt_select <- function(occ_list, x, y, env_layer, filter_prop = FALSE) {
mean_autocorr <- n_records <- filt_value <- NULL
if (class(occ_list)[1] != "list") {
stop("occ_list must be a list")
}
# Remove factor variables
filt <- terra::is.factor(env_layer)
names(filt) <- names(env_layer)
if (sum(filt) > 0) {
env_layer <- env_layer[[!filt]]
message("Next categorical variables were removed: ", names(filt)[filt])
}
rm(filt)
# Calculates spatial autocorrelation
filtpropr <- list()
for (ii in 1:length(occ_list)) {
coord <- occ_list[[ii]] %>% dplyr::select(x, y)
data <- data.frame(terra::extract(env_layer, coord, ID = FALSE))
distm <- dist(coord)
distm <- as.matrix(distm)
distm <- 1 / distm
diag(distm) <- 0
try(filtpropr[[ii]] <-
apply(data, 2, function(x) {
abs(morani(x = x, weight = distm, scaled = TRUE))
}, simplify = FALSE) %>% as.data.frame())
try(filtpropr[[ii]]$mean_autocorr <- apply(data.frame(filtpropr[[ii]]), 1, mean))
filtpropr[[ii]]$n_records <- nrow(coord)
}
names(filtpropr) <- names(occ_list)
filtpropr <- bind_rows(filtpropr, .id = "filt_value")
# Select
selected_value <- filtpropr %>%
filter(mean_autocorr <= mean(mean_autocorr)) %>%
filter(n_records == max(n_records)) %>%
pull("filt_value")
selected_value <- selected_value[1]
filtpropr$filt_value[filtpropr$filt_value == selected_value] <- paste("*", selected_value)
message("Dataset with filtered value ", selected_value, " was selected")
if (filter_prop) {
return(list(
occ = occ_list[[selected_value]],
filter_prop = dplyr::relocate(filtpropr, filt_value, mean_autocorr, n_records)
))
} else {
return(occ_list[[selected_value]])
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.