R/occfilt_select.R

Defines functions occfilt_select

Documented in occfilt_select

#' Select filtered occurrences
#'
#' @description
#' Select filtered occurrences based on number of records and spatial autocorrelation (see details)
#'
#' @param occ_list list. A list with filtered specie occurrences testing several values (see \code{\link{occfilt_env}} and \code{\link{occfilt_geo}})
#' @param x character. Column name with longitude data
#' @param y character. Column name with latitude data
#' @param env_layer SpatRaster. Raster variables that will be used to fit the model. Factor variables will be removed.
#' @param filter_prop logical. If TRUE, the function will return a list with the filtered occurrences and a tibble
#' with the spatial autocorrelation and number of occurrence values
#'
#' @details The function implement the approach used in Velazco et al. (2020)
#' which consists in calculating for each filtered dataset:
#' \itemize{
#'   \item 1- the number of occurrence.
#'   \item 2- the spatial autocorrelation based on Morans'I for each variable
#'   \item 3- the mean spatial autocorrelation among variables
#'   }
#'
#' Then function will select those dataset with average spatial autocorrelation lower
#' than the mean of all dataset, and from this subset will select the one with
#' the highest number occurrences.
#'
#' If use occfilt_select cite Velazco et al. (2020) as reference.
#'
#' @return If filter_prop = FALSE, a tibble with selected filtered occurrences.
#' If filter_prop = TRUE, a list with following objects:
#' \itemize{
#'   \item A tibble with selected filtered occurrences
#'   \item A tibble with filter properties with columns:
#'   \itemize{
#'   \item filt_value: values used for filtering, the value with an asterisk will denote the one selected
#'   \item n_records: number of occurrence
#'   \item mean_autocorr: mean spatial autocorrelation.
#'   \item the remaining columns have the spatial autocorrelation values for each variable.
#'   }
#'   }
#'
#' @references
#' \itemize{
#' \item Velazco, S. J. E., Svenning, J-C., Ribeiro, B. R., & Laureto, L. M. O. (2020). On
#' opportunities and threats to conserve the phylogenetic diversity of Neotropical palms.
#' Diversity and Distributions, 27, 512–523. https://doi.org/10.1111/ddi.13215
#' }
#'
#' @export
#'
#' @seealso \code{\link{occfilt_env}}, \code{\link{occfilt_geo}}
#'
#' @examples
#' \dontrun{
#' require(terra)
#' require(dplyr)
#'
#' # Environmental variables
#' somevar <- system.file("external/somevar.tif", package = "flexsdm")
#' somevar <- terra::rast(somevar)
#'
#' plot(somevar)
#'
#' # Species occurrences
#' data("spp")
#' spp
#' spp1 <- spp %>% dplyr::filter(species == "sp1", pr_ab == 1)
#'
#' ## %######################################################%##
#' ####                  Cellsize method                   ####
#' ## %######################################################%##
#' # Using cellsize method
#' filtered_occ <- occfilt_geo(
#'   data = spp1,
#'   x = "x",
#'   y = "y",
#'   env_layer = somevar,
#'   method = c("cellsize", factor = c(1, 4, 8, 12, 16, 20)),
#'   prj = crs(somevar)
#' )
#'
#' filtered_occ
#'
#' # Select filtered occurrences based on
#' # number of records and spatial autocorrelation
#' occ_selected <- occfilt_select(
#'   occ_list = filtered_occ,
#'   x = "x",
#'   y = "y",
#'   env_layer = somevar,
#'   filter_prop = FALSE
#' )
#' occ_selected
#'
#' occ_selected <- occfilt_select(
#'   occ_list = filtered_occ,
#'   x = "x",
#'   y = "y",
#'   env_layer = somevar,
#'   filter_prop = TRUE
#' )
#' occ_selected$occ
#'
#' occ_selected$filter_prop
#' }
occfilt_select <- function(occ_list, x, y, env_layer, filter_prop = FALSE) {
  mean_autocorr <- n_records <- filt_value <- NULL

  if (class(occ_list)[1] != "list") {
    stop("occ_list must be a list")
  }

  # Remove factor variables
  filt <- terra::is.factor(env_layer)
  names(filt) <- names(env_layer)
  if (sum(filt) > 0) {
    env_layer <- env_layer[[!filt]]
    message("Next categorical variables were removed: ", names(filt)[filt])
  }
  rm(filt)

  # Calculates spatial autocorrelation
  filtpropr <- list()
  for (ii in 1:length(occ_list)) {
    coord <- occ_list[[ii]] %>% dplyr::select(x, y)
    data <- data.frame(terra::extract(env_layer, coord, ID = FALSE))
    distm <- dist(coord)
    distm <- as.matrix(distm)
    distm <- 1 / distm
    diag(distm) <- 0
    try(filtpropr[[ii]] <-
      apply(data, 2, function(x) {
        abs(morani(x = x, weight = distm, scaled = TRUE))
      }, simplify = FALSE) %>% as.data.frame())
    try(filtpropr[[ii]]$mean_autocorr <- apply(data.frame(filtpropr[[ii]]), 1, mean))
    filtpropr[[ii]]$n_records <- nrow(coord)
  }
  names(filtpropr) <- names(occ_list)
  filtpropr <- bind_rows(filtpropr, .id = "filt_value")

  # Select
  selected_value <- filtpropr %>%
    filter(mean_autocorr <= mean(mean_autocorr)) %>%
    filter(n_records == max(n_records)) %>%
    pull("filt_value")
  selected_value <- selected_value[1]

  filtpropr$filt_value[filtpropr$filt_value == selected_value] <- paste("*", selected_value)

  message("Dataset with filtered value ", selected_value, " was selected")

  if (filter_prop) {
    return(list(
      occ = occ_list[[selected_value]],
      filter_prop = dplyr::relocate(filtpropr, filt_value, mean_autocorr, n_records)
    ))
  } else {
    return(occ_list[[selected_value]])
  }
}
sjevelazco/flexsdm documentation built on Feb. 28, 2025, 9:07 a.m.