R/fauna_pam.R

Defines functions fauna_pam

Documented in fauna_pam

#' Get a presence-absence matrix
#' @description
#'  Get a presence-absence matrix of species based on its distribution
#' (brazilian states and/or countries) according to Fauna do Brasil.
#'
#' @param data (data.frame) a data.frame imported with the
#' \code{\link{load_faunabr}} function or generated by either
#' \code{\link{select_fauna}} or \code{\link{subset_fauna}} functions
#' @param by_state (logical) get occurrences by State. Default = TRUE
#' @param by_country (logical) get occurrences by countries. Default = FALSE
#' @param remove_empty_sites (logical) remove empty sites (sites without any
#' species) from final presence-absence matrix. Default = TRUE
#' @param return_richness_summary (logical) return a data.frame with the number
#' of species in each site. Default = TRUE
#' @param return_spatial_richness (logical) return a SpatVector with the number
#' of species in each site. Default = TRUE
#' @param return_plot (logical) plot map with the number of species in each
#' site.
#' Only works if return_spatial_richness = TRUE. Default = TRUE
#'
#' @return If return_richness_summary and/or return_spatial_richness is set to
#' TRUE, return a list with:
#' - PAM: the presence-absence matrix (PAM)
#' - Richness_summary: a data.frame with the number of species in each site
#' - Spatial_richness: a SpatVector with the number of species in each site
#' (by State and/or country)
#'
#' If return_richness_summary and return_spatial_richness is set to FALSE,
#' return a presence-absence matrix
#' @usage fauna_pam(data, by_state = TRUE, by_country= FALSE,
#'                remove_empty_sites = TRUE,
#'                return_richness_summary = TRUE,
#'                return_spatial_richness = TRUE,
#'                return_plot = TRUE)
#' @importFrom terra merge plot intersect unwrap
#' @importFrom stats quantile
#' @importFrom grDevices terrain.colors
#' @export
#' @examples
#' #Test function
#' data("fauna_data") #Load fauna e Funga do Brasil data
#' #Select native species of mammals with occurrence only in Brazil
#' br_mammals <- select_fauna(data = fauna_data,
#'                            include_subspecies = FALSE, phylum = "all",
#'                            class = "Mammalia",
#'                            order = "all", family = "all",
#'                            genus = "all",
#'                            lifeForm = "all", filter_lifeForm = "in",
#'                            habitat = "all", filter_habitat = "in",
#'                            states = "all", filter_states = "in",
#'                            country = "BR", filter_country = "only",
#'                            origin = "all", taxonomicStatus = "valid")
#' #Get presence-absence matrix in states
#' pam_mammals <- fauna_pam(data = br_mammals, by_state = TRUE,
#'                          by_country = FALSE,
#'                          remove_empty_sites = TRUE,
#'                          return_richness_summary = TRUE,
#'                          return_spatial_richness = TRUE,
#'                          return_plot = TRUE)
fauna_pam <- function(data, by_state = TRUE, by_country = FALSE,
                      remove_empty_sites = TRUE,
                      return_richness_summary = TRUE,
                      return_spatial_richness = TRUE,
                      return_plot = TRUE) {
  if (missing(data)) {
    stop("Argument data is not defined")
  }

  if (!inherits(data, "data.frame")) {
    stop(paste0("Argument data must be a data.frame, not ", class(data)))
  }

  if (!is.logical(by_state)) {
    stop(paste0("Argument by_state must be logical, not ", class(by_state)))
  }

  if (!is.logical(by_country)) {
    stop(paste0("Argument by_country must be logical, not ",
                class(by_country)))
  }

  if (!is.logical(remove_empty_sites)) {
    stop(paste0("Argument remove_empty_sites must be logical, not ",
                class(remove_empty_sites)))
  }

  if (!is.logical(return_spatial_richness)) {
    stop(paste0("Argument return_spatial_richness must be logical, not ",
                class(return_spatial_richness)))
  }

  #Check colnames in data
  if(!all(c("species", "states", "countryCode") %in%
          colnames(data))) {
    stop("Important columns are missing in data. Check if data is an object
         created by 'load_faunabr()', 'subset_fauna()' or 'select_fauna()'")
  }

  #Check if there is at least one TRUE in states or countries
  if(!by_state & !by_country){
    stop("At least one of the parameters by_state or by_country must be TRUE")
  }

  # Return_spatial_richnessnly works if by_state or by_country is set to TRUE
  if(return_spatial_richness & !by_state & !by_country){
    stop("return_spatial_richness=TRUE only works if by_state or/and by_country is set to TRUE")
  }

  #Get columns
  columns <- c("species")
  if (by_country) {
    columns <- c(columns, "countryCode")
  }
  if (by_state) {
    columns <- c(columns, "states")
  }
  d <- data[, columns, drop = FALSE]

  #Create list of unique values
  v <- colnames(d)[colnames(d)!= "species"]
  l <- lapply(v, function(i){
    unique(unlist(strsplit(d[,i], ";")))
  })
  names(l) <- v

  # All combinations of state/countries
  sites <- expand.grid(l)
  #Remove NA from states
  if(by_state){
    sites <- subset(sites, sites$states != "NA") }


  if(by_state & by_country){
    sites$states[sites$countryCode != "BR"] <- NA
    sites <- unique(sites)
  }

  # Create an empty presence-absence matrix
  presence_matrix <- matrix(0, nrow = nrow(sites), ncol = nrow(d))


  # Fill matrix with values of presence (1) and absence (0)
  for (i in 1:nrow(d)) {
    species_i <- d[i, "species"]

    #Get index
    site_index <- lapply(v, function(z){
      which(sites[,z] %in% unlist(strsplit(
        d[,z][which(d$species == species_i)], ";")))
    })
    site_index <- unique(unlist(site_index))
    presence_matrix[site_index, i] <- 1
  }

  # Name species in the column names
  colnames(presence_matrix) <- d$species

  #Append sites
  pam <- cbind(sites, presence_matrix)

  #Remove empty sites
  if(remove_empty_sites){
    remove_sites <- which(rowSums(pam[,-match(colnames(sites),
                                              colnames(pam))]) == 0)
    if(length(remove_sites) > 0){
      pam <- pam[-remove_sites,]
    }
  }


  ####Richness summary####
  if(return_richness_summary){
    r_sum <- cbind(pam[, intersect(names(pam), v), drop = FALSE],
                   richness = rowSums(pam[, setdiff(names(pam), v), drop = FALSE]))}

  ####Spatialize richness####
  if(return_spatial_richness) {
    #Load data
    if(by_state & !by_country) {
      m <- terra::unwrap(faunabr::states)
      names(m)[2] <- "states"
    }
    if(by_country & !by_state) {
      m <- terra::unwrap(faunabr::world_fauna)
      names(m)[3] <- "countryCode"
    }

    #If by_country and by_country, merge polygons
    if(by_country & by_state){
      m_world <- terra::simplifyGeom(terra::unwrap(faunabr::world_fauna))
      names(m_world)[3] <- "countryCode"
      m_states <- terra::simplifyGeom(terra::unwrap(faunabr::states))
      names(m_states)[2] <- "states"
      m <- terra::union(m_world, m_states)
      m$states[m$country_code != "BR"] <- NA
    }

    #Get columns
    id_pam <- unique(pam[, v, drop = FALSE])

    #Calculate richness
    r <- rowSums(pam[,d$species])

    #Create and transfer ID
    pam_m <- pam
    pam_m$site_id <- 1:nrow(pam_m)

    #Get new dataframe with richness by site
    pam_m <- cbind(pam_m[, v, drop = FALSE], richness = r)

    #####Create complete combination of states and countries####
    pam_m <- merge(m[[v]], pam_m, by = v, all.x = TRUE, all.y = FALSE)
    pam_m$richness[is.na(pam_m$richness)] <- 0 #Remove NA

    #Merge with spatial data
    m <- terra::merge(m, pam_m, by = v, na.rm = FALSE)

    if(return_plot){
      #Get unique values
      unique_values <- unique(pam_m$richness)

      n_breaks <- ifelse(length(unique_values) <= 10, length(unique_values), 10)
      set_breaks <- round(stats::quantile(1:max(pam_m$richness, na.rm = TRUE),
                                    probs = seq(0, 1, length.out = n_breaks + 1)),0)
      plot_title <- ifelse(by_state & !by_country, "Richness by State", ifelse(
        !by_state & by_country, "Richness by country", ifelse(
          by_state & by_country, "Richness by State and country", NA)
      ))

      if(n_breaks >= 10){
        terra::plot(m, "richness", breaks = set_breaks,
                    col = rev(grDevices::terrain.colors(length(set_breaks))),
                    main = plot_title) }    else{
        terra::plot(m, "richness", col = rev(grDevices::terrain.colors(n_breaks)),
                    main = plot_title)
      }
    }
  } #End of return_spatial_richness

  #Return final data
  if(!return_richness_summary) {
    r_sum <- NA
  }
  if(!return_spatial_richness){
    m <- NA
  }


  #Crate final list
  res <- list(PAM = pam,
              Richness_summary = r_sum,
              Spatial_richness = m)

  res <- res[!is.na(res)]

  if(length(res) > 1){
    return(res)
  } else {
    return(pam)
  }
}#End of function

# #Test function
# data("fauna_data") #Load fauna e Funga do Brasil data
# #Select native species of mammals with occurrence only in Brazil
# br_mammals <- select_fauna(data = fauna_data,
#                          include_subspecies = FALSE, phylum = "all",
#                          class = "Mammalia",
#                          order = "all", family = "all",
#                          genus = "all",
#                          lifeForm = "all", filter_lifeForm = "in",
#                          habitat = "all", filter_habitat = "in",
#                          states = "all", filter_states = "in",
#                          country = "brazil", filter_country = "only",
#                          origin = "all", taxonomicStatus = "valid")
# #Get presence-absence matrix in states
# pam_mammals <- fauna_pam(data = br_mammals, by_state = TRUE, by_country = FALSE,
#                          remove_empty_sites = TRUE,
#                          return_richness_summary = TRUE,
#                          return_spatial_richness = TRUE,
#                          return_plot = TRUE)

Try the faunabr package in your browser

Any scripts or data that you put into this service are public.

faunabr documentation built on Nov. 5, 2025, 7:26 p.m.