R/ignore/Ostats_regional.R

Defines functions Ostats_regional

#' Calculate O-statistics evaluated against regional species pool
#'
#' This function evaluates the O-statistics (community-level pairwise niche
#' overlap statistics)against a regional null model. In contrast to
#' \code{Ostats}, it requires trait and species information about the regional
#' species pool.
#'
#' @param traits a vector of trait measurement with nrow = n individuals.
#' @param plots a factor with length equal to nrow(traits) that indicates the
#'   community each individual belongs to.
#' @param sp a factor with length equal to nrow(traits) that indicates the taxon
#'   of each individual.
#' @param reg_pool_traits trait measurements in the species regional pool to
#'   compare the dataset with.
#' @param reg_pool_sp species identification for each measurement in the
#'   regional species pool.
#' @param run_null_model whether to run a null model (if \code{TRUE}) and evaluate the
#'  O-statistics against it, or simply return the raw O-statistics (if \code{FALSE}).
#'  Defaults to \code{TRUE}.
#' @param nperm the number of null model permutations to generate. Defaults to 99.
#' @param nullqs numeric vector of probabilities with values in [0,1] to set
#'   effect size quantiles. Defaults to \code{c(0.025, 0.975)}.
#' @param random_seed User may supply a random seed to enable reproducibility
#'   of null model output. A warning is issued, and a random seed is generated
#'   based on the local time, if the user does not supply a seed.
#' @param density_args additional arguments to pass to \code{\link[stats]{density}}, such
#'   as \code{bw}, \code{n}, or \code{adjust}. If none are provided, default
#'   values are used.
#'
#' @details This function evaluates the overlap statistics against a regional
#'   species pool. It takes all individual trait measurements in the community to
#'   compare with the regional pool (ignoring species identity) to calculate
#'   pairwise overlap for each community, which indicates the regional pool
#'   space a community takes up.
#'
#'   Total pool null model is generated by sampling n trait measurements (n =
#'   number of trait measurements for the community) from the regional pool and
#'   calculate the pairwise_overlap between the random sample and the regional
#'   pool. By-species null model is calculated by sampling trait measurements of
#'   the same species from the regional pool for each species present in the
#'   community to calculate pairwise overlap between the sample and the whole
#'   regional pool.
#'
#'   In this function, the area under all density functions are normalized to 1.
#'
#'   This function does not support circular data types, and multiple traits are
#'   evaluated separately rather than being combined into a hypervolume.
#'
#' @return The function returns a list containing 4 objects:
#'   \item{overlaps_reg}{pairwise overlap output between the trait data and the
#'   regional species pool.}
#'   \item{overlaps_reg_allpool_ses}{effect size
#'   statistics between observed O-stats and the total-pool null model}
#'   \item{overlaps_reg_bysp_ses}{effect size statistics between observed
#'   O-stats and the by-species null model}
#'
#' @author Quentin Read, John Grady, Arya Y. Yue, Isadora Fluck E., Ben Baiser,
#'   Angela Strecker, Phoebe Zarnetske, and Sydne Record
#'
#' @seealso \code{\link{Ostats}} to evaluate against a local null model.
#' @seealso \code{\link{pairwise_overlap}} to calculate overlap between two
#'   empirical density estimates.
#'
#' @examples
#' # use the 2015 data from the two communities HARV and BART
#' sites2015 <- reg_pool[reg_pool$siteID %in% c('HARV', 'BART'), ]
#' sites2015 <- sites2015[substr(as.character(sites2015$endDate),1,4)=="2015", ]
#'
#' # making the overall data (HARV and BART, sites from the same domain)
#' # the regional pool across space and time
#' reg_pool_traits <- list (as.matrix(reg_pool$log_weight), as.matrix(reg_pool$log_weight))
#' reg_pool_sp <- list(as.matrix(reg_pool$taxonID), as.matrix(reg_pool$taxonID))
#' names(reg_pool_traits) <- c("HARV", "BART")
#' names(reg_pool_sp) <- c("HARV", "BART")
#' Ostats_reg_example <- Ostats_regional(traits = as.matrix(sites2015$log_weight),
#'                                      plots = factor(sites2015$siteID),
#'                                      sp = factor(sites2015$taxonID),
#'                                      reg_pool_traits = reg_pool_traits,
#'                                      reg_pool_sp = reg_pool_sp,
#'                                      nperm = 2)
#'
#' @export
Ostats_regional <-function(traits, plots, sp, reg_pool_traits, reg_pool_sp, run_null_model = TRUE, nperm = 99, nullqs = c(0.025, 0.975), random_seed = NULL, density_args = list()) {

  # If user did not supply a random seed, generate one and print a message.
  if (run_null_model && missing(random_seed)) {
    random_seed <- round(as.numeric(Sys.time()) %% 12345)
    message(paste("Note: argument random_seed was not supplied; setting seed to", random_seed))
  }

  # Set random seed.
  set.seed(random_seed)

  # Declaration of data structures to hold the results

  # Data structures for observed O-Stats
  overlaps_reg <- matrix(nrow = nlevels(plots), ncol = ncol(traits))

  # Data structures for null O-Stats
  allpool_overlaps_reg_null <- array(dim = c(nlevels(plots), ncol(traits), nperm))
  bysp_overlaps_reg_null <- array(dim = c(nlevels(plots), ncol(traits), nperm))

  # Name rows and columns of the outputs.
  dimnames(overlaps_reg) <- list(levels(plots), dimnames(traits)[[2]])

  # Calculation of observed O-Stats

  print('Calculating observed regional O-stats for each community . . .')
  pb <- utils::txtProgressBar(min = 0, max = nlevels(plots), style = 3)

  # Define common grid limits so that all density functions are estimated across the same domain.
  grid_limits <- apply(traits, 2, range)

  # Add a multiplicative factor to the upper and lower end of the range so that the tails aren't cut off.
  extend_grid <- c(-0.5, 0.5) %*% t(apply(grid_limits,2,diff))
  grid_limits <- grid_limits + extend_grid

  # Get trait density of regional pool for each trait (need not be rerun every time through the loop)
  density_reg_pool <- lapply(1:ncol(traits), function(t) trait_density(x = `[[`(reg_pool_traits, levels(plots)[s])[, t], grid_limits = grid_limits[, t, drop = FALSE], normal = TRUE, data_type = 'linear', density_args = density_args, circular_args = list()))

  for (s in 1:nlevels(plots)) {
    for (t in 1:ncol(traits)) {

      overlap_reg_st <- try({
        # Trait density at focal plot
        density_focal_plot <- trait_density(x = traits[plots == levels(plots)[s], t], grid_limits = grid_limits[, t, drop = FALSE], normal = TRUE, data_type = 'linear', density_args = density_args, circular_args = list())
        pairwise_overlap(a = density_focal_plot, b = density_reg_pool[[t]], density_args = density_args)
      }, TRUE)

      overlaps_reg[s, t] <- if(inherits(overlap_reg_st, 'try-error')) NA else overlap_reg_st
    }
    utils::setTxtProgressBar(pb, s)
  }

  close(pb)

  if (run_null_model) {

    print('Calculating total-pool and species-pool null distributions of regional O-stats . . . ')
    pb <- utils::txtProgressBar(min = 0, max = nperm, style = 3)

    # Null model generation and calculation of null O-Stats

    # Total-pool null model: generation and calculation done in the same loop
    for (i in 1:nperm) {
      for (s in 1:nlevels(plots)) {
        for (t in 1:ncol(traits)) {
          trnull_sti <- sample(x=`[[`(reg_pool_traits, levels(plots)[s])[, t], size=sum(plots == levels(plots)[s]), replace=FALSE)

          allpool_overlap_reg_sti <- try({
            # Trait density at focal plot
            density_focal_plot <- trait_density(x = trnull_sti, grid_limits = grid_limits[, t, drop = FALSE], normal = TRUE, data_type = 'linear', density_args = density_args, circular_args = list())
            pairwise_overlap(a = density_focal_plot, b = density_reg_pool[[t]], density_args = density_args)
          }, TRUE)

          allpool_overlaps_reg_null[s, t, i] <- if (inherits(allpool_overlap_reg_sti, 'try-error')) NA else allpool_overlap_reg_sti
        }
      }

      # By-species null model: generation of null communities and calculation of O-stats (in same loop)


      for (s in 1:nlevels(plots)) {
        for (t in 1:ncol(traits)) {
          sp_sti <- sp[plots == levels(plots)[s]]
          trnull_sti <- c()
          spnames_sti <- unique(sp_sti)
          sptable_sti <- table(sp_sti)

          for (j in 1:length(spnames_sti)) {
            z <- `[[`(reg_pool_traits, levels(plots)[s])[`[[`(reg_pool_sp, levels(plots)[s]) == as.character(spnames_sti[j]), t]
            if (any(!is.na(z))) trnull_sti <- c(trnull_sti, sample(x=z, size=sum(sp_sti == spnames_sti[j]), replace=FALSE))
          }

          bysp_overlap_reg_sti <- try ({
            # Trait density at focal plot
            density_focal_plot <- trait_density(x = trnull_sti, grid_limits = grid_limits[, t, drop = FALSE], normal = TRUE, data_type = 'linear', density_args = density_args, circular_args = list())
            pairwise_overlap(a = density_focal_plot, b = density_reg_pool[[t]], density_args = density_args)
          }, TRUE)

          bysp_overlaps_reg_null[s, t, i] <- if (inherits(bysp_overlap_reg_sti, 'try-error')) NA else bysp_overlap_reg_sti

        }
      }
      utils::setTxtProgressBar(pb, i)
    }

    close(pb)
    print('Extracting null quantiles to get standardized effect sizes (almost done!) . . .')

    overlaps_reg_allpool_ses <- get_ses(overlaps_reg, allpool_overlaps_reg_null, nullqs)
    overlaps_reg_bysp_ses <- get_ses(overlaps_reg, bysp_overlaps_reg_null, nullqs)

    list(overlaps_reg=overlaps_reg, overlaps_reg_allpool_ses=overlaps_reg_allpool_ses, overlaps_reg_bysp_ses=overlaps_reg_bysp_ses)
  } else {
    list(overlaps_reg=overlaps_reg)
  }

}
NEON-biodiversity/Ostats documentation built on Nov. 21, 2024, 4:01 a.m.