R/Ostats.R
In Ostats: O-Stats, or Pairwise Community-Level Niche Overlap Statistics

Documented in Ostats

#' @title Calculate O-statistics (community-level pairwise niche overlap statistics)
#'
#' @description This is the primary function in the Ostats package. It calculates O-statistics
#' by finding the trait density overlap among all pairs of species in each
#' community and taking the mean or median. Next it optionally evaluates the
#' O-statistics against a local null model. This is done separately for each trait.
#'
#' @param traits a numeric vector or matrix of trait measurements. The number of elements
#'   in the vector or number of rows in the matrix is the number of individuals,
#'   and the number of columns of the matrix is the number of traits.
#' @param plots a factor with length equal to \code{nrow(traits)} that indicates the
#'   community each individual belongs to.
#' @param sp a factor with length equal to \code{nrow(traits)} that indicates the taxon
#'   of each individual.
#' @param discrete whether trait data may take continuous or discrete values. Defaults to
#'   \code{FALSE} (all traits continuous). A single logical value or a logical
#'   vector with length equal to the number of columns in traits. See details below.
#' @param circular whether trait data are circular (e.g., hours or angles). Defaults to
#'   \code{FALSE} (all traits non-circular). A single logical value or a logical
#'   vector with length equal to the number of columns in traits. See details below.
#' @param output specifies whether median or mean is calculated. Default \code{"median"}.
#' @param weight_type specifies weights to be used to calculate the median or mean.
#'   Default \code{"hmean"} (harmonic mean), meaning each pair of species is weighted
#'   by the harmonic mean of abundances.
#' @param run_null_model whether to run a null model (if \code{TRUE}) and evaluate the
#'  O-statistics against it, or simply return the raw O-statistics (if \code{FALSE}).
#'  Defaults to \code{TRUE}.
#' @param nperm the number of null model permutations to generate. Defaults to 99.
#' @param nullqs numeric vector of probabilities with values in [0,1] to set
#'   effect size quantiles. Defaults to \code{c(0.025, 0.975)}.
#' @param shuffle_weights If \code{TRUE}, shuffle weights given to pairwise overlaps
#'   within a community when generating null models.
#' @param swap_means If \code{TRUE}, swap means of body sizes within a community when
#'   generating null models.
#' @param random_seed User may supply a random seed to enable reproducibility
#'   of null model output. A warning is issued, and a random seed is generated
#'   based on the local time, if the user does not supply a seed.
#' @param unique_values Vector of all possible discrete values that \code{traits}
#'   can take. Only used if \code{discrete = TRUE} and \code{circular = TRUE}.
#' @param circular_args optional list of additional arguments to pass to
#'  \code{\link[circular]{circular}}. Only used if \code{circular = TRUE} and
#'  \code{discrete = FALSE}.
#'  Note that continuous circular data must be provided in radian units.
#' @param density_args additional arguments to pass to \code{\link[stats]{density}}, such as
#' \code{bw}, \code{n}, or \code{adjust}. If none are provided, default values
#' are used.
#' @param verbose If \code{TRUE}, progress messages are displayed. Defaults to \code{FALSE}.
#'
#' @details This function calculates overlap statistics and optionally evaluates them
#'   against a local null model. By default, it calculates the median of pairwise
#'   overlaps, weighted by harmonic mean of species abundaces of the species
#'   pairs in each community. Two results are produced, one normalizing the area
#'   under all density functions to 1, the other making the area under all density
#'   functions proportional to the number of observations in that group.
#'
#'   If \code{discrete = FALSE}, continuous kernel density functions are estimated for
#'   each species at each community, if \code{TRUE}, discrete functions (histograms) are
#'   estimated.
#'
#'   If \code{circular = TRUE} and \code{discrete = FALSE}, the function \code{\link[circular]{circular}}
#'   is used to convert each column of \code{traits} to an object of class circular.
#'   Unless additional arguments about input data type are specified, it is
#'   assumed that the circular input data are in radian units (0 to 2*pi).
#'
#'   If \code{circular = TRUE} and \code{discrete = TRUE}, data will be interpreted as
#'   discrete values on a circular scale. For example, data might be integer values
#'   representing hours and ranging from 0 to 23.
#'
#'   If \code{run_null_model} is \code{TRUE}, the O-statistics are evaluated relative
#'   to a null model. When both \code{shuffle_weights} and \code{swap_means} are \code{FALSE},
#'   null communities are generated by randomly assigning a taxon that is present in the community to
#'   each individual. If \code{shuffle_weights} is \code{TRUE}, species abundances
#'   are also randomly assigned to each species to weight the O-statistic for each
#'   null community. If \code{swap_means} is \code{TRUE}, instead of sampling individuals
#'   randomly, species means are sampled randomly among species, keeping the deviation
#'   of each individual from its species mean the same. After the null communities
#'   are generated, O-stats are calculated for each null community to compare with
#'   the observed O-stat.
#'
#'   Effect size statistics are calculated by z-transforming the O-statistics
#'   using the mean and standard deviation of the null distribution.
#'
#' @return The function returns a list containing four objects:
#' \item{overlaps_norm}{a matrix showing the O-statistic for each trait and each community,
#'   with the area under all density functions normalized to 1.}
#' \item{overlaps_unnorm}{a matrix showing O-stats calculated with the area under all density
#'   functions proportional to the number of observations in that group.}
#' \item{overlaps_norm_ses}{List of matrices of effect size statistics against a null model
#'   with the area under all density functions normalized to 1. \code{ses} contains the
#'   effect sizes (z-scores), \code{ses_lower} contains the effect size lower critical values
#'   for significance at the level determined by \code{nullqs}, and \code{ses_upper}
#'   contains the upper critical values. \code{raw_lower} and \code{raw_upper} are
#'   the critical values in raw units ranging from 0 to 1.}
#' \item{overlaps_unnorm_ses}{List of matrices of effect size statistics against a null model
#'   with the area under all density functions proportional to the number
#'   of observations in that group. Elements are as in \code{overlaps_norm_ses}.}
#'
#' @references Read, Q. D. et al. Among-species overlap in rodent body size
#'   distributions predicts species richness along a temperature gradient.
#'   Ecography 41, 1718-1727 (2018).
#'
#' @author Quentin D. Read, John M. Grady, Arya Y. Yue, Isadora Fluck E., Ben Baiser,
#' Angela Strecker, Phoebe L. Zarnetske, and Sydne Record
#'
#' @seealso \code{\link{Ostats_multivariate}} for multidimensional overlap.
#' @seealso \code{\link{Ostats_plot}} for plotting community overlap for
#' each community.
#'
#' @examples
#' # overlap statistics for body weights of small mammals in NEON sites
#'
#' # Keep only the relevant part of data
#' dat <- small_mammal_data[small_mammal_data$siteID %in% c('HARV','JORN'), ]
#' dat <- dat[!is.na(dat$weight), ]
#' dat$log_weight <- log10(dat$weight)
#'
#'
#' #Run O-stats on the data with only a few null model iterations
#' Ostats_example <- Ostats(traits = as.matrix(dat[,'log_weight']),
#'                    sp = factor(dat$taxonID),
#'                    plots = factor(dat$siteID),
#'                    nperm = 10)
#' @export
#'
#'
Ostats <- function(traits, plots, sp, discrete = FALSE, circular = FALSE, output = "median", weight_type= "hmean", run_null_model = TRUE, nperm = 99, nullqs = c(0.025, 0.975), shuffle_weights = FALSE, swap_means = FALSE, random_seed = NULL, unique_values = NULL, circular_args = list(), density_args = list(), verbose = FALSE) {
  # Required input: a matrix called traits (nrows=n individuals, ncols=n traits),
  # a vector called plots which is a factor with length equal to nrow(traits),
  # a vector called sp which is a factor with length equal to nrow(traits),

  # warning and error messages to check the inputs
  if(!is.numeric(traits)) stop("traits must be a numeric matrix.")
  if(length(unique(sp)) == 1) stop("only one taxon is present; overlap cannot be calculated.")

  # If traits is a vector, coerce to a single column matrix.
  if(is.vector(traits)) traits <- matrix(data = traits, ncol = 1, dimnames = list(names(traits)))

  # If user did not supply a random seed, generate one and print a message.
  if (run_null_model && missing(random_seed)) {
    random_seed <- round(as.numeric(Sys.time()) %% 12345)
    message(paste("Note: argument random_seed was not supplied; setting seed to", random_seed))
  }

  # If the abundances of species within sites are different, print a message
  abund_table <- table(plots, sp)
  unique_abunds <- apply(abund_table, 1, function(x) length(unique(x[x > 0])))
  if (any(unique_abunds > 1)) message("Note: species abundances differ. Consider sampling equivalent numbers of individuals per species.")

  # If discrete or circular are not logical vectors of length either 1 or length of number of columns in traits, stop.
  # Recycle the value if there is only one in each case.
  if (!length(discrete) %in% c(1, ncol(traits))) stop("discrete must be length=1 or length=ncol(traits)")
  if (!length(circular) %in% c(1, ncol(traits))) stop("circular must be length=1 or length=ncol(traits)")

  if (length(discrete) == 1) discrete <- rep(discrete, ncol(traits))
  if (length(circular) == 1) circular <- rep(circular, ncol(traits))

  # Set random seed.
  set.seed(random_seed)

  # Declaration of data structures to hold the results
  # Data structures for observed O-Stats
  overlaps_norm <- matrix(nrow = nlevels(plots), ncol = ncol(traits))
  overlaps_unnorm <- matrix(nrow = nlevels(plots), ncol = ncol(traits))

  # Data structures for null O-Stats
  overlaps_norm_null <- array(dim = c(nlevels(plots), ncol(traits), nperm))
  overlaps_unnorm_null <- array(dim = c(nlevels(plots), ncol(traits), nperm))

  # Name rows and columns of the outputs.
  dimnames(overlaps_norm) <- list(levels(plots), dimnames(traits)[[2]])
  dimnames(overlaps_unnorm) <- list(levels(plots), dimnames(traits)[[2]])

  # Calculation of observed O-Stats
  if (verbose) {
    message('Calculating observed local O-stats for each community . . .')
    pb <- utils::txtProgressBar(min = 0, max = nlevels(plots), style = 3)
  }

  for (s in 1:nlevels(plots)) {
    for (t in 1:ncol(traits)) {
      overlap_norm_st <- try(community_overlap(traits = traits[plots == levels(plots)[s], t], sp = sp[plots == levels(plots)[s]], discrete = discrete[t], circular = circular[t], output = output, weight_type = weight_type, normal = TRUE, unique_values = unique_values, circular_args = circular_args, density_args = density_args), TRUE)
      overlaps_norm[s, t] <- if (inherits(overlap_norm_st, 'try-error')) NA else overlap_norm_st
      overlap_unnorm_st <- try(community_overlap(traits = traits[plots == levels(plots)[s], t], sp = sp[plots == levels(plots)[s]], discrete = discrete[t], circular = circular[t], output = output, weight_type = weight_type, normal = FALSE, unique_values = unique_values, circular_args = circular_args, density_args = density_args), TRUE)
      overlaps_unnorm[s, t] <- if (inherits(overlap_unnorm_st, 'try-error')) NA else overlap_unnorm_st
    }

    if (verbose) utils::setTxtProgressBar(pb, s)

  }

  if (verbose) close(pb)

  if (run_null_model) {

    if (verbose) {
      message('Calculating null distributions of O-stats . . . ')
      pb <- utils::txtProgressBar(min = 0, max = nperm, style = 3)
    }

    # Null model generation and calculation of null O-Stats

    # Local null model: generation and calculation done in the same loop
    for (i in 1:nperm) {
      for (s in 1:nlevels(plots)) {
        for (t in 1:ncol(traits)) {
          if (!swap_means) {
            overlap_norm_sti <- try(community_overlap(traits = traits[plots == levels(plots)[s], t], sp = sample(sp[plots == levels(plots)[s]]), discrete = discrete[t], circular = circular[t], output = output, weight_type = weight_type, normal = TRUE, unique_values = unique_values, randomize_weights = shuffle_weights, circular_args = circular_args, density_args = density_args), TRUE)
            overlap_unnorm_sti <- try(community_overlap(traits = traits[plots == levels(plots)[s], t], sp = sample(sp[plots == levels(plots)[s]]), discrete = discrete[t], circular = circular[t],output = output, weight_type = weight_type, normal = FALSE, randomize_weights = shuffle_weights, unique_values = unique_values, circular_args = circular_args, density_args = density_args), TRUE)
          } else {
            traits_st <- traits[plots==levels(plots)[s], t]
            sp_st <- sp[plots==levels(plots)[s]]

            traitmeans <- tapply(traits_st,sp_st,mean)
            traitdeviations <- traits_st-traitmeans[sp_st]

            # Sort the trait means out randomly.
            traitmeans_null <- sample(traitmeans)
            sp_null <- rep(names(traitmeans_null), table(sp_st))
            traits_null <- traitdeviations + traitmeans_null[sp_null]
            overlap_norm_sti <- try(community_overlap(traits = traits_null, sp = sp_null, discrete = discrete[t], circular = circular[t], output = output, weight_type = weight_type, normal = TRUE, randomize_weights = shuffle_weights, unique_values = unique_values, circular_args = circular_args, density_args = density_args), TRUE)
            overlap_unnorm_sti <- try(community_overlap(traits = traits[plots == levels(plots)[s], t], sp = sample(sp[plots == levels(plots)[s]]), discrete = discrete[t], circular = circular[t], output = output, weight_type = weight_type, normal = FALSE, randomize_weights = shuffle_weights, unique_values = unique_values, circular_args = circular_args, density_args = density_args), TRUE)
          }

          overlaps_norm_null[s, t, i] <- if (inherits(overlap_norm_sti, 'try-error')) NA else overlap_norm_sti

          overlaps_unnorm_null[s, t, i] <- if (inherits(overlap_unnorm_sti, 'try-error')) NA else overlap_unnorm_sti
        }
      }
      if (verbose) utils::setTxtProgressBar(pb, i)
    }

    if (verbose) close(pb)

    # Extract quantiles to get standardized effect sizes for the overlap stats
    overlaps_norm_ses <- get_ses(overlaps_norm, overlaps_norm_null, nullqs)
    overlaps_unnorm_ses <- get_ses(overlaps_unnorm, overlaps_unnorm_null, nullqs)
    list(overlaps_norm=overlaps_norm, overlaps_unnorm=overlaps_unnorm,
         overlaps_norm_ses=overlaps_norm_ses, overlaps_unnorm_ses=overlaps_unnorm_ses)

  } else {
    list(overlaps_norm=overlaps_norm, overlaps_unnorm=overlaps_unnorm)
  }

}