R/species_data_functions.R

Defines functions generate_sd_table get_sp_mean_size add_estimated_sds clean_sp_size_data species_estimate_sd get_sd_parameters

Documented in add_estimated_sds clean_sp_size_data generate_sd_table get_sd_parameters get_sp_mean_size species_estimate_sd

#' Estimate parameters for scaling of standard deviation with mean body size
#'
#' Calculates parameters for a (log-log linear) scaling relationship between the
#' mean and standard deviation of a species' mean body size. Given a table of
#' species with known mean and standard deviation body sizes, fits a linear
#' model of the form `log(var(body_size)) ~ log(mean(body_size))` and extracts
#' parameter estimates, which can then be used to estimate the standard
#' deviation of body mass for a species based only on its mean body mass. See
#' also Thibault et al. (2011) for this method applied to the Breeding Bird
#' Survey.
#'
#' @references \itemize{ \item{Thibault, K. M., White, E. P., Hurlbert, A. H., &
#'   Ernest, S. K. M. (2011). Multimodality in the individual size distributions
#'   of bird communities. Global Ecology and Biogeography, 20(1), 145–153.
#'   https://doi.org/10.1111/j.1466-8238.2010.00576.x} }
#'
#' @param raw_size_data dataframe of species' mean and standard deviation body
#'   sizes; use the included `raw_masses` data table.
#'
#' @return list of `$slope` and `$intercept` from the linear model fit
#'
#' @keywords internal
#'
#' @importFrom stats lm var family
get_sd_parameters <- function(raw_size_data) {
  sp_for_sd <- raw_size_data[!is.na(raw_size_data$sd),]
  sp_for_sd$mass <- as.numeric(sp_for_sd$mass)
  sp_for_sd$sd <- as.numeric(sp_for_sd$sd)
  sp_for_sd$var <- sp_for_sd$sd ^ 2
  sp_for_sd$log_m <- log(sp_for_sd$mass)
  sp_for_sd$log_var <- log(sp_for_sd$var)


  sd_fit <- stats::lm(data = sp_for_sd, formula = log_var ~ log_m)

  intercept <- exp(sd_fit$coefficients[[1]])
  slope <- sd_fit$coefficient[[2]]

  return(list(intercept = intercept,
              slope = slope))
}


#' Estimate a species' standard deviation of body mass based on its mean body
#' mass
#'
#' Using the parameters estimated in `get_sd_parameters`, estimate the standard
#' deviation of body mass for a species based only on its mean body mass.
#'
#' @param sp_mean mean body mass, in grams
#' @param pars list containing `$slope` and `$intercept`, generated by
#'   [get_sd_parameters()]. If not provided, estimated by running
#'   `get_sd_parameters(raw_masses)`.
#' @keywords internal
#' @return the estimated standard deviation of body mass
#'
#'
species_estimate_sd <- function(sp_mean, pars = NULL) {
  if (is.null(pars)) {
    pars <- get_sd_parameters(raw_masses)
  }

  fitted_sd <- sqrt(pars$intercept * (sp_mean ^ pars$slope))

  return(fitted_sd)
}

#' Reconcile taxonomic updates between 2008 and 2019
#'
#' Six species have undergone name changes or other minor taxonomic
#' rearrangements between 2008 and 2019, resulting in name mismatches between
#' data from the Breeding Bird Survey (Paradieck et al. 2019) and the CRC
#' Handbook (Dunning 2008). This function resolves those mismatches such that
#' all species in the Breeding Bird Survey are associated with the appropriate
#' size records from the CRC Handbook.
#'
#' @param raw_size_data dataframe of species' mean and standard deviation body
#'   sizes; use the included `raw_masses` data table.
#'
#' @return dataframe of species' mean and standard deviation body sizes, with
#'   name mismatches resolved.
#'
#' @references \itemize{ \item{Dunning, J. B. (2008). CRC handbook of avian body
#'   masses (2nd ed.). CRC Press.} \item{Pardieck, K. L., Ziolkowski, D. J.,
#'   Lutmerding, M., Aponte, V., & Hudson, M.-A. (2019). North American Breeding
#'   Bird Survey Dataset 1966—2018, version 2018.0. U.S. Geological Survey.
#'   https://doi.org/10.5066/P9HE8XYJ} }
#'
#' @keywords internal
clean_sp_size_data <- function(raw_size_data) {
  cols_to_remove <-
    which(colnames(raw_size_data) %in% c("english_common_name", "sporder", "family"))
  sp_clean <- raw_size_data[,-cols_to_remove]
  sp_clean$mass <- as.numeric(sp_clean$mass)

  name_change <- sp_clean[which(sp_clean$not_in_dunning == 1),]


  sp_clean <- sp_clean[which(is.na(sp_clean$not_in_dunning)),]
  sp_clean$added_flag <- NA_integer_

  for (i in seq_len(nrow(name_change))) {
    if (!is.na(name_change$close_subspecies[i])) {
      matched_rows <- sp_clean[sp_clean$genus == name_change$close_genus[i] &
                                 sp_clean$species == name_change$close_species[i] &
                                 sp_clean$subspecies == name_change$close_subspecies[i],]
    } else {
      matched_rows <- sp_clean[sp_clean$genus == name_change$close_genus[i] &
                                 sp_clean$species == name_change$close_species[i],]
    }

    sp_to_add <- matched_rows
    sp_to_add$AOU <- name_change$AOU[i]
    sp_to_add$added_flag <- 1

    sp_clean <- rbind(sp_clean, sp_to_add)
  }

  return(sp_clean)
}


#' Estimate missing records for standard deviation based on mean of body size
#'
#' Fill in missing records for the standard deviation of body mass for a
#' species, based on its mean body size and the parameters estimated by the
#' linear model fit by `get_sd_parameters`.
#'
#' @param clean_size_data dataframe of species' masses and standard deviations;
#'   as generated by `clean_sp_size_data`
#' @param sd_pars parameters as list of `$slope`, `$intercept`; as generated by
#'   `get_sd_parameters`
#' @keywords internal
#' @return a dataframe of species' `species_id` (which matches the AOU in the
#'   Breeding Bird Survey), `mass` mean body mass, `sd` standard deviation body
#'   mass, and a new column for `estimated_sd`, a `TRUE`/`FALSE` flag for
#'   whether the standard deviation has been estimated using the parameters
#'   provided in `sd_pars`.
#'
#'
add_estimated_sds <- function(clean_size_data, sd_pars) {
  clean_size_data$estimated_sd <- FALSE

  for (i in seq_len(nrow(clean_size_data))) {
    if (is.na(clean_size_data$sd[i])) {
      clean_size_data$estimated_sd[i] <- TRUE
      clean_size_data$sd[i] <-
        species_estimate_sd(clean_size_data$mass[i], pars = sd_pars)
    } else {
      clean_size_data$estimated_sd[i] <- FALSE
    }
  }

  return(clean_size_data)
}

#' Summarize records of mean and standard deviation of body mass to
#' species-level means
#'
#' The CRC Handbook (Dunning 2008) often contains multiple records for mean body
#' mass (and standard deviation of body mass) for a species, drawn from
#' different locations or for different sexes. This function summarizes across
#' all records for each species to produce species-level means for the mean and
#' standard deviation of body mass.
#'
#' @param sd_dat dataframe of mean and standard deviation of body mass for all
#'   records for all species; generated by `add_estimated_sds`
#'
#' @return `sd_dat` summarized to species-level means for the mean and standard
#'   deviation of body mass
#' @keywords internal
#' @references \itemize{ \item{Dunning, J. B. (2008). CRC handbook of avian body
#'   masses (2nd ed.). CRC Press.} }
#'
#'
get_sp_mean_size <- function(sd_dat) {
  unique_combinations <- sd_dat[, c("AOU", "genus", "species")]

  unique_combinations <- unique(unique_combinations)

  unique_combinations$mean_mass <- NA_real_
  unique_combinations$mean_sd <- NA_real_
  unique_combinations$contains_estimates <- NA


  for (i in seq_len(nrow(unique_combinations))) {
    this_combination <-
      sd_dat[which(
        sd_dat$AOU == unique_combinations$AOU[i] &
          sd_dat$genus == unique_combinations$genus[i] &
          sd_dat$species == unique_combinations$species[i]
      ),]

    unique_combinations$mean_mass[i] <- mean(this_combination$mass)
    unique_combinations$mean_sd[i] <-
      mean(this_combination$sd, na.rm = FALSE)
    unique_combinations$contains_estimates[i] <-
      any(this_combination$estimated_sd)
  }

  sp_means <- unique_combinations[!is.na(unique_combinations$AOU),]
  sp_means <- sp_means[order(sp_means$AOU),]

  sp_means
}


#' Generate table of species-level means for the mean and standard deviation of
#' body mass for species in the Breeding Bird Survey
#'
#' Goes from the `raw_masses` dataframe (included in `bbssize`) of records of
#' species' mean and (where provided) standard deviation of body mass from the
#' CRC Handbook (Dunning 2008) to a table of species-level means for the mean
#' and standard deviation of body mass, incorporating estimates for missing
#' standard deviation records and resolving taxonomic updates between the
#' publication of the CRC Handbook and present releases of the Breeding Bird
#' Survey dataset (Paradieck et al. 2019).
#'
#' @param raw_size_data the `raw_masses` dataframe
#'
#' @references \itemize{ \item{Dunning, J. B. (2008). CRC handbook of avian body
#'   masses (2nd ed.). CRC Press.} \item{Pardieck, K. L., Ziolkowski, D. J.,
#'   Lutmerding, M., Aponte, V., & Hudson, M.-A. (2019). North American Breeding
#'   Bird Survey Dataset 1966—2018, version 2018.0. U.S. Geological Survey.
#'   https://doi.org/10.5066/P9HE8XYJ} }
#'
#' @return a dataframe of species-level means for mean body size and standard
#'   deviation of body size
#' @keywords internal
#'
generate_sd_table <- function(raw_size_data) {
  # Calculate scaling parameters
  fitted_pars <- get_sd_parameters(raw_size_data)

  # Resolve name mismatches
  clean_size_dat <- clean_sp_size_data(raw_size_data)

  # Add estimates for missing standard deviation records  ----
  sd_size_dat <- add_estimated_sds(clean_size_data = clean_size_dat,
                                   sd_pars = fitted_pars)

  # Summarize to species-level means for the mean and standard deviation of body mass ----
  sp_mean_size_dat <- get_sp_mean_size(sd_size_dat)

  sp_mean_size_dat$scientific_name <-
    paste(sp_mean_size_dat$genus, sp_mean_size_dat$species, sep = " ")
  sp_mean_size_dat
}
diazrenata/birdsize documentation built on Jan. 29, 2024, 12:25 p.m.