R/utils.R

Defines functions is.obr as.obr as_obr obr make_outbreak_status is.defol series_names events_table stack_defoliation as.defol as_defol defol make_defol_status id_defoliation gsi

Documented in as_defol as.defol as_obr as.obr defol gsi id_defoliation is.defol is.obr obr series_names stack_defoliation

#' Calculate the growth suppression index
#'
#' This function removes the nonhost growth signal from a host tree-ring series.
#'
#' @details The growth suppression index (GSI) is referred to as the "corrected"
#'   series in OUTBREAK. It is calculated as:
#'
#'   \deqn{GSI(i) = H(i) - (NH(i) - mean(NH)) sd(H)/sd(NH)}
#'
#'   where H and NH are the host and nonhost tree-ring series as standardized
#'   index values; i is the year, and the functions [mean()] and [sd()] are
#'   applied to the common period.
#'
#'   [gsi()] will rarely be run directly by a user. It is called from
#'   [defoliate_trees()].
#'
#' @param input_series A `dplr::rwl` object with the host tree series as the
#'   first column and the non-host chronology as the second. Years should be the
#'   row names. This is specifically created by [defoliate_trees()] and
#'   passed to [gsi()].
#'
#' @return A data frame with the two input columns (host and nonhost series) and
#'   3 added columns:
#'
#'   \enumerate{ \item The mean/sd adjusted non-host chronology, \item The
#'   growth suppression index ("gsi") of the host series after subtraction of
#'   the adjusted nonhost chronology, \item The normalized growth suppression
#'   index ("ngsi") generated by applying [scale()] to the gsi. }
#'
#' @export
gsi <- function(input_series) {
  nms <- colnames(input_series)
  nam1 <- paste(nms[2], "_rescale", sep = "")
  nam2 <- paste(nms[1], "_gsi", sep = "")
  nam3 <- paste(nms[1], "_ngsi", sep = "")
  h_sd <- stats::sd(input_series[, 1])
  nh_sd <- stats::sd(input_series[, 2])
  nh_mean <- mean(input_series[, 2])
  # To Do: Add provision to prevent large non-host values from swaying results
  # See outbreak.txt for description of fractional power.
  input_series[, 3] <- (input_series[, 2] - nh_mean) * (h_sd / nh_sd)
  input_series[, 4] <- input_series[, 1] - input_series[, 3]
  input_series[, 5] <- as.numeric(scale(input_series[, 4]))
  input_series <- round(input_series, 4)
  names(input_series) <- c(nms, nam1, nam2, nam3)
  input_series
}

#' Identify defoliation events in a host series
#'
#' After calculating the growth suppression index in [gsi()], [id_defoliation()]
#' performs a runs analysis on the normalized growth suppression index, or
#' `ngsi`, in which sequences of negative departures are assessed as possible
#' defoliation events. [id_defoliation()] is the workhorse for
#' [defoliate_trees()], performing much of the necessary criteria evaluation
#' used in OUTBREAK. The defaults for most parameters originate from OUTBREAK.
#' Two new features distinguish the `dfoliatR` version: bridging events that
#' occur in close sequence and allowing for the recent end of a series to be
#' evaluated for defoliation regardless of the event duration. See parameter
#' specifics for details.
#'
#' @note [id_defoliation()] is called by [defoliate_trees()]. It might only be
#'   used by the user for troubleshooting.
#'
#' @param input_series A data frame with 5 columns, as generated by [gsi()].
#'
#' @param duration_years The minimum length of time in which the tree is
#'   considered to be in defoliation.
#'
#' @param max_reduction The minimum value of `ngsi` required to be considered a
#'   defoliation event. If a sequence of negative ngsi values does not reach
#'   this threshold, the potential event is rejected. Defaults to -1.28.
#'
#' @param bridge_events Binary, defaults to `FALSE`. This option allows for
#'   successive events that are separated by a single year to be bridged and
#'   become one event. It should be used cautiously and closely evaluated to
#'   ensure the likelihood that the two (or more) events are actually one long
#'   event.
#'
#' @param series_end_event Binary, defaults to `FALSE`. This option allows the
#'   user to identify an event occurring at the time of sampling as a
#'   defoliation event, regardless of duration. Including it will help to
#'   quantify periodicity and extent of an outbreak. This should only be used if
#'   the user has direct knowledge of an ongoing defoliation event when the
#'   trees were sampled.
#'
#' @return After performing runs analyses, the function adds a column to the
#'   input data frame that distinguishes years of defoliation and the maximum
#'   defoliation year (ie. the year the greatest negative growth departure
#'   within the event).
#'
#' @export
id_defoliation <- function(input_series,
                           duration_years = 8,
                           max_reduction = -1.28,
                           bridge_events = FALSE,
                           series_end_event = FALSE) {
  rns <- rle(as.vector(input_series[, 5] < 0))
  rns_index <- cumsum(rns$lengths)
  neg_runs_pos <- which(rns$values == TRUE)
  ends <- rns_index[neg_runs_pos]
  newindex <- ifelse(neg_runs_pos > 1, neg_runs_pos - 1, 0)
  starts <- rns_index[newindex] + 1
  if (0 %in% newindex) starts <-  c(1, starts)
  deps <- data.frame(cbind(starts, ends))
  input_series$defol_status <-  NA
  events <- c("defol", "max_defol", "bridge_defol", "series_end_defol")
  for (y in seq_len(nrow(deps))) {
   dep_seq <- deps$starts[y] : deps$ends[y]
    if (any(input_series[dep_seq, ]$defol_status %in% events)) next
    max.red <- dep_seq[1] + which.min(input_series[dep_seq, 5]) - 1
    # Includes setting for max growth reduction
    if (input_series[max.red, 5] > max_reduction) next
    prev_flag <- FALSE
    if (y > 1) {
      if (min(dep_seq) - deps$ends[y - 1] == 2) {
        if (! any(
          input_series[deps$starts[y - 1] :
                       deps$ends[y - 1], ]$defol_status  %in% events)) {
          dep_seq <- c(deps$starts[y - 1] : max(dep_seq))
         prev_flag <- TRUE
        }
      }
    }
    if (y < nrow(deps)) {
      if (deps$starts[y + 1] - max(dep_seq) == 2) {
        if (min(input_series[dep_seq, 5]) >
            min(input_series[deps$starts[y + 1] : deps$ends[y + 1], 5])
            )  next
        if ((!prev_flag) & (y < nrow(deps) - 1)) {
          if (deps$starts[y + 2] - deps$ends[y + 1] == 2) {
            if (min(input_series[dep_seq, 5]) >
                min(input_series[deps$starts[y + 2] : deps$ends[y + 2], 5])
                )  next
          }
        }
       dep_seq <- c(min(dep_seq) : deps$ends[y + 1])
      }
    }
    se_flag <- FALSE
    if (y == nrow(deps)) {
      if (series_end_event) {
        if (nrow(input_series) - deps[y, "ends"] < 2) {
          dep_seq <- c(min(dep_seq) : nrow(input_series))
          se_flag <- TRUE
        }
        if (! se_flag) {
          if (length(dep_seq) < duration_years) next
        }
      }
      else if (length(dep_seq) < duration_years) next
    }
    else if (length(dep_seq) < duration_years) next

    input_series[dep_seq, "defol_status"] <- "defol"
    input_series[max.red, "defol_status"] <- "max_defol"

    if (se_flag) {
      input_series[dep_seq, ]$defol_status <-
        replace(input_series[dep_seq, ]$defol_status,
                input_series[dep_seq, ]$defol_status == "defol",
                "series_end_defol")
    }
    if (y > 1 & bridge_events) {
      if (any(input_series[min(dep_seq) - 2, ]$defol_status %in% events)) {
        input_series[min(dep_seq) - 1, "defol_status"] <- "bridge_defol"
      }
    }
  }
  input_series$defol_status <- replace(input_series$defol_status,
                          is.na(input_series$defol_status),
                          "nd")
  input_series
}

#' Turn character vector into factor with proper levels of `defol_status`
#'
#' @param x a vector of defol types
#'
#' @return A factor with appropriate defol levels
#'
#' @noRd
make_defol_status <- function(x) {
  defol_types <- c("nd", "defol",
                   "max_defol",
                   "series_end_defol",
                   "bridge_defol")
  stopifnot(x %in% defol_types)
  factor(x, levels = defol_types)
}

#' Constructor for S3 defol class
#'
#' @param year An n-length integer vector of observation years
#' @param series An n-length factor or character vector of series names
#' @param gsi An n-length numeric vector of growth suppression index, such as
#'  calculated by [gsi()]
#' @param ngsi An n-length numeric vector of normalized gsi, such as calculated
#'  by [gsi()].
#' @param defol_status An n-length factor or character vector denoting the
#'  defoliation event status of each year. This uses a controlled vocabulary,
#'  see `dfoliatR:::make_defol_status` for possible values.
#'
#' @return a tree-level `defol` object
#'
#' @export
defol <- function(year, series, gsi, ngsi, defol_status) {
  df <- data.frame(year = as.numeric(year),
                   series = as.factor(series),
                   gsi = as.numeric(gsi),
                   ngsi = as.numeric(ngsi),
                   defol_status = make_defol_status(defol_status)
  )
  class(df) <- c("defol", "data.frame")
  df
}

#' Cast data frame to list-like `defol` object
#'
#' @param x A data frame or list-like object to cast. Must have named elements
#'   for "year", "series", "gsi", "ngsi", and "defol_status".
#'
#' @return `x` cast to a `defol` object
#'
#' @examples
#' data(dmj_defol)
#' example_data <- as.data.frame(dmj_defol)
#' is.defol(example_data)
#' back_to_defol <- as_defol(example_data)
#' is.defol(back_to_defol)
#'
#' @export
as_defol <- function(x) {
  good_names <- c("year",
                  "series",
                  "gsi",
                  "ngsi",
                  "defol_status")
  if (! all(good_names %in% names(x))) {
    stop("`x` must have members 'year', 'series',
         'gsi', 'ngsi', 'defol_status'")
  }
  defol(x$year, x$series, x$gsi, x$ngsi, x$defol_status)
}

#' Alias to [as_defol()]
#'
#' @inherit as_defol
#'
#' @export
as.defol <- function(x) {
  as_defol(x)
}

#' Stack a defoliation list
#'
#' @param x a list object created by [defoliate_trees()]
#'
#' @return a defol object (long-format data frame)
#'
#' @export
stack_defoliation <- function(x) {
  out_list <- lapply(x, function(i) {
    inout <- range(as.numeric(rownames(i)))
    df <- data.frame(year = inout[1]:inout[2],
                     series = colnames(i)[1],
                     gsi = i[, 4],
                     ngsi = i[, 5],
                     defol_status = i[, 6])
    return(df)
  })
  out <- do.call(rbind, out_list)
  as_defol(out)
}

#' Create a runs table of events
#'
#' @param status vector of defoliation or outbreak status
#' @param events vector of events types to include in the table
#'
#' @noRd
events_table <- function(status, events) {
  rlx <- rle(status %in% events)
  index <- cumsum(rlx$lengths)
  pos <- which(rlx$values == TRUE)
  ends <- index[pos]
  newindex <- ifelse(pos > 1, pos - 1, 0)
  starts <- index[newindex] + 1
  if (0 %in% newindex) starts <- c(1, starts)
  data.frame(cbind(starts, ends))
}

#' Extract series names from a defol object
#'
#' @param x a defol object
#'
#' @export
series_names <- function(x) {
  stopifnot(is.defol(x))
  as.character(unique(x$series))
}


#' Check if object is tree-level defoliation object: `defol`
#'
#' @param x Any R object.
#'
#' @return Boolean indicating whether `x` is a defol object.
#'
#' @export
is.defol <- function(x) {
  inherits(x, "defol")
}

#' Turn character vector into factor with proper levels of `outbreak_status`
#'
#' @param x a vector of outbreak designation types
#'
#' @return A factor with appropriate outbreak levels
#'
#' @noRd
make_outbreak_status <- function(x) {
  obr_types <- c("outbreak", "se_outbreak", "not_obr")
  stopifnot(x %in% obr_types)
  factor(x, levels = obr_types)
}

#' Constructor for an `obr` object.
#'
#' @param year An n-length numeric vector of observed years.
#'
#' @param samp_depth An n-length numeric vector of the number of trees.
#'
#' @param num_defol An n-length numeric vector of the number of trees
#'   experiencing defoliation.
#'
#' @param perc_defol An n-length numeric vector of the percent of trees
#'   experiencing defoliation.
#'
#' @param num_max_defol An n-length numeric vector of the number of trees
#'   experiencing their maximum level of defoliation (i.e., their most extreme
#'   negative growth departure).
#'
#' @param perc_max_defol An n-length numeric vector of the percent of trees
#'   experiencing their maximum level of defoliation (i.e., their most extreme
#'   negative growth departure).
#'
#' @param mean_gsi An n-length numeric vector of the average growth suppression
#'   index across all observed trees.
#'
#' @param mean_ngsi An n-length numeric vector of the average normalized
#'   (scaled) growth suppression index.
#'
#' @param outbreak_status An n-length factor or character vector that identified
#'   whether that year surpasses the designated thresholds for an "outbreak
#'   event". Threshold criteria are provided in [outbreak()].
#'
#' @return An `obr` object with columns matching the input variables.
#'
#' @export
obr <- function(year,
                samp_depth,
                num_defol,
                perc_defol,
                num_max_defol,
                perc_max_defol,
                mean_gsi,
                mean_ngsi,
                outbreak_status) {
  obr_dat <- data.frame(
    year = as.numeric(year),
    samp_depth = as.numeric(samp_depth),
    num_defol = as.numeric(num_defol),
    perc_defol = as.numeric(perc_defol),
    num_max_defol = as.numeric(num_max_defol),
    perc_max_defol = as.numeric(perc_max_defol),
    mean_gsi = as.numeric(mean_gsi),
    mean_ngsi = as.numeric(mean_ngsi),
    outbreak_status = make_outbreak_status(outbreak_status)
  )
  class(obr_dat) <- c("obr", "data.frame")
  obr_dat
}

#' Cast data frame to list-like `obr` object
#'
#' @param x A data frame or list-like object to cast. Must have named elements
#'   for "year", "samp_depth", "num_defol", "perc_defol", "num_max_defol",
#'   "perc_max_defol", "mean_gsi", "mean_ngsi", "outbreak_status".
#'
#' @return `x` cast to an `obr` object
#'
#' @examples
#' data(dmj_obr)
#' example_data <- as.data.frame(dmj_obr)
#' is.obr(example_data)
#' back_to_obr <- as_obr(example_data)
#' is.obr(back_to_obr)
#'
#' @export
as_obr <- function(x) {
  good_names <- c("year",
                  "samp_depth",
                  "num_defol",
                  "perc_defol",
                  "num_max_defol",
                  "perc_max_defol",
                  "mean_gsi",
                  "mean_ngsi",
                  "outbreak_status")
  if (! all(good_names %in% names(x))) {
    stop(
      "`x` must have members 'year', 'samp_depth',
      'num_defol', 'perc_defol', 'num_max_defol',
      'perc_max_defol', 'mean_gsi', 'mean_ngsi',
      'outbreak_status'"
    )
  }
  obr(x$year,
      x$samp_depth,
      x$num_defol,
      x$perc_defol,
      x$num_max_defol,
      x$perc_max_defol,
      x$mean_gsi,
      x$mean_ngsi,
      x$outbreak_status)
}

#' Alias to [as_obr()]
#'
#' @inherit as_obr
#'
#' @export
as.obr <- function(x) {
  as_obr(x)
}

#' Check if object is outbreak, meaning site-level outbreak object
#'
#' @param x Any R object.
#'
#' @return Boolean indicating whether `x` is an outbreak object.
#'
#' @export
is.obr <- function(x) {
  inherits(x, "obr")
}

Try the dfoliatR package in your browser

Any scripts or data that you put into this service are public.

dfoliatR documentation built on Aug. 10, 2023, 1:08 a.m.