R/get_drought.R
In SEI: Calculating Standardised Indices

Documented in get_drought

#' @title Get drought characteristics
#'
#' @description
#' Extract characteristics of droughts from a time series of values. Drought characteristics
#' include the occurrence, intensity, magnitude, and duration of the drought.
#'
#' @param x vector or xts object from which droughts are defined.
#' @param thresholds numeric vector containing thresholds to use when defining droughts.
#' @param exceed logical; \code{TRUE} if a drought is defined when \code{x} is above the thresholds, \code{FALSE} otherwise.
#' @param cluster integer specifying the number of time steps over which droughts should be clustered.
#' @param lag numeric specifying the value at which the drought should end.
#'
#' @details
#' A drought is assumed to be defined as an instance when the vector \code{x} exceeds
#' (if \code{exceed = TRUE}) or falls below (if \code{exceed = FALSE}) the specified
#' thresholds in \code{thresholds}.
#'
#' \code{thresholds} can be a single value, or a vector of values. In the latter case,
#' each threshold is assumed to be a different level or intensity of the drought.
#' If \code{exceed = TRUE} then a higher threshold corresponds to a higher intensity,
#' and if \code{exceed = FALSE} then a lower threshold corresponds to a higher intensity.
#' For example, if \code{thresholds = c(1, 1.5, 2)}, then a level 1 drought occurs
#' whenever \code{x} exceeds 1 but is lower than 1.5, a level 2 drought occurs
#' whenever \code{x} exceeds 1.5 but is lower than 2, and a level 3 drought occurs
#' whenever \code{x} exceeds 2.
#'
#' By default, \code{thresholds = c(1.28, 1.64, 1.96)}, which corresponds to the
#' 90th, 95th, and 97.5th percentiles of the standard normal distribution. These
#' thresholds are often used alongside standardised indices to define hydrometeorological
#' droughts; see references.
#'
#' \code{cluster} represents the number
#' of time steps between different drought events that should be attributed to the same drought.
#' For example, suppose \eqn{x_{i} \geq t, x_{i + 1} < t, x_{i + 2} \geq t},
#' where \eqn{x_{i}} represents the \eqn{i}-th value in \code{x}, and \eqn{t} is the
#' lowest threshold in \code{thresholds}. In this case, one drought event will finish
#' at time point \eqn{i} and a new drought event will begin at time point \eqn{i + 2};
#' no drought will occur at time point \eqn{i + 1} because the value \eqn{x_{i + 1}} is
#' below the threshold defining a drought. Since both \eqn{x_{i}} and \eqn{x_{i + 2}}
#' are classed as drought events, it may be desirable to ignore the fluctuation, and
#' assume that the drought persists through \eqn{x_{i + 1}} despite its value. This can
#' be achieved by setting \code{cluster = 1}. If there were two time points separating
#' different drought events, these can be clustered together by setting \code{cluster = 2},
#' and so on. The default is that no clustering should be implemented, i.e. \code{cluster = 0}.
#'
#' Alternatively, we may wish to assume that the drought persists until \code{x} falls below
#' a value that is not necessarily equal to the threshold defining a drought. For example,
#' hydrometeorological droughts based on standardised indices, such as the Standardised
#' Precipitation Index (SPI), are often defined to persist until the standardised index changes
#' sign, i.e. falls below zero. This can be achieved by setting \code{lag = 0}. More generally,
#' \code{lag} can be any numerical value. If \code{exceed = TRUE}, a warning is issued if
#' \code{lag} is above the lowest threshold, and if \code{exceed = FALSE}, a warning is
#' issued if \code{lag} is below the highest threshold. If \code{lag} is \code{NULL}
#' (the default), then no lagging is performed.
#'
#' \code{get_drought()} currently does not use the time series information in
#' the xts input, thereby assuming that the time series is complete, without missing
#' time periods. If \code{x} is a vector, rather than an xts object, then this
#' is also implicitly assumed.
#'
#' The output is a dataframe containing the vector \code{x}, a logical vector
#' specifying whether each value of \code{x} corresponds to a drought event,
#' and the magnitude of the drought, defined as the sum of the values of \code{x} during
#' the drought; see references. The magnitude of the drought is only shown
#' on the last day of the drought. This makes it easier to compute statistics about
#' the drought magnitude, such as the average drought magnitude.
#' If \code{thresholds} is a vector, the intensity or level of the drought is also returned.
#'
#'
#' @return
#' A data frame containing the original values \code{x} and the corresponding drought characteristics.
#'
#' @references
#' McKee, T. B., Doesken, N. J., & Kleist, J. (1993):
#' `The relationship of drought frequency and duration to time scales',
#' \emph{In Proceedings of the 8th Conference on Applied Climatology} 17, 179-183.
#'
#' Vicente-Serrano, S. M., Beguería, S., & López-Moreno, J. I. (2010):
#' `A multiscalar drought index sensitive to global warming: the standardized precipitation evapotranspiration index',
#' \emph{Journal of Climate} 23, 1696-1718.
#' \doi{https://doi.org/10.1175/2009JCLI2909.1}
#'
#' Allen, S. & N. Otero (2023):
#' `Standardised indices to monitor energy droughts',
#' \emph{Renewable Energy} 217, 119206.
#' \doi{https://doi.org/10.1016/j.renene.2023.119206}
#'
#'
#' @author Sam Allen, Noelia Otero
#'
#' @examples
#'
#' data(data_supply)
#'
#' # consider daily German energy supply data in 2019
#' supply_de <- subset(data_supply, country == "Germany", select = c("date", "PWS"))
#' supply_de <- xts::xts(supply_de$PWS, order.by = supply_de$date)
#' supply_de_std <- std_index(supply_de, rescale = "days", timescale = "hours")
#'
#' # a drought may correspond to when energy supply is low
#' drought_df <- get_drought(supply_de_std, thresholds = c(-1.28, -1.64, -1.96), exceed = FALSE)
#' head(drought_df)
#' mean(drought_df$occ) # droughts occur on roughly 10% of time steps
#'
#' # cluster droughts two time steps apart
#' drought_df <- get_drought(supply_de_std, thresholds = c(-1.28, -1.64, -1.96),
#'                           cluster = 2, exceed = FALSE)
#' mean(drought_df$occ) # droughts occur on roughly 11% of time steps
#'
#' # let droughts persist until the standardised index changes sign
#' drought_df <- get_drought(supply_de_std, thresholds = c(-1.28, -1.64, -1.96),
#'                           lag = 0, exceed = FALSE)
#' mean(drought_df$occ) # droughts occur on roughly 17% of time steps
#'
#'
#' @name get_drought
NULL

#' @rdname get_drought
#' @export
get_drought <- function(x, thresholds = c(1.28, 1.64, 1.96),
                        exceed = TRUE, cluster = 0, lag = NULL){

  # check inputs
  inputs <- as.list(environment())
  check_drought(inputs)

  if (xts::is.xts(x)) {
    x <- unname(x)
    df <- zoo::fortify.zoo(x)
  } else {
    df <- data.frame(x = x)
  }

  # intensity & occurrence
  if (exceed) {
    if (length(thresholds) == 1) {
      df$occ <- as.numeric(df$x >= thresholds)
    } else {
      df$ins <- sapply(df$x, function(z) sum(z >= thresholds))
      df$occ <- as.numeric(df$ins >= 1)
    }
  }else{
    if (length(thresholds) == 1) {
      df$occ <- as.numeric(df$x <= thresholds)
    }else{
      df$ins <- sapply(df$x, function(z) sum(z <= thresholds))
      df$occ <- as.numeric(df$ins >= 1)
    }
  }

  # lag
  if (!is.null(lag)) {
    for (i in 2:nrow(df)) {
      if (exceed) {
        if (df$occ[i] == 0 & df$occ[i - 1] == 1 & df$x[i] >= lag) df$occ[i] <- 1
      } else {
        if (df$occ[i] == 0 & df$occ[i - 1] == 1 & df$x[i] <= lag) df$occ[i] <- 1
      }
    }
  }

  # cluster droughts
  if (cluster > 0) {
    ind <- which(df$occ == 1)
    dif <- c(diff(ind), 0)
    cl <- (dif > 1) & (dif <= cluster + 1)
    ind <- ind[cl]
    dif <- dif[cl]
    for (i in seq_along(ind)) df$occ[ind[i]:(ind[i] + dif[i])] <- 1
  }

  # duration and magnitude
  mag <- abs(df$x)*(df$occ == 1)
  df['dur'] <- c(df$occ[1], numeric(nrow(df) - 1))
  df['mag'] <- c(mag[1], numeric(nrow(df) - 1))
  for (i in 2:nrow(df)) {
    if (df$occ[i]) {
      df$dur[i] <- df$dur[i - 1] + 1
      df$dur[i - 1] <- 0

      df$mag[i] <- df$mag[i - 1] + mag[i]
      df$mag[i - 1] <- 0
    }
  }

  return(df)
}


# check the inputs of get_drought
check_drought <- function(inputs) {

  # x
  if (is.vector(inputs$x)) {
    if (!is.numeric(inputs$x)) {
      stop("'x' must be a numeric vector or xts object")
    }
  } else if (!xts::is.xts(inputs$x)) {
    stop("'x' must be a numeric vector or xts object")
  }

  # thresholds
  if (!is.vector(inputs$thresholds)) {
    if (!is.numeric(inputs$thresholds)) {
      stop("'thresholds' must be a numeric vector")
    }
  }

  # exceed
  if (!is.logical(inputs$exceed) | length(inputs$exceed) > 1) {
    stop("'exceed' must either be TRUE or FALSE")
  }

  # cluster
  if (!is.numeric(inputs$cluster)) {
    stop("'cluster' must be a single integer")
  }
  if (length(inputs$cluster) > 1) {
    stop("'cluster' must be a single integer")
  }
  if (inputs$cluster %% 1 != 0) {
    stop("'cluster' must be a single integer")
  }

  # lag
  if (!is.null(inputs$lag)) {
    if (!is.numeric(inputs$lag)) {
      stop("'lag' must be a single numeric value")
    }
    if (length(inputs$lag) > 1) {
      stop("'lag' must be a single numeric value")
    }
    if (inputs$exceed & inputs$lag > min(inputs$thresholds)) {
        warning("'lag' is larger than the lowest value in 'thresholds'")
    } else if (!(inputs$exceed) & inputs$lag < max(inputs$thresholds)) {
      warning("'lag' is smaller than the largest value in 'thresholds'")
    }
  }

}