R/dm_sea.R

Defines functions `%||%` plot.dm_epoch print.summary.dm_epoch summary.dm_epoch dm_epoch_test dm_epoch_extract dmea_stat_fun dmea_sample_anchor_one dmea_extract_from_anchors dmea_prepare_climate dmea_filter_time_table dmea_time_filter_index dmea_apply_fun dmea_step_seconds dmea_unit_string dmea_parse_time dm_event_times

Documented in dmea_apply_fun dmea_extract_from_anchors dmea_filter_time_table dmea_parse_time dmea_prepare_climate dmea_sample_anchor_one dmea_stat_fun dmea_step_seconds dmea_time_filter_index dmea_unit_string dm_epoch_extract dm_epoch_test dm_event_times plot.dm_epoch print.summary.dm_epoch summary.dm_epoch

# =========================
# Superposed epoch analysis
# =========================

#' Extract event times from phase.zg() or phase.sc() output
#'
#' @description
#' Extracts event times from objects returned by \code{phase.zg()} or
#' \code{phase.sc()}. The resulting table can be passed directly to
#' [dm_epoch_extract()] for superposed epoch analysis.
#'
#' @param x Object of class \code{"ZG_output"} or \code{"SC_output"}.
#' @param event Event type to extract. Use \code{"all"} to return all supported
#'   events. For ZG output, supported events are \code{"TWD_start"},
#'   \code{"TWD_end"}, \code{"GRO_start"}, \code{"GRO_end"},
#'   \code{"MaxTWD"}, \code{"phase_start"}, and \code{"phase_end"}.
#'   For SC output, supported events are \code{"Shrinkage_start"},
#'   \code{"Shrinkage_end"}, \code{"Expansion_start"}, \code{"Expansion_end"},
#'   \code{"Increment_start"}, \code{"Increment_end"}, \code{"phase_start"},
#'   and \code{"phase_end"}.
#' @param phase Optional phase filter for generic events \code{"phase_start"}
#'   or \code{"phase_end"}. For ZG output, use \code{"TWD"} or \code{"GRO"}.
#'   For SC output, use \code{"Shrinkage"}, \code{"Expansion"}, or
#'   \code{"Increment"}.
#' @param min_duration Optional minimum \code{Duration_h} filter.
#' @param min_magnitude Optional minimum \code{Magnitude} filter.
#' @param min_max_twd Optional minimum \code{max.twd} filter for ZG output.
#' @param remove_na_times Logical. If \code{TRUE}, rows with missing
#'   \code{event_time} are removed.
#'
#' @return
#' A tibble of events with class \code{c("dm_events", ...)}.
#'
#' @importFrom tibble as_tibble tibble
#' @importFrom dplyr bind_rows
#' @export
dm_event_times <- function(x,
                           event = "all",
                           phase = NULL,
                           min_duration = NULL,
                           min_magnitude = NULL,
                           min_max_twd = NULL,
                           remove_na_times = TRUE) {

  if (inherits(x, "ZG_output")) {
    cyc <- tibble::as_tibble(x$ZG_cycle)

    if (!all(c("Phases", "Start", "End") %in% names(cyc))) {
      stop("ZG_output does not contain the expected ZG_cycle columns.")
    }

    cyc$phase_label <- ifelse(cyc$Phases == 1L, "TWD", "GRO")

    if (!is.null(min_duration) && "Duration_h" %in% names(cyc)) {
      cyc <- cyc[is.na(cyc$Duration_h) | cyc$Duration_h >= min_duration, , drop = FALSE]
    }

    if (!is.null(min_magnitude) && "Magnitude" %in% names(cyc)) {
      cyc <- cyc[is.na(cyc$Magnitude) | cyc$Magnitude >= min_magnitude, , drop = FALSE]
    }

    if (!is.null(min_max_twd) && "max.twd" %in% names(cyc)) {
      cyc <- cyc[is.na(cyc$max.twd) | cyc$max.twd >= min_max_twd, , drop = FALSE]
    }

    allowed <- c(
      "all",
      "TWD_start", "TWD_end", "GRO_start", "GRO_end",
      "MaxTWD",
      "phase_start", "phase_end"
    )

    if (!event %in% allowed) {
      stop("Unsupported event for ZG_output.")
    }

    out_list <- list()

    if (event %in% c("all", "TWD_start")) {
      z <- cyc[cyc$Phases == 1L, , drop = FALSE]
      if (nrow(z) > 0) {
        z$event_type <- "TWD_start"
        z$event_time <- z$Start
        out_list[[length(out_list) + 1L]] <- z
      }
    }

    if (event %in% c("all", "TWD_end")) {
      z <- cyc[cyc$Phases == 1L, , drop = FALSE]
      if (nrow(z) > 0) {
        z$event_type <- "TWD_end"
        z$event_time <- z$End
        out_list[[length(out_list) + 1L]] <- z
      }
    }

    if (event %in% c("all", "GRO_start")) {
      z <- cyc[cyc$Phases == 2L, , drop = FALSE]
      if (nrow(z) > 0) {
        z$event_type <- "GRO_start"
        z$event_time <- z$Start
        out_list[[length(out_list) + 1L]] <- z
      }
    }

    if (event %in% c("all", "GRO_end")) {
      z <- cyc[cyc$Phases == 2L, , drop = FALSE]
      if (nrow(z) > 0) {
        z$event_type <- "GRO_end"
        z$event_time <- z$End
        out_list[[length(out_list) + 1L]] <- z
      }
    }

    if (event %in% c("all", "MaxTWD")) {
      if (!"Max.twd.time" %in% names(cyc)) {
        stop("ZG_cycle does not contain column 'Max.twd.time'.")
      }

      z <- cyc[cyc$Phases == 1L, , drop = FALSE]
      if (nrow(z) > 0) {
        z$event_type <- "MaxTWD"
        z$event_time <- z$Max.twd.time
        out_list[[length(out_list) + 1L]] <- z
      }
    }

    if (event %in% c("phase_start", "phase_end")) {
      if (is.null(phase)) {
        stop("For event = 'phase_start' or 'phase_end', please supply phase = 'TWD' or 'GRO'.")
      }

      phase <- match.arg(phase, c("TWD", "GRO"))
      ph_num <- ifelse(phase == "TWD", 1L, 2L)

      z <- cyc[cyc$Phases == ph_num, , drop = FALSE]
      if (nrow(z) > 0) {
        z$event_type <- paste0(
          phase,
          "_",
          ifelse(event == "phase_start", "start", "end")
        )
        z$event_time <- if (event == "phase_start") z$Start else z$End
        out_list[[length(out_list) + 1L]] <- z
      }
    }

    if (length(out_list) == 0) {
      out <- tibble::tibble()
    } else {
      out <- dplyr::bind_rows(out_list)
    }

  } else if (inherits(x, "SC_output")) {
    cyc <- tibble::as_tibble(x$SC_cycle)

    if (!all(c("Phases", "Start", "End") %in% names(cyc))) {
      stop("SC_output does not contain the expected SC_cycle columns.")
    }

    phase_map <- c(
      "1" = "Shrinkage",
      "2" = "Expansion",
      "3" = "Increment"
    )

    cyc$phase_label <- unname(phase_map[as.character(cyc$Phases)])

    if (!is.null(min_duration) && "Duration_h" %in% names(cyc)) {
      cyc <- cyc[is.na(cyc$Duration_h) | cyc$Duration_h >= min_duration, , drop = FALSE]
    }

    if (!is.null(min_magnitude) && "Magnitude" %in% names(cyc)) {
      cyc <- cyc[is.na(cyc$Magnitude) | abs(cyc$Magnitude) >= min_magnitude, , drop = FALSE]
    }

    allowed <- c(
      "all",
      "Shrinkage_start", "Shrinkage_end",
      "Expansion_start", "Expansion_end",
      "Increment_start", "Increment_end",
      "phase_start", "phase_end"
    )

    if (!event %in% allowed) {
      stop("Unsupported event for SC_output.")
    }

    out_list <- list()

    for (ph in c("Shrinkage", "Expansion", "Increment")) {
      ph_num <- which(c("Shrinkage", "Expansion", "Increment") == ph)

      if (event %in% c("all", paste0(ph, "_start"))) {
        z <- cyc[cyc$Phases == ph_num, , drop = FALSE]
        if (nrow(z) > 0) {
          z$event_type <- paste0(ph, "_start")
          z$event_time <- z$Start
          out_list[[length(out_list) + 1L]] <- z
        }
      }

      if (event %in% c("all", paste0(ph, "_end"))) {
        z <- cyc[cyc$Phases == ph_num, , drop = FALSE]
        if (nrow(z) > 0) {
          z$event_type <- paste0(ph, "_end")
          z$event_time <- z$End
          out_list[[length(out_list) + 1L]] <- z
        }
      }
    }

    if (event %in% c("phase_start", "phase_end")) {
      if (is.null(phase)) {
        stop("For event = 'phase_start' or 'phase_end', please supply phase = 'Shrinkage', 'Expansion', or 'Increment'.")
      }

      phase <- match.arg(phase, c("Shrinkage", "Expansion", "Increment"))
      ph_num <- which(c("Shrinkage", "Expansion", "Increment") == phase)

      z <- cyc[cyc$Phases == ph_num, , drop = FALSE]
      if (nrow(z) > 0) {
        z$event_type <- paste0(
          phase,
          "_",
          ifelse(event == "phase_start", "start", "end")
        )
        z$event_time <- if (event == "phase_start") z$Start else z$End
        out_list[[length(out_list) + 1L]] <- z
      }
    }

    if (length(out_list) == 0) {
      out <- tibble::tibble()
    } else {
      out <- dplyr::bind_rows(out_list)
    }

  } else {
    stop("'x' must be output of phase.zg() or phase.sc().")
  }

  if (nrow(out) == 0) {
    out <- tibble::tibble(
      event_id = integer(0),
      event_time = as.POSIXct(character(0)),
      event_type = character(0),
      phase = character(0)
    )
  } else {
    out$phase <- out$phase_label

    if (isTRUE(remove_na_times)) {
      out <- out[!is.na(out$event_time), , drop = FALSE]
    }

    out <- out[order(out$event_time), , drop = FALSE]
    out$event_id <- seq_len(nrow(out))

    out <- out[
      ,
      c(
        "event_id", "event_time", "event_type", "phase",
        setdiff(names(out), c("event_id", "event_time", "event_type", "phase"))
      ),
      drop = FALSE
    ]
  }

  class(out) <- c("dm_events", class(out))

  out
}


# -------------------------
# internal helpers
# -------------------------

#' Parse date-time values for SEA
#'
#' @param x A vector of date-time, date, or character values.
#'
#' @return A \code{POSIXct} vector.
#'
#' @importFrom lubridate ymd_hms parse_date_time
#' @keywords internal
dmea_parse_time <- function(x) {
  if (inherits(x, "POSIXct")) {
    return(as.POSIXct(x))
  }

  if (inherits(x, "POSIXt")) {
    return(as.POSIXct(x))
  }

  if (inherits(x, "Date")) {
    return(as.POSIXct(x))
  }

  out <- suppressWarnings(lubridate::ymd_hms(x, quiet = TRUE))

  if (all(is.na(out))) {
    out <- suppressWarnings(
      lubridate::parse_date_time(
        x,
        orders = c(
          "ymd HMS", "ymd HM", "ymd",
          "dmy HMS", "dmy HM", "dmy",
          "mdy HMS", "mdy HM", "mdy"
        ),
        quiet = TRUE
      )
    )
  }

  out
}


#' Create a lubridate period string
#'
#' @param step Numeric step size.
#' @param unit Unit. One of \code{"mins"}, \code{"hours"}, or \code{"days"}.
#'
#' @return A character period string for [lubridate::floor_date()].
#'
#' @keywords internal
dmea_unit_string <- function(step, unit) {
  if (unit == "mins") {
    return(paste(step, "minutes"))
  }

  if (unit == "hours") {
    return(paste(step, "hours"))
  }

  if (unit == "days") {
    return(paste(step, "days"))
  }

  stop("Unsupported unit.")
}


#' Convert SEA step size to seconds
#'
#' @param step Numeric step size.
#' @param unit Unit. One of \code{"mins"}, \code{"hours"}, or \code{"days"}.
#'
#' @return Numeric number of seconds.
#'
#' @keywords internal
dmea_step_seconds <- function(step, unit) {
  if (unit == "mins") {
    return(step * 60)
  }

  if (unit == "hours") {
    return(step * 3600)
  }

  if (unit == "days") {
    return(step * 86400)
  }

  stop("Unsupported unit.")
}


#' Apply an aggregation function
#'
#' @param x Numeric vector.
#' @param fun Function or character string. Supported character values are
#'   \code{"mean"}, \code{"sum"}, \code{"max"}, \code{"min"}, and
#'   \code{"median"}.
#'
#' @return Numeric scalar.
#'
#' @keywords internal
dmea_apply_fun <- function(x, fun) {
  if (all(is.na(x))) {
    return(NA_real_)
  }

  if (is.function(fun)) {
    return(fun(x))
  }

  fun <- match.arg(fun, c("mean", "sum", "max", "min", "median"))

  switch(
    fun,
    mean = mean(x, na.rm = TRUE),
    sum = sum(x, na.rm = TRUE),
    max = max(x, na.rm = TRUE),
    min = min(x, na.rm = TRUE),
    median = median(x, na.rm = TRUE)
  )
}


#' Filter timestamps by calendar year and day-of-year
#'
#' @description
#' Internal helper used by the SEA functions to select timestamps by calendar
#' year and day-of-year.
#'
#' @param times A vector of date-time values.
#' @param Year Optional numeric vector of calendar years to retain.
#' @param DOY Optional numeric vector of length one or two. If length one, only
#'   that day-of-year is retained. If length two, the inclusive DOY range is
#'   retained. If the first value is larger than the second, the range is treated
#'   as a cross-year DOY window, for example \code{DOY = c(300, 60)} keeps
#'   \code{DOY >= 300} or \code{DOY <= 60}.
#'
#' @return A logical vector with the same length as \code{times}.
#'
#' @importFrom lubridate year yday
#' @keywords internal
dmea_time_filter_index <- function(times, Year = NULL, DOY = NULL) {
  tt <- dmea_parse_time(times)

  keep <- !is.na(tt)

  if (!is.null(Year)) {
    if (!is.numeric(Year) || length(Year) < 1L || any(is.na(Year))) {
      stop("'Year' must be a non-empty numeric vector without NA values.")
    }

    if (any(Year != floor(Year))) {
      stop("'Year' must contain whole calendar years.")
    }

    Year <- as.integer(Year)

    keep <- keep & lubridate::year(tt) %in% Year
  }

  if (!is.null(DOY)) {
    if (!is.numeric(DOY) || !length(DOY) %in% c(1L, 2L) || any(is.na(DOY))) {
      stop("'DOY' must be a numeric vector of length one or two without NA values.")
    }

    if (any(DOY != floor(DOY))) {
      stop("'DOY' must contain whole day-of-year values.")
    }

    if (any(DOY < 1 | DOY > 366)) {
      stop("'DOY' values must be between 1 and 366.")
    }

    DOY <- as.integer(DOY)

    yy <- lubridate::yday(tt)

    if (length(DOY) == 1L) {
      keep <- keep & yy == DOY[1L]
    } else {
      doy_start <- DOY[1L]
      doy_end <- DOY[2L]

      if (doy_start <= doy_end) {
        keep <- keep & yy >= doy_start & yy <= doy_end
      } else {
        keep <- keep & (yy >= doy_start | yy <= doy_end)
      }
    }
  }

  keep
}


#' Filter a table by calendar year and day-of-year
#'
#' @param dat A data frame or tibble.
#' @param time_col Name of the timestamp column.
#' @param Year Optional numeric vector of calendar years.
#' @param DOY Optional numeric vector of day-of-year values.
#' @param label Text used in error messages.
#'
#' @return A filtered tibble.
#'
#' @importFrom tibble as_tibble
#' @keywords internal
dmea_filter_time_table <- function(dat,
                                   time_col,
                                   Year = NULL,
                                   DOY = NULL,
                                   label = "rows") {
  dat <- tibble::as_tibble(dat)

  if (!time_col %in% names(dat)) {
    stop("Column '", time_col, "' not found in ", label, ".")
  }

  keep <- dmea_time_filter_index(
    times = dat[[time_col]],
    Year = Year,
    DOY = DOY
  )

  dat[keep, , drop = FALSE]
}


#' Prepare climate data for superposed epoch analysis
#'
#' @param clim Climate data frame. First column must contain timestamps.
#' @param vars Climate variables to use, or \code{"all"}.
#' @param step Step size in units.
#' @param unit One of \code{"hours"}, \code{"days"}, or \code{"mins"}.
#' @param agg_fun Aggregation function applied to all variables when
#'   \code{var_funs} is not supplied.
#' @param var_funs Optional named vector or named list of aggregation functions
#'   for individual variables.
#'
#' @return A tibble with aggregated climate data.
#'
#' @importFrom tibble as_tibble
#' @importFrom lubridate floor_date
#' @importFrom dplyr %>% group_by summarise across all_of cur_column rename
#' @keywords internal
dmea_prepare_climate <- function(clim,
                                 vars = "all",
                                 step = 1,
                                 unit = c("hours", "days", "mins"),
                                 agg_fun = "mean",
                                 var_funs = NULL) {
  TIME_BIN <- NULL
  unit <- match.arg(unit)

  clim <- tibble::as_tibble(clim)
  names(clim)[1] <- "TIME"
  clim$TIME <- dmea_parse_time(clim$TIME)

  if (any(is.na(clim$TIME))) {
    stop("Some climate timestamps could not be parsed.")
  }

  clim <- clim[order(clim$TIME), , drop = FALSE]

  if (identical(vars, "all")) {
    vars <- names(clim)[vapply(clim, is.numeric, logical(1))]
    vars <- setdiff(vars, "TIME")
  }

  if (length(vars) == 0) {
    stop("No climate variables selected.")
  }

  miss <- setdiff(vars, names(clim))

  if (length(miss) > 0) {
    stop("These climate variables were not found: ", paste(miss, collapse = ", "))
  }

  period_unit <- dmea_unit_string(step, unit)

  clim$TIME_BIN <- lubridate::floor_date(
    clim$TIME,
    unit = period_unit
  )

  if (is.null(var_funs)) {
    var_funs <- stats::setNames(rep(agg_fun, length(vars)), vars)
  } else {
    if (is.null(names(var_funs))) {
      if (length(var_funs) == 1) {
        var_funs <- stats::setNames(rep(var_funs, length(vars)), vars)
      } else {
        stop("'var_funs' must be named by climate variable, or length 1.")
      }
    } else {
      tmp <- stats::setNames(rep(agg_fun, length(vars)), vars)
      shared <- intersect(names(var_funs), vars)
      tmp[shared] <- unlist(var_funs[shared], use.names = FALSE)
      var_funs <- tmp
    }
  }

  out <- clim %>%
    dplyr::group_by(TIME_BIN) %>%
    dplyr::summarise(
      dplyr::across(
        dplyr::all_of(vars),
        ~ dmea_apply_fun(.x, var_funs[dplyr::cur_column()][[1]])
      ),
      .groups = "drop"
    ) %>%
    dplyr::rename(TIME = TIME_BIN)

  out
}


#' Extract climate values around anchor times
#'
#' @param anchor_times Anchor times.
#' @param clim_tbl Aggregated climate table from [dmea_prepare_climate()].
#' @param lag_values Lag values in seconds.
#' @param vars Climate variables to extract.
#'
#' @return A long-format tibble.
#'
#' @importFrom tibble tibble
#' @importFrom dplyr left_join all_of
#' @importFrom tidyr pivot_longer
#' @keywords internal
dmea_extract_from_anchors <- function(anchor_times,
                                      clim_tbl,
                                      lag_values,
                                      vars) {
  if (length(anchor_times) == 0) {
    return(tibble::tibble())
  }

  tz_use <- attr(clim_tbl$TIME, "tzone")

  if (is.null(tz_use) || length(tz_use) == 0) {
    tz_use <- "UTC"
  }

  anchor_times <- as.POSIXct(
    anchor_times,
    origin = "1970-01-01",
    tz = tz_use
  )

  exp <- tibble::tibble(
    event_id = rep(seq_along(anchor_times), each = length(lag_values)),
    anchor_time = rep(anchor_times, each = length(lag_values)),
    lag = rep(lag_values, times = length(anchor_times))
  )

  exp$TIME <- as.POSIXct(
    exp$anchor_time + exp$lag,
    origin = "1970-01-01",
    tz = tz_use
  )

  join <- dplyr::left_join(
    exp,
    clim_tbl[, c("TIME", vars), drop = FALSE],
    by = "TIME"
  )

  out <- tidyr::pivot_longer(
    join,
    cols = dplyr::all_of(vars),
    names_to = "variable",
    values_to = "value"
  )

  out
}


#' Sample one null anchor time
#'
#' @param event_time Original event time.
#' @param clim_times Candidate climate timestamps.
#' @param null Null model. One of \code{"same_doy_window"},
#'   \code{"same_month"}, or \code{"random_time"}.
#' @param doy_window DOY window used for \code{null = "same_doy_window"}.
#' @param match_hour Logical. For hourly or minute data, keep the same hour
#'   when possible.
#' @param match_minute Logical. For minute data, keep the same minute when
#'   possible.
#' @param unit One of \code{"hours"}, \code{"days"}, or \code{"mins"}.
#'
#' @return One sampled \code{POSIXct} timestamp.
#'
#' @importFrom lubridate month yday hour minute
#' @keywords internal
dmea_sample_anchor_one <- function(event_time,
                                   clim_times,
                                   null = c("same_doy_window", "same_month", "random_time"),
                                   doy_window = 15,
                                   match_hour = TRUE,
                                   match_minute = TRUE,
                                   unit = c("hours", "days", "mins")) {
  null <- match.arg(null)
  unit <- match.arg(unit)

  candidates <- clim_times

  tz_use <- attr(clim_times, "tzone")

  if (is.null(tz_use) || length(tz_use) == 0) {
    tz_use <- "UTC"
  }

  if (null == "same_month") {
    candidates <- candidates[
      lubridate::month(candidates) == lubridate::month(event_time)
    ]
  }

  if (null == "same_doy_window") {
    ev_doy <- lubridate::yday(event_time)
    cdoy <- lubridate::yday(candidates)

    dd <- abs(cdoy - ev_doy)
    dd <- pmin(dd, 366 - dd)

    candidates <- candidates[dd <= doy_window]
  }

  if (unit %in% c("hours", "mins") && isTRUE(match_hour)) {
    hh <- lubridate::hour(event_time)
    candidates2 <- candidates[lubridate::hour(candidates) == hh]

    if (length(candidates2) > 0) {
      candidates <- candidates2
    }
  }

  if (unit == "mins" && isTRUE(match_minute)) {
    mm <- lubridate::minute(event_time)
    candidates2 <- candidates[lubridate::minute(candidates) == mm]

    if (length(candidates2) > 0) {
      candidates <- candidates2
    }
  }

  if (length(candidates) == 0) {
    candidates <- clim_times
  }

  out <- sample(candidates, size = 1)

  as.POSIXct(
    out,
    origin = "1970-01-01",
    tz = tz_use
  )
}


#' Composite statistic helper
#'
#' @param x Numeric vector.
#' @param statistic Statistic. One of \code{"mean"} or \code{"median"}.
#'
#' @return Numeric scalar.
#'
#' @keywords internal
dmea_stat_fun <- function(x, statistic = c("mean", "median")) {
  statistic <- match.arg(statistic)

  if (all(is.na(x))) {
    return(NA_real_)
  }

  if (statistic == "mean") {
    return(mean(x, na.rm = TRUE))
  }

  median(x, na.rm = TRUE)
}


#' Build an epoch table around event times
#'
#' @description
#' Builds a long-format epoch table by extracting climate values before and
#' after event times.
#'
#' The optional \code{Year} and \code{DOY} arguments allow the user to restrict
#' which events are included in the superposed epoch analysis. The full climate
#' table is retained for lag extraction so that values before and after selected
#' events are still available.
#'
#' @param events Output of [dm_event_times()] or a data frame containing an
#'   \code{event_time} column.
#' @param clim Climate data frame. The first column must contain timestamps.
#' @param vars Climate variables to use, or \code{"all"}.
#' @param lag_before Number of lag steps before the event.
#' @param lag_after Number of lag steps after the event.
#' @param step Step size in units.
#' @param unit One of \code{"hours"}, \code{"days"}, or \code{"mins"}.
#' @param agg_fun Aggregation function applied to all climate variables when
#'   \code{var_funs} is not supplied.
#' @param var_funs Optional named vector or named list of aggregation functions
#'   for each variable.
#' @param Year Optional numeric vector of calendar years to include in the SEA.
#'   Events whose \code{event_time} falls outside these years are removed before
#'   epoch extraction.
#' @param DOY Optional numeric vector of length one or two giving the
#'   day-of-year window to include in the SEA. If length one, only that DOY is
#'   retained. If length two, the inclusive range is retained. Cross-year DOY
#'   windows are supported, for example \code{DOY = c(300, 60)}.
#' @param restrict_null_to_window Logical. If \code{TRUE}, null anchor times in
#'   [dm_epoch_test()] are sampled only from the same \code{Year}/\code{DOY}
#'   window. The full climate table is still retained for extracting
#'   \code{lag_before} and \code{lag_after} windows. Default is \code{TRUE}.
#'
#' @return
#' An object of class \code{c("dm_epoch", "dm_epoch_extract")}.
#'
#' @examples
#' \donttest{
#' # events <- dm_event_times(zg, event = "GRO_start")
#' # ep <- dm_epoch_extract(
#' #   events = events,
#' #   clim = climate,
#' #   vars = c("Rain", "temp", "vpd"),
#' #   Year = c(2022, 2023, 2024),
#' #   DOY = c(100, 300),
#' #   lag_before = 24,
#' #   lag_after = 24,
#' #   unit = "hours"
#' # )
#' }
#'
#' @importFrom tibble as_tibble
#' @importFrom lubridate floor_date
#' @importFrom dplyr left_join all_of
#' @importFrom tidyr pivot_longer
#' @export
dm_epoch_extract <- function(events,
                             clim,
                             vars = "all",
                             lag_before = 24,
                             lag_after = 24,
                             step = 1,
                             unit = c("hours", "days", "mins"),
                             agg_fun = "mean",
                             var_funs = NULL,
                             Year = NULL,
                             DOY = NULL,
                             restrict_null_to_window = TRUE) {

  unit <- match.arg(unit)

  if (!("event_time" %in% names(events))) {
    stop("'events' must contain an 'event_time' column.")
  }

  if (!is.numeric(lag_before) ||
      !is.numeric(lag_after) ||
      length(lag_before) != 1 ||
      length(lag_after) != 1 ||
      is.na(lag_before) ||
      is.na(lag_after) ||
      lag_before < 0 ||
      lag_after < 0) {
    stop("'lag_before' and 'lag_after' must be non-negative scalars.")
  }

  if (!is.numeric(step) ||
      length(step) != 1 ||
      is.na(step) ||
      step <= 0) {
    stop("'step' must be a positive number.")
  }

  ev <- tibble::as_tibble(events)
  ev$event_time <- dmea_parse_time(ev$event_time)

  if (any(is.na(ev$event_time))) {
    stop("Some event times could not be parsed.")
  }

  n_events_before_filter <- nrow(ev)

  ev <- dmea_filter_time_table(
    dat = ev,
    time_col = "event_time",
    Year = Year,
    DOY = DOY,
    label = "events"
  )

  if (nrow(ev) == 0) {
    stop("No events remain after filtering by Year and/or DOY.")
  }

  if ("event_id" %in% names(ev)) {
    ev$original_event_id <- ev$event_id
  }

  ev$event_id <- seq_len(nrow(ev))

  clim_tbl <- dmea_prepare_climate(
    clim = clim,
    vars = vars,
    step = step,
    unit = unit,
    agg_fun = agg_fun,
    var_funs = var_funs
  )

  vars_use <- setdiff(names(clim_tbl), "TIME")

  if (length(vars_use) == 0) {
    stop("No usable climate variables found after aggregation.")
  }

  bin_unit <- dmea_unit_string(step, unit)

  ev$event_time_aligned <- lubridate::floor_date(
    ev$event_time,
    unit = bin_unit
  )

  lag_values_units <- seq.int(-lag_before, lag_after, by = 1L) * step
  lag_values_seconds <- lag_values_units * dmea_step_seconds(1, unit)

  exp <- ev[
    rep(seq_len(nrow(ev)), each = length(lag_values_units)),
    ,
    drop = FALSE
  ]

  exp$lag <- rep(lag_values_units, times = nrow(ev))
  exp$lag_seconds <- rep(lag_values_seconds, times = nrow(ev))
  exp$TIME <- exp$event_time_aligned + exp$lag_seconds

  epoch_wide <- dplyr::left_join(
    exp,
    clim_tbl,
    by = "TIME"
  )

  epoch_long <- tidyr::pivot_longer(
    epoch_wide,
    cols = dplyr::all_of(vars_use),
    names_to = "variable",
    values_to = "value"
  )

  out <- list(
    events = ev,
    epoch_data = epoch_long,
    climate_data = clim_tbl,
    settings = list(
      vars = vars_use,
      lag_before = lag_before,
      lag_after = lag_after,
      step = step,
      unit = unit,
      lag_values = lag_values_units,
      agg_fun = agg_fun,
      var_funs = var_funs,
      Year = Year,
      DOY = DOY,
      restrict_null_to_window = isTRUE(restrict_null_to_window),
      n_events_before_filter = n_events_before_filter,
      n_events_after_filter = nrow(ev)
    )
  )

  class(out) <- c("dm_epoch", "dm_epoch_extract")

  out
}


#' Test superposed epoch composites against a null distribution
#'
#' @description
#' Tests observed superposed epoch composites against empirical null
#' distributions generated by randomly sampled anchor times.
#'
#' If the input object was created with \code{Year} and/or \code{DOY} filters
#' in [dm_epoch_extract()] and \code{restrict_null_to_window = TRUE}, null
#' anchors are sampled from the same calendar-year and day-of-year window.
#'
#' @param x Object returned by [dm_epoch_extract()].
#' @param statistic Composite statistic. One of \code{"mean"} or
#'   \code{"median"}.
#' @param null Null model. One of \code{"same_doy_window"},
#'   \code{"same_month"}, or \code{"random_time"}.
#' @param n_iter Number of null iterations.
#' @param doy_window DOY window used for \code{null = "same_doy_window"}.
#' @param match_hour Logical. For hourly or minute data, keep the same hour
#'   when possible.
#' @param match_minute Logical. For minute data, keep the same minute when
#'   possible.
#' @param conf_level Confidence level for null envelopes.
#' @param seed Optional random seed.
#'
#' @return
#' An object of class \code{c("dm_epoch", "dm_epoch_test")}.
#'
#' @importFrom tibble as_tibble tibble
#' @importFrom dplyr %>% group_by summarise mutate left_join bind_rows
#' @importFrom stats sd quantile
#' @export
dm_epoch_test <- function(x,
                          statistic = c("mean", "median"),
                          null = c("same_doy_window", "same_month", "random_time"),
                          n_iter = 1000,
                          doy_window = 15,
                          match_hour = TRUE,
                          match_minute = TRUE,
                          conf_level = 0.95,
                          seed = NULL) {
  variable <- value <- null_stat <- extreme <- NULL

  if (!inherits(x, "dm_epoch_extract")) {
    stop("'x' must be output of dm_epoch_extract().")
  }

  statistic <- match.arg(statistic)
  null <- match.arg(null)

  if (!is.numeric(n_iter) ||
      length(n_iter) != 1 ||
      is.na(n_iter) ||
      n_iter < 1) {
    stop("'n_iter' must be a positive integer.")
  }

  n_iter <- as.integer(n_iter)

  if (!is.numeric(conf_level) ||
      length(conf_level) != 1 ||
      is.na(conf_level) ||
      conf_level <= 0 ||
      conf_level >= 1) {
    stop("'conf_level' must be between 0 and 1.")
  }

  if (!is.null(seed)) {
    set.seed(seed)
  }

  ep <- tibble::as_tibble(x$epoch_data)
  ev <- tibble::as_tibble(x$events)
  clim_tbl <- tibble::as_tibble(x$climate_data)

  vars_use <- x$settings$vars
  lag_values <- x$settings$lag_values
  unit <- x$settings$unit

  tz_use <- attr(clim_tbl$TIME, "tzone")

  if (is.null(tz_use) || length(tz_use) == 0) {
    tz_use <- "UTC"
  }

  observed <- ep %>%
    dplyr::group_by(variable, lag) %>%
    dplyr::summarise(
      observed = dmea_stat_fun(value, statistic = statistic),
      n = sum(!is.na(value)),
      .groups = "drop"
    )

  null_list <- vector("list", n_iter)

  clim_times_all <- as.POSIXct(
    clim_tbl$TIME,
    origin = "1970-01-01",
    tz = tz_use
  )

  use_restricted_null <- FALSE

  if (!is.null(x$settings$restrict_null_to_window)) {
    use_restricted_null <- isTRUE(x$settings$restrict_null_to_window)
  }

  if (isTRUE(use_restricted_null) &&
      (!is.null(x$settings$Year) || !is.null(x$settings$DOY))) {

    null_keep <- dmea_time_filter_index(
      times = clim_times_all,
      Year = x$settings$Year,
      DOY = x$settings$DOY
    )

    clim_times <- clim_times_all[null_keep]

    if (length(clim_times) == 0) {
      stop(
        "No climate timestamps remain for null-anchor sampling after applying ",
        "the Year/DOY filter. Set restrict_null_to_window = FALSE in ",
        "dm_epoch_extract() if you want to sample null anchors from the full ",
        "climate record."
      )
    }

  } else {
    clim_times <- clim_times_all
  }

  for (ii in seq_len(n_iter)) {
    anchors <- do.call(
      c,
      lapply(ev$event_time_aligned, function(tt) {
        dmea_sample_anchor_one(
          event_time = as.POSIXct(tt, origin = "1970-01-01", tz = tz_use),
          clim_times = clim_times,
          null = null,
          doy_window = doy_window,
          match_hour = match_hour,
          match_minute = match_minute,
          unit = unit
        )
      })
    )

    anchors <- as.POSIXct(
      anchors,
      origin = "1970-01-01",
      tz = tz_use
    )

    null_ep <- dmea_extract_from_anchors(
      anchor_times = anchors,
      clim_tbl = clim_tbl,
      lag_values = lag_values * dmea_step_seconds(1, unit),
      vars = vars_use
    )

    if (nrow(null_ep) == 0) {
      null_sum <- tibble::tibble(
        iter = ii,
        variable = character(0),
        lag = numeric(0),
        null_stat = numeric(0)
      )
    } else {
      null_sum <- null_ep %>%
        dplyr::mutate(
          lag = .data$lag / dmea_step_seconds(1, unit)
        ) %>%
        dplyr::group_by(variable, lag) %>%
        dplyr::summarise(
          null_stat = dmea_stat_fun(value, statistic = statistic),
          .groups = "drop"
        )

      null_sum$iter <- ii

      null_sum <- null_sum[
        ,
        c("iter", "variable", "lag", "null_stat"),
        drop = FALSE
      ]
    }

    null_list[[ii]] <- null_sum
  }

  null_long <- dplyr::bind_rows(null_list)

  alpha <- (1 - conf_level) / 2

  null_summary <- null_long %>%
    dplyr::group_by(variable, lag) %>%
    dplyr::summarise(
      null_mean = mean(null_stat, na.rm = TRUE),
      null_sd = stats::sd(null_stat, na.rm = TRUE),
      ci_low = stats::quantile(
        null_stat,
        probs = alpha,
        na.rm = TRUE,
        names = FALSE
      ),
      ci_high = stats::quantile(
        null_stat,
        probs = 1 - alpha,
        na.rm = TRUE,
        names = FALSE
      ),
      .groups = "drop"
    )

  summary_tbl <- dplyr::left_join(
    observed,
    null_summary,
    by = c("variable", "lag")
  )

  pvals <- null_long %>%
    dplyr::left_join(
      summary_tbl[
        ,
        c("variable", "lag", "observed", "null_mean"),
        drop = FALSE
      ],
      by = c("variable", "lag")
    ) %>%
    dplyr::mutate(
      extreme = abs(.data$null_stat - .data$null_mean) >=
        abs(.data$observed - .data$null_mean)
    ) %>%
    dplyr::group_by(variable, lag) %>%
    dplyr::summarise(
      p_value = mean(extreme, na.rm = TRUE),
      .groups = "drop"
    )

  summary_tbl <- dplyr::left_join(
    summary_tbl,
    pvals,
    by = c("variable", "lag")
  )

  summary_tbl$significant <- with(
    summary_tbl,
    observed < ci_low | observed > ci_high
  )

  out <- list(
    events = ev,
    epoch_data = ep,
    climate_data = clim_tbl,
    summary = summary_tbl,
    null_distribution = null_long,
    settings = c(
      x$settings,
      list(
        statistic = statistic,
        null = null,
        n_iter = n_iter,
        doy_window = doy_window,
        match_hour = match_hour,
        match_minute = match_minute,
        conf_level = conf_level,
        seed = seed
      )
    )
  )

  class(out) <- c("dm_epoch", "dm_epoch_test")

  out
}


#' Summarize a dm_epoch object
#'
#' @param object Object of class \code{"dm_epoch"}.
#' @param ... Unused.
#'
#' @return
#' An object of class \code{"summary.dm_epoch"}.
#'
#' @importFrom tibble as_tibble
#' @importFrom dplyr %>% group_by summarise
#' @export
summary.dm_epoch <- function(object, ...) {
  variable <- significant <- NULL

  if (!inherits(object, "dm_epoch")) {
    stop("'object' must inherit from 'dm_epoch'.")
  }

  if (inherits(object, "dm_epoch_test")) {
    tab <- tibble::as_tibble(object$summary)

    out <- list(
      n_events = nrow(object$events),
      variables = unique(tab$variable),
      lag_range = range(tab$lag, na.rm = TRUE),
      significant_counts = tab %>%
        dplyr::group_by(variable) %>%
        dplyr::summarise(
          n_significant = sum(significant %in% TRUE, na.rm = TRUE),
          .groups = "drop"
        ),
      summary_table = tab,
      settings = object$settings
    )
  } else {
    tab <- tibble::as_tibble(object$epoch_data)

    out <- list(
      n_events = nrow(object$events),
      variables = unique(tab$variable),
      lag_range = range(tab$lag, na.rm = TRUE),
      settings = object$settings
    )
  }

  class(out) <- "summary.dm_epoch"

  out
}


#' Print summary of a dm_epoch object
#'
#' @param x Object of class \code{"summary.dm_epoch"}.
#' @param ... Unused.
#'
#' @return
#' The input object, invisibly.
#'
#' @export
print.summary.dm_epoch <- function(x, ...) {
  cat("dm_epoch summary\n")
  cat("----------------\n")
  cat("Number of events:", x$n_events, "\n")
  cat("Variables:", paste(x$variables, collapse = ", "), "\n")
  cat("Lag range:", paste(x$lag_range, collapse = " to "), "\n")

  if (!is.null(x$settings$Year)) {
    cat("Year filter:", paste(x$settings$Year, collapse = ", "), "\n")
  }

  if (!is.null(x$settings$DOY)) {
    cat("DOY filter:", paste(x$settings$DOY, collapse = " to "), "\n")
  }

  if (!is.null(x$settings$n_events_before_filter) &&
      !is.null(x$settings$n_events_after_filter)) {
    cat(
      "Events before/after Year-DOY filtering:",
      x$settings$n_events_before_filter,
      "/",
      x$settings$n_events_after_filter,
      "\n"
    )
  }

  if (!is.null(x$significant_counts)) {
    cat("\nSignificant lags by variable:\n")
    print(x$significant_counts)
  }

  invisible(x)
}


#' Plot a dm_epoch object
#'
#' @param x Object of class \code{"dm_epoch"}.
#' @param y Unused.
#' @param type Plot type. One of \code{"composite"}, \code{"heatmap"}, or
#'   \code{"difference"}.
#' @param variables Optional climate variables to plot.
#' @param facet Logical. If \code{TRUE}, facet by variable.
#' @param show_ci Logical. If \code{TRUE} and \code{x} is a
#'   \code{"dm_epoch_test"} object, show the null confidence ribbon.
#' @param legend_position Legend position passed to [ggplot2::theme()].
#' @param ... Unused.
#'
#' @return
#' A \code{ggplot} object.
#'
#' @method plot dm_epoch
#' @importFrom tibble as_tibble
#' @importFrom dplyr %>% group_by summarise
#' @importFrom ggplot2 ggplot aes geom_ribbon geom_line geom_hline geom_vline
#'   geom_tile theme_bw theme element_text labs facet_wrap
#' @export
plot.dm_epoch <- function(x,
                          y = NULL,
                          type = c("composite", "heatmap", "difference"),
                          variables = NULL,
                          facet = TRUE,
                          show_ci = TRUE,
                          legend_position = "right",
                          ...) {
  variable <- value <- observed <- ci_low <- ci_high <- null_mean <- NULL
  diff_obs_null <- event_id <- NULL

  if (!inherits(x, "dm_epoch")) {
    stop("'x' must inherit from 'dm_epoch'.")
  }

  if (!requireNamespace("ggplot2", quietly = TRUE)) {
    stop("Package 'ggplot2' is required for plot.dm_epoch().")
  }

  type <- match.arg(type)

  lag_lab <- paste0("Lag (", x$settings$unit, ")")

  if (inherits(x, "dm_epoch_test")) {
    tab <- tibble::as_tibble(x$summary)
    ep <- tibble::as_tibble(x$epoch_data)
  } else {
    ep <- tibble::as_tibble(x$epoch_data)

    tab <- ep %>%
      dplyr::group_by(variable, lag) %>%
      dplyr::summarise(
        observed = mean(value, na.rm = TRUE),
        n = sum(!is.na(value)),
        .groups = "drop"
      )
  }

  if (!is.null(variables)) {
    tab <- tab[tab$variable %in% variables, , drop = FALSE]
    ep <- ep[ep$variable %in% variables, , drop = FALSE]
  }

  if (nrow(tab) == 0) {
    stop("No data remain after filtering by variable.")
  }

  if (type == "composite") {
    p <- ggplot2::ggplot(
      tab,
      ggplot2::aes(
        x = lag,
        y = observed,
        colour = variable,
        group = variable
      )
    )

    if (inherits(x, "dm_epoch_test") &&
        isTRUE(show_ci) &&
        all(c("ci_low", "ci_high") %in% names(tab))) {
      p <- p +
        ggplot2::geom_ribbon(
          data = tab,
          mapping = ggplot2::aes(
            x = lag,
            ymin = ci_low,
            ymax = ci_high,
            fill = variable,
            group = variable
          ),
          inherit.aes = FALSE,
          alpha = 0.18,
          colour = NA
        )
    }

    if (inherits(x, "dm_epoch_test") &&
        "null_mean" %in% names(tab)) {
      p <- p +
        ggplot2::geom_line(
          data = tab,
          mapping = ggplot2::aes(
            x = lag,
            y = null_mean,
            colour = variable,
            group = variable
          ),
          linetype = 2,
          linewidth = 0.6,
          alpha = 0.8,
          inherit.aes = FALSE
        )
    }

    p <- p +
      ggplot2::geom_hline(yintercept = 0, linetype = 2) +
      ggplot2::geom_vline(xintercept = 0, linetype = 2) +
      ggplot2::geom_line(linewidth = 0.9) +
      ggplot2::theme_bw() +
      ggplot2::theme(
        legend.position = legend_position,
        axis.title = ggplot2::element_text(size = 14),
        axis.text = ggplot2::element_text(size = 11)
      ) +
      ggplot2::labs(
        x = lag_lab,
        y = paste0(
          "Composite ",
          if (!is.null(x$settings$statistic)) {
            x$settings$statistic
          } else {
            "mean"
          }
        ),
        colour = "Variable",
        fill = "Variable",
        title = "Superposed epoch composite"
      )

    if (isTRUE(facet)) {
      p <- p +
        ggplot2::facet_wrap(
          stats::as.formula("~ variable"),
          scales = "free_y",
          ncol = 1
        )
    }

    return(p)
  }

  if (type == "difference") {
    if (!inherits(x, "dm_epoch_test")) {
      stop("type = 'difference' requires output from dm_epoch_test().")
    }

    if (!"null_mean" %in% names(tab)) {
      stop("No null mean available in the dm_epoch_test object.")
    }

    tab$diff_obs_null <- tab$observed - tab$null_mean

    p <- ggplot2::ggplot(
      tab,
      ggplot2::aes(
        x = lag,
        y = diff_obs_null,
        colour = variable,
        group = variable
      )
    ) +
      ggplot2::geom_hline(yintercept = 0, linetype = 2) +
      ggplot2::geom_vline(xintercept = 0, linetype = 2) +
      ggplot2::geom_line(linewidth = 0.9) +
      ggplot2::theme_bw() +
      ggplot2::theme(
        legend.position = legend_position,
        axis.title = ggplot2::element_text(size = 14),
        axis.text = ggplot2::element_text(size = 11)
      ) +
      ggplot2::labs(
        x = lag_lab,
        y = "Observed - null mean",
        colour = "Variable",
        title = "Difference between observed composite and null mean"
      )

    if (isTRUE(facet)) {
      p <- p +
        ggplot2::facet_wrap(
          stats::as.formula("~ variable"),
          scales = "free_y",
          ncol = 1
        )
    }

    return(p)
  }

  if (type == "heatmap") {
    ep2 <- ep
    ep2$event_id <- factor(
      ep2$event_id,
      levels = rev(sort(unique(ep2$event_id)))
    )

    p <- ggplot2::ggplot(
      ep2,
      ggplot2::aes(x = lag, y = event_id, fill = value)
    ) +
      ggplot2::geom_tile() +
      ggplot2::geom_vline(xintercept = 0, linetype = 2) +
      ggplot2::theme_bw() +
      ggplot2::theme(
        legend.position = legend_position,
        axis.title = ggplot2::element_text(size = 14),
        axis.text = ggplot2::element_text(size = 11)
      ) +
      ggplot2::labs(
        x = lag_lab,
        y = "Event ID",
        fill = "Value",
        title = "Epoch heatmap"
      )

    if (isTRUE(facet)) {
      p <- p +
        ggplot2::facet_wrap(
          stats::as.formula("~ variable"),
          scales = "free_x",
          ncol = 1
        )
    }

    return(p)
  }

  stop("Unknown plot type.")
}


#' Null-coalescing helper
#'
#' @param a First object.
#' @param b Fallback object.
#'
#' @return \code{a} if not \code{NULL}, otherwise \code{b}.
#'
#' @keywords internal
#' @noRd
`%||%` <- function(a, b) {
  if (!is.null(a)) {
    a
  } else {
    b
  }
}

Try the dendRoAnalyst package in your browser

Any scripts or data that you put into this service are public.

dendRoAnalyst documentation built on May 20, 2026, 5:07 p.m.