Nothing
# =========================
# Superposed epoch analysis
# =========================
#' Extract event times from phase.zg() or phase.sc() output
#'
#' @description
#' Extracts event times from objects returned by \code{phase.zg()} or
#' \code{phase.sc()}. The resulting table can be passed directly to
#' [dm_epoch_extract()] for superposed epoch analysis.
#'
#' @param x Object of class \code{"ZG_output"} or \code{"SC_output"}.
#' @param event Event type to extract. Use \code{"all"} to return all supported
#' events. For ZG output, supported events are \code{"TWD_start"},
#' \code{"TWD_end"}, \code{"GRO_start"}, \code{"GRO_end"},
#' \code{"MaxTWD"}, \code{"phase_start"}, and \code{"phase_end"}.
#' For SC output, supported events are \code{"Shrinkage_start"},
#' \code{"Shrinkage_end"}, \code{"Expansion_start"}, \code{"Expansion_end"},
#' \code{"Increment_start"}, \code{"Increment_end"}, \code{"phase_start"},
#' and \code{"phase_end"}.
#' @param phase Optional phase filter for generic events \code{"phase_start"}
#' or \code{"phase_end"}. For ZG output, use \code{"TWD"} or \code{"GRO"}.
#' For SC output, use \code{"Shrinkage"}, \code{"Expansion"}, or
#' \code{"Increment"}.
#' @param min_duration Optional minimum \code{Duration_h} filter.
#' @param min_magnitude Optional minimum \code{Magnitude} filter.
#' @param min_max_twd Optional minimum \code{max.twd} filter for ZG output.
#' @param remove_na_times Logical. If \code{TRUE}, rows with missing
#' \code{event_time} are removed.
#'
#' @return
#' A tibble of events with class \code{c("dm_events", ...)}.
#'
#' @importFrom tibble as_tibble tibble
#' @importFrom dplyr bind_rows
#' @export
dm_event_times <- function(x,
event = "all",
phase = NULL,
min_duration = NULL,
min_magnitude = NULL,
min_max_twd = NULL,
remove_na_times = TRUE) {
if (inherits(x, "ZG_output")) {
cyc <- tibble::as_tibble(x$ZG_cycle)
if (!all(c("Phases", "Start", "End") %in% names(cyc))) {
stop("ZG_output does not contain the expected ZG_cycle columns.")
}
cyc$phase_label <- ifelse(cyc$Phases == 1L, "TWD", "GRO")
if (!is.null(min_duration) && "Duration_h" %in% names(cyc)) {
cyc <- cyc[is.na(cyc$Duration_h) | cyc$Duration_h >= min_duration, , drop = FALSE]
}
if (!is.null(min_magnitude) && "Magnitude" %in% names(cyc)) {
cyc <- cyc[is.na(cyc$Magnitude) | cyc$Magnitude >= min_magnitude, , drop = FALSE]
}
if (!is.null(min_max_twd) && "max.twd" %in% names(cyc)) {
cyc <- cyc[is.na(cyc$max.twd) | cyc$max.twd >= min_max_twd, , drop = FALSE]
}
allowed <- c(
"all",
"TWD_start", "TWD_end", "GRO_start", "GRO_end",
"MaxTWD",
"phase_start", "phase_end"
)
if (!event %in% allowed) {
stop("Unsupported event for ZG_output.")
}
out_list <- list()
if (event %in% c("all", "TWD_start")) {
z <- cyc[cyc$Phases == 1L, , drop = FALSE]
if (nrow(z) > 0) {
z$event_type <- "TWD_start"
z$event_time <- z$Start
out_list[[length(out_list) + 1L]] <- z
}
}
if (event %in% c("all", "TWD_end")) {
z <- cyc[cyc$Phases == 1L, , drop = FALSE]
if (nrow(z) > 0) {
z$event_type <- "TWD_end"
z$event_time <- z$End
out_list[[length(out_list) + 1L]] <- z
}
}
if (event %in% c("all", "GRO_start")) {
z <- cyc[cyc$Phases == 2L, , drop = FALSE]
if (nrow(z) > 0) {
z$event_type <- "GRO_start"
z$event_time <- z$Start
out_list[[length(out_list) + 1L]] <- z
}
}
if (event %in% c("all", "GRO_end")) {
z <- cyc[cyc$Phases == 2L, , drop = FALSE]
if (nrow(z) > 0) {
z$event_type <- "GRO_end"
z$event_time <- z$End
out_list[[length(out_list) + 1L]] <- z
}
}
if (event %in% c("all", "MaxTWD")) {
if (!"Max.twd.time" %in% names(cyc)) {
stop("ZG_cycle does not contain column 'Max.twd.time'.")
}
z <- cyc[cyc$Phases == 1L, , drop = FALSE]
if (nrow(z) > 0) {
z$event_type <- "MaxTWD"
z$event_time <- z$Max.twd.time
out_list[[length(out_list) + 1L]] <- z
}
}
if (event %in% c("phase_start", "phase_end")) {
if (is.null(phase)) {
stop("For event = 'phase_start' or 'phase_end', please supply phase = 'TWD' or 'GRO'.")
}
phase <- match.arg(phase, c("TWD", "GRO"))
ph_num <- ifelse(phase == "TWD", 1L, 2L)
z <- cyc[cyc$Phases == ph_num, , drop = FALSE]
if (nrow(z) > 0) {
z$event_type <- paste0(
phase,
"_",
ifelse(event == "phase_start", "start", "end")
)
z$event_time <- if (event == "phase_start") z$Start else z$End
out_list[[length(out_list) + 1L]] <- z
}
}
if (length(out_list) == 0) {
out <- tibble::tibble()
} else {
out <- dplyr::bind_rows(out_list)
}
} else if (inherits(x, "SC_output")) {
cyc <- tibble::as_tibble(x$SC_cycle)
if (!all(c("Phases", "Start", "End") %in% names(cyc))) {
stop("SC_output does not contain the expected SC_cycle columns.")
}
phase_map <- c(
"1" = "Shrinkage",
"2" = "Expansion",
"3" = "Increment"
)
cyc$phase_label <- unname(phase_map[as.character(cyc$Phases)])
if (!is.null(min_duration) && "Duration_h" %in% names(cyc)) {
cyc <- cyc[is.na(cyc$Duration_h) | cyc$Duration_h >= min_duration, , drop = FALSE]
}
if (!is.null(min_magnitude) && "Magnitude" %in% names(cyc)) {
cyc <- cyc[is.na(cyc$Magnitude) | abs(cyc$Magnitude) >= min_magnitude, , drop = FALSE]
}
allowed <- c(
"all",
"Shrinkage_start", "Shrinkage_end",
"Expansion_start", "Expansion_end",
"Increment_start", "Increment_end",
"phase_start", "phase_end"
)
if (!event %in% allowed) {
stop("Unsupported event for SC_output.")
}
out_list <- list()
for (ph in c("Shrinkage", "Expansion", "Increment")) {
ph_num <- which(c("Shrinkage", "Expansion", "Increment") == ph)
if (event %in% c("all", paste0(ph, "_start"))) {
z <- cyc[cyc$Phases == ph_num, , drop = FALSE]
if (nrow(z) > 0) {
z$event_type <- paste0(ph, "_start")
z$event_time <- z$Start
out_list[[length(out_list) + 1L]] <- z
}
}
if (event %in% c("all", paste0(ph, "_end"))) {
z <- cyc[cyc$Phases == ph_num, , drop = FALSE]
if (nrow(z) > 0) {
z$event_type <- paste0(ph, "_end")
z$event_time <- z$End
out_list[[length(out_list) + 1L]] <- z
}
}
}
if (event %in% c("phase_start", "phase_end")) {
if (is.null(phase)) {
stop("For event = 'phase_start' or 'phase_end', please supply phase = 'Shrinkage', 'Expansion', or 'Increment'.")
}
phase <- match.arg(phase, c("Shrinkage", "Expansion", "Increment"))
ph_num <- which(c("Shrinkage", "Expansion", "Increment") == phase)
z <- cyc[cyc$Phases == ph_num, , drop = FALSE]
if (nrow(z) > 0) {
z$event_type <- paste0(
phase,
"_",
ifelse(event == "phase_start", "start", "end")
)
z$event_time <- if (event == "phase_start") z$Start else z$End
out_list[[length(out_list) + 1L]] <- z
}
}
if (length(out_list) == 0) {
out <- tibble::tibble()
} else {
out <- dplyr::bind_rows(out_list)
}
} else {
stop("'x' must be output of phase.zg() or phase.sc().")
}
if (nrow(out) == 0) {
out <- tibble::tibble(
event_id = integer(0),
event_time = as.POSIXct(character(0)),
event_type = character(0),
phase = character(0)
)
} else {
out$phase <- out$phase_label
if (isTRUE(remove_na_times)) {
out <- out[!is.na(out$event_time), , drop = FALSE]
}
out <- out[order(out$event_time), , drop = FALSE]
out$event_id <- seq_len(nrow(out))
out <- out[
,
c(
"event_id", "event_time", "event_type", "phase",
setdiff(names(out), c("event_id", "event_time", "event_type", "phase"))
),
drop = FALSE
]
}
class(out) <- c("dm_events", class(out))
out
}
# -------------------------
# internal helpers
# -------------------------
#' Parse date-time values for SEA
#'
#' @param x A vector of date-time, date, or character values.
#'
#' @return A \code{POSIXct} vector.
#'
#' @importFrom lubridate ymd_hms parse_date_time
#' @keywords internal
dmea_parse_time <- function(x) {
if (inherits(x, "POSIXct")) {
return(as.POSIXct(x))
}
if (inherits(x, "POSIXt")) {
return(as.POSIXct(x))
}
if (inherits(x, "Date")) {
return(as.POSIXct(x))
}
out <- suppressWarnings(lubridate::ymd_hms(x, quiet = TRUE))
if (all(is.na(out))) {
out <- suppressWarnings(
lubridate::parse_date_time(
x,
orders = c(
"ymd HMS", "ymd HM", "ymd",
"dmy HMS", "dmy HM", "dmy",
"mdy HMS", "mdy HM", "mdy"
),
quiet = TRUE
)
)
}
out
}
#' Create a lubridate period string
#'
#' @param step Numeric step size.
#' @param unit Unit. One of \code{"mins"}, \code{"hours"}, or \code{"days"}.
#'
#' @return A character period string for [lubridate::floor_date()].
#'
#' @keywords internal
dmea_unit_string <- function(step, unit) {
if (unit == "mins") {
return(paste(step, "minutes"))
}
if (unit == "hours") {
return(paste(step, "hours"))
}
if (unit == "days") {
return(paste(step, "days"))
}
stop("Unsupported unit.")
}
#' Convert SEA step size to seconds
#'
#' @param step Numeric step size.
#' @param unit Unit. One of \code{"mins"}, \code{"hours"}, or \code{"days"}.
#'
#' @return Numeric number of seconds.
#'
#' @keywords internal
dmea_step_seconds <- function(step, unit) {
if (unit == "mins") {
return(step * 60)
}
if (unit == "hours") {
return(step * 3600)
}
if (unit == "days") {
return(step * 86400)
}
stop("Unsupported unit.")
}
#' Apply an aggregation function
#'
#' @param x Numeric vector.
#' @param fun Function or character string. Supported character values are
#' \code{"mean"}, \code{"sum"}, \code{"max"}, \code{"min"}, and
#' \code{"median"}.
#'
#' @return Numeric scalar.
#'
#' @keywords internal
dmea_apply_fun <- function(x, fun) {
if (all(is.na(x))) {
return(NA_real_)
}
if (is.function(fun)) {
return(fun(x))
}
fun <- match.arg(fun, c("mean", "sum", "max", "min", "median"))
switch(
fun,
mean = mean(x, na.rm = TRUE),
sum = sum(x, na.rm = TRUE),
max = max(x, na.rm = TRUE),
min = min(x, na.rm = TRUE),
median = median(x, na.rm = TRUE)
)
}
#' Filter timestamps by calendar year and day-of-year
#'
#' @description
#' Internal helper used by the SEA functions to select timestamps by calendar
#' year and day-of-year.
#'
#' @param times A vector of date-time values.
#' @param Year Optional numeric vector of calendar years to retain.
#' @param DOY Optional numeric vector of length one or two. If length one, only
#' that day-of-year is retained. If length two, the inclusive DOY range is
#' retained. If the first value is larger than the second, the range is treated
#' as a cross-year DOY window, for example \code{DOY = c(300, 60)} keeps
#' \code{DOY >= 300} or \code{DOY <= 60}.
#'
#' @return A logical vector with the same length as \code{times}.
#'
#' @importFrom lubridate year yday
#' @keywords internal
dmea_time_filter_index <- function(times, Year = NULL, DOY = NULL) {
tt <- dmea_parse_time(times)
keep <- !is.na(tt)
if (!is.null(Year)) {
if (!is.numeric(Year) || length(Year) < 1L || any(is.na(Year))) {
stop("'Year' must be a non-empty numeric vector without NA values.")
}
if (any(Year != floor(Year))) {
stop("'Year' must contain whole calendar years.")
}
Year <- as.integer(Year)
keep <- keep & lubridate::year(tt) %in% Year
}
if (!is.null(DOY)) {
if (!is.numeric(DOY) || !length(DOY) %in% c(1L, 2L) || any(is.na(DOY))) {
stop("'DOY' must be a numeric vector of length one or two without NA values.")
}
if (any(DOY != floor(DOY))) {
stop("'DOY' must contain whole day-of-year values.")
}
if (any(DOY < 1 | DOY > 366)) {
stop("'DOY' values must be between 1 and 366.")
}
DOY <- as.integer(DOY)
yy <- lubridate::yday(tt)
if (length(DOY) == 1L) {
keep <- keep & yy == DOY[1L]
} else {
doy_start <- DOY[1L]
doy_end <- DOY[2L]
if (doy_start <= doy_end) {
keep <- keep & yy >= doy_start & yy <= doy_end
} else {
keep <- keep & (yy >= doy_start | yy <= doy_end)
}
}
}
keep
}
#' Filter a table by calendar year and day-of-year
#'
#' @param dat A data frame or tibble.
#' @param time_col Name of the timestamp column.
#' @param Year Optional numeric vector of calendar years.
#' @param DOY Optional numeric vector of day-of-year values.
#' @param label Text used in error messages.
#'
#' @return A filtered tibble.
#'
#' @importFrom tibble as_tibble
#' @keywords internal
dmea_filter_time_table <- function(dat,
time_col,
Year = NULL,
DOY = NULL,
label = "rows") {
dat <- tibble::as_tibble(dat)
if (!time_col %in% names(dat)) {
stop("Column '", time_col, "' not found in ", label, ".")
}
keep <- dmea_time_filter_index(
times = dat[[time_col]],
Year = Year,
DOY = DOY
)
dat[keep, , drop = FALSE]
}
#' Prepare climate data for superposed epoch analysis
#'
#' @param clim Climate data frame. First column must contain timestamps.
#' @param vars Climate variables to use, or \code{"all"}.
#' @param step Step size in units.
#' @param unit One of \code{"hours"}, \code{"days"}, or \code{"mins"}.
#' @param agg_fun Aggregation function applied to all variables when
#' \code{var_funs} is not supplied.
#' @param var_funs Optional named vector or named list of aggregation functions
#' for individual variables.
#'
#' @return A tibble with aggregated climate data.
#'
#' @importFrom tibble as_tibble
#' @importFrom lubridate floor_date
#' @importFrom dplyr %>% group_by summarise across all_of cur_column rename
#' @keywords internal
dmea_prepare_climate <- function(clim,
vars = "all",
step = 1,
unit = c("hours", "days", "mins"),
agg_fun = "mean",
var_funs = NULL) {
TIME_BIN <- NULL
unit <- match.arg(unit)
clim <- tibble::as_tibble(clim)
names(clim)[1] <- "TIME"
clim$TIME <- dmea_parse_time(clim$TIME)
if (any(is.na(clim$TIME))) {
stop("Some climate timestamps could not be parsed.")
}
clim <- clim[order(clim$TIME), , drop = FALSE]
if (identical(vars, "all")) {
vars <- names(clim)[vapply(clim, is.numeric, logical(1))]
vars <- setdiff(vars, "TIME")
}
if (length(vars) == 0) {
stop("No climate variables selected.")
}
miss <- setdiff(vars, names(clim))
if (length(miss) > 0) {
stop("These climate variables were not found: ", paste(miss, collapse = ", "))
}
period_unit <- dmea_unit_string(step, unit)
clim$TIME_BIN <- lubridate::floor_date(
clim$TIME,
unit = period_unit
)
if (is.null(var_funs)) {
var_funs <- stats::setNames(rep(agg_fun, length(vars)), vars)
} else {
if (is.null(names(var_funs))) {
if (length(var_funs) == 1) {
var_funs <- stats::setNames(rep(var_funs, length(vars)), vars)
} else {
stop("'var_funs' must be named by climate variable, or length 1.")
}
} else {
tmp <- stats::setNames(rep(agg_fun, length(vars)), vars)
shared <- intersect(names(var_funs), vars)
tmp[shared] <- unlist(var_funs[shared], use.names = FALSE)
var_funs <- tmp
}
}
out <- clim %>%
dplyr::group_by(TIME_BIN) %>%
dplyr::summarise(
dplyr::across(
dplyr::all_of(vars),
~ dmea_apply_fun(.x, var_funs[dplyr::cur_column()][[1]])
),
.groups = "drop"
) %>%
dplyr::rename(TIME = TIME_BIN)
out
}
#' Extract climate values around anchor times
#'
#' @param anchor_times Anchor times.
#' @param clim_tbl Aggregated climate table from [dmea_prepare_climate()].
#' @param lag_values Lag values in seconds.
#' @param vars Climate variables to extract.
#'
#' @return A long-format tibble.
#'
#' @importFrom tibble tibble
#' @importFrom dplyr left_join all_of
#' @importFrom tidyr pivot_longer
#' @keywords internal
dmea_extract_from_anchors <- function(anchor_times,
clim_tbl,
lag_values,
vars) {
if (length(anchor_times) == 0) {
return(tibble::tibble())
}
tz_use <- attr(clim_tbl$TIME, "tzone")
if (is.null(tz_use) || length(tz_use) == 0) {
tz_use <- "UTC"
}
anchor_times <- as.POSIXct(
anchor_times,
origin = "1970-01-01",
tz = tz_use
)
exp <- tibble::tibble(
event_id = rep(seq_along(anchor_times), each = length(lag_values)),
anchor_time = rep(anchor_times, each = length(lag_values)),
lag = rep(lag_values, times = length(anchor_times))
)
exp$TIME <- as.POSIXct(
exp$anchor_time + exp$lag,
origin = "1970-01-01",
tz = tz_use
)
join <- dplyr::left_join(
exp,
clim_tbl[, c("TIME", vars), drop = FALSE],
by = "TIME"
)
out <- tidyr::pivot_longer(
join,
cols = dplyr::all_of(vars),
names_to = "variable",
values_to = "value"
)
out
}
#' Sample one null anchor time
#'
#' @param event_time Original event time.
#' @param clim_times Candidate climate timestamps.
#' @param null Null model. One of \code{"same_doy_window"},
#' \code{"same_month"}, or \code{"random_time"}.
#' @param doy_window DOY window used for \code{null = "same_doy_window"}.
#' @param match_hour Logical. For hourly or minute data, keep the same hour
#' when possible.
#' @param match_minute Logical. For minute data, keep the same minute when
#' possible.
#' @param unit One of \code{"hours"}, \code{"days"}, or \code{"mins"}.
#'
#' @return One sampled \code{POSIXct} timestamp.
#'
#' @importFrom lubridate month yday hour minute
#' @keywords internal
dmea_sample_anchor_one <- function(event_time,
clim_times,
null = c("same_doy_window", "same_month", "random_time"),
doy_window = 15,
match_hour = TRUE,
match_minute = TRUE,
unit = c("hours", "days", "mins")) {
null <- match.arg(null)
unit <- match.arg(unit)
candidates <- clim_times
tz_use <- attr(clim_times, "tzone")
if (is.null(tz_use) || length(tz_use) == 0) {
tz_use <- "UTC"
}
if (null == "same_month") {
candidates <- candidates[
lubridate::month(candidates) == lubridate::month(event_time)
]
}
if (null == "same_doy_window") {
ev_doy <- lubridate::yday(event_time)
cdoy <- lubridate::yday(candidates)
dd <- abs(cdoy - ev_doy)
dd <- pmin(dd, 366 - dd)
candidates <- candidates[dd <= doy_window]
}
if (unit %in% c("hours", "mins") && isTRUE(match_hour)) {
hh <- lubridate::hour(event_time)
candidates2 <- candidates[lubridate::hour(candidates) == hh]
if (length(candidates2) > 0) {
candidates <- candidates2
}
}
if (unit == "mins" && isTRUE(match_minute)) {
mm <- lubridate::minute(event_time)
candidates2 <- candidates[lubridate::minute(candidates) == mm]
if (length(candidates2) > 0) {
candidates <- candidates2
}
}
if (length(candidates) == 0) {
candidates <- clim_times
}
out <- sample(candidates, size = 1)
as.POSIXct(
out,
origin = "1970-01-01",
tz = tz_use
)
}
#' Composite statistic helper
#'
#' @param x Numeric vector.
#' @param statistic Statistic. One of \code{"mean"} or \code{"median"}.
#'
#' @return Numeric scalar.
#'
#' @keywords internal
dmea_stat_fun <- function(x, statistic = c("mean", "median")) {
statistic <- match.arg(statistic)
if (all(is.na(x))) {
return(NA_real_)
}
if (statistic == "mean") {
return(mean(x, na.rm = TRUE))
}
median(x, na.rm = TRUE)
}
#' Build an epoch table around event times
#'
#' @description
#' Builds a long-format epoch table by extracting climate values before and
#' after event times.
#'
#' The optional \code{Year} and \code{DOY} arguments allow the user to restrict
#' which events are included in the superposed epoch analysis. The full climate
#' table is retained for lag extraction so that values before and after selected
#' events are still available.
#'
#' @param events Output of [dm_event_times()] or a data frame containing an
#' \code{event_time} column.
#' @param clim Climate data frame. The first column must contain timestamps.
#' @param vars Climate variables to use, or \code{"all"}.
#' @param lag_before Number of lag steps before the event.
#' @param lag_after Number of lag steps after the event.
#' @param step Step size in units.
#' @param unit One of \code{"hours"}, \code{"days"}, or \code{"mins"}.
#' @param agg_fun Aggregation function applied to all climate variables when
#' \code{var_funs} is not supplied.
#' @param var_funs Optional named vector or named list of aggregation functions
#' for each variable.
#' @param Year Optional numeric vector of calendar years to include in the SEA.
#' Events whose \code{event_time} falls outside these years are removed before
#' epoch extraction.
#' @param DOY Optional numeric vector of length one or two giving the
#' day-of-year window to include in the SEA. If length one, only that DOY is
#' retained. If length two, the inclusive range is retained. Cross-year DOY
#' windows are supported, for example \code{DOY = c(300, 60)}.
#' @param restrict_null_to_window Logical. If \code{TRUE}, null anchor times in
#' [dm_epoch_test()] are sampled only from the same \code{Year}/\code{DOY}
#' window. The full climate table is still retained for extracting
#' \code{lag_before} and \code{lag_after} windows. Default is \code{TRUE}.
#'
#' @return
#' An object of class \code{c("dm_epoch", "dm_epoch_extract")}.
#'
#' @examples
#' \donttest{
#' # events <- dm_event_times(zg, event = "GRO_start")
#' # ep <- dm_epoch_extract(
#' # events = events,
#' # clim = climate,
#' # vars = c("Rain", "temp", "vpd"),
#' # Year = c(2022, 2023, 2024),
#' # DOY = c(100, 300),
#' # lag_before = 24,
#' # lag_after = 24,
#' # unit = "hours"
#' # )
#' }
#'
#' @importFrom tibble as_tibble
#' @importFrom lubridate floor_date
#' @importFrom dplyr left_join all_of
#' @importFrom tidyr pivot_longer
#' @export
dm_epoch_extract <- function(events,
clim,
vars = "all",
lag_before = 24,
lag_after = 24,
step = 1,
unit = c("hours", "days", "mins"),
agg_fun = "mean",
var_funs = NULL,
Year = NULL,
DOY = NULL,
restrict_null_to_window = TRUE) {
unit <- match.arg(unit)
if (!("event_time" %in% names(events))) {
stop("'events' must contain an 'event_time' column.")
}
if (!is.numeric(lag_before) ||
!is.numeric(lag_after) ||
length(lag_before) != 1 ||
length(lag_after) != 1 ||
is.na(lag_before) ||
is.na(lag_after) ||
lag_before < 0 ||
lag_after < 0) {
stop("'lag_before' and 'lag_after' must be non-negative scalars.")
}
if (!is.numeric(step) ||
length(step) != 1 ||
is.na(step) ||
step <= 0) {
stop("'step' must be a positive number.")
}
ev <- tibble::as_tibble(events)
ev$event_time <- dmea_parse_time(ev$event_time)
if (any(is.na(ev$event_time))) {
stop("Some event times could not be parsed.")
}
n_events_before_filter <- nrow(ev)
ev <- dmea_filter_time_table(
dat = ev,
time_col = "event_time",
Year = Year,
DOY = DOY,
label = "events"
)
if (nrow(ev) == 0) {
stop("No events remain after filtering by Year and/or DOY.")
}
if ("event_id" %in% names(ev)) {
ev$original_event_id <- ev$event_id
}
ev$event_id <- seq_len(nrow(ev))
clim_tbl <- dmea_prepare_climate(
clim = clim,
vars = vars,
step = step,
unit = unit,
agg_fun = agg_fun,
var_funs = var_funs
)
vars_use <- setdiff(names(clim_tbl), "TIME")
if (length(vars_use) == 0) {
stop("No usable climate variables found after aggregation.")
}
bin_unit <- dmea_unit_string(step, unit)
ev$event_time_aligned <- lubridate::floor_date(
ev$event_time,
unit = bin_unit
)
lag_values_units <- seq.int(-lag_before, lag_after, by = 1L) * step
lag_values_seconds <- lag_values_units * dmea_step_seconds(1, unit)
exp <- ev[
rep(seq_len(nrow(ev)), each = length(lag_values_units)),
,
drop = FALSE
]
exp$lag <- rep(lag_values_units, times = nrow(ev))
exp$lag_seconds <- rep(lag_values_seconds, times = nrow(ev))
exp$TIME <- exp$event_time_aligned + exp$lag_seconds
epoch_wide <- dplyr::left_join(
exp,
clim_tbl,
by = "TIME"
)
epoch_long <- tidyr::pivot_longer(
epoch_wide,
cols = dplyr::all_of(vars_use),
names_to = "variable",
values_to = "value"
)
out <- list(
events = ev,
epoch_data = epoch_long,
climate_data = clim_tbl,
settings = list(
vars = vars_use,
lag_before = lag_before,
lag_after = lag_after,
step = step,
unit = unit,
lag_values = lag_values_units,
agg_fun = agg_fun,
var_funs = var_funs,
Year = Year,
DOY = DOY,
restrict_null_to_window = isTRUE(restrict_null_to_window),
n_events_before_filter = n_events_before_filter,
n_events_after_filter = nrow(ev)
)
)
class(out) <- c("dm_epoch", "dm_epoch_extract")
out
}
#' Test superposed epoch composites against a null distribution
#'
#' @description
#' Tests observed superposed epoch composites against empirical null
#' distributions generated by randomly sampled anchor times.
#'
#' If the input object was created with \code{Year} and/or \code{DOY} filters
#' in [dm_epoch_extract()] and \code{restrict_null_to_window = TRUE}, null
#' anchors are sampled from the same calendar-year and day-of-year window.
#'
#' @param x Object returned by [dm_epoch_extract()].
#' @param statistic Composite statistic. One of \code{"mean"} or
#' \code{"median"}.
#' @param null Null model. One of \code{"same_doy_window"},
#' \code{"same_month"}, or \code{"random_time"}.
#' @param n_iter Number of null iterations.
#' @param doy_window DOY window used for \code{null = "same_doy_window"}.
#' @param match_hour Logical. For hourly or minute data, keep the same hour
#' when possible.
#' @param match_minute Logical. For minute data, keep the same minute when
#' possible.
#' @param conf_level Confidence level for null envelopes.
#' @param seed Optional random seed.
#'
#' @return
#' An object of class \code{c("dm_epoch", "dm_epoch_test")}.
#'
#' @importFrom tibble as_tibble tibble
#' @importFrom dplyr %>% group_by summarise mutate left_join bind_rows
#' @importFrom stats sd quantile
#' @export
dm_epoch_test <- function(x,
statistic = c("mean", "median"),
null = c("same_doy_window", "same_month", "random_time"),
n_iter = 1000,
doy_window = 15,
match_hour = TRUE,
match_minute = TRUE,
conf_level = 0.95,
seed = NULL) {
variable <- value <- null_stat <- extreme <- NULL
if (!inherits(x, "dm_epoch_extract")) {
stop("'x' must be output of dm_epoch_extract().")
}
statistic <- match.arg(statistic)
null <- match.arg(null)
if (!is.numeric(n_iter) ||
length(n_iter) != 1 ||
is.na(n_iter) ||
n_iter < 1) {
stop("'n_iter' must be a positive integer.")
}
n_iter <- as.integer(n_iter)
if (!is.numeric(conf_level) ||
length(conf_level) != 1 ||
is.na(conf_level) ||
conf_level <= 0 ||
conf_level >= 1) {
stop("'conf_level' must be between 0 and 1.")
}
if (!is.null(seed)) {
set.seed(seed)
}
ep <- tibble::as_tibble(x$epoch_data)
ev <- tibble::as_tibble(x$events)
clim_tbl <- tibble::as_tibble(x$climate_data)
vars_use <- x$settings$vars
lag_values <- x$settings$lag_values
unit <- x$settings$unit
tz_use <- attr(clim_tbl$TIME, "tzone")
if (is.null(tz_use) || length(tz_use) == 0) {
tz_use <- "UTC"
}
observed <- ep %>%
dplyr::group_by(variable, lag) %>%
dplyr::summarise(
observed = dmea_stat_fun(value, statistic = statistic),
n = sum(!is.na(value)),
.groups = "drop"
)
null_list <- vector("list", n_iter)
clim_times_all <- as.POSIXct(
clim_tbl$TIME,
origin = "1970-01-01",
tz = tz_use
)
use_restricted_null <- FALSE
if (!is.null(x$settings$restrict_null_to_window)) {
use_restricted_null <- isTRUE(x$settings$restrict_null_to_window)
}
if (isTRUE(use_restricted_null) &&
(!is.null(x$settings$Year) || !is.null(x$settings$DOY))) {
null_keep <- dmea_time_filter_index(
times = clim_times_all,
Year = x$settings$Year,
DOY = x$settings$DOY
)
clim_times <- clim_times_all[null_keep]
if (length(clim_times) == 0) {
stop(
"No climate timestamps remain for null-anchor sampling after applying ",
"the Year/DOY filter. Set restrict_null_to_window = FALSE in ",
"dm_epoch_extract() if you want to sample null anchors from the full ",
"climate record."
)
}
} else {
clim_times <- clim_times_all
}
for (ii in seq_len(n_iter)) {
anchors <- do.call(
c,
lapply(ev$event_time_aligned, function(tt) {
dmea_sample_anchor_one(
event_time = as.POSIXct(tt, origin = "1970-01-01", tz = tz_use),
clim_times = clim_times,
null = null,
doy_window = doy_window,
match_hour = match_hour,
match_minute = match_minute,
unit = unit
)
})
)
anchors <- as.POSIXct(
anchors,
origin = "1970-01-01",
tz = tz_use
)
null_ep <- dmea_extract_from_anchors(
anchor_times = anchors,
clim_tbl = clim_tbl,
lag_values = lag_values * dmea_step_seconds(1, unit),
vars = vars_use
)
if (nrow(null_ep) == 0) {
null_sum <- tibble::tibble(
iter = ii,
variable = character(0),
lag = numeric(0),
null_stat = numeric(0)
)
} else {
null_sum <- null_ep %>%
dplyr::mutate(
lag = .data$lag / dmea_step_seconds(1, unit)
) %>%
dplyr::group_by(variable, lag) %>%
dplyr::summarise(
null_stat = dmea_stat_fun(value, statistic = statistic),
.groups = "drop"
)
null_sum$iter <- ii
null_sum <- null_sum[
,
c("iter", "variable", "lag", "null_stat"),
drop = FALSE
]
}
null_list[[ii]] <- null_sum
}
null_long <- dplyr::bind_rows(null_list)
alpha <- (1 - conf_level) / 2
null_summary <- null_long %>%
dplyr::group_by(variable, lag) %>%
dplyr::summarise(
null_mean = mean(null_stat, na.rm = TRUE),
null_sd = stats::sd(null_stat, na.rm = TRUE),
ci_low = stats::quantile(
null_stat,
probs = alpha,
na.rm = TRUE,
names = FALSE
),
ci_high = stats::quantile(
null_stat,
probs = 1 - alpha,
na.rm = TRUE,
names = FALSE
),
.groups = "drop"
)
summary_tbl <- dplyr::left_join(
observed,
null_summary,
by = c("variable", "lag")
)
pvals <- null_long %>%
dplyr::left_join(
summary_tbl[
,
c("variable", "lag", "observed", "null_mean"),
drop = FALSE
],
by = c("variable", "lag")
) %>%
dplyr::mutate(
extreme = abs(.data$null_stat - .data$null_mean) >=
abs(.data$observed - .data$null_mean)
) %>%
dplyr::group_by(variable, lag) %>%
dplyr::summarise(
p_value = mean(extreme, na.rm = TRUE),
.groups = "drop"
)
summary_tbl <- dplyr::left_join(
summary_tbl,
pvals,
by = c("variable", "lag")
)
summary_tbl$significant <- with(
summary_tbl,
observed < ci_low | observed > ci_high
)
out <- list(
events = ev,
epoch_data = ep,
climate_data = clim_tbl,
summary = summary_tbl,
null_distribution = null_long,
settings = c(
x$settings,
list(
statistic = statistic,
null = null,
n_iter = n_iter,
doy_window = doy_window,
match_hour = match_hour,
match_minute = match_minute,
conf_level = conf_level,
seed = seed
)
)
)
class(out) <- c("dm_epoch", "dm_epoch_test")
out
}
#' Summarize a dm_epoch object
#'
#' @param object Object of class \code{"dm_epoch"}.
#' @param ... Unused.
#'
#' @return
#' An object of class \code{"summary.dm_epoch"}.
#'
#' @importFrom tibble as_tibble
#' @importFrom dplyr %>% group_by summarise
#' @export
summary.dm_epoch <- function(object, ...) {
variable <- significant <- NULL
if (!inherits(object, "dm_epoch")) {
stop("'object' must inherit from 'dm_epoch'.")
}
if (inherits(object, "dm_epoch_test")) {
tab <- tibble::as_tibble(object$summary)
out <- list(
n_events = nrow(object$events),
variables = unique(tab$variable),
lag_range = range(tab$lag, na.rm = TRUE),
significant_counts = tab %>%
dplyr::group_by(variable) %>%
dplyr::summarise(
n_significant = sum(significant %in% TRUE, na.rm = TRUE),
.groups = "drop"
),
summary_table = tab,
settings = object$settings
)
} else {
tab <- tibble::as_tibble(object$epoch_data)
out <- list(
n_events = nrow(object$events),
variables = unique(tab$variable),
lag_range = range(tab$lag, na.rm = TRUE),
settings = object$settings
)
}
class(out) <- "summary.dm_epoch"
out
}
#' Print summary of a dm_epoch object
#'
#' @param x Object of class \code{"summary.dm_epoch"}.
#' @param ... Unused.
#'
#' @return
#' The input object, invisibly.
#'
#' @export
print.summary.dm_epoch <- function(x, ...) {
cat("dm_epoch summary\n")
cat("----------------\n")
cat("Number of events:", x$n_events, "\n")
cat("Variables:", paste(x$variables, collapse = ", "), "\n")
cat("Lag range:", paste(x$lag_range, collapse = " to "), "\n")
if (!is.null(x$settings$Year)) {
cat("Year filter:", paste(x$settings$Year, collapse = ", "), "\n")
}
if (!is.null(x$settings$DOY)) {
cat("DOY filter:", paste(x$settings$DOY, collapse = " to "), "\n")
}
if (!is.null(x$settings$n_events_before_filter) &&
!is.null(x$settings$n_events_after_filter)) {
cat(
"Events before/after Year-DOY filtering:",
x$settings$n_events_before_filter,
"/",
x$settings$n_events_after_filter,
"\n"
)
}
if (!is.null(x$significant_counts)) {
cat("\nSignificant lags by variable:\n")
print(x$significant_counts)
}
invisible(x)
}
#' Plot a dm_epoch object
#'
#' @param x Object of class \code{"dm_epoch"}.
#' @param y Unused.
#' @param type Plot type. One of \code{"composite"}, \code{"heatmap"}, or
#' \code{"difference"}.
#' @param variables Optional climate variables to plot.
#' @param facet Logical. If \code{TRUE}, facet by variable.
#' @param show_ci Logical. If \code{TRUE} and \code{x} is a
#' \code{"dm_epoch_test"} object, show the null confidence ribbon.
#' @param legend_position Legend position passed to [ggplot2::theme()].
#' @param ... Unused.
#'
#' @return
#' A \code{ggplot} object.
#'
#' @method plot dm_epoch
#' @importFrom tibble as_tibble
#' @importFrom dplyr %>% group_by summarise
#' @importFrom ggplot2 ggplot aes geom_ribbon geom_line geom_hline geom_vline
#' geom_tile theme_bw theme element_text labs facet_wrap
#' @export
plot.dm_epoch <- function(x,
y = NULL,
type = c("composite", "heatmap", "difference"),
variables = NULL,
facet = TRUE,
show_ci = TRUE,
legend_position = "right",
...) {
variable <- value <- observed <- ci_low <- ci_high <- null_mean <- NULL
diff_obs_null <- event_id <- NULL
if (!inherits(x, "dm_epoch")) {
stop("'x' must inherit from 'dm_epoch'.")
}
if (!requireNamespace("ggplot2", quietly = TRUE)) {
stop("Package 'ggplot2' is required for plot.dm_epoch().")
}
type <- match.arg(type)
lag_lab <- paste0("Lag (", x$settings$unit, ")")
if (inherits(x, "dm_epoch_test")) {
tab <- tibble::as_tibble(x$summary)
ep <- tibble::as_tibble(x$epoch_data)
} else {
ep <- tibble::as_tibble(x$epoch_data)
tab <- ep %>%
dplyr::group_by(variable, lag) %>%
dplyr::summarise(
observed = mean(value, na.rm = TRUE),
n = sum(!is.na(value)),
.groups = "drop"
)
}
if (!is.null(variables)) {
tab <- tab[tab$variable %in% variables, , drop = FALSE]
ep <- ep[ep$variable %in% variables, , drop = FALSE]
}
if (nrow(tab) == 0) {
stop("No data remain after filtering by variable.")
}
if (type == "composite") {
p <- ggplot2::ggplot(
tab,
ggplot2::aes(
x = lag,
y = observed,
colour = variable,
group = variable
)
)
if (inherits(x, "dm_epoch_test") &&
isTRUE(show_ci) &&
all(c("ci_low", "ci_high") %in% names(tab))) {
p <- p +
ggplot2::geom_ribbon(
data = tab,
mapping = ggplot2::aes(
x = lag,
ymin = ci_low,
ymax = ci_high,
fill = variable,
group = variable
),
inherit.aes = FALSE,
alpha = 0.18,
colour = NA
)
}
if (inherits(x, "dm_epoch_test") &&
"null_mean" %in% names(tab)) {
p <- p +
ggplot2::geom_line(
data = tab,
mapping = ggplot2::aes(
x = lag,
y = null_mean,
colour = variable,
group = variable
),
linetype = 2,
linewidth = 0.6,
alpha = 0.8,
inherit.aes = FALSE
)
}
p <- p +
ggplot2::geom_hline(yintercept = 0, linetype = 2) +
ggplot2::geom_vline(xintercept = 0, linetype = 2) +
ggplot2::geom_line(linewidth = 0.9) +
ggplot2::theme_bw() +
ggplot2::theme(
legend.position = legend_position,
axis.title = ggplot2::element_text(size = 14),
axis.text = ggplot2::element_text(size = 11)
) +
ggplot2::labs(
x = lag_lab,
y = paste0(
"Composite ",
if (!is.null(x$settings$statistic)) {
x$settings$statistic
} else {
"mean"
}
),
colour = "Variable",
fill = "Variable",
title = "Superposed epoch composite"
)
if (isTRUE(facet)) {
p <- p +
ggplot2::facet_wrap(
stats::as.formula("~ variable"),
scales = "free_y",
ncol = 1
)
}
return(p)
}
if (type == "difference") {
if (!inherits(x, "dm_epoch_test")) {
stop("type = 'difference' requires output from dm_epoch_test().")
}
if (!"null_mean" %in% names(tab)) {
stop("No null mean available in the dm_epoch_test object.")
}
tab$diff_obs_null <- tab$observed - tab$null_mean
p <- ggplot2::ggplot(
tab,
ggplot2::aes(
x = lag,
y = diff_obs_null,
colour = variable,
group = variable
)
) +
ggplot2::geom_hline(yintercept = 0, linetype = 2) +
ggplot2::geom_vline(xintercept = 0, linetype = 2) +
ggplot2::geom_line(linewidth = 0.9) +
ggplot2::theme_bw() +
ggplot2::theme(
legend.position = legend_position,
axis.title = ggplot2::element_text(size = 14),
axis.text = ggplot2::element_text(size = 11)
) +
ggplot2::labs(
x = lag_lab,
y = "Observed - null mean",
colour = "Variable",
title = "Difference between observed composite and null mean"
)
if (isTRUE(facet)) {
p <- p +
ggplot2::facet_wrap(
stats::as.formula("~ variable"),
scales = "free_y",
ncol = 1
)
}
return(p)
}
if (type == "heatmap") {
ep2 <- ep
ep2$event_id <- factor(
ep2$event_id,
levels = rev(sort(unique(ep2$event_id)))
)
p <- ggplot2::ggplot(
ep2,
ggplot2::aes(x = lag, y = event_id, fill = value)
) +
ggplot2::geom_tile() +
ggplot2::geom_vline(xintercept = 0, linetype = 2) +
ggplot2::theme_bw() +
ggplot2::theme(
legend.position = legend_position,
axis.title = ggplot2::element_text(size = 14),
axis.text = ggplot2::element_text(size = 11)
) +
ggplot2::labs(
x = lag_lab,
y = "Event ID",
fill = "Value",
title = "Epoch heatmap"
)
if (isTRUE(facet)) {
p <- p +
ggplot2::facet_wrap(
stats::as.formula("~ variable"),
scales = "free_x",
ncol = 1
)
}
return(p)
}
stop("Unknown plot type.")
}
#' Null-coalescing helper
#'
#' @param a First object.
#' @param b Fallback object.
#'
#' @return \code{a} if not \code{NULL}, otherwise \code{b}.
#'
#' @keywords internal
#' @noRd
`%||%` <- function(a, b) {
if (!is.null(a)) {
a
} else {
b
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.