Nothing
#' @title Calculate tree-, species-, or site-level statistics from clim.twd output
#'
#' @description
#' Calculates grouped statistics from the output of \code{clim.twd()}.
#'
#' The function can summarize relative dendrometer change trajectories and
#' event-level metrics:
#' \itemize{
#' \item by tree,
#' \item by species,
#' \item by site,
#' \item or by species within site.
#' }
#'
#' It can also restrict IDs to specific months, years, or custom day-of-year
#' windows based on the adverse-phase start date of each ID.
#'
#' @param x An object of class \code{"clim_twd_output"} returned by
#' \code{clim.twd()}.
#' @param tree_info Optional metadata table describing trees. It must contain a
#' column named \code{tree}, and may additionally contain \code{species} and
#' \code{site}. A named character vector is also accepted for species-only
#' mapping, where names are trees and values are species.
#' @param response Character. Which response table from \code{clim.twd()} should
#' be summarized:
#' \describe{
#' \item{`"adverse_change"`}{Only the adverse phase.}
#' \item{`"normal_change"`}{Only the following normal phase.}
#' \item{`"full_period_change"`}{Adverse + following normal phase, reset to 0
#' at each adverse start.}
#' \item{`"continuous_full_period_change"`}{Adverse + following normal phase,
#' continuing from the previous full period instead of resetting to 0.}
#' }
#' @param group_by Character. Grouping level for trajectory summaries. One of
#' \code{"tree"}, \code{"species"}, \code{"site"}, or \code{"species_site"}.
#' @param center Character. Central tendency used for grouped trajectories and
#' grouped period metrics. One of \code{"mean"} or \code{"median"}.
#' @param conf_level Numeric confidence/limit level. Default is \code{0.95}.
#' Empirical limits are returned as the lower and upper quantiles corresponding
#' to this level.
#' @param months Optional numeric vector of months (1--12). Only IDs whose
#' adverse-phase start falls in these months are retained.
#' @param years Optional numeric vector of years. Only IDs whose adverse-phase
#' start year falls in these years are retained.
#' @param doy_window Optional numeric vector of length 2 defining the allowed
#' day-of-year window for adverse-phase start. Wrapped windows are supported,
#' for example \code{c(300, 50)}.
#' @param year_window Optional numeric vector of length 2 defining the allowed
#' year range for adverse-phase start.
#' @param include_tree_series Logical. If \code{TRUE}, the filtered long-format
#' tree-level data are stored in the output.
#'
#' @return
#' A list of class \code{"clim_twd_stats"} with:
#' \describe{
#' \item{trajectory_summary}{Grouped time-series summary for each ID and date,
#' including central tendency, SD bands, and empirical 95\% limits.}
#' \item{id_summary}{Grouped per-ID summary derived from
#' \code{x$period_info}.}
#' \item{selected_ids}{IDs retained after temporal filtering.}
#' \item{phase_table}{Filtered phase table.}
#' \item{tree_info}{Metadata table used for grouping.}
#' \item{long_data}{Filtered long-format tree-level response table, if
#' \code{include_tree_series = TRUE}.}
#' \item{settings}{List of settings used to generate the summaries.}
#' }
#'
#' @details
#' \strong{Temporal filtering} is based on the \code{adverse_start} date in
#' \code{x$phase_table}. Multiple filters are combined, so for example users can
#' simultaneously restrict to:
#' \itemize{
#' \item specific months,
#' \item specific years,
#' \item a day-of-year window,
#' \item and a year range.
#' }
#'
#' If \code{group_by = "species"}, \code{"site"}, or \code{"species_site"},
#' \code{tree_info} must provide the required columns.
#'
#' @examples
#' \donttest{
#' rel_out <- clim.twd(
#' df = gf_nepa17,
#' Clim = ktm_rain17,
#' thresholdClim = "<10",
#' thresholdDays = ">5",
#' showPlot = FALSE
#' )
#'
#' # tree-level statistics
#' st1 <- clim.twd.stats(
#' rel_out,
#' response = "full_period_change",
#' group_by = "tree"
#' )
#'
#' summary(st1)
#' plot(st1, type = "trajectory")
#'
#' # species metadata
#' tree_info <- data.frame(
#' tree = c("T2", "T3"),
#' species = c("Pinus", "Pinus"),
#' site = c("Ktm", "Ktm")
#' )
#'
#' # species-level summaries restricted to IDs starting in months 6 to 8
#' st2 <- clim.twd.stats(
#' rel_out,
#' tree_info = tree_info,
#' response = "full_period_change",
#' group_by = "species",
#' center = "median",
#' months = 6:8
#' )
#'
#' summary(st2)
#' plot(st2, type = "trajectory", band = "limit95")
#'
#' # species within site
#' st3 <- clim.twd.stats(
#' rel_out,
#' tree_info = tree_info,
#' response = "continuous_full_period_change",
#' group_by = "species_site",
#' doy_window = c(150, 250),
#' year_window = c(2017, 2018)
#' )
#' }
#'
#' @importFrom dplyr mutate group_by summarise ungroup arrange filter select
#' left_join bind_rows all_of n_distinct %>%
#' @importFrom tidyr pivot_longer
#' @importFrom tibble tibble as_tibble
#' @importFrom lubridate month year yday
#' @export
clim.twd.stats <- function(x,
tree_info = NULL,
response = c("adverse_change", "normal_change",
"full_period_change", "continuous_full_period_change"),
group_by = c("tree", "species", "site", "species_site"),
center = c("mean", "median"),
conf_level = 0.95,
months = NULL,
years = NULL,
doy_window = NULL,
year_window = NULL,
include_tree_series = TRUE) {
IDs <- TIME <- phase_type <- tree <- species <- site <- group_label <- value <- NULL
adverse_start <- center_value <- adverse_end <- normal_start <- normal_end <- NULL
full_end <- no_adverse_for_tree <- lag_to_below_zero <- lag_to_max_adverse_decline <- NULL
lag_to_return_zero <- max_adverse_decline_value <- change_end_adverse <- change_end_full_period <- NULL
continuous_start_full_period <- continuous_end_full_period <- NULL
if (!inherits(x, "clim_twd_output")) {
stop("'x' must be of class 'clim_twd_output'.")
}
response <- match.arg(response)
group_by <- match.arg(group_by)
center <- match.arg(center)
if (!is.numeric(conf_level) || length(conf_level) != 1 || is.na(conf_level) ||
conf_level <= 0 || conf_level >= 1) {
stop("'conf_level' must be a single number between 0 and 1.")
}
if (is.null(x[[response]]) || nrow(x[[response]]) == 0) {
stop("Selected response table is empty: ", response)
}
if (is.null(x$phase_table) || nrow(x$phase_table) == 0) {
stop("The supplied clim.twd object has no phase_table.")
}
# -----------------------------
# helpers
# -----------------------------
agg_fun <- if (center == "mean") {
function(z) if (all(is.na(z))) NA_real_ else mean(z, na.rm = TRUE)
} else {
function(z) if (all(is.na(z))) NA_real_ else stats::median(z, na.rm = TRUE)
}
get_limits <- function(z, conf_level = 0.95) {
if (all(is.na(z))) {
return(c(NA_real_, NA_real_))
}
alpha <- (1 - conf_level) / 2
stats::quantile(z, probs = c(alpha, 1 - alpha), na.rm = TRUE, names = FALSE)
}
within_doy_window <- function(doy, win) {
if (length(win) != 2 || any(is.na(win))) {
stop("'doy_window' must be a numeric vector of length 2.")
}
if (win[1] <= win[2]) {
doy >= win[1] & doy <= win[2]
} else {
doy >= win[1] | doy <= win[2]
}
}
# -----------------------------
# metadata table
# -----------------------------
response_df <- x[[response]]
phase_table <- x$phase_table
period_info <- x$period_info
tree_cols <- setdiff(names(response_df), c("TIME", "IDs", "phase_type"))
if (is.null(tree_info)) {
info_tbl <- tibble::tibble(
tree = tree_cols,
species = NA_character_,
site = NA_character_
)
} else if (is.character(tree_info) && !is.null(names(tree_info))) {
info_tbl <- tibble::tibble(
tree = names(tree_info),
species = unname(tree_info),
site = NA_character_
)
} else if (is.data.frame(tree_info)) {
info_tbl <- tibble::as_tibble(tree_info)
if (!("tree" %in% names(info_tbl))) {
stop("'tree_info' data frame must contain a column named 'tree'.")
}
if (!("species" %in% names(info_tbl))) info_tbl$species <- NA_character_
if (!("site" %in% names(info_tbl))) info_tbl$site <- NA_character_
info_tbl <- info_tbl %>% dplyr::select(tree, species, site)
} else {
stop("'tree_info' must be NULL, a named character vector, or a data frame with at least a 'tree' column.")
}
# validate grouping requirements
if (group_by == "species" && all(is.na(info_tbl$species))) {
stop("group_by = 'species' requires species information in 'tree_info'.")
}
if (group_by == "site" && all(is.na(info_tbl$site))) {
stop("group_by = 'site' requires site information in 'tree_info'.")
}
if (group_by == "species_site" && (all(is.na(info_tbl$species)) || all(is.na(info_tbl$site)))) {
stop("group_by = 'species_site' requires both species and site information in 'tree_info'.")
}
# -----------------------------
# filter IDs by time windows
# -----------------------------
phase_tbl2 <- phase_table %>%
dplyr::mutate(
start_month = lubridate::month(adverse_start),
start_year = lubridate::year(adverse_start),
start_doy = lubridate::yday(adverse_start)
)
keep <- rep(TRUE, nrow(phase_tbl2))
if (!is.null(months)) {
keep <- keep & phase_tbl2$start_month %in% months
}
if (!is.null(years)) {
keep <- keep & phase_tbl2$start_year %in% years
}
if (!is.null(doy_window)) {
keep <- keep & within_doy_window(phase_tbl2$start_doy, doy_window)
}
if (!is.null(year_window)) {
if (length(year_window) != 2 || any(is.na(year_window))) {
stop("'year_window' must be a numeric vector of length 2.")
}
ylo <- min(year_window)
yhi <- max(year_window)
keep <- keep & phase_tbl2$start_year >= ylo & phase_tbl2$start_year <= yhi
}
phase_tbl2 <- phase_tbl2[keep, , drop = FALSE]
selected_ids <- phase_tbl2$IDs
if (length(selected_ids) == 0) {
stop("No IDs remain after applying the requested temporal filters.")
}
# -----------------------------
# long-format response data
# -----------------------------
long_dat <- response_df %>%
dplyr::filter(IDs %in% selected_ids) %>%
tidyr::pivot_longer(
cols = dplyr::all_of(tree_cols),
names_to = "tree",
values_to = "value"
) %>%
dplyr::left_join(info_tbl, by = "tree")
long_dat$group_label <- switch(
group_by,
tree = long_dat$tree,
species = long_dat$species,
site = long_dat$site,
species_site = paste(long_dat$species, long_dat$site, sep = " @ ")
)
if (all(is.na(long_dat$group_label))) {
stop("Grouping produced only missing labels. Check 'tree_info' and 'group_by'.")
}
# -----------------------------
# trajectory summary
# -----------------------------
trajectory_summary <- long_dat %>%
dplyr::group_by(IDs, TIME, phase_type, group_label) %>%
dplyr::summarise(
n_trees = sum(!is.na(value)),
center_value = agg_fun(value),
sd = if (sum(!is.na(value)) <= 1) NA_real_ else stats::sd(value, na.rm = TRUE),
sd_lower = center_value - sd,
sd_upper = center_value + sd,
limit95_lower = get_limits(value, conf_level)[1],
limit95_upper = get_limits(value, conf_level)[2],
.groups = "drop"
) %>%
dplyr::arrange(IDs, TIME, group_label)
# -----------------------------
# grouped ID summary from period_info
# -----------------------------
period_info2 <- period_info %>%
dplyr::filter(IDs %in% selected_ids) %>%
dplyr::left_join(info_tbl, by = "tree")
period_info2$group_label <- switch(
group_by,
tree = period_info2$tree,
species = period_info2$species,
site = period_info2$site,
species_site = paste(period_info2$species, period_info2$site, sep = " @ ")
)
safe_agg <- function(z) {
if (all(is.na(z))) return(NA_real_)
agg_fun(z)
}
id_summary <- period_info2 %>%
dplyr::group_by(IDs, group_label) %>%
dplyr::summarise(
n_trees = dplyr::n(),
adverse_start = min(adverse_start, na.rm = TRUE),
adverse_end = min(adverse_end, na.rm = TRUE),
normal_start = suppressWarnings(min(normal_start, na.rm = TRUE)),
normal_end = suppressWarnings(min(normal_end, na.rm = TRUE)),
full_end = min(full_end, na.rm = TRUE),
prop_no_adverse = mean(no_adverse_for_tree, na.rm = TRUE),
lag_to_below_zero = safe_agg(lag_to_below_zero),
lag_to_max_adverse_decline = safe_agg(lag_to_max_adverse_decline),
lag_to_return_zero = safe_agg(lag_to_return_zero),
max_adverse_decline_value = safe_agg(max_adverse_decline_value),
change_end_adverse = safe_agg(change_end_adverse),
change_end_full_period = safe_agg(change_end_full_period),
continuous_start_full_period = safe_agg(continuous_start_full_period),
continuous_end_full_period = safe_agg(continuous_end_full_period),
.groups = "drop"
) %>%
dplyr::mutate(
normal_start = ifelse(is.infinite(normal_start), NA, normal_start),
normal_end = ifelse(is.infinite(normal_end), NA, normal_end)
) %>%
dplyr::arrange(IDs, group_label)
out <- list(
call = match.call(),
response = response,
group_by = group_by,
center = center,
conf_level = conf_level,
selected_ids = selected_ids,
phase_table = phase_tbl2,
tree_info = info_tbl,
trajectory_summary = trajectory_summary,
id_summary = id_summary,
long_data = if (isTRUE(include_tree_series)) long_dat else NULL,
settings = list(
months = months,
years = years,
doy_window = doy_window,
year_window = year_window
)
)
class(out) <- "clim_twd_stats"
out
}
#' @title Summarize clim.twd.stats output
#'
#' @param object An object of class \code{"clim_twd_stats"}.
#' @param ... Further arguments passed to or from other methods.
#'
#' @return An object of class \code{"summary.clim_twd_stats"}.
#'
#' @method summary clim_twd_stats
#' @export
summary.clim_twd_stats <- function(object, ...) {
if (!inherits(object, "clim_twd_stats")) {
stop("'object' must be of class 'clim_twd_stats'.")
}
overview <- tibble::tibble(
response = object$response,
group_by = object$group_by,
center = object$center,
conf_level = object$conf_level,
n_ids = length(unique(object$selected_ids)),
n_groups = length(unique(object$trajectory_summary$group_label)),
n_rows_trajectory = nrow(object$trajectory_summary),
n_rows_id_summary = nrow(object$id_summary)
)
out <- list(
overview = overview,
phase_table = object$phase_table,
trajectory_head = utils::head(object$trajectory_summary, 12),
id_summary = object$id_summary
)
class(out) <- "summary.clim_twd_stats"
out
}
#' @title Print summary of clim.twd.stats output
#'
#' @param x An object of class \code{"summary.clim_twd_stats"}.
#' @param ... Further arguments passed to or from other methods.
#'
#' @return The input object, invisibly.
#'
#' @method print summary.clim_twd_stats
#' @export
print.summary.clim_twd_stats <- function(x, ...) {
if (!inherits(x, "summary.clim_twd_stats")) {
stop("'x' must be of class 'summary.clim_twd_stats'.")
}
cat("Summary of clim.twd.stats output\n")
cat("--------------------------------\n\n")
cat("Overview:\n")
print(x$overview)
cat("\nFiltered phase table:\n")
print(x$phase_table)
cat("\nTrajectory summary preview:\n")
print(x$trajectory_head)
cat("\nID summary:\n")
print(x$id_summary)
invisible(x)
}
#' @title Plot method for clim.twd.stats output
#'
#' @description
#' Plots grouped trajectories or grouped ID-level metrics from
#' \code{clim.twd.stats()}.
#'
#' @param x An object of class \code{"clim_twd_stats"}.
#' @param y Unused.
#' @param type One of \code{"trajectory"} or \code{"id_metric"}.
#' @param ids Optional numeric vector of IDs to plot.
#' @param groups Optional character vector of group labels to plot.
#' @param band One of \code{"none"}, \code{"sd"}, \code{"limit95"}, or \code{"both"}.
#' Used for \code{type = "trajectory"}.
#' @param metric Metric column from \code{x$id_summary} for \code{type = "id_metric"}.
#' @param facet_by One of \code{"IDs"}, \code{"group"}, or \code{"none"}.
#' @param legend_position Legend position.
#' @param main Optional title.
#' @param ... Further arguments passed to or from other methods.
#'
#' @return A \code{ggplot2} object.
#'
#' @method plot clim_twd_stats
#' @importFrom dplyr filter bind_rows %>%
#' @importFrom ggplot2 ggplot aes geom_line geom_ribbon geom_col facet_wrap
#' theme_minimal theme element_text labs position_dodge
#' @export
plot.clim_twd_stats <- function(x,
y = NULL,
type = c("trajectory", "id_metric"),
ids = NULL,
groups = NULL,
band = c("none", "sd", "limit95", "both"),
metric = c("lag_to_below_zero",
"lag_to_max_adverse_decline",
"lag_to_return_zero",
"max_adverse_decline_value",
"change_end_adverse",
"change_end_full_period",
"continuous_end_full_period"),
facet_by = c("IDs", "group", "none"),
legend_position = "bottom",
main = NULL,
...) {
IDs <- TIME <- group_label <- center_value <- sd_lower <- sd_upper <- NULL
limit95_lower <- limit95_upper <- group_label <- value <- NULL
if (!inherits(x, "clim_twd_stats")) {
stop("'x' must be of class 'clim_twd_stats'.")
}
if (!requireNamespace("ggplot2", quietly = TRUE)) {
stop("Package 'ggplot2' is required for plotting.")
}
type <- match.arg(type)
band <- match.arg(band)
facet_by <- match.arg(facet_by)
metric <- match.arg(metric)
if (type == "trajectory") {
dat <- x$trajectory_summary
if (!is.null(ids)) dat <- dat %>% dplyr::filter(IDs %in% ids)
if (!is.null(groups)) dat <- dat %>% dplyr::filter(group_label %in% groups)
if (nrow(dat) == 0) stop("No trajectory rows remain after filtering.")
p <- ggplot2::ggplot(dat, ggplot2::aes(x = TIME, y = center_value, colour = group_label, fill = group_label))
if (band %in% c("sd", "both")) {
p <- p + ggplot2::geom_ribbon(
ggplot2::aes(ymin = sd_lower, ymax = sd_upper),
alpha = 0.15,
colour = NA
)
}
if (band %in% c("limit95", "both")) {
p <- p + ggplot2::geom_ribbon(
ggplot2::aes(ymin = limit95_lower, ymax = limit95_upper),
alpha = 0.10,
colour = NA
)
}
p <- p +
ggplot2::geom_line() +
ggplot2::theme_minimal() +
ggplot2::theme(
legend.position = legend_position,
axis.title = ggplot2::element_text(size = 14),
axis.text = ggplot2::element_text(size = 12)
) +
ggplot2::labs(
title = if (is.null(main)) {
paste("Grouped trajectory:", x$group_by, "(", x$center, ")")
} else main,
x = "Time",
y = "Relative change",
colour = x$group_by,
fill = x$group_by
)
if (facet_by == "IDs" && length(unique(dat$IDs)) > 1) {
p <- p + ggplot2::facet_wrap(~ IDs, scales = "free_x", ncol = 1)
} else if (facet_by == "group" && length(unique(dat$group_label)) > 1) {
p <- p + ggplot2::facet_wrap(~ group_label, scales = "free_x", ncol = 1)
}
return(p)
}
dat <- x$id_summary
if (!is.null(ids)) dat <- dat %>% dplyr::filter(IDs %in% ids)
if (!is.null(groups)) dat <- dat %>% dplyr::filter(group_label %in% groups)
if (nrow(dat) == 0) stop("No ID summary rows remain after filtering.")
p <- ggplot2::ggplot(
dat,
ggplot2::aes(x = group_label, y = .data[[metric]], fill = group_label)
) +
ggplot2::geom_col() +
ggplot2::theme_minimal() +
ggplot2::theme(
legend.position = legend_position,
axis.title = ggplot2::element_text(size = 14),
axis.text = ggplot2::element_text(size = 12, angle = 45, hjust = 1)
) +
ggplot2::labs(
title = if (is.null(main)) paste("ID metric:", metric) else main,
x = x$group_by,
y = metric,
fill = x$group_by
)
if (facet_by == "IDs" && length(unique(dat$IDs)) > 1) {
p <- p + ggplot2::facet_wrap(~ IDs, scales = "free_y", ncol = 1)
}
p
}
#' @title Statistical testing for clim.twd output
#'
#' @description
#' Performs hypothesis tests on parameters derived from \code{clim.twd()} output.
#'
#' This function is designed for scenarios such as:
#' \itemize{
#' \item species comparison as a whole,
#' \item species comparison by year,
#' \item site comparison as a whole,
#' \item site comparison by year,
#' \item species-within-site comparison,
#' \item ID-wise comparisons.
#' }
#'
#' The function uses the \code{period_info} table from \code{clim.twd()} and
#' tests selected parameters across groups such as trees, species, sites, or
#' species-site combinations.
#'
#' @param x An object of class \code{"clim_twd_output"} returned by
#' \code{clim.twd()}.
#' @param tree_info Optional metadata table describing trees. It must contain a
#' column named \code{tree}, and may additionally contain \code{species} and
#' \code{site}. A named character vector is also accepted for species-only
#' mapping, where names are trees and values are species.
#' @param parameter Character. Parameter from \code{x$period_info} to test.
#' Supported examples include:
#' \describe{
#' \item{`"lag_to_below_zero"`}{Lag from adverse start until relative change
#' first drops below zero.}
#' \item{`"lag_to_max_adverse_decline"`}{Lag from adverse start until maximum
#' adverse decline is reached.}
#' \item{`"lag_to_return_zero"`}{Lag from adverse start until relative change
#' returns to zero or above.}
#' \item{`"max_adverse_decline_value"`}{Most negative relative change during
#' the adverse phase.}
#' \item{`"change_end_adverse"`}{Relative change at the end of the adverse phase.}
#' \item{`"change_end_full_period"`}{Relative change at the end of the full period.}
#' \item{`"continuous_end_full_period"`}{Continuous end value across periods.}
#' }
#' @param compare_by Character. Grouping variable to compare. One of
#' \code{"tree"}, \code{"species"}, \code{"site"}, or \code{"species_site"}.
#' @param within Character. Defines the comparison scenario:
#' \describe{
#' \item{`"all"`}{Compare groups across all retained IDs.}
#' \item{`"year"`}{Compare groups separately within each adverse-start year.}
#' \item{`"ID"`}{Compare groups separately within each ID.}
#' }
#' @param test_method Character. One of \code{"auto"}, \code{"t_test"},
#' \code{"welch_t"}, \code{"wilcox"}, \code{"anova"}, or \code{"kruskal"}.
#' With \code{"auto"}, the function uses:
#' \itemize{
#' \item Welch t-test or one-way ANOVA when group distributions look
#' approximately normal,
#' \item Wilcoxon or Kruskal-Wallis otherwise.
#' }
#' @param ids Optional numeric vector of IDs to retain before testing.
#' @param years Optional numeric vector of years to retain based on
#' \code{adverse_start}.
#' @param months Optional numeric vector of months (1--12) to retain based on
#' \code{adverse_start}.
#' @param doy_window Optional numeric vector of length 2 defining the allowed
#' day-of-year window of \code{adverse_start}. Wrapped windows such as
#' \code{c(300, 50)} are supported.
#' @param year_window Optional numeric vector of length 2 defining the allowed
#' year range for \code{adverse_start}.
#' @param pairwise Logical. If \code{TRUE}, pairwise post-hoc tests are computed
#' for strata with more than two groups when applicable.
#' @param p_adjust_method Character. P-value adjustment method for pairwise tests.
#' Default is \code{"BH"}.
#' @param min_n_per_group Minimum number of non-missing observations required per
#' group before testing. Default is \code{2}.
#'
#' @return
#' A list of class \code{"clim_twd_test"} containing:
#' \describe{
#' \item{data_used}{Filtered test data used for the analyses.}
#' \item{test_results}{Main test results by stratum.}
#' \item{pairwise_results}{Pairwise comparison table where available.}
#' \item{group_summary}{Descriptive summaries by stratum and group.}
#' \item{settings}{Settings used for the analysis.}
#' }
#'
#' @examples
#' \donttest{
#' rel_out <- clim.twd(
#' df = gf_nepa17,
#' Clim = ktm_rain17,
#' thresholdClim = "<10",
#' thresholdDays = ">5",
#' showPlot = FALSE
#' )
#'
#' tree_info <- data.frame(
#' tree = c("T2", "T3"),
#' species = c("Pinus", "Pinus"),
#' site = c("Ktm", "Ktm")
#' )
#'
#' # species comparison across all retained IDs
#' tst1 <- clim.twd.test(
#' rel_out,
#' tree_info = tree_info,
#' parameter = "max_adverse_decline_value",
#' compare_by = "species",
#' within = "all"
#' )
#'
#' summary(tst1)
#' plot(tst1)
#'
#' # species comparison by year
#' tst2 <- clim.twd.test(
#' rel_out,
#' tree_info = tree_info,
#' parameter = "lag_to_return_zero",
#' compare_by = "species",
#' within = "year"
#' )
#'
#' # site comparison by year
#' tst3 <- clim.twd.test(
#' rel_out,
#' tree_info = tree_info,
#' parameter = "change_end_full_period",
#' compare_by = "site",
#' within = "year",
#' test_method = "kruskal"
#' )
#' }
#'
#' @importFrom dplyr mutate group_by summarise ungroup arrange filter select
#' left_join bind_rows n_distinct all_of %>%
#' @importFrom tibble tibble as_tibble
#' @importFrom lubridate year month yday
#' @importFrom ggplot2 ggplot aes geom_boxplot geom_jitter facet_wrap theme_minimal
#' theme element_text labs
#' @importFrom stats shapiro.test t.test wilcox.test kruskal.test aov
#' pairwise.t.test pairwise.wilcox.test
#' @export
clim.twd.test <- function(x,
tree_info = NULL,
parameter = c(
"lag_to_below_zero",
"lag_to_max_adverse_decline",
"lag_to_return_zero",
"max_adverse_decline_value",
"change_end_adverse",
"change_end_full_period",
"continuous_end_full_period"
),
compare_by = c("tree", "species", "site", "species_site"),
within = c("all", "year", "ID"),
test_method = c("auto", "t_test", "welch_t", "wilcox", "anova", "kruskal"),
ids = NULL,
years = NULL,
months = NULL,
doy_window = NULL,
year_window = NULL,
pairwise = TRUE,
p_adjust_method = "BH",
min_n_per_group = 2) {
tree <- species <- site <- group_label <- stratum <- n_group <- .n_group <- NULL
adverse_start <- IDs <- value <- start_year <- start_month <- start_doy <- NULL
if (!inherits(x, "clim_twd_output")) {
stop("'x' must be of class 'clim_twd_output'.")
}
if (is.null(x$period_info) || nrow(x$period_info) == 0) {
stop("The supplied object has no 'period_info' table.")
}
parameter <- match.arg(parameter)
compare_by <- match.arg(compare_by)
within <- match.arg(within)
test_method <- match.arg(test_method)
if (!parameter %in% names(x$period_info)) {
stop("Parameter not found in x$period_info: ", parameter)
}
# -----------------------------
# helpers
# -----------------------------
within_doy_window <- function(doy, win) {
if (length(win) != 2 || any(is.na(win))) {
stop("'doy_window' must be a numeric vector of length 2.")
}
if (win[1] <= win[2]) {
doy >= win[1] & doy <= win[2]
} else {
doy >= win[1] | doy <= win[2]
}
}
safe_shapiro <- function(z) {
z <- z[is.finite(z)]
if (length(z) < 3 || length(z) > 5000) return(NA_real_)
out <- try(stats::shapiro.test(z), silent = TRUE)
if (inherits(out, "try-error")) return(NA_real_)
out$p.value
}
infer_method <- function(dat, compare_by) {
vals_split <- split(dat$value, dat$group_label)
vals_split <- vals_split[vapply(vals_split, function(z) sum(is.finite(z)) > 0, logical(1))]
ng <- length(vals_split)
if (ng < 2) return(NA_character_)
shp <- vapply(vals_split, safe_shapiro, numeric(1))
normal_like <- all(is.na(shp) | shp > 0.05)
if (ng == 2) {
if (normal_like) return("welch_t")
return("wilcox")
}
if (normal_like) return("anova")
"kruskal"
}
run_main_test <- function(dat, method) {
ng <- length(unique(dat$group_label))
if (ng < 2) {
return(tibble::tibble(
method = NA_character_,
statistic = NA_real_,
p_value = NA_real_,
df = NA_real_,
n_groups = ng,
note = "Fewer than 2 groups remained."
))
}
if (method == "t_test") {
if (ng != 2) {
return(tibble::tibble(
method = method,
statistic = NA_real_,
p_value = NA_real_,
df = NA_real_,
n_groups = ng,
note = "t_test requires exactly 2 groups."
))
}
fit <- stats::t.test(value ~ group_label, data = dat, var.equal = TRUE)
return(tibble::tibble(
method = method,
statistic = unname(fit$statistic),
p_value = fit$p.value,
df = unname(fit$parameter),
n_groups = ng,
note = NA_character_
))
}
if (method == "welch_t") {
if (ng != 2) {
return(tibble::tibble(
method = method,
statistic = NA_real_,
p_value = NA_real_,
df = NA_real_,
n_groups = ng,
note = "welch_t requires exactly 2 groups."
))
}
fit <- stats::t.test(value ~ group_label, data = dat, var.equal = FALSE)
return(tibble::tibble(
method = method,
statistic = unname(fit$statistic),
p_value = fit$p.value,
df = unname(fit$parameter),
n_groups = ng,
note = NA_character_
))
}
if (method == "wilcox") {
if (ng != 2) {
return(tibble::tibble(
method = method,
statistic = NA_real_,
p_value = NA_real_,
df = NA_real_,
n_groups = ng,
note = "wilcox requires exactly 2 groups."
))
}
fit <- stats::wilcox.test(value ~ group_label, data = dat, exact = FALSE)
return(tibble::tibble(
method = method,
statistic = unname(fit$statistic),
p_value = fit$p.value,
df = NA_real_,
n_groups = ng,
note = NA_character_
))
}
if (method == "anova") {
fit <- stats::aov(value ~ group_label, data = dat)
sm <- summary(fit)[[1]]
return(tibble::tibble(
method = method,
statistic = sm[1, "F value"],
p_value = sm[1, "Pr(>F)"],
df = sm[1, "Df"],
n_groups = ng,
note = NA_character_
))
}
if (method == "kruskal") {
fit <- stats::kruskal.test(value ~ group_label, data = dat)
return(tibble::tibble(
method = method,
statistic = unname(fit$statistic),
p_value = fit$p.value,
df = unname(fit$parameter),
n_groups = ng,
note = NA_character_
))
}
tibble::tibble(
method = method,
statistic = NA_real_,
p_value = NA_real_,
df = NA_real_,
n_groups = ng,
note = "Unknown method."
)
}
run_pairwise_test <- function(dat, method, p_adjust_method) {
ng <- length(unique(dat$group_label))
if (ng < 3) return(NULL)
if (method %in% c("anova", "t_test", "welch_t")) {
pw <- stats::pairwise.t.test(
x = dat$value,
g = dat$group_label,
p.adjust.method = p_adjust_method,
pool.sd = FALSE
)
} else if (method %in% c("kruskal", "wilcox")) {
pw <- stats::pairwise.wilcox.test(
x = dat$value,
g = dat$group_label,
p.adjust.method = p_adjust_method,
exact = FALSE
)
} else {
return(NULL)
}
mat <- pw$p.value
if (is.null(mat)) return(NULL)
out <- list()
rn <- rownames(mat)
cn <- colnames(mat)
idx <- 1L
for (i in seq_len(nrow(mat))) {
for (j in seq_len(ncol(mat))) {
if (!is.na(mat[i, j])) {
out[[idx]] <- tibble::tibble(
group1 = rn[i],
group2 = cn[j],
p_value = mat[i, j]
)
idx <- idx + 1L
}
}
}
if (length(out) == 0) return(NULL)
dplyr::bind_rows(out)
}
# -----------------------------
# metadata
# -----------------------------
period_info <- x$period_info
if (is.null(tree_info)) {
info_tbl <- tibble::tibble(
tree = unique(period_info$tree),
species = NA_character_,
site = NA_character_
)
} else if (is.character(tree_info) && !is.null(names(tree_info))) {
info_tbl <- tibble::tibble(
tree = names(tree_info),
species = unname(tree_info),
site = NA_character_
)
} else if (is.data.frame(tree_info)) {
info_tbl <- tibble::as_tibble(tree_info)
if (!("tree" %in% names(info_tbl))) {
stop("'tree_info' data frame must contain a column named 'tree'.")
}
if (!("species" %in% names(info_tbl))) info_tbl$species <- NA_character_
if (!("site" %in% names(info_tbl))) info_tbl$site <- NA_character_
info_tbl <- info_tbl %>% dplyr::select(tree, species, site)
} else {
stop("'tree_info' must be NULL, a named character vector, or a data frame with at least a 'tree' column.")
}
if (compare_by == "species" && all(is.na(info_tbl$species))) {
stop("compare_by = 'species' requires species information in 'tree_info'.")
}
if (compare_by == "site" && all(is.na(info_tbl$site))) {
stop("compare_by = 'site' requires site information in 'tree_info'.")
}
if (compare_by == "species_site" && (all(is.na(info_tbl$species)) || all(is.na(info_tbl$site)))) {
stop("compare_by = 'species_site' requires both species and site information in 'tree_info'.")
}
dat <- period_info %>%
dplyr::left_join(info_tbl, by = "tree") %>%
dplyr::mutate(
start_year = lubridate::year(adverse_start),
start_month = lubridate::month(adverse_start),
start_doy = lubridate::yday(adverse_start),
value = .data[[parameter]]
)
if (!is.null(ids)) {
dat <- dat %>% dplyr::filter(IDs %in% ids)
}
if (!is.null(years)) {
dat <- dat %>% dplyr::filter(start_year %in% years)
}
if (!is.null(months)) {
dat <- dat %>% dplyr::filter(start_month %in% months)
}
if (!is.null(doy_window)) {
dat <- dat %>% dplyr::filter(within_doy_window(start_doy, doy_window))
}
if (!is.null(year_window)) {
if (length(year_window) != 2 || any(is.na(year_window))) {
stop("'year_window' must be a numeric vector of length 2.")
}
ylo <- min(year_window)
yhi <- max(year_window)
dat <- dat %>% dplyr::filter(start_year >= ylo, start_year <= yhi)
}
if (nrow(dat) == 0) {
stop("No rows remain after filtering.")
}
dat$group_label <- switch(
compare_by,
tree = dat$tree,
species = dat$species,
site = dat$site,
species_site = paste(dat$species, dat$site, sep = " @ ")
)
dat <- dat %>% dplyr::filter(!is.na(group_label), !is.na(value))
if (nrow(dat) == 0) {
stop("No rows remain after removing missing groups or missing parameter values.")
}
dat$stratum <- switch(
within,
all = "all",
year = as.character(dat$start_year),
ID = paste0("ID_", dat$IDs)
)
# drop groups with too few observations within each stratum
dat <- dat %>%
dplyr::group_by(stratum, group_label) %>%
dplyr::mutate(.n_group = sum(is.finite(value))) %>%
dplyr::ungroup() %>%
dplyr::filter(.n_group >= min_n_per_group) %>%
dplyr::select(-.n_group)
if (nrow(dat) == 0) {
stop("No rows remain after applying 'min_n_per_group'.")
}
# descriptive group summary
group_summary <- dat %>%
dplyr::group_by(stratum, group_label) %>%
dplyr::summarise(
n = sum(is.finite(value)),
mean = if (all(is.na(value))) NA_real_ else mean(value, na.rm = TRUE),
median = if (all(is.na(value))) NA_real_ else stats::median(value, na.rm = TRUE),
sd = if (sum(is.finite(value)) <= 1) NA_real_ else stats::sd(value, na.rm = TRUE),
min = if (all(is.na(value))) NA_real_ else min(value, na.rm = TRUE),
max = if (all(is.na(value))) NA_real_ else max(value, na.rm = TRUE),
.groups = "drop"
) %>%
dplyr::arrange(stratum, group_label)
# main tests and pairwise tests
strata <- unique(dat$stratum)
main_list <- list()
pair_list <- list()
idx_main <- 1L
idx_pair <- 1L
for (ss in strata) {
dss <- dat %>% dplyr::filter(stratum == ss)
# groups still valid
ng <- length(unique(dss$group_label))
if (ng < 2) {
main_list[[idx_main]] <- tibble::tibble(
stratum = ss,
parameter = parameter,
compare_by = compare_by,
method = NA_character_,
statistic = NA_real_,
p_value = NA_real_,
df = NA_real_,
n_groups = ng,
note = "Fewer than 2 valid groups."
)
idx_main <- idx_main + 1L
next
}
mth <- if (test_method == "auto") infer_method(dss, compare_by) else test_method
res_main <- run_main_test(dss, mth)
res_main <- cbind(
tibble::tibble(
stratum = ss,
parameter = parameter,
compare_by = compare_by
),
res_main
)
main_list[[idx_main]] <- res_main
idx_main <- idx_main + 1L
if (isTRUE(pairwise)) {
pw <- run_pairwise_test(dss, mth, p_adjust_method = p_adjust_method)
if (!is.null(pw) && nrow(pw) > 0) {
pw <- cbind(
tibble::tibble(
stratum = ss,
parameter = parameter,
compare_by = compare_by,
method = mth
),
pw
)
pair_list[[idx_pair]] <- pw
idx_pair <- idx_pair + 1L
}
}
}
test_results <- dplyr::bind_rows(main_list)
pairwise_results <- dplyr::bind_rows(pair_list)
out <- list(
call = match.call(),
data_used = dat,
test_results = test_results,
pairwise_results = pairwise_results,
group_summary = group_summary,
settings = list(
parameter = parameter,
compare_by = compare_by,
within = within,
test_method = test_method,
ids = ids,
years = years,
months = months,
doy_window = doy_window,
year_window = year_window,
pairwise = pairwise,
p_adjust_method = p_adjust_method,
min_n_per_group = min_n_per_group
)
)
class(out) <- "clim_twd_test"
out
}
#' @title Summarize clim.twd.test output
#'
#' @param object An object of class \code{"clim_twd_test"}.
#' @param ... Further arguments passed to or from other methods.
#'
#' @return An object of class \code{"summary.clim_twd_test"}.
#'
#' @method summary clim_twd_test
#' @export
summary.clim_twd_test <- function(object, ...) {
if (!inherits(object, "clim_twd_test")) {
stop("'object' must be of class 'clim_twd_test'.")
}
overview <- tibble::tibble(
parameter = object$settings$parameter,
compare_by = object$settings$compare_by,
within = object$settings$within,
test_method = object$settings$test_method,
n_rows_data = nrow(object$data_used),
n_strata = length(unique(object$data_used$stratum)),
n_groups = length(unique(object$data_used$group_label)),
n_main_tests = nrow(object$test_results),
n_pairwise_tests = if (is.null(object$pairwise_results)) 0L else nrow(object$pairwise_results)
)
out <- list(
overview = overview,
group_summary = object$group_summary,
test_results = object$test_results,
pairwise_head = utils::head(object$pairwise_results, 20)
)
class(out) <- "summary.clim_twd_test"
out
}
#' @title Print summary of clim.twd.test output
#'
#' @param x An object of class \code{"summary.clim_twd_test"}.
#' @param ... Further arguments passed to or from other methods.
#'
#' @return The input object, invisibly.
#'
#' @method print summary.clim_twd_test
#' @export
print.summary.clim_twd_test <- function(x, ...) {
if (!inherits(x, "summary.clim_twd_test")) {
stop("'x' must be of class 'summary.clim_twd_test'.")
}
cat("Summary of clim.twd.test output\n")
cat("--------------------------------\n\n")
cat("Overview:\n")
print(x$overview)
cat("\nGroup summary:\n")
print(x$group_summary)
cat("\nMain test results:\n")
print(x$test_results)
if (!is.null(x$pairwise_head) && nrow(x$pairwise_head) > 0) {
cat("\nPairwise comparisons (preview):\n")
print(x$pairwise_head)
}
invisible(x)
}
#' @title Plot method for clim.twd.test output
#'
#' @description
#' Plots the data used in \code{clim.twd.test()} as grouped boxplots with jittered
#' observations, faceted by stratum when applicable.
#'
#' @param x An object of class \code{"clim_twd_test"}.
#' @param y Unused.
#' @param show_points Logical. If \code{TRUE}, add jittered points.
#' @param facet Logical. If \code{TRUE}, facet by stratum when multiple strata exist.
#' @param legend_position Legend position.
#' @param main Optional plot title.
#' @param ... Further arguments passed to or from other methods.
#'
#' @return A \code{ggplot2} object.
#'
#' @method plot clim_twd_test
#' @export
plot.clim_twd_test <- function(x,
y = NULL,
show_points = TRUE,
facet = TRUE,
legend_position = "none",
main = NULL,
...) {
group_label <- value <- NULL
if (!inherits(x, "clim_twd_test")) {
stop("'x' must be of class 'clim_twd_test'.")
}
if (!requireNamespace("ggplot2", quietly = TRUE)) {
stop("Package 'ggplot2' is required for plotting.")
}
dat <- x$data_used
if (is.null(dat) || nrow(dat) == 0) {
stop("No data available for plotting.")
}
p <- ggplot2::ggplot(dat, ggplot2::aes(x = group_label, y = value, fill = group_label)) +
ggplot2::geom_boxplot(outlier.shape = NA)
if (isTRUE(show_points)) {
p <- p + ggplot2::geom_jitter(width = 0.15, alpha = 0.7, size = 1.5)
}
p <- p +
ggplot2::theme_minimal() +
ggplot2::theme(
legend.position = legend_position,
axis.title = ggplot2::element_text(size = 14),
axis.text = ggplot2::element_text(size = 12, angle = 45, hjust = 1)
) +
ggplot2::labs(
title = if (is.null(main)) {
paste0(
"Parameter: ", x$settings$parameter,
" | Compare by: ", x$settings$compare_by,
" | Within: ", x$settings$within
)
} else main,
x = x$settings$compare_by,
y = x$settings$parameter
)
if (isTRUE(facet) && length(unique(dat$stratum)) > 1) {
p <- p + ggplot2::facet_wrap(~ stratum, scales = "free_y", ncol = 1)
}
p
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.