R/stats.clim.twd.R

Defines functions plot.clim_twd_test print.summary.clim_twd_test summary.clim_twd_test clim.twd.test plot.clim_twd_stats print.summary.clim_twd_stats summary.clim_twd_stats clim.twd.stats

Documented in clim.twd.stats clim.twd.test plot.clim_twd_stats plot.clim_twd_test print.summary.clim_twd_stats print.summary.clim_twd_test summary.clim_twd_stats summary.clim_twd_test

#' @title Calculate tree-, species-, or site-level statistics from clim.twd output
#'
#' @description
#' Calculates grouped statistics from the output of \code{clim.twd()}.
#'
#' The function can summarize relative dendrometer change trajectories and
#' event-level metrics:
#' \itemize{
#'   \item by tree,
#'   \item by species,
#'   \item by site,
#'   \item or by species within site.
#' }
#'
#' It can also restrict IDs to specific months, years, or custom day-of-year
#' windows based on the adverse-phase start date of each ID.
#'
#' @param x An object of class \code{"clim_twd_output"} returned by
#'   \code{clim.twd()}.
#' @param tree_info Optional metadata table describing trees. It must contain a
#'   column named \code{tree}, and may additionally contain \code{species} and
#'   \code{site}. A named character vector is also accepted for species-only
#'   mapping, where names are trees and values are species.
#' @param response Character. Which response table from \code{clim.twd()} should
#'   be summarized:
#'   \describe{
#'     \item{`"adverse_change"`}{Only the adverse phase.}
#'     \item{`"normal_change"`}{Only the following normal phase.}
#'     \item{`"full_period_change"`}{Adverse + following normal phase, reset to 0
#'     at each adverse start.}
#'     \item{`"continuous_full_period_change"`}{Adverse + following normal phase,
#'     continuing from the previous full period instead of resetting to 0.}
#'   }
#' @param group_by Character. Grouping level for trajectory summaries. One of
#'   \code{"tree"}, \code{"species"}, \code{"site"}, or \code{"species_site"}.
#' @param center Character. Central tendency used for grouped trajectories and
#'   grouped period metrics. One of \code{"mean"} or \code{"median"}.
#' @param conf_level Numeric confidence/limit level. Default is \code{0.95}.
#'   Empirical limits are returned as the lower and upper quantiles corresponding
#'   to this level.
#' @param months Optional numeric vector of months (1--12). Only IDs whose
#'   adverse-phase start falls in these months are retained.
#' @param years Optional numeric vector of years. Only IDs whose adverse-phase
#'   start year falls in these years are retained.
#' @param doy_window Optional numeric vector of length 2 defining the allowed
#'   day-of-year window for adverse-phase start. Wrapped windows are supported,
#'   for example \code{c(300, 50)}.
#' @param year_window Optional numeric vector of length 2 defining the allowed
#'   year range for adverse-phase start.
#' @param include_tree_series Logical. If \code{TRUE}, the filtered long-format
#'   tree-level data are stored in the output.
#'
#' @return
#' A list of class \code{"clim_twd_stats"} with:
#' \describe{
#'   \item{trajectory_summary}{Grouped time-series summary for each ID and date,
#'   including central tendency, SD bands, and empirical 95\% limits.}
#'   \item{id_summary}{Grouped per-ID summary derived from
#'   \code{x$period_info}.}
#'   \item{selected_ids}{IDs retained after temporal filtering.}
#'   \item{phase_table}{Filtered phase table.}
#'   \item{tree_info}{Metadata table used for grouping.}
#'   \item{long_data}{Filtered long-format tree-level response table, if
#'   \code{include_tree_series = TRUE}.}
#'   \item{settings}{List of settings used to generate the summaries.}
#' }
#'
#' @details
#' \strong{Temporal filtering} is based on the \code{adverse_start} date in
#' \code{x$phase_table}. Multiple filters are combined, so for example users can
#' simultaneously restrict to:
#' \itemize{
#'   \item specific months,
#'   \item specific years,
#'   \item a day-of-year window,
#'   \item and a year range.
#' }
#'
#' If \code{group_by = "species"}, \code{"site"}, or \code{"species_site"},
#' \code{tree_info} must provide the required columns.
#'
#' @examples
#' \donttest{
#' rel_out <- clim.twd(
#'   df = gf_nepa17,
#'   Clim = ktm_rain17,
#'   thresholdClim = "<10",
#'   thresholdDays = ">5",
#'   showPlot = FALSE
#' )
#'
#' # tree-level statistics
#' st1 <- clim.twd.stats(
#'   rel_out,
#'   response = "full_period_change",
#'   group_by = "tree"
#' )
#'
#' summary(st1)
#' plot(st1, type = "trajectory")
#'
#' # species metadata
#' tree_info <- data.frame(
#'   tree = c("T2", "T3"),
#'   species = c("Pinus", "Pinus"),
#'   site = c("Ktm", "Ktm")
#' )
#'
#' # species-level summaries restricted to IDs starting in months 6 to 8
#' st2 <- clim.twd.stats(
#'   rel_out,
#'   tree_info = tree_info,
#'   response = "full_period_change",
#'   group_by = "species",
#'   center = "median",
#'   months = 6:8
#' )
#'
#' summary(st2)
#' plot(st2, type = "trajectory", band = "limit95")
#'
#' # species within site
#' st3 <- clim.twd.stats(
#'   rel_out,
#'   tree_info = tree_info,
#'   response = "continuous_full_period_change",
#'   group_by = "species_site",
#'   doy_window = c(150, 250),
#'   year_window = c(2017, 2018)
#' )
#' }
#'
#' @importFrom dplyr mutate group_by summarise ungroup arrange filter select
#'   left_join bind_rows all_of n_distinct %>%
#' @importFrom tidyr pivot_longer
#' @importFrom tibble tibble as_tibble
#' @importFrom lubridate month year yday
#' @export
clim.twd.stats <- function(x,
                           tree_info = NULL,
                           response = c("adverse_change", "normal_change",
                                        "full_period_change", "continuous_full_period_change"),
                           group_by = c("tree", "species", "site", "species_site"),
                           center = c("mean", "median"),
                           conf_level = 0.95,
                           months = NULL,
                           years = NULL,
                           doy_window = NULL,
                           year_window = NULL,
                           include_tree_series = TRUE) {

  IDs <- TIME <- phase_type <- tree <- species <- site <- group_label <- value <- NULL
  adverse_start <- center_value <- adverse_end <- normal_start <- normal_end <- NULL
  full_end <- no_adverse_for_tree <- lag_to_below_zero <- lag_to_max_adverse_decline <- NULL
  lag_to_return_zero <- max_adverse_decline_value <- change_end_adverse <- change_end_full_period <- NULL
  continuous_start_full_period <- continuous_end_full_period <- NULL

  if (!inherits(x, "clim_twd_output")) {
    stop("'x' must be of class 'clim_twd_output'.")
  }

  response <- match.arg(response)
  group_by <- match.arg(group_by)
  center <- match.arg(center)

  if (!is.numeric(conf_level) || length(conf_level) != 1 || is.na(conf_level) ||
      conf_level <= 0 || conf_level >= 1) {
    stop("'conf_level' must be a single number between 0 and 1.")
  }

  if (is.null(x[[response]]) || nrow(x[[response]]) == 0) {
    stop("Selected response table is empty: ", response)
  }

  if (is.null(x$phase_table) || nrow(x$phase_table) == 0) {
    stop("The supplied clim.twd object has no phase_table.")
  }

  # -----------------------------
  # helpers
  # -----------------------------
  agg_fun <- if (center == "mean") {
    function(z) if (all(is.na(z))) NA_real_ else mean(z, na.rm = TRUE)
  } else {
    function(z) if (all(is.na(z))) NA_real_ else stats::median(z, na.rm = TRUE)
  }

  get_limits <- function(z, conf_level = 0.95) {
    if (all(is.na(z))) {
      return(c(NA_real_, NA_real_))
    }
    alpha <- (1 - conf_level) / 2
    stats::quantile(z, probs = c(alpha, 1 - alpha), na.rm = TRUE, names = FALSE)
  }

  within_doy_window <- function(doy, win) {
    if (length(win) != 2 || any(is.na(win))) {
      stop("'doy_window' must be a numeric vector of length 2.")
    }
    if (win[1] <= win[2]) {
      doy >= win[1] & doy <= win[2]
    } else {
      doy >= win[1] | doy <= win[2]
    }
  }

  # -----------------------------
  # metadata table
  # -----------------------------
  response_df <- x[[response]]
  phase_table <- x$phase_table
  period_info <- x$period_info

  tree_cols <- setdiff(names(response_df), c("TIME", "IDs", "phase_type"))

  if (is.null(tree_info)) {
    info_tbl <- tibble::tibble(
      tree = tree_cols,
      species = NA_character_,
      site = NA_character_
    )
  } else if (is.character(tree_info) && !is.null(names(tree_info))) {
    info_tbl <- tibble::tibble(
      tree = names(tree_info),
      species = unname(tree_info),
      site = NA_character_
    )
  } else if (is.data.frame(tree_info)) {
    info_tbl <- tibble::as_tibble(tree_info)

    if (!("tree" %in% names(info_tbl))) {
      stop("'tree_info' data frame must contain a column named 'tree'.")
    }
    if (!("species" %in% names(info_tbl))) info_tbl$species <- NA_character_
    if (!("site" %in% names(info_tbl))) info_tbl$site <- NA_character_

    info_tbl <- info_tbl %>% dplyr::select(tree, species, site)
  } else {
    stop("'tree_info' must be NULL, a named character vector, or a data frame with at least a 'tree' column.")
  }

  # validate grouping requirements
  if (group_by == "species" && all(is.na(info_tbl$species))) {
    stop("group_by = 'species' requires species information in 'tree_info'.")
  }
  if (group_by == "site" && all(is.na(info_tbl$site))) {
    stop("group_by = 'site' requires site information in 'tree_info'.")
  }
  if (group_by == "species_site" && (all(is.na(info_tbl$species)) || all(is.na(info_tbl$site)))) {
    stop("group_by = 'species_site' requires both species and site information in 'tree_info'.")
  }

  # -----------------------------
  # filter IDs by time windows
  # -----------------------------
  phase_tbl2 <- phase_table %>%
    dplyr::mutate(
      start_month = lubridate::month(adverse_start),
      start_year = lubridate::year(adverse_start),
      start_doy = lubridate::yday(adverse_start)
    )

  keep <- rep(TRUE, nrow(phase_tbl2))

  if (!is.null(months)) {
    keep <- keep & phase_tbl2$start_month %in% months
  }
  if (!is.null(years)) {
    keep <- keep & phase_tbl2$start_year %in% years
  }
  if (!is.null(doy_window)) {
    keep <- keep & within_doy_window(phase_tbl2$start_doy, doy_window)
  }
  if (!is.null(year_window)) {
    if (length(year_window) != 2 || any(is.na(year_window))) {
      stop("'year_window' must be a numeric vector of length 2.")
    }
    ylo <- min(year_window)
    yhi <- max(year_window)
    keep <- keep & phase_tbl2$start_year >= ylo & phase_tbl2$start_year <= yhi
  }

  phase_tbl2 <- phase_tbl2[keep, , drop = FALSE]
  selected_ids <- phase_tbl2$IDs

  if (length(selected_ids) == 0) {
    stop("No IDs remain after applying the requested temporal filters.")
  }

  # -----------------------------
  # long-format response data
  # -----------------------------
  long_dat <- response_df %>%
    dplyr::filter(IDs %in% selected_ids) %>%
    tidyr::pivot_longer(
      cols = dplyr::all_of(tree_cols),
      names_to = "tree",
      values_to = "value"
    ) %>%
    dplyr::left_join(info_tbl, by = "tree")

  long_dat$group_label <- switch(
    group_by,
    tree = long_dat$tree,
    species = long_dat$species,
    site = long_dat$site,
    species_site = paste(long_dat$species, long_dat$site, sep = " @ ")
  )

  if (all(is.na(long_dat$group_label))) {
    stop("Grouping produced only missing labels. Check 'tree_info' and 'group_by'.")
  }

  # -----------------------------
  # trajectory summary
  # -----------------------------
  trajectory_summary <- long_dat %>%
    dplyr::group_by(IDs, TIME, phase_type, group_label) %>%
    dplyr::summarise(
      n_trees = sum(!is.na(value)),
      center_value = agg_fun(value),
      sd = if (sum(!is.na(value)) <= 1) NA_real_ else stats::sd(value, na.rm = TRUE),
      sd_lower = center_value - sd,
      sd_upper = center_value + sd,
      limit95_lower = get_limits(value, conf_level)[1],
      limit95_upper = get_limits(value, conf_level)[2],
      .groups = "drop"
    ) %>%
    dplyr::arrange(IDs, TIME, group_label)

  # -----------------------------
  # grouped ID summary from period_info
  # -----------------------------
  period_info2 <- period_info %>%
    dplyr::filter(IDs %in% selected_ids) %>%
    dplyr::left_join(info_tbl, by = "tree")

  period_info2$group_label <- switch(
    group_by,
    tree = period_info2$tree,
    species = period_info2$species,
    site = period_info2$site,
    species_site = paste(period_info2$species, period_info2$site, sep = " @ ")
  )

  safe_agg <- function(z) {
    if (all(is.na(z))) return(NA_real_)
    agg_fun(z)
  }

  id_summary <- period_info2 %>%
    dplyr::group_by(IDs, group_label) %>%
    dplyr::summarise(
      n_trees = dplyr::n(),
      adverse_start = min(adverse_start, na.rm = TRUE),
      adverse_end = min(adverse_end, na.rm = TRUE),
      normal_start = suppressWarnings(min(normal_start, na.rm = TRUE)),
      normal_end = suppressWarnings(min(normal_end, na.rm = TRUE)),
      full_end = min(full_end, na.rm = TRUE),
      prop_no_adverse = mean(no_adverse_for_tree, na.rm = TRUE),
      lag_to_below_zero = safe_agg(lag_to_below_zero),
      lag_to_max_adverse_decline = safe_agg(lag_to_max_adverse_decline),
      lag_to_return_zero = safe_agg(lag_to_return_zero),
      max_adverse_decline_value = safe_agg(max_adverse_decline_value),
      change_end_adverse = safe_agg(change_end_adverse),
      change_end_full_period = safe_agg(change_end_full_period),
      continuous_start_full_period = safe_agg(continuous_start_full_period),
      continuous_end_full_period = safe_agg(continuous_end_full_period),
      .groups = "drop"
    ) %>%
    dplyr::mutate(
      normal_start = ifelse(is.infinite(normal_start), NA, normal_start),
      normal_end = ifelse(is.infinite(normal_end), NA, normal_end)
    ) %>%
    dplyr::arrange(IDs, group_label)

  out <- list(
    call = match.call(),
    response = response,
    group_by = group_by,
    center = center,
    conf_level = conf_level,
    selected_ids = selected_ids,
    phase_table = phase_tbl2,
    tree_info = info_tbl,
    trajectory_summary = trajectory_summary,
    id_summary = id_summary,
    long_data = if (isTRUE(include_tree_series)) long_dat else NULL,
    settings = list(
      months = months,
      years = years,
      doy_window = doy_window,
      year_window = year_window
    )
  )

  class(out) <- "clim_twd_stats"
  out
}


#' @title Summarize clim.twd.stats output
#'
#' @param object An object of class \code{"clim_twd_stats"}.
#' @param ... Further arguments passed to or from other methods.
#'
#' @return An object of class \code{"summary.clim_twd_stats"}.
#'
#' @method summary clim_twd_stats
#' @export
summary.clim_twd_stats <- function(object, ...) {
  if (!inherits(object, "clim_twd_stats")) {
    stop("'object' must be of class 'clim_twd_stats'.")
  }

  overview <- tibble::tibble(
    response = object$response,
    group_by = object$group_by,
    center = object$center,
    conf_level = object$conf_level,
    n_ids = length(unique(object$selected_ids)),
    n_groups = length(unique(object$trajectory_summary$group_label)),
    n_rows_trajectory = nrow(object$trajectory_summary),
    n_rows_id_summary = nrow(object$id_summary)
  )

  out <- list(
    overview = overview,
    phase_table = object$phase_table,
    trajectory_head = utils::head(object$trajectory_summary, 12),
    id_summary = object$id_summary
  )

  class(out) <- "summary.clim_twd_stats"
  out
}


#' @title Print summary of clim.twd.stats output
#'
#' @param x An object of class \code{"summary.clim_twd_stats"}.
#' @param ... Further arguments passed to or from other methods.
#'
#' @return The input object, invisibly.
#'
#' @method print summary.clim_twd_stats
#' @export
print.summary.clim_twd_stats <- function(x, ...) {
  if (!inherits(x, "summary.clim_twd_stats")) {
    stop("'x' must be of class 'summary.clim_twd_stats'.")
  }

  cat("Summary of clim.twd.stats output\n")
  cat("--------------------------------\n\n")

  cat("Overview:\n")
  print(x$overview)

  cat("\nFiltered phase table:\n")
  print(x$phase_table)

  cat("\nTrajectory summary preview:\n")
  print(x$trajectory_head)

  cat("\nID summary:\n")
  print(x$id_summary)

  invisible(x)
}


#' @title Plot method for clim.twd.stats output
#'
#' @description
#' Plots grouped trajectories or grouped ID-level metrics from
#' \code{clim.twd.stats()}.
#'
#' @param x An object of class \code{"clim_twd_stats"}.
#' @param y Unused.
#' @param type One of \code{"trajectory"} or \code{"id_metric"}.
#' @param ids Optional numeric vector of IDs to plot.
#' @param groups Optional character vector of group labels to plot.
#' @param band One of \code{"none"}, \code{"sd"}, \code{"limit95"}, or \code{"both"}.
#'   Used for \code{type = "trajectory"}.
#' @param metric Metric column from \code{x$id_summary} for \code{type = "id_metric"}.
#' @param facet_by One of \code{"IDs"}, \code{"group"}, or \code{"none"}.
#' @param legend_position Legend position.
#' @param main Optional title.
#' @param ... Further arguments passed to or from other methods.
#'
#' @return A \code{ggplot2} object.
#'
#' @method plot clim_twd_stats
#' @importFrom dplyr filter bind_rows %>%
#' @importFrom ggplot2 ggplot aes geom_line geom_ribbon geom_col facet_wrap
#'   theme_minimal theme element_text labs position_dodge
#' @export
plot.clim_twd_stats <- function(x,
                                y = NULL,
                                type = c("trajectory", "id_metric"),
                                ids = NULL,
                                groups = NULL,
                                band = c("none", "sd", "limit95", "both"),
                                metric = c("lag_to_below_zero",
                                           "lag_to_max_adverse_decline",
                                           "lag_to_return_zero",
                                           "max_adverse_decline_value",
                                           "change_end_adverse",
                                           "change_end_full_period",
                                           "continuous_end_full_period"),
                                facet_by = c("IDs", "group", "none"),
                                legend_position = "bottom",
                                main = NULL,
                                ...) {

  IDs <- TIME <- group_label <- center_value <- sd_lower <- sd_upper <- NULL
  limit95_lower <- limit95_upper <- group_label <- value <- NULL

  if (!inherits(x, "clim_twd_stats")) {
    stop("'x' must be of class 'clim_twd_stats'.")
  }

  if (!requireNamespace("ggplot2", quietly = TRUE)) {
    stop("Package 'ggplot2' is required for plotting.")
  }

  type <- match.arg(type)
  band <- match.arg(band)
  facet_by <- match.arg(facet_by)
  metric <- match.arg(metric)

  if (type == "trajectory") {
    dat <- x$trajectory_summary

    if (!is.null(ids)) dat <- dat %>% dplyr::filter(IDs %in% ids)
    if (!is.null(groups)) dat <- dat %>% dplyr::filter(group_label %in% groups)

    if (nrow(dat) == 0) stop("No trajectory rows remain after filtering.")

    p <- ggplot2::ggplot(dat, ggplot2::aes(x = TIME, y = center_value, colour = group_label, fill = group_label))

    if (band %in% c("sd", "both")) {
      p <- p + ggplot2::geom_ribbon(
        ggplot2::aes(ymin = sd_lower, ymax = sd_upper),
        alpha = 0.15,
        colour = NA
      )
    }

    if (band %in% c("limit95", "both")) {
      p <- p + ggplot2::geom_ribbon(
        ggplot2::aes(ymin = limit95_lower, ymax = limit95_upper),
        alpha = 0.10,
        colour = NA
      )
    }

    p <- p +
      ggplot2::geom_line() +
      ggplot2::theme_minimal() +
      ggplot2::theme(
        legend.position = legend_position,
        axis.title = ggplot2::element_text(size = 14),
        axis.text = ggplot2::element_text(size = 12)
      ) +
      ggplot2::labs(
        title = if (is.null(main)) {
          paste("Grouped trajectory:", x$group_by, "(", x$center, ")")
        } else main,
        x = "Time",
        y = "Relative change",
        colour = x$group_by,
        fill = x$group_by
      )

    if (facet_by == "IDs" && length(unique(dat$IDs)) > 1) {
      p <- p + ggplot2::facet_wrap(~ IDs, scales = "free_x", ncol = 1)
    } else if (facet_by == "group" && length(unique(dat$group_label)) > 1) {
      p <- p + ggplot2::facet_wrap(~ group_label, scales = "free_x", ncol = 1)
    }

    return(p)
  }

  dat <- x$id_summary

  if (!is.null(ids)) dat <- dat %>% dplyr::filter(IDs %in% ids)
  if (!is.null(groups)) dat <- dat %>% dplyr::filter(group_label %in% groups)

  if (nrow(dat) == 0) stop("No ID summary rows remain after filtering.")

  p <- ggplot2::ggplot(
    dat,
    ggplot2::aes(x = group_label, y = .data[[metric]], fill = group_label)
  ) +
    ggplot2::geom_col() +
    ggplot2::theme_minimal() +
    ggplot2::theme(
      legend.position = legend_position,
      axis.title = ggplot2::element_text(size = 14),
      axis.text = ggplot2::element_text(size = 12, angle = 45, hjust = 1)
    ) +
    ggplot2::labs(
      title = if (is.null(main)) paste("ID metric:", metric) else main,
      x = x$group_by,
      y = metric,
      fill = x$group_by
    )

  if (facet_by == "IDs" && length(unique(dat$IDs)) > 1) {
    p <- p + ggplot2::facet_wrap(~ IDs, scales = "free_y", ncol = 1)
  }

  p
}

#' @title Statistical testing for clim.twd output
#'
#' @description
#' Performs hypothesis tests on parameters derived from \code{clim.twd()} output.
#'
#' This function is designed for scenarios such as:
#' \itemize{
#'   \item species comparison as a whole,
#'   \item species comparison by year,
#'   \item site comparison as a whole,
#'   \item site comparison by year,
#'   \item species-within-site comparison,
#'   \item ID-wise comparisons.
#' }
#'
#' The function uses the \code{period_info} table from \code{clim.twd()} and
#' tests selected parameters across groups such as trees, species, sites, or
#' species-site combinations.
#'
#' @param x An object of class \code{"clim_twd_output"} returned by
#'   \code{clim.twd()}.
#' @param tree_info Optional metadata table describing trees. It must contain a
#'   column named \code{tree}, and may additionally contain \code{species} and
#'   \code{site}. A named character vector is also accepted for species-only
#'   mapping, where names are trees and values are species.
#' @param parameter Character. Parameter from \code{x$period_info} to test.
#'   Supported examples include:
#'   \describe{
#'     \item{`"lag_to_below_zero"`}{Lag from adverse start until relative change
#'     first drops below zero.}
#'     \item{`"lag_to_max_adverse_decline"`}{Lag from adverse start until maximum
#'     adverse decline is reached.}
#'     \item{`"lag_to_return_zero"`}{Lag from adverse start until relative change
#'     returns to zero or above.}
#'     \item{`"max_adverse_decline_value"`}{Most negative relative change during
#'     the adverse phase.}
#'     \item{`"change_end_adverse"`}{Relative change at the end of the adverse phase.}
#'     \item{`"change_end_full_period"`}{Relative change at the end of the full period.}
#'     \item{`"continuous_end_full_period"`}{Continuous end value across periods.}
#'   }
#' @param compare_by Character. Grouping variable to compare. One of
#'   \code{"tree"}, \code{"species"}, \code{"site"}, or \code{"species_site"}.
#' @param within Character. Defines the comparison scenario:
#'   \describe{
#'     \item{`"all"`}{Compare groups across all retained IDs.}
#'     \item{`"year"`}{Compare groups separately within each adverse-start year.}
#'     \item{`"ID"`}{Compare groups separately within each ID.}
#'   }
#' @param test_method Character. One of \code{"auto"}, \code{"t_test"},
#'   \code{"welch_t"}, \code{"wilcox"}, \code{"anova"}, or \code{"kruskal"}.
#'   With \code{"auto"}, the function uses:
#'   \itemize{
#'     \item Welch t-test or one-way ANOVA when group distributions look
#'     approximately normal,
#'     \item Wilcoxon or Kruskal-Wallis otherwise.
#'   }
#' @param ids Optional numeric vector of IDs to retain before testing.
#' @param years Optional numeric vector of years to retain based on
#'   \code{adverse_start}.
#' @param months Optional numeric vector of months (1--12) to retain based on
#'   \code{adverse_start}.
#' @param doy_window Optional numeric vector of length 2 defining the allowed
#'   day-of-year window of \code{adverse_start}. Wrapped windows such as
#'   \code{c(300, 50)} are supported.
#' @param year_window Optional numeric vector of length 2 defining the allowed
#'   year range for \code{adverse_start}.
#' @param pairwise Logical. If \code{TRUE}, pairwise post-hoc tests are computed
#'   for strata with more than two groups when applicable.
#' @param p_adjust_method Character. P-value adjustment method for pairwise tests.
#'   Default is \code{"BH"}.
#' @param min_n_per_group Minimum number of non-missing observations required per
#'   group before testing. Default is \code{2}.
#'
#' @return
#' A list of class \code{"clim_twd_test"} containing:
#' \describe{
#'   \item{data_used}{Filtered test data used for the analyses.}
#'   \item{test_results}{Main test results by stratum.}
#'   \item{pairwise_results}{Pairwise comparison table where available.}
#'   \item{group_summary}{Descriptive summaries by stratum and group.}
#'   \item{settings}{Settings used for the analysis.}
#' }
#'
#' @examples
#' \donttest{
#' rel_out <- clim.twd(
#'   df = gf_nepa17,
#'   Clim = ktm_rain17,
#'   thresholdClim = "<10",
#'   thresholdDays = ">5",
#'   showPlot = FALSE
#' )
#'
#' tree_info <- data.frame(
#'   tree = c("T2", "T3"),
#'   species = c("Pinus", "Pinus"),
#'   site = c("Ktm", "Ktm")
#' )
#'
#' # species comparison across all retained IDs
#' tst1 <- clim.twd.test(
#'   rel_out,
#'   tree_info = tree_info,
#'   parameter = "max_adverse_decline_value",
#'   compare_by = "species",
#'   within = "all"
#' )
#'
#' summary(tst1)
#' plot(tst1)
#'
#' # species comparison by year
#' tst2 <- clim.twd.test(
#'   rel_out,
#'   tree_info = tree_info,
#'   parameter = "lag_to_return_zero",
#'   compare_by = "species",
#'   within = "year"
#' )
#'
#' # site comparison by year
#' tst3 <- clim.twd.test(
#'   rel_out,
#'   tree_info = tree_info,
#'   parameter = "change_end_full_period",
#'   compare_by = "site",
#'   within = "year",
#'   test_method = "kruskal"
#' )
#' }
#'
#' @importFrom dplyr mutate group_by summarise ungroup arrange filter select
#'   left_join bind_rows n_distinct all_of %>%
#' @importFrom tibble tibble as_tibble
#' @importFrom lubridate year month yday
#' @importFrom ggplot2 ggplot aes geom_boxplot geom_jitter facet_wrap theme_minimal
#'   theme element_text labs
#' @importFrom stats shapiro.test t.test wilcox.test kruskal.test aov
#'   pairwise.t.test pairwise.wilcox.test
#' @export
clim.twd.test <- function(x,
                          tree_info = NULL,
                          parameter = c(
                            "lag_to_below_zero",
                            "lag_to_max_adverse_decline",
                            "lag_to_return_zero",
                            "max_adverse_decline_value",
                            "change_end_adverse",
                            "change_end_full_period",
                            "continuous_end_full_period"
                          ),
                          compare_by = c("tree", "species", "site", "species_site"),
                          within = c("all", "year", "ID"),
                          test_method = c("auto", "t_test", "welch_t", "wilcox", "anova", "kruskal"),
                          ids = NULL,
                          years = NULL,
                          months = NULL,
                          doy_window = NULL,
                          year_window = NULL,
                          pairwise = TRUE,
                          p_adjust_method = "BH",
                          min_n_per_group = 2) {

  tree <- species <- site <- group_label <- stratum <- n_group <- .n_group <- NULL
  adverse_start <- IDs <- value <- start_year <- start_month <- start_doy <- NULL

  if (!inherits(x, "clim_twd_output")) {
    stop("'x' must be of class 'clim_twd_output'.")
  }

  if (is.null(x$period_info) || nrow(x$period_info) == 0) {
    stop("The supplied object has no 'period_info' table.")
  }

  parameter <- match.arg(parameter)
  compare_by <- match.arg(compare_by)
  within <- match.arg(within)
  test_method <- match.arg(test_method)

  if (!parameter %in% names(x$period_info)) {
    stop("Parameter not found in x$period_info: ", parameter)
  }

  # -----------------------------
  # helpers
  # -----------------------------
  within_doy_window <- function(doy, win) {
    if (length(win) != 2 || any(is.na(win))) {
      stop("'doy_window' must be a numeric vector of length 2.")
    }
    if (win[1] <= win[2]) {
      doy >= win[1] & doy <= win[2]
    } else {
      doy >= win[1] | doy <= win[2]
    }
  }

  safe_shapiro <- function(z) {
    z <- z[is.finite(z)]
    if (length(z) < 3 || length(z) > 5000) return(NA_real_)
    out <- try(stats::shapiro.test(z), silent = TRUE)
    if (inherits(out, "try-error")) return(NA_real_)
    out$p.value
  }

  infer_method <- function(dat, compare_by) {
    vals_split <- split(dat$value, dat$group_label)
    vals_split <- vals_split[vapply(vals_split, function(z) sum(is.finite(z)) > 0, logical(1))]

    ng <- length(vals_split)
    if (ng < 2) return(NA_character_)

    shp <- vapply(vals_split, safe_shapiro, numeric(1))
    normal_like <- all(is.na(shp) | shp > 0.05)

    if (ng == 2) {
      if (normal_like) return("welch_t")
      return("wilcox")
    }

    if (normal_like) return("anova")
    "kruskal"
  }

  run_main_test <- function(dat, method) {
    ng <- length(unique(dat$group_label))
    if (ng < 2) {
      return(tibble::tibble(
        method = NA_character_,
        statistic = NA_real_,
        p_value = NA_real_,
        df = NA_real_,
        n_groups = ng,
        note = "Fewer than 2 groups remained."
      ))
    }

    if (method == "t_test") {
      if (ng != 2) {
        return(tibble::tibble(
          method = method,
          statistic = NA_real_,
          p_value = NA_real_,
          df = NA_real_,
          n_groups = ng,
          note = "t_test requires exactly 2 groups."
        ))
      }
      fit <- stats::t.test(value ~ group_label, data = dat, var.equal = TRUE)
      return(tibble::tibble(
        method = method,
        statistic = unname(fit$statistic),
        p_value = fit$p.value,
        df = unname(fit$parameter),
        n_groups = ng,
        note = NA_character_
      ))
    }

    if (method == "welch_t") {
      if (ng != 2) {
        return(tibble::tibble(
          method = method,
          statistic = NA_real_,
          p_value = NA_real_,
          df = NA_real_,
          n_groups = ng,
          note = "welch_t requires exactly 2 groups."
        ))
      }
      fit <- stats::t.test(value ~ group_label, data = dat, var.equal = FALSE)
      return(tibble::tibble(
        method = method,
        statistic = unname(fit$statistic),
        p_value = fit$p.value,
        df = unname(fit$parameter),
        n_groups = ng,
        note = NA_character_
      ))
    }

    if (method == "wilcox") {
      if (ng != 2) {
        return(tibble::tibble(
          method = method,
          statistic = NA_real_,
          p_value = NA_real_,
          df = NA_real_,
          n_groups = ng,
          note = "wilcox requires exactly 2 groups."
        ))
      }
      fit <- stats::wilcox.test(value ~ group_label, data = dat, exact = FALSE)
      return(tibble::tibble(
        method = method,
        statistic = unname(fit$statistic),
        p_value = fit$p.value,
        df = NA_real_,
        n_groups = ng,
        note = NA_character_
      ))
    }

    if (method == "anova") {
      fit <- stats::aov(value ~ group_label, data = dat)
      sm <- summary(fit)[[1]]
      return(tibble::tibble(
        method = method,
        statistic = sm[1, "F value"],
        p_value = sm[1, "Pr(>F)"],
        df = sm[1, "Df"],
        n_groups = ng,
        note = NA_character_
      ))
    }

    if (method == "kruskal") {
      fit <- stats::kruskal.test(value ~ group_label, data = dat)
      return(tibble::tibble(
        method = method,
        statistic = unname(fit$statistic),
        p_value = fit$p.value,
        df = unname(fit$parameter),
        n_groups = ng,
        note = NA_character_
      ))
    }

    tibble::tibble(
      method = method,
      statistic = NA_real_,
      p_value = NA_real_,
      df = NA_real_,
      n_groups = ng,
      note = "Unknown method."
    )
  }

  run_pairwise_test <- function(dat, method, p_adjust_method) {
    ng <- length(unique(dat$group_label))
    if (ng < 3) return(NULL)

    if (method %in% c("anova", "t_test", "welch_t")) {
      pw <- stats::pairwise.t.test(
        x = dat$value,
        g = dat$group_label,
        p.adjust.method = p_adjust_method,
        pool.sd = FALSE
      )
    } else if (method %in% c("kruskal", "wilcox")) {
      pw <- stats::pairwise.wilcox.test(
        x = dat$value,
        g = dat$group_label,
        p.adjust.method = p_adjust_method,
        exact = FALSE
      )
    } else {
      return(NULL)
    }

    mat <- pw$p.value
    if (is.null(mat)) return(NULL)

    out <- list()
    rn <- rownames(mat)
    cn <- colnames(mat)

    idx <- 1L
    for (i in seq_len(nrow(mat))) {
      for (j in seq_len(ncol(mat))) {
        if (!is.na(mat[i, j])) {
          out[[idx]] <- tibble::tibble(
            group1 = rn[i],
            group2 = cn[j],
            p_value = mat[i, j]
          )
          idx <- idx + 1L
        }
      }
    }

    if (length(out) == 0) return(NULL)
    dplyr::bind_rows(out)
  }

  # -----------------------------
  # metadata
  # -----------------------------
  period_info <- x$period_info

  if (is.null(tree_info)) {
    info_tbl <- tibble::tibble(
      tree = unique(period_info$tree),
      species = NA_character_,
      site = NA_character_
    )
  } else if (is.character(tree_info) && !is.null(names(tree_info))) {
    info_tbl <- tibble::tibble(
      tree = names(tree_info),
      species = unname(tree_info),
      site = NA_character_
    )
  } else if (is.data.frame(tree_info)) {
    info_tbl <- tibble::as_tibble(tree_info)
    if (!("tree" %in% names(info_tbl))) {
      stop("'tree_info' data frame must contain a column named 'tree'.")
    }
    if (!("species" %in% names(info_tbl))) info_tbl$species <- NA_character_
    if (!("site" %in% names(info_tbl))) info_tbl$site <- NA_character_
    info_tbl <- info_tbl %>% dplyr::select(tree, species, site)
  } else {
    stop("'tree_info' must be NULL, a named character vector, or a data frame with at least a 'tree' column.")
  }

  if (compare_by == "species" && all(is.na(info_tbl$species))) {
    stop("compare_by = 'species' requires species information in 'tree_info'.")
  }
  if (compare_by == "site" && all(is.na(info_tbl$site))) {
    stop("compare_by = 'site' requires site information in 'tree_info'.")
  }
  if (compare_by == "species_site" && (all(is.na(info_tbl$species)) || all(is.na(info_tbl$site)))) {
    stop("compare_by = 'species_site' requires both species and site information in 'tree_info'.")
  }

  dat <- period_info %>%
    dplyr::left_join(info_tbl, by = "tree") %>%
    dplyr::mutate(
      start_year = lubridate::year(adverse_start),
      start_month = lubridate::month(adverse_start),
      start_doy = lubridate::yday(adverse_start),
      value = .data[[parameter]]
    )

  if (!is.null(ids)) {
    dat <- dat %>% dplyr::filter(IDs %in% ids)
  }
  if (!is.null(years)) {
    dat <- dat %>% dplyr::filter(start_year %in% years)
  }
  if (!is.null(months)) {
    dat <- dat %>% dplyr::filter(start_month %in% months)
  }
  if (!is.null(doy_window)) {
    dat <- dat %>% dplyr::filter(within_doy_window(start_doy, doy_window))
  }
  if (!is.null(year_window)) {
    if (length(year_window) != 2 || any(is.na(year_window))) {
      stop("'year_window' must be a numeric vector of length 2.")
    }
    ylo <- min(year_window)
    yhi <- max(year_window)
    dat <- dat %>% dplyr::filter(start_year >= ylo, start_year <= yhi)
  }

  if (nrow(dat) == 0) {
    stop("No rows remain after filtering.")
  }

  dat$group_label <- switch(
    compare_by,
    tree = dat$tree,
    species = dat$species,
    site = dat$site,
    species_site = paste(dat$species, dat$site, sep = " @ ")
  )

  dat <- dat %>% dplyr::filter(!is.na(group_label), !is.na(value))

  if (nrow(dat) == 0) {
    stop("No rows remain after removing missing groups or missing parameter values.")
  }

  dat$stratum <- switch(
    within,
    all = "all",
    year = as.character(dat$start_year),
    ID = paste0("ID_", dat$IDs)
  )

  # drop groups with too few observations within each stratum
  dat <- dat %>%
    dplyr::group_by(stratum, group_label) %>%
    dplyr::mutate(.n_group = sum(is.finite(value))) %>%
    dplyr::ungroup() %>%
    dplyr::filter(.n_group >= min_n_per_group) %>%
    dplyr::select(-.n_group)

  if (nrow(dat) == 0) {
    stop("No rows remain after applying 'min_n_per_group'.")
  }

  # descriptive group summary
  group_summary <- dat %>%
    dplyr::group_by(stratum, group_label) %>%
    dplyr::summarise(
      n = sum(is.finite(value)),
      mean = if (all(is.na(value))) NA_real_ else mean(value, na.rm = TRUE),
      median = if (all(is.na(value))) NA_real_ else stats::median(value, na.rm = TRUE),
      sd = if (sum(is.finite(value)) <= 1) NA_real_ else stats::sd(value, na.rm = TRUE),
      min = if (all(is.na(value))) NA_real_ else min(value, na.rm = TRUE),
      max = if (all(is.na(value))) NA_real_ else max(value, na.rm = TRUE),
      .groups = "drop"
    ) %>%
    dplyr::arrange(stratum, group_label)

  # main tests and pairwise tests
  strata <- unique(dat$stratum)
  main_list <- list()
  pair_list <- list()
  idx_main <- 1L
  idx_pair <- 1L

  for (ss in strata) {
    dss <- dat %>% dplyr::filter(stratum == ss)

    # groups still valid
    ng <- length(unique(dss$group_label))
    if (ng < 2) {
      main_list[[idx_main]] <- tibble::tibble(
        stratum = ss,
        parameter = parameter,
        compare_by = compare_by,
        method = NA_character_,
        statistic = NA_real_,
        p_value = NA_real_,
        df = NA_real_,
        n_groups = ng,
        note = "Fewer than 2 valid groups."
      )
      idx_main <- idx_main + 1L
      next
    }

    mth <- if (test_method == "auto") infer_method(dss, compare_by) else test_method

    res_main <- run_main_test(dss, mth)
    res_main <- cbind(
      tibble::tibble(
        stratum = ss,
        parameter = parameter,
        compare_by = compare_by
      ),
      res_main
    )
    main_list[[idx_main]] <- res_main
    idx_main <- idx_main + 1L

    if (isTRUE(pairwise)) {
      pw <- run_pairwise_test(dss, mth, p_adjust_method = p_adjust_method)
      if (!is.null(pw) && nrow(pw) > 0) {
        pw <- cbind(
          tibble::tibble(
            stratum = ss,
            parameter = parameter,
            compare_by = compare_by,
            method = mth
          ),
          pw
        )
        pair_list[[idx_pair]] <- pw
        idx_pair <- idx_pair + 1L
      }
    }
  }

  test_results <- dplyr::bind_rows(main_list)
  pairwise_results <- dplyr::bind_rows(pair_list)

  out <- list(
    call = match.call(),
    data_used = dat,
    test_results = test_results,
    pairwise_results = pairwise_results,
    group_summary = group_summary,
    settings = list(
      parameter = parameter,
      compare_by = compare_by,
      within = within,
      test_method = test_method,
      ids = ids,
      years = years,
      months = months,
      doy_window = doy_window,
      year_window = year_window,
      pairwise = pairwise,
      p_adjust_method = p_adjust_method,
      min_n_per_group = min_n_per_group
    )
  )

  class(out) <- "clim_twd_test"
  out
}


#' @title Summarize clim.twd.test output
#'
#' @param object An object of class \code{"clim_twd_test"}.
#' @param ... Further arguments passed to or from other methods.
#'
#' @return An object of class \code{"summary.clim_twd_test"}.
#'
#' @method summary clim_twd_test
#' @export
summary.clim_twd_test <- function(object, ...) {
  if (!inherits(object, "clim_twd_test")) {
    stop("'object' must be of class 'clim_twd_test'.")
  }

  overview <- tibble::tibble(
    parameter = object$settings$parameter,
    compare_by = object$settings$compare_by,
    within = object$settings$within,
    test_method = object$settings$test_method,
    n_rows_data = nrow(object$data_used),
    n_strata = length(unique(object$data_used$stratum)),
    n_groups = length(unique(object$data_used$group_label)),
    n_main_tests = nrow(object$test_results),
    n_pairwise_tests = if (is.null(object$pairwise_results)) 0L else nrow(object$pairwise_results)
  )

  out <- list(
    overview = overview,
    group_summary = object$group_summary,
    test_results = object$test_results,
    pairwise_head = utils::head(object$pairwise_results, 20)
  )

  class(out) <- "summary.clim_twd_test"
  out
}


#' @title Print summary of clim.twd.test output
#'
#' @param x An object of class \code{"summary.clim_twd_test"}.
#' @param ... Further arguments passed to or from other methods.
#'
#' @return The input object, invisibly.
#'
#' @method print summary.clim_twd_test
#' @export
print.summary.clim_twd_test <- function(x, ...) {
  if (!inherits(x, "summary.clim_twd_test")) {
    stop("'x' must be of class 'summary.clim_twd_test'.")
  }

  cat("Summary of clim.twd.test output\n")
  cat("--------------------------------\n\n")

  cat("Overview:\n")
  print(x$overview)

  cat("\nGroup summary:\n")
  print(x$group_summary)

  cat("\nMain test results:\n")
  print(x$test_results)

  if (!is.null(x$pairwise_head) && nrow(x$pairwise_head) > 0) {
    cat("\nPairwise comparisons (preview):\n")
    print(x$pairwise_head)
  }

  invisible(x)
}


#' @title Plot method for clim.twd.test output
#'
#' @description
#' Plots the data used in \code{clim.twd.test()} as grouped boxplots with jittered
#' observations, faceted by stratum when applicable.
#'
#' @param x An object of class \code{"clim_twd_test"}.
#' @param y Unused.
#' @param show_points Logical. If \code{TRUE}, add jittered points.
#' @param facet Logical. If \code{TRUE}, facet by stratum when multiple strata exist.
#' @param legend_position Legend position.
#' @param main Optional plot title.
#' @param ... Further arguments passed to or from other methods.
#'
#' @return A \code{ggplot2} object.
#'
#' @method plot clim_twd_test
#' @export
plot.clim_twd_test <- function(x,
                               y = NULL,
                               show_points = TRUE,
                               facet = TRUE,
                               legend_position = "none",
                               main = NULL,
                               ...) {
  group_label <- value <- NULL

  if (!inherits(x, "clim_twd_test")) {
    stop("'x' must be of class 'clim_twd_test'.")
  }

  if (!requireNamespace("ggplot2", quietly = TRUE)) {
    stop("Package 'ggplot2' is required for plotting.")
  }

  dat <- x$data_used
  if (is.null(dat) || nrow(dat) == 0) {
    stop("No data available for plotting.")
  }

  p <- ggplot2::ggplot(dat, ggplot2::aes(x = group_label, y = value, fill = group_label)) +
    ggplot2::geom_boxplot(outlier.shape = NA)

  if (isTRUE(show_points)) {
    p <- p + ggplot2::geom_jitter(width = 0.15, alpha = 0.7, size = 1.5)
  }

  p <- p +
    ggplot2::theme_minimal() +
    ggplot2::theme(
      legend.position = legend_position,
      axis.title = ggplot2::element_text(size = 14),
      axis.text = ggplot2::element_text(size = 12, angle = 45, hjust = 1)
    ) +
    ggplot2::labs(
      title = if (is.null(main)) {
        paste0(
          "Parameter: ", x$settings$parameter,
          " | Compare by: ", x$settings$compare_by,
          " | Within: ", x$settings$within
        )
      } else main,
      x = x$settings$compare_by,
      y = x$settings$parameter
    )

  if (isTRUE(facet) && length(unique(dat$stratum)) > 1) {
    p <- p + ggplot2::facet_wrap(~ stratum, scales = "free_y", ncol = 1)
  }

  p
}

Try the dendRoAnalyst package in your browser

Any scripts or data that you put into this service are public.

dendRoAnalyst documentation built on May 20, 2026, 5:07 p.m.