R/events_phase_plots.R

Defines functions plot_event_climate_relation plot_event_climate_box dm_event_climate_test dm_event_climate_summary dm_event_climate .dm_sig_stars .dm_filter_event_table .dm_safe_agg_event .dm_subset_event_window .dm_match_event_row .dm_check_numeric_vars .dm_prepare_event_clim

Documented in dm_event_climate dm_event_climate_summary dm_event_climate_test plot_event_climate_box plot_event_climate_relation

# =========================================================
# Helpers
# =========================================================

.dm_prepare_event_clim <- function(clim_df) {
  if (inherits(clim_df, "dm_clim") || inherits(clim_df, "subdaily_clim")) {
    dat <- tibble::as_tibble(clim_df)
  } else if (is.data.frame(clim_df) ||
             (is.character(clim_df) && length(clim_df) == 1 && file.exists(clim_df))) {
    dat <- tibble::as_tibble(read.climate(clim_df, verbose = FALSE))
  } else {
    stop(
      "'clim_df' must be one of: output of read.climate(), output of dm_subdaily_clim(), ",
      "a raw climate data frame, or a valid climate file path."
    )
  }

  if (!"TIME" %in% names(dat)) {
    stop("Climate input must contain a 'TIME' column.")
  }

  dat$TIME <- as.POSIXct(dat$TIME)
  dat <- dat[order(dat$TIME), , drop = FALSE]

  d <- diff(as.numeric(dat$TIME)) / 3600
  d <- d[is.finite(d) & d > 0]
  if (length(d) == 0) {
    stop("Could not infer temporal resolution from climate time column.")
  }

  res_h <- stats::median(d)
  if (res_h >= 24) {
    stop(
      "Event based climate extraction requires subdaily climate data. ",
      "Use climate data with timestamps finer than 24 hours."
    )
  }

  dat
}

.dm_check_numeric_vars <- function(dat, vars, arg_name) {
  allowed <- setdiff(names(dat)[vapply(dat, is.numeric, logical(1))], "TIME")
  if (is.null(vars)) return(character(0))
  miss <- setdiff(vars, allowed)
  if (length(miss) > 0) {
    stop(sprintf(
      "Unknown or non-numeric variable(s) in %s: %s",
      arg_name, paste(miss, collapse = ", ")
    ))
  }
  vars
}

.dm_match_event_row <- function(time_vec, target_time, rule = c("nearest", "previous", "next")) {
  rule <- match.arg(rule)

  if (is.na(target_time)) return(NA_integer_)

  if (rule == "nearest") {
    return(which.min(abs(as.numeric(time_vec) - as.numeric(target_time))))
  }

  if (rule == "previous") {
    idx <- which(time_vec <= target_time)
    return(if (length(idx) == 0) NA_integer_ else max(idx))
  }

  idx <- which(time_vec >= target_time)
  if (length(idx) == 0) NA_integer_ else min(idx)
}

.dm_subset_event_window <- function(dat, event_time, hours_back) {
  start_time <- event_time - as.difftime(hours_back, units = "hours")
  dat[dat$TIME > start_time & dat$TIME <= event_time, , drop = FALSE]
}

.dm_safe_agg_event <- function(x, FUN) {
  if (length(x) == 0 || all(is.na(x))) return(NA_real_)
  FUN(x, na.rm = TRUE)
}

.dm_filter_event_table <- function(dat, time_col = "event_time", Year = NULL, DOY = NULL) {
  tt <- as.POSIXct(dat[[time_col]])

  keep <- rep(TRUE, length(tt))

  if (!is.null(Year)) {
    keep <- keep & lubridate::year(tt) %in% Year
  }

  if (!is.null(DOY)) {
    if (length(DOY) != 2) stop("'DOY' must be a numeric vector of length 2.")
    keep <- keep & lubridate::yday(tt) >= min(DOY) & lubridate::yday(tt) <= max(DOY)
  }

  out <- dat[keep, , drop = FALSE]
  if (nrow(out) == 0) stop("No event data available for the selected Year or DOY window.")
  out
}

.dm_sig_stars <- function(p) {
  if (is.na(p)) return("NA")
  if (p < 0.0001) return("****")
  if (p < 0.001) return("***")
  if (p < 0.01) return("**")
  if (p < 0.05) return("*")
  "ns"
}


# =========================================================
# 1) Extract event times and climate before events
# =========================================================

#' @title Extract climate at and before phase events
#'
#' @description
#' Extracts climate at event times and summarizes climate in windows preceding
#' those events. Event times can be phase starts, phase ends, or
#' \code{Max.twd.time} for \code{phase.zg()} output.
#'
#' @param x Output of \code{phase.zg()} or \code{phase.sc()}.
#' @param clim_df Subdaily climate input. This can be a standardized climate
#'   object from \code{read.climate()}, a raw climate data frame, or a valid
#'   climate file path.
#' @param event One of \code{"phase_start"}, \code{"phase_end"}, or
#'   \code{"max_twd"}.
#' @param phase Phase selector. For \code{ZG_output}: \code{"all"},
#'   \code{"TWD"}, or \code{"GRO"}. For \code{SC_output}: \code{"all"},
#'   \code{"Shrinkage"}, \code{"Expansion"}, or \code{"Increment"}.
#' @param windows Numeric vector of antecedent windows in hours, e.g.
#'   \code{c(0, 1, 3, 6, 12, 24, 48)}. A value of \code{0} extracts climate at
#'   the event time.
#' @param mean_vars,max_vars,min_vars,sum_vars,median_vars Climate variables to
#'   summarize in antecedent windows.
#' @param instant_vars Climate variables to extract at the exact event time.
#'   If \code{NULL}, all selected variables are used.
#' @param instant_match Matching rule for event-time extraction:
#'   \code{"nearest"}, \code{"previous"}, or \code{"next"}.
#'
#' @return A tibble with one row per event and climate summaries in the
#'   requested antecedent windows.
#' @examples
#' \donttest{
#' data(gf_nepa17)
#' data(ktm_clim_hourly)
#'
#' zg <- phase.zg(df = gf_nepa17[1:800, ], TreeNum = 1)
#'
#' evt_zg <- dm_event_climate(
#'   zg,
#'   ktm_clim_hourly,
#'   event = "phase_start",
#'   phase = "all",
#'   windows = c(0, 1, 3, 6, 12, 24),
#'   mean_vars = c("temp", "VPD", "RH"),
#'   max_vars  = c("temp", "VPD"),
#'   sum_vars  = c("prec")
#' )
#'
#' head(evt_zg)
#'
#' evt_maxtwd <- dm_event_climate(
#'   zg,
#'   ktm_clim_hourly,
#'   event = "max_twd",
#'   windows = c(0, 1, 3, 6, 12, 24),
#'   mean_vars = c("temp", "VPD", "RH"),
#'   max_vars  = c("VPD"),
#'   sum_vars  = c("prec")
#' )
#'
#' head(evt_maxtwd)
#' }
#' @export
dm_event_climate <- function(x,
                             clim_df,
                             event = c("phase_start", "phase_end", "max_twd"),
                             phase = "all",
                             windows = c(0, 1, 3, 6, 12, 24, 48),
                             mean_vars = NULL,
                             max_vars = NULL,
                             min_vars = NULL,
                             sum_vars = NULL,
                             median_vars = NULL,
                             instant_vars = NULL,
                             instant_match = c("nearest", "previous", "next")) {

  event <- match.arg(event)
  instant_match <- match.arg(instant_match)

  if (!inherits(x, "ZG_output") && !inherits(x, "SC_output")) {
    stop("'x' must be an object returned by phase.zg() or phase.sc().")
  }

  clim <- .dm_prepare_event_clim(clim_df)
  numeric_vars <- setdiff(names(clim)[vapply(clim, is.numeric, logical(1))], "TIME")

  if (all(c(
    is.null(mean_vars), is.null(max_vars), is.null(min_vars),
    is.null(sum_vars), is.null(median_vars), is.null(instant_vars)
  ))) {
    mean_vars <- numeric_vars
  }

  mean_vars   <- .dm_check_numeric_vars(clim, mean_vars, "mean_vars")
  max_vars    <- .dm_check_numeric_vars(clim, max_vars, "max_vars")
  min_vars    <- .dm_check_numeric_vars(clim, min_vars, "min_vars")
  sum_vars    <- .dm_check_numeric_vars(clim, sum_vars, "sum_vars")
  median_vars <- .dm_check_numeric_vars(clim, median_vars, "median_vars")

  if (is.null(instant_vars)) {
    instant_vars <- unique(c(mean_vars, max_vars, min_vars, sum_vars, median_vars))
  }
  instant_vars <- .dm_check_numeric_vars(clim, instant_vars, "instant_vars")

  windows <- sort(unique(as.numeric(windows)))
  if (any(!is.finite(windows)) || any(windows < 0)) {
    stop("'windows' must contain non negative numeric values.")
  }

  # -----------------------------------------------------
  # Build event table
  # -----------------------------------------------------
  if (inherits(x, "ZG_output")) {
    cyc <- tibble::as_tibble(x$ZG_cycle)
    cyc$Phase <- factor(cyc$Phases, levels = c(1, 2), labels = c("TWD", "GRO"))

    if (event == "phase_start") {
      evt <- cyc
      evt$event_time <- as.POSIXct(evt$Start)
      evt$event_type <- paste0(as.character(evt$Phase), "_start")
    } else if (event == "phase_end") {
      evt <- cyc
      evt$event_time <- as.POSIXct(evt$End)
      evt$event_type <- paste0(as.character(evt$Phase), "_end")
    } else {
      evt <- cyc[cyc$Phases == 1L & !is.na(cyc$Max.twd.time), , drop = FALSE]
      evt$event_time <- as.POSIXct(evt$Max.twd.time)
      evt$event_type <- "MaxTWD"
      evt$Phase <- factor("TWD", levels = c("TWD", "GRO"))
    }

    if (phase != "all") {
      evt <- evt[as.character(evt$Phase) == phase, , drop = FALSE]
    }

  } else {
    if (event == "max_twd") {
      stop("'max_twd' is only available for ZG_output.")
    }

    cyc <- tibble::as_tibble(x$SC_cycle)
    cyc$Phase <- factor(cyc$Phases, levels = c(1, 2, 3),
                        labels = c("Shrinkage", "Expansion", "Increment"))

    if (event == "phase_start") {
      evt <- cyc
      evt$event_time <- as.POSIXct(evt$Start)
      evt$event_type <- paste0(as.character(evt$Phase), "_start")
    } else {
      evt <- cyc
      evt$event_time <- as.POSIXct(evt$End)
      evt$event_type <- paste0(as.character(evt$Phase), "_end")
    }

    if (phase != "all") {
      evt <- evt[as.character(evt$Phase) == phase, , drop = FALSE]
    }
  }

  if (nrow(evt) == 0) {
    stop("No events available for the selected event phase combination.")
  }

  # -----------------------------------------------------
  # Extract climate for each event
  # -----------------------------------------------------
  res <- lapply(seq_len(nrow(evt)), function(i) {
    out <- evt[i, , drop = FALSE]

    et <- as.POSIXct(out$event_time)

    # exact climate at event
    idx0 <- .dm_match_event_row(clim$TIME, et, rule = instant_match)
    out$clim_match_time <- as.POSIXct(NA)
    out$clim_match_diff_min <- NA_real_

    if (!is.na(idx0)) {
      out$clim_match_time <- clim$TIME[idx0]
      out$clim_match_diff_min <- as.numeric(difftime(clim$TIME[idx0], et, units = "mins"))

      for (v in instant_vars) {
        out[[paste0(v, "_at_event")]] <- clim[[v]][idx0]
      }
    } else {
      for (v in instant_vars) {
        out[[paste0(v, "_at_event")]] <- NA_real_
      }
    }

    # antecedent windows
    for (w in windows[windows > 0]) {
      sub <- .dm_subset_event_window(clim, et, w)
      out[[paste0("n_clim_prev_", w, "h")]] <- nrow(sub)

      for (v in mean_vars) {
        out[[paste0(v, "_mean_prev_", w, "h")]] <- .dm_safe_agg_event(sub[[v]], mean)
      }
      for (v in max_vars) {
        out[[paste0(v, "_max_prev_", w, "h")]] <- .dm_safe_agg_event(sub[[v]], max)
      }
      for (v in min_vars) {
        out[[paste0(v, "_min_prev_", w, "h")]] <- .dm_safe_agg_event(sub[[v]], min)
      }
      for (v in sum_vars) {
        out[[paste0(v, "_sum_prev_", w, "h")]] <- .dm_safe_agg_event(sub[[v]], sum)
      }
      for (v in median_vars) {
        out[[paste0(v, "_median_prev_", w, "h")]] <- .dm_safe_agg_event(sub[[v]], stats::median)
      }
    }

    out
  })

  out <- dplyr::bind_rows(res)
  out
}


# =========================================================
# 2) Summarize event climate output
# =========================================================

#' @title Summarize event-based climate results
#'
#' @description
#' Summarizes a climate variable extracted by \code{dm_event_climate()} by
#' phase, event type, month, or month-of-year.
#'
#' @param event_data Output of \code{dm_event_climate()}.
#' @param climate_var Name of the climate-derived column to summarize.
#' @param group_var Grouping variable. One of \code{"Phase"} or \code{"event_type"}.
#' @param by One of \code{"none"}, \code{"month"}, \code{"month_of_year"}, or \code{"year"}.
#' @param Year Optional numeric year or vector of years for filtering.
#' @param DOY Optional numeric vector of length 2 for filtering.
#'
#' @return A tibble of summary statistics.
#' @examples
#' \donttest{
#' data(gf_nepa17)
#' data(ktm_clim_hourly)
#'
#' zg <- phase.zg(df = gf_nepa17[1:800, ], TreeNum = 1)
#' evt_zg <- dm_event_climate(
#'   zg,
#'   ktm_clim_hourly,
#'   event = "phase_start",
#'   windows = c(0, 3, 6, 12, 24),
#'   mean_vars = c("temp", "VPD", "RH"),
#'   max_vars  = c("VPD"),
#'   sum_vars  = c("prec")
#' )
#'
#' smry <- dm_event_climate_summary(
#'   evt_zg,
#'   climate_var = "VPD_mean_prev_6h",
#'   group_var = "Phase",
#'   by = "month_of_year"
#' )
#'
#' head(smry)
#' }
#'
#' @export
dm_event_climate_summary <- function(event_data,
                                     climate_var,
                                     group_var = c("Phase", "event_type"),
                                     by = c("none", "month", "month_of_year", "year"),
                                     Year = NULL,
                                     DOY = NULL) {
  .by <- n <- NULL

  group_var <- match.arg(group_var)
  by <- match.arg(by)

  dat <- tibble::as_tibble(event_data)
  if (!"event_time" %in% names(dat)) stop("'event_data' must contain 'event_time'.")
  if (!climate_var %in% names(dat)) stop(sprintf("'%s' not found.", climate_var))
  if (!group_var %in% names(dat)) stop(sprintf("'%s' not found.", group_var))

  dat <- .dm_filter_event_table(dat, time_col = "event_time", Year = Year, DOY = DOY)

  tt <- as.POSIXct(dat$event_time)
  dat$.by <- "all"

  if (by == "month") {
    dat$.by <- format(lubridate::floor_date(tt, "month"), "%Y-%m")
  } else if (by == "month_of_year") {
    dat$.by <- factor(
      as.character(lubridate::month(tt, label = TRUE, abbr = TRUE)),
      levels = month.abb
    )
  } else if (by == "year") {
    dat$.by <- factor(lubridate::year(tt))
  }

  dat <- dat[!is.na(dat[[climate_var]]) & !is.na(dat[[group_var]]), , drop = FALSE]

  dat %>%
    dplyr::group_by(.by, .data[[group_var]]) %>%
    dplyr::summarise(
      n = dplyr::n(),
      mean = mean(.data[[climate_var]], na.rm = TRUE),
      median = stats::median(.data[[climate_var]], na.rm = TRUE),
      sd = stats::sd(.data[[climate_var]], na.rm = TRUE),
      se = sd / sqrt(n),
      min = min(.data[[climate_var]], na.rm = TRUE),
      max = max(.data[[climate_var]], na.rm = TRUE),
      .groups = "drop"
    ) %>%
    dplyr::rename(group = !!group_var, by_group = .by)
}


# =========================================================
# 3) Test climate differences between event groups
# =========================================================

#' @title Test differences in event-based climate between groups
#'
#' @description
#' Tests whether an event-based climate variable differs between groups such as
#' phases or event types.
#'
#' @param event_data Output of \code{dm_event_climate()}.
#' @param climate_var Name of the climate-derived column to test.
#' @param group_var Grouping variable. One of \code{"Phase"} or \code{"event_type"}.
#' @param by One of \code{"none"}, \code{"month"}, \code{"month_of_year"}, or \code{"year"}.
#' @param Year Optional numeric year or vector of years for filtering.
#' @param DOY Optional numeric vector of length 2 for filtering.
#' @param method One of \code{"auto"}, \code{"anova"}, \code{"kruskal"},
#'   \code{"t.test"}, or \code{"wilcox"}.
#'
#' @return A list with summary statistics, overall tests, and pairwise tests.
#'
#' @examples
#' \donttest{
#' data(gf_nepa17)
#' data(ktm_clim_hourly)
#'
#' zg <- phase.zg(df = gf_nepa17[1:800, ], TreeNum = 1)
#' evt_zg <- dm_event_climate(
#'   zg,
#'   ktm_clim_hourly,
#'   event = "phase_start",
#'   windows = c(0, 3, 6, 12, 24),
#'   mean_vars = c("temp", "VPD", "RH"),
#'   max_vars  = c("VPD"),
#'   sum_vars  = c("prec")
#' )
#'
#' tst <- dm_event_climate_test(
#'   evt_zg,
#'   climate_var = "VPD_mean_prev_6h",
#'   group_var = "Phase",
#'   by = "month_of_year",
#'   method = "auto"
#' )
#'
#' tst$summary
#' tst$overall_tests
#' head(tst$pairwise_tests)
#' }
#'
#' @export
dm_event_climate_test <- function(event_data,
                                  climate_var,
                                  group_var = c("Phase", "event_type"),
                                  by = c("none", "month", "month_of_year", "year"),
                                  Year = NULL,
                                  DOY = NULL,
                                  method = c("auto", "anova", "kruskal", "t.test", "wilcox")) {

  group_var <- match.arg(group_var)
  by <- match.arg(by)
  method <- match.arg(method)

  dat <- tibble::as_tibble(event_data)
  if (!"event_time" %in% names(dat)) stop("'event_data' must contain 'event_time'.")
  if (!climate_var %in% names(dat)) stop(sprintf("'%s' not found.", climate_var))
  if (!group_var %in% names(dat)) stop(sprintf("'%s' not found.", group_var))

  dat <- .dm_filter_event_table(dat, time_col = "event_time", Year = Year, DOY = DOY)

  tt <- as.POSIXct(dat$event_time)
  dat$.by <- "all"

  if (by == "month") {
    dat$.by <- format(lubridate::floor_date(tt, "month"), "%Y-%m")
  } else if (by == "month_of_year") {
    dat$.by <- factor(
      as.character(lubridate::month(tt, label = TRUE, abbr = TRUE)),
      levels = month.abb
    )
  } else if (by == "year") {
    dat$.by <- factor(lubridate::year(tt))
  }

  dat <- dat[!is.na(dat[[climate_var]]) & !is.na(dat[[group_var]]), , drop = FALSE]
  if (nrow(dat) == 0) stop("No complete observations available after filtering.")

  summary_tab <- dm_event_climate_summary(
    event_data = dat,
    climate_var = climate_var,
    group_var = group_var,
    by = "none"
  )

  run_one <- function(dd) {
    g <- dd[[group_var]]
    y <- dd[[climate_var]]
    ng <- length(unique(g))

    if (ng < 2) {
      return(list(
        overall = tibble::tibble(method = NA_character_, p.value = NA_real_),
        pairwise = NULL
      ))
    }

    meth <- method
    if (meth == "auto") {
      meth <- if (ng == 2) "wilcox" else "kruskal"
    }

    overall_p <- NA_real_
    overall_method <- meth
    pairwise <- NULL

    if (meth == "anova") {
      fit <- stats::lm(y ~ g)
      overall_p <- stats::anova(fit)[["Pr(>F)"]][1]
      if (ng > 2) {
        pw <- stats::pairwise.t.test(y, g, p.adjust.method = "BH")
        pairwise <- as.data.frame(as.table(pw$p.value))
        names(pairwise) <- c("group1", "group2", "p.value")
        pairwise <- pairwise[!is.na(pairwise$p.value), , drop = FALSE]
      }
    } else if (meth == "kruskal") {
      overall_p <- stats::kruskal.test(y ~ g)$p.value
      if (ng > 2) {
        pw <- stats::pairwise.wilcox.test(y, g, p.adjust.method = "BH", exact = FALSE)
        pairwise <- as.data.frame(as.table(pw$p.value))
        names(pairwise) <- c("group1", "group2", "p.value")
        pairwise <- pairwise[!is.na(pairwise$p.value), , drop = FALSE]
      }
    } else if (meth == "t.test") {
      if (ng != 2) stop("t.test requires exactly 2 groups.")
      overall_p <- stats::t.test(y ~ g)$p.value
      lev <- unique(as.character(g))
      pairwise <- tibble::tibble(group1 = lev[1], group2 = lev[2], p.value = overall_p)
    } else if (meth == "wilcox") {
      if (ng == 2) {
        overall_p <- stats::wilcox.test(y ~ g, exact = FALSE)$p.value
        lev <- unique(as.character(g))
        pairwise <- tibble::tibble(group1 = lev[1], group2 = lev[2], p.value = overall_p)
      } else {
        overall_p <- stats::kruskal.test(y ~ g)$p.value
        pw <- stats::pairwise.wilcox.test(y, g, p.adjust.method = "BH", exact = FALSE)
        pairwise <- as.data.frame(as.table(pw$p.value))
        names(pairwise) <- c("group1", "group2", "p.value")
        pairwise <- pairwise[!is.na(pairwise$p.value), , drop = FALSE]
      }
    }

    list(
      overall = tibble::tibble(method = overall_method, p.value = overall_p),
      pairwise = pairwise
    )
  }

  sp <- split(dat, dat$.by)
  test_list <- lapply(sp, run_one)

  overall_tests <- dplyr::bind_rows(
    lapply(names(test_list), function(nm) {
      cbind(by_group = nm, test_list[[nm]]$overall)
    })
  )

  pairwise_tests <- dplyr::bind_rows(
    lapply(names(test_list), function(nm) {
      pw <- test_list[[nm]]$pairwise
      if (is.null(pw)) return(NULL)
      cbind(by_group = nm, pw)
    })
  )

  list(
    summary = summary_tab,
    overall_tests = tibble::as_tibble(overall_tests),
    pairwise_tests = tibble::as_tibble(pairwise_tests),
    climate_var = climate_var,
    group_var = group_var,
    by = by
  )
}


# =========================================================
# 4) Box violin plots with optional test annotation
# =========================================================

#' @title Plot event-based climate distributions
#'
#' @description
#' Draws boxplots or violins of event-based climate variables across phases or
#' event types, with optional significance annotation.
#'
#' @param event_data Output of \code{dm_event_climate()}.
#' @param climate_var Name of the climate-derived column to plot.
#' @param group_var Grouping variable. One of \code{"Phase"} or \code{"event_type"}.
#' @param facet_by One of \code{"none"}, \code{"month"}, \code{"month_of_year"}, or \code{"year"}.
#' @param Year Optional numeric year or vector of years for filtering.
#' @param DOY Optional numeric vector of length 2 for filtering.
#' @param geom One of \code{"boxplot"}, \code{"violin"}, or \code{"both"}.
#' @param add_test Logical. If \code{TRUE}, adds per-panel test results.
#' @param test_method Passed to \code{dm_event_climate_test()}.
#' @param point_alpha Point transparency.
#'
#' @return A \code{ggplot2} object.
#'
#' @examples
#' \donttest{
#' data(gf_nepa17)
#' data(ktm_clim_hourly)
#'
#' zg <- phase.zg(df = gf_nepa17[1:800, ], TreeNum = 1)
#' evt_zg <- dm_event_climate(
#'   zg,
#'   ktm_clim_hourly,
#'   event = "phase_start",
#'   windows = c(0, 3, 6, 12, 24),
#'   mean_vars = c("temp", "VPD", "RH"),
#'   max_vars  = c("VPD"),
#'   sum_vars  = c("prec")
#' )
#'
#' if (any(!is.na(evt_zg$VPD_mean_prev_6h))) {
#'   plot_event_climate_box(
#'     evt_zg,
#'     climate_var = "VPD_mean_prev_6h",
#'     group_var = "Phase",
#'     facet_by = "month_of_year",
#'     geom = "both",
#'     add_test = TRUE
#'   )
#' }
#' }
#'
#' @export
plot_event_climate_box <- function(event_data,
                                   climate_var,
                                   group_var = c("Phase", "event_type"),
                                   facet_by = c("none", "month", "month_of_year", "year"),
                                   Year = NULL,
                                   DOY = NULL,
                                   geom = c("boxplot", "violin", "both"),
                                   add_test = TRUE,
                                   test_method = c("auto", "anova", "kruskal", "t.test", "wilcox"),
                                   point_alpha = 0.5) {
  group <- label <- value <- NULL

  group_var <- match.arg(group_var)
  facet_by <- match.arg(facet_by)
  geom <- match.arg(geom)
  test_method <- match.arg(test_method)

  dat <- tibble::as_tibble(event_data)
  if (!"event_time" %in% names(dat)) stop("'event_data' must contain 'event_time'.")
  if (!climate_var %in% names(dat)) stop(sprintf("'%s' not found.", climate_var))
  if (!group_var %in% names(dat)) stop(sprintf("'%s' not found.", group_var))

  dat <- .dm_filter_event_table(dat, time_col = "event_time", Year = Year, DOY = DOY)

  tt <- as.POSIXct(dat$event_time)
  dat$group <- dat[[group_var]]
  dat$value <- dat[[climate_var]]
  dat$.facet <- "all"

  if (facet_by == "month") {
    dat$.facet <- format(lubridate::floor_date(tt, "month"), "%Y-%m")
  } else if (facet_by == "month_of_year") {
    dat$.facet <- factor(
      as.character(lubridate::month(tt, label = TRUE, abbr = TRUE)),
      levels = month.abb
    )
  } else if (facet_by == "year") {
    dat$.facet <- factor(lubridate::year(tt))
  }

  dat <- dat[!is.na(dat$value) & !is.na(dat$group), , drop = FALSE]
  if (nrow(dat) == 0) stop("No complete observations available after filtering.")

  p <- ggplot2::ggplot(dat, ggplot2::aes(x = group, y = value))

  if (geom %in% c("violin", "both")) {
    p <- p + ggplot2::geom_violin(fill = "grey80", alpha = 0.8, trim = FALSE)
  }

  if (geom %in% c("boxplot", "both")) {
    p <- p + ggplot2::geom_boxplot(
      width = if (geom == "both") 0.18 else 0.6,
      outlier.shape = NA,
      fill = if (geom == "both") "white" else "grey80"
    )
  }

  p <- p +
    ggplot2::geom_point(
      position = ggplot2::position_jitter(width = 0.12, height = 0),
      size = 1.2,
      alpha = point_alpha,
      color = "grey25"
    ) +
    ggplot2::labs(
      x = group_var,
      y = climate_var
    ) +
    ggplot2::theme_minimal()

  if (facet_by != "none") {
    p <- p + ggplot2::facet_wrap(~ .facet, scales = "free_y")
  }

  if (isTRUE(add_test)) {
    tst <- dm_event_climate_test(
      event_data = dat,
      climate_var = climate_var,
      group_var = group_var,
      by = facet_by,
      method = test_method
    )

    ann <- tst$overall_tests
    ann$label <- paste0(
      ann$method, ": p = ", format.pval(ann$p.value, digits = 3, eps = 1e-4),
      " (", vapply(ann$p.value, .dm_sig_stars, character(1)), ")"
    )

    if (facet_by == "none") {
      ann$.facet <- "all"
    } else {
      ann$.facet <- ann$by_group
    }

    p <- p +
      ggplot2::geom_text(
        data = ann,
        ggplot2::aes(x = Inf, y = Inf, label = label),
        inherit.aes = FALSE,
        hjust = 1.05,
        vjust = 1.1,
        size = 3.3
      )
  }

  print(p)
  invisible(p)
}


# =========================================================
# 5) Scatterplot with R2, slope, p_value
# =========================================================

#' @title Plot event-based climate-response relationships
#'
#' @description
#' Draws a scatterplot between an event-based climate variable and a response
#' variable and annotates the fitted linear relationship with R², slope,
#' and p-value.
#'
#' @param event_data Output of \code{dm_event_climate()}.
#' @param climate_var Name of the climate-derived column.
#' @param response_var Name of the response variable.
#' @param group_var Optional grouping variable for color.
#' @param Year Optional numeric year or vector of years for filtering.
#' @param DOY Optional numeric vector of length 2 for filtering.
#' @param add_smooth Logical. If \code{TRUE}, adds linear fit line.
#' @param point_alpha Point transparency.
#'
#' @return A \code{ggplot2} object.
#'
#' @examples
#' \donttest{
#' data(gf_nepa17)
#' data(ktm_clim_hourly)
#'
#' zg <- phase.zg(df = gf_nepa17[1:800, ], TreeNum = 1)
#' evt_maxtwd <- dm_event_climate(
#'   zg,
#'   ktm_clim_hourly,
#'   event = "max_twd",
#'   windows = c(0, 3, 6, 12, 24),
#'   mean_vars = c("temp", "VPD", "RH"),
#'   max_vars  = c("VPD"),
#'   sum_vars  = c("prec")
#' )
#'
#' if (all(c("VPD_mean_prev_6h", "max.twd") %in% names(evt_maxtwd)) &&
#'     any(is.finite(evt_maxtwd$VPD_mean_prev_6h)) &&
#'     any(is.finite(evt_maxtwd$max.twd))) {
#'   plot_event_climate_relation(
#'     evt_maxtwd,
#'     climate_var = "VPD_mean_prev_6h",
#'     response_var = "max.twd",
#'     group_var = "Phase"
#'   )
#' }
#' }
#'
#' @export
plot_event_climate_relation <- function(event_data,
                                        climate_var,
                                        response_var,
                                        group_var = NULL,
                                        Year = NULL,
                                        DOY = NULL,
                                        add_smooth = TRUE,
                                        point_alpha = 0.7) {

  dat <- tibble::as_tibble(event_data)
  if (!"event_time" %in% names(dat)) stop("'event_data' must contain 'event_time'.")
  if (!climate_var %in% names(dat)) stop(sprintf("'%s' not found.", climate_var))
  if (!response_var %in% names(dat)) stop(sprintf("'%s' not found.", response_var))

  dat <- .dm_filter_event_table(dat, time_col = "event_time", Year = Year, DOY = DOY)

  ok <- is.finite(dat[[climate_var]]) & is.finite(dat[[response_var]])
  dat <- dat[ok, , drop = FALSE]
  if (nrow(dat) < 3) stop("Not enough complete observations for regression.")

  fit <- stats::lm(dat[[response_var]] ~ dat[[climate_var]])
  sm <- summary(fit)

  slope <- unname(stats::coef(fit)[2])
  pval <- unname(sm$coefficients[2, 4])
  r2 <- unname(sm$r.squared)

  stat_label <- paste0(
    "R2 = ", formatC(r2, digits = 3, format = "f"),
    "   slope = ", formatC(slope, digits = 3, format = "f"),
    "   p = ", format.pval(pval, digits = 3, eps = 1e-4)
  )

  p <- ggplot2::ggplot(dat, ggplot2::aes(x = .data[[climate_var]], y = .data[[response_var]]))

  if (!is.null(group_var) && group_var %in% names(dat)) {
    p <- p + ggplot2::geom_point(ggplot2::aes(color = .data[[group_var]]), alpha = point_alpha)
  } else {
    p <- p + ggplot2::geom_point(alpha = point_alpha)
  }

  if (isTRUE(add_smooth)) {
    p <- p + ggplot2::geom_smooth(method = "lm", se = TRUE, color = "black")
  }

  p <- p +
    ggplot2::labs(
      x = climate_var,
      y = response_var,
      subtitle = stat_label,
      color = group_var
    ) +
    ggplot2::theme_minimal()

  print(p)
  invisible(p)
}

Try the dendRoAnalyst package in your browser

Any scripts or data that you put into this service are public.

dendRoAnalyst documentation built on May 20, 2026, 5:07 p.m.