R/dm.detrend.R
In dendRoAnalyst: A Tool for Processing and Analyzing Dendrometer Data

Documented in dm.detrend.fit

#' Detrend and standardize dendrometer series from growth-fit residuals
#'
#' @description
#' Creates detrended standardized dendrometer series from objects returned by
#' [dm.growth.fit()] or [dm.growth.fit.double()].
#'
#' The function compares the original daily dendrometer series
#' (`x$original_daily_data`) with the fitted growth curve from `x$fitted_data`.
#' Because fitted values are stored on the processed scale used for modelling,
#' the function first reconstructs fitted values on the original daily scale by
#' adding back the seasonal baseline (the first non-missing original daily value
#' of each series within each vegetation season).
#'
#' Residuals are then calculated as:
#' \deqn{residual = observed_{daily} - fitted_{original\ scale}}
#'
#' To obtain a detrended standardized series with mean equal to 1 and no negative
#' values, residuals are transformed within each \code{series × season_label} as:
#' \deqn{z = \frac{residual - \min(residual) + \epsilon}{\mathrm{mean}(residual - \min(residual) + \epsilon)}}
#'
#' where \eqn{\epsilon > 0} is a small constant ensuring strictly positive values.
#' If all residuals within a season are identical, the standardized series
#' becomes a constant vector of 1.
#'
#' @param x An object of class \code{"dm_growth_fit"} returned by
#'   [dm.growth.fit()] or [dm.growth.fit.double()].
#' @param series Optional character vector of dendrometer series to retain.
#'   Default is \code{NULL}, meaning all available series are used.
#' @param seasons Optional character vector of vegetation-season labels to
#'   retain. Default is \code{NULL}, meaning all available seasons are used.
#' @param epsilon Small positive constant added after shifting residuals by their
#'   seasonal minimum. Default is \code{1e-6}. Use \code{0} to allow exact zeros.
#' @param keep_na_rows Logical. If \code{TRUE}, rows are retained even when the
#'   observed or fitted value is missing; detrended values remain \code{NA}.
#'   If \code{FALSE} (default), rows with missing observed or fitted values are
#'   removed from the long output before standardization.
#'
#' @return
#' A list of class \code{"dm_detrended"} with elements:
#' \describe{
#'   \item{call}{The matched function call.}
#'   \item{detrended_data}{Wide-format tibble with metadata columns and one
#'     detrended standardized series column per dendrometer.}
#'   \item{detrended_long}{Long-format tibble containing original daily values,
#'     reconstructed fitted values on the original scale, residuals, shifted
#'     residuals, and detrended standardized values.}
#'   \item{parameters}{Per-series and per-season summary table containing
#'     baseline values and residual standardization parameters.}
#'   \item{source_fit_statistics}{The original \code{x$fit_statistics} table.}
#' }
#'
#' @details
#' The function uses \code{x$original_daily_data}, which must be present in the
#' input object. If the object was created by an older version of
#' [dm.growth.fit()] or [dm.growth.fit.double()] that did not store
#' \code{original_daily_data}, the function will stop with an informative error.
#'
#' Standardization is carried out separately within each vegetation season. This
#' means the mean of the detrended standardized series is 1 for each
#' \code{series × season_label}, not necessarily across the entire dataset.
#'
#' @examples
#' \donttest{
#' data(gf_nepa17)
#'
#' fit1 <- dm.growth.fit(
#'   df = gf_nepa17,
#'   TreeNum = 1:2,
#'   method = "gompertz",
#'   year_mode = "yearly",
#'   verbose = FALSE
#' )
#'
#' det1 <- dm.detrend.fit(fit1)
#' head(det1$detrended_data)
#' head(det1$parameters)
#'
#' fit2 <- dm.growth.fit.double(
#'   df = gf_nepa17,
#'   TreeNum = 1,
#'   method = "gompertz",
#'   year_mode = "yearly",
#'   verbose = FALSE
#' )
#'
#' det2 <- dm.detrend.fit(fit2)
#' head(det2$detrended_long)
#' }
#'
#' @importFrom dplyr mutate filter select rename left_join distinct group_by summarise ungroup arrange across all_of bind_rows %>%
#' @importFrom tidyr pivot_longer pivot_wider
#' @importFrom tibble as_tibble tibble
#' @export
dm.detrend.fit <- function(x,
                           series = NULL,
                           seasons = NULL,
                           epsilon = 1e-6,
                           keep_na_rows = FALSE) {
  TIME <- doy <- season_label <- season_start <- season_end <- season_day <- NULL
  series_name <- observed <- fitted_processed <- fitted_original <- residual <- NULL
  residual_shifted <- detrended_std <- baseline <- fit_id <- n_obs <- residual_min <- NULL
  shift_added <- shifted_mean <- NULL

  cl <- match.call()

  if (!inherits(x, "dm_growth_fit")) {
    stop("'x' must be an object of class 'dm_growth_fit'.")
  }

  if (is.null(x$original_daily_data)) {
    stop(
      "The input object does not contain 'original_daily_data'. ",
      "Please refit using the updated dm.growth.fit() or dm.growth.fit.double()."
    )
  }

  if (!is.numeric(epsilon) || length(epsilon) != 1 || is.na(epsilon) || epsilon < 0) {
    stop("'epsilon' must be a single non-negative numeric value.")
  }

  if (!is.logical(keep_na_rows) || length(keep_na_rows) != 1 || is.na(keep_na_rows)) {
    stop("'keep_na_rows' must be TRUE or FALSE.")
  }

  orig <- tibble::as_tibble(x$original_daily_data)
  fitd <- tibble::as_tibble(x$fitted_data)

  meta_cols_orig <- intersect(
    c("TIME", "doy", "season_label", "season_start", "season_end", "season_day"),
    names(orig)
  )

  meta_cols_fit <- intersect(
    c("TIME", "doy", "season_label", "season_start", "season_end", "season_day", "season_length", "any_observed"),
    names(fitd)
  )

  series_cols <- intersect(
    setdiff(names(orig), meta_cols_orig),
    setdiff(names(fitd), meta_cols_fit)
  )

  if (length(series_cols) == 0) {
    stop("No matching dendrometer series were found in original_daily_data and fitted_data.")
  }

  if (!is.null(series)) {
    miss <- setdiff(series, series_cols)
    if (length(miss) > 0) {
      warning("These series were not found and were ignored: ", paste(miss, collapse = ", "))
    }
    series_cols <- intersect(series_cols, series)
    if (length(series_cols) == 0) {
      stop("No requested series remained after filtering.")
    }
  }

  orig_long <- orig %>%
    dplyr::select(dplyr::all_of(meta_cols_orig), dplyr::all_of(series_cols)) %>%
    tidyr::pivot_longer(
      cols = dplyr::all_of(series_cols),
      names_to = "series_name",
      values_to = "observed"
    )

  fit_long <- fitd %>%
    dplyr::select(dplyr::all_of(meta_cols_fit), dplyr::all_of(series_cols)) %>%
    tidyr::pivot_longer(
      cols = dplyr::all_of(series_cols),
      names_to = "series_name",
      values_to = "fitted_processed"
    )

  dat <- orig_long %>%
    dplyr::left_join(
      fit_long %>%
        dplyr::select(TIME, season_label, season_day, series_name, fitted_processed),
      by = c("TIME", "season_label", "season_day", "series_name")
    ) %>%
    dplyr::arrange(series_name, season_label, TIME)

  if (!is.null(seasons)) {
    dat <- dat %>% dplyr::filter(.data$season_label %in% seasons)
  }

  if (nrow(dat) == 0) {
    stop("No rows remain after filtering by series and/or seasons.")
  }

  # Baseline = first non-missing original daily value within each series × season
  baseline_tbl <- dat %>%
    dplyr::group_by(.data$series_name, .data$season_label) %>%
    dplyr::summarise(
      baseline = {
        idx <- which(!is.na(.data$observed))[1]
        if (length(idx) == 0 || is.na(idx)) NA_real_ else .data$observed[idx]
      },
      .groups = "drop"
    )

  dat <- dat %>%
    dplyr::left_join(baseline_tbl, by = c("series_name", "season_label")) %>%
    dplyr::mutate(
      fitted_original = .data$fitted_processed + .data$baseline,
      residual = .data$observed - .data$fitted_original
    )

  # Determine fit_id
  if (!is.null(x$fit_parameters) &&
      "fit_id" %in% names(x$fit_parameters) &&
      length(unique(stats::na.omit(x$fit_parameters$fit_id))) == 1 &&
      unique(stats::na.omit(x$fit_parameters$fit_id)) == "pooled") {
    dat$fit_id <- "pooled"
  } else {
    dat$fit_id <- as.character(dat$season_label)
  }

  if (!isTRUE(keep_na_rows)) {
    dat <- dat %>%
      dplyr::filter(!is.na(.data$observed), !is.na(.data$fitted_original))
  }

  if (nrow(dat) == 0) {
    stop("No rows available after removing missing observed/fitted values.")
  }

  # Standardize residuals within each series × season
  dat <- dat %>%
    dplyr::group_by(.data$series_name, .data$season_label) %>%
    dplyr::mutate(
      residual_min = if (all(is.na(.data$residual))) NA_real_ else min(.data$residual, na.rm = TRUE),
      shift_added = if (all(is.na(.data$residual))) NA_real_ else -residual_min + epsilon,
      residual_shifted = ifelse(is.na(.data$residual), NA_real_, .data$residual + .data$shift_added),
      shifted_mean = if (all(is.na(.data$residual_shifted))) NA_real_ else mean(.data$residual_shifted, na.rm = TRUE),
      detrended_std = dplyr::if_else(
        is.finite(.data$shifted_mean) & .data$shifted_mean > 0,
        .data$residual_shifted / .data$shifted_mean,
        NA_real_
      )
    ) %>%
    dplyr::ungroup()

  # If residuals are constant, detrended_std becomes exactly 1
  dat <- dat %>%
    dplyr::mutate(
      detrended_std = ifelse(is.nan(.data$detrended_std) | is.infinite(.data$detrended_std), NA_real_, .data$detrended_std)
    )

  params <- dat %>%
    dplyr::group_by(.data$series_name, .data$season_label, .data$fit_id) %>%
    dplyr::summarise(
      n_obs = sum(!is.na(.data$observed) & !is.na(.data$fitted_original)),
      baseline = dplyr::first(.data$baseline),
      residual_min = dplyr::first(.data$residual_min),
      shift_added = dplyr::first(.data$shift_added),
      shifted_mean = dplyr::first(.data$shifted_mean),
      mean_detrended = if (all(is.na(.data$detrended_std))) NA_real_ else mean(.data$detrended_std, na.rm = TRUE),
      min_detrended = if (all(is.na(.data$detrended_std))) NA_real_ else min(.data$detrended_std, na.rm = TRUE),
      max_detrended = if (all(is.na(.data$detrended_std))) NA_real_ else max(.data$detrended_std, na.rm = TRUE),
      .groups = "drop"
    ) %>%
    dplyr::rename(series = .data$series_name)

  # Wide output
  meta_keep <- intersect(
    c("TIME", "doy", "season_label", "season_start", "season_end", "season_day"),
    names(dat)
  )

  detrended_data <- dat %>%
    dplyr::select(dplyr::all_of(meta_keep), .data$series_name, .data$detrended_std) %>%
    dplyr::distinct() %>%
    tidyr::pivot_wider(
      names_from = .data$series_name,
      values_from = .data$detrended_std
    ) %>%
    dplyr::arrange(.data$season_label, .data$TIME)

  detrended_long <- dat %>%
    dplyr::select(
      TIME, doy, season_label, season_start, season_end, season_day,
      fit_id, series_name, observed, baseline, fitted_processed, fitted_original,
      residual, residual_shifted, detrended_std
    ) %>%
    dplyr::rename(series = .data$series_name) %>%
    dplyr::arrange(.data$series, .data$season_label, .data$TIME)

  out <- list(
    call = cl,
    detrended_data = detrended_data,
    detrended_long = detrended_long,
    parameters = params,
    source_fit_statistics = x$fit_statistics
  )

  class(out) <- "dm_detrended"
  out
}