Nothing
#' @title Standardize dendrometer series within seasonal years
#'
#' @description
#' Standardizes one or more dendrometer series within seasonal years so that
#' multiple trees can be brought to a comparable scale while preserving their
#' within-season temporal pattern.
#'
#' Seasonal years are defined as:
#' \itemize{
#' \item \code{"NH"}: Northern Hemisphere year, from \strong{01 Jan} to \strong{31 Dec}
#' \item \code{"SH"}: Southern Hemisphere year, from \strong{01 Jul} to \strong{30 Jun} of the next year
#' \item \code{"CS"}: Custom season defined by \code{CS_doys = c(doy1, doy2)}
#' }
#'
#' For \code{season_type = "CS"}:
#' \itemize{
#' \item if \code{CS_doys[1] <= CS_doys[2]}, the season stays within one calendar year
#' \item if \code{CS_doys[1] > CS_doys[2]}, the season wraps across years
#' }
#'
#' Standardization is applied separately for each dendrometer series and each
#' seasonal year.
#'
#' @param df A data frame whose first column contains date-time
#' (\code{yyyy-mm-dd HH:MM:SS}, \code{POSIXct}, or \code{Date}) and all
#' remaining columns are dendrometer series.
#' @param season_type One of \code{"NH"}, \code{"SH"}, or \code{"CS"}.
#' @param CS_doys Optional numeric vector of length 2 defining the start and end
#' DOY for \code{season_type = "CS"}.
#' @param method Standardization method. One of:
#' \itemize{
#' \item \code{"center"}: subtract seasonal reference value
#' \item \code{"amplitude"}: subtract reference and divide by seasonal range
#' \item \code{"robust_amplitude"}: subtract reference and divide by
#' \code{Q_high - Q_low}
#' \item \code{"minmax"}: rescale to \code{(x - min)/(max - min)}
#' \item \code{"zscore"}: seasonal z-score
#' \item \code{"robust_zscore"}: robust seasonal z-score using median and MAD
#' \item \code{"percentile"}: convert seasonal values to percentiles in \eqn{[0,1]}
#' }
#' @param ref_type Reference-value definition for methods that need a reference
#' (\code{"center"}, \code{"amplitude"}, \code{"robust_amplitude"}). One of:
#' \itemize{
#' \item \code{"first_value"}: first non-missing value in the seasonal year
#' \item \code{"first_n_days"}: mean of the first \code{ref_n_days} unique days
#' \item \code{"ref_window"}: mean of values within \code{ref_doys}
#' }
#' @param ref_n_days Number of initial days used when \code{ref_type = "first_n_days"}.
#' @param ref_doys Optional numeric vector of length 2 defining a DOY reference
#' window when \code{ref_type = "ref_window"}.
#' @param q_low Lower quantile used by \code{method = "robust_amplitude"}.
#' @param q_high Upper quantile used by \code{method = "robust_amplitude"}.
#'
#' @return
#' A list of class \code{"dm_standardized"} containing:
#' \itemize{
#' \item \code{data}: wide data frame with columns
#' \code{TIME}, \code{season_year}, \code{in_season}, and standardized dendrometer series
#' \item \code{parameters}: tibble with one row per tree and seasonal year,
#' summarizing the reference value and scaling denominator used
#' \item \code{metadata}: method and seasonal-year settings
#' }
#'
#' For \code{season_type = "CS"}, observations outside the custom season are
#' retained in \code{data}, but their standardized values are set to \code{NA}
#' and \code{in_season = FALSE}.
#'
#' @examples
#' \donttest{
#' data(gf_nepa17)
#'
#' # Northern Hemisphere seasonal-year standardization
#' out_nh <- dm_standardize(
#' df = gf_nepa17,
#' season_type = "NH",
#' method = "robust_amplitude"
#' )
#'
#' # Southern Hemisphere seasonal-year standardization
#' out_sh <- dm_standardize(
#' df = gf_nepa17,
#' season_type = "SH",
#' method = "center"
#' )
#'
#' # Custom season within one year
#' out_cs1 <- dm_standardize(
#' df = gf_nepa17,
#' season_type = "CS",
#' CS_doys = c(100, 280),
#' method = "robust_amplitude"
#' )
#'
#' # Custom season wrapping across years
#' out_cs2 <- dm_standardize(
#' df = gf_nepa17,
#' season_type = "CS",
#' CS_doys = c(250, 120),
#' method = "percentile"
#' )
#'
#' head(out_nh$data)
#' head(out_nh$parameters)
#' }
#'
#' @importFrom lubridate ymd_hms ymd year yday month
#' @importFrom dplyr bind_rows
#' @importFrom tibble tibble as_tibble
#' @importFrom stats quantile median mad
#' @export
dm_standardize <- function(df,
season_type = c("NH", "SH", "CS"),
CS_doys = NULL,
method = c("center", "amplitude", "robust_amplitude",
"minmax", "zscore", "robust_zscore", "percentile"),
ref_type = c("first_value", "first_n_days", "ref_window"),
ref_n_days = 7,
ref_doys = NULL,
q_low = 0.05,
q_high = 0.95) {
season_type <- match.arg(season_type)
method <- match.arg(method)
ref_type <- match.arg(ref_type)
# ----------------------------
# checks
# ----------------------------
if (!is.data.frame(df) || ncol(df) < 2) {
stop("'df' must be a data frame with a time column and at least one dendrometer series.")
}
if (season_type == "CS") {
if (is.null(CS_doys) || length(CS_doys) != 2 || any(is.na(CS_doys))) {
stop("For season_type = 'CS', 'CS_doys' must be a numeric vector of length 2.")
}
if (any(CS_doys < 1 | CS_doys > 366)) {
stop("'CS_doys' values must be between 1 and 366.")
}
CS_doys <- as.integer(CS_doys)
} else {
if (!is.null(CS_doys)) {
warning("'CS_doys' is ignored unless season_type = 'CS'.")
}
}
if (!is.numeric(ref_n_days) || length(ref_n_days) != 1 || is.na(ref_n_days) || ref_n_days < 1) {
stop("'ref_n_days' must be a positive number.")
}
ref_n_days <- as.integer(ref_n_days)
if (ref_type == "ref_window") {
if (is.null(ref_doys) || length(ref_doys) != 2 || any(is.na(ref_doys))) {
stop("For ref_type = 'ref_window', 'ref_doys' must be a numeric vector of length 2.")
}
if (any(ref_doys < 1 | ref_doys > 366)) {
stop("'ref_doys' values must be between 1 and 366.")
}
ref_doys <- as.integer(ref_doys)
} else {
if (!is.null(ref_doys)) {
warning("'ref_doys' is ignored unless ref_type = 'ref_window'.")
}
}
if (!is.numeric(q_low) || !is.numeric(q_high) || length(q_low) != 1 || length(q_high) != 1 ||
is.na(q_low) || is.na(q_high) || q_low < 0 || q_high > 1 || q_low >= q_high) {
stop("'q_low' and 'q_high' must satisfy 0 <= q_low < q_high <= 1.")
}
# ----------------------------
# parse time
# ----------------------------
parse_time <- function(x) {
if (inherits(x, "POSIXct")) return(as.POSIXct(x))
if (inherits(x, "Date")) return(as.POSIXct(x))
out <- suppressWarnings(lubridate::ymd_hms(x, quiet = TRUE))
if (all(is.na(out))) {
out_date <- suppressWarnings(lubridate::ymd(x, quiet = TRUE))
if (!all(is.na(out_date))) out <- as.POSIXct(out_date)
}
if (all(is.na(out))) out <- suppressWarnings(as.POSIXct(x))
out
}
TIME <- parse_time(df[[1]])
if (all(is.na(TIME))) {
stop("Could not parse the first column of 'df' as date-time.")
}
dat <- tibble::as_tibble(df)
names(dat)[1] <- "TIME"
dat$TIME <- TIME
dat <- dat[order(dat$TIME), , drop = FALSE]
tree_names <- names(dat)[-1]
# ----------------------------
# helper functions
# ----------------------------
in_doy_window <- function(doy, doy_window) {
start <- doy_window[1]
end <- doy_window[2]
if (start <= end) {
doy >= start & doy <= end
} else {
doy >= start | doy <= end
}
}
assign_season_year <- function(time, season_type, CS_doys = NULL) {
yy <- lubridate::year(time)
dd <- lubridate::yday(time)
mm <- lubridate::month(time)
if (season_type == "NH") {
return(list(
season_year = yy,
in_season = rep(TRUE, length(time))
))
}
if (season_type == "SH") {
sy <- ifelse(mm >= 7, yy, yy - 1L)
return(list(
season_year = sy,
in_season = rep(TRUE, length(time))
))
}
# custom season
d1 <- CS_doys[1]
d2 <- CS_doys[2]
if (d1 <= d2) {
in_season <- dd >= d1 & dd <= d2
sy <- yy
} else {
in_season <- dd >= d1 | dd <= d2
sy <- ifelse(dd >= d1, yy, yy - 1L)
}
list(
season_year = sy,
in_season = in_season
)
}
get_reference_value <- function(vals, time, doy, ref_type, ref_n_days, ref_doys) {
ok <- !is.na(vals)
if (!any(ok)) return(NA_real_)
if (ref_type == "first_value") {
return(vals[which(ok)[1]])
}
if (ref_type == "first_n_days") {
dts <- as.Date(time)
unique_days <- sort(unique(dts[ok]))
if (length(unique_days) == 0) return(NA_real_)
keep_days <- unique_days[seq_len(min(ref_n_days, length(unique_days)))]
return(mean(vals[dts %in% keep_days], na.rm = TRUE))
}
# ref_window
keep <- in_doy_window(doy, ref_doys) & ok
if (!any(keep)) return(NA_real_)
mean(vals[keep], na.rm = TRUE)
}
safe_divide <- function(num, denom) {
out <- rep(NA_real_, length(num))
ok <- !is.na(num)
if (!is.finite(denom) || is.na(denom)) return(out)
if (denom == 0) {
out[ok] <- 0
return(out)
}
out[ok] <- num[ok] / denom
out
}
percentile_scale <- function(x) {
out <- rep(NA_real_, length(x))
ok <- !is.na(x)
n <- sum(ok)
if (n == 0) return(out)
if (n == 1) {
out[ok] <- 0.5
return(out)
}
r <- rank(x[ok], ties.method = "average")
out[ok] <- (r - 1) / (n - 1)
out
}
# ----------------------------
# season-year assignment
# ----------------------------
season_info <- assign_season_year(dat$TIME, season_type = season_type, CS_doys = CS_doys)
dat$season_year <- season_info$season_year
dat$in_season <- season_info$in_season
dat$DOY <- lubridate::yday(dat$TIME)
# prepare output
out_data <- tibble::tibble(
TIME = dat$TIME,
season_year = dat$season_year,
in_season = dat$in_season
)
param_list <- list()
# ----------------------------
# standardize each tree by season-year
# ----------------------------
for (tr in tree_names) {
x <- dat[[tr]]
std_x <- rep(NA_real_, length(x))
season_levels <- sort(unique(dat$season_year[dat$in_season]))
for (sy in season_levels) {
idx <- which(dat$season_year == sy & dat$in_season)
vals <- x[idx]
tsub <- dat$TIME[idx]
dsub <- dat$DOY[idx]
n_total <- length(vals)
n_nonmiss <- sum(!is.na(vals))
ref_val <- NA_real_
denom <- NA_real_
center_val <- NA_real_
if (n_nonmiss > 0) {
if (method %in% c("center", "amplitude", "robust_amplitude")) {
ref_val <- get_reference_value(
vals = vals,
time = tsub,
doy = dsub,
ref_type = ref_type,
ref_n_days = ref_n_days,
ref_doys = ref_doys
)
center_val <- ref_val
}
if (method == "center") {
std_x[idx] <- vals - ref_val
denom <- NA_real_
} else if (method == "amplitude") {
denom <- max(vals, na.rm = TRUE) - min(vals, na.rm = TRUE)
std_x[idx] <- safe_divide(vals - ref_val, denom)
} else if (method == "robust_amplitude") {
qq <- stats::quantile(vals, probs = c(q_low, q_high), na.rm = TRUE, names = FALSE)
denom <- qq[2] - qq[1]
std_x[idx] <- safe_divide(vals - ref_val, denom)
} else if (method == "minmax") {
vmin <- min(vals, na.rm = TRUE)
vmax <- max(vals, na.rm = TRUE)
denom <- vmax - vmin
std_x[idx] <- safe_divide(vals - vmin, denom)
center_val <- vmin
} else if (method == "zscore") {
mu <- mean(vals, na.rm = TRUE)
denom <- stats::sd(vals, na.rm = TRUE)
center_val <- mu
std_x[idx] <- safe_divide(vals - mu, denom)
} else if (method == "robust_zscore") {
med <- stats::median(vals, na.rm = TRUE)
denom <- stats::mad(vals, center = med, constant = 1.4826, na.rm = TRUE)
center_val <- med
std_x[idx] <- safe_divide(vals - med, denom)
} else if (method == "percentile") {
std_x[idx] <- percentile_scale(vals)
center_val <- NA_real_
denom <- NA_real_
}
}
param_list[[length(param_list) + 1L]] <- tibble::tibble(
tree = tr,
season_year = sy,
season_type = season_type,
method = method,
ref_type = if (method %in% c("center", "amplitude", "robust_amplitude")) ref_type else NA_character_,
ref_n_days = if (method %in% c("center", "amplitude", "robust_amplitude") &&
ref_type == "first_n_days") ref_n_days else NA_integer_,
ref_doys_start = if (method %in% c("center", "amplitude", "robust_amplitude") &&
ref_type == "ref_window") ref_doys[1] else NA_integer_,
ref_doys_end = if (method %in% c("center", "amplitude", "robust_amplitude") &&
ref_type == "ref_window") ref_doys[2] else NA_integer_,
q_low = if (method == "robust_amplitude") q_low else NA_real_,
q_high = if (method == "robust_amplitude") q_high else NA_real_,
n_total = n_total,
n_nonmiss = n_nonmiss,
season_start = min(tsub),
season_end = max(tsub),
reference_value = ref_val,
center_value = center_val,
denominator = denom
)
}
out_data[[tr]] <- std_x
}
out <- list(
data = out_data,
parameters = dplyr::bind_rows(param_list),
metadata = list(
season_type = season_type,
CS_doys = if (season_type == "CS") CS_doys else NULL,
method = method,
ref_type = ref_type,
ref_n_days = ref_n_days,
ref_doys = ref_doys,
q_low = q_low,
q_high = q_high,
trees = tree_names,
time_range = range(dat$TIME, na.rm = TRUE)
)
)
class(out) <- "dm_standardized"
out
}
#' @title Plot method for standardized dendrometer output
#'
#' @description
#' S3 plotting method for objects returned by \code{dm_standardize()}.
#'
#' @param x Object of class \code{"dm_standardized"} returned by
#' \code{dm_standardize()}.
#' @param y Unused.
#' @param trees Character vector of dendrometer series to plot, or \code{"all"}
#' for all series.
#' @param type Plot type. One of:
#' \itemize{
#' \item \code{"series"}: standardized time series over calendar time
#' \item \code{"seasonal"}: standardized seasonal trajectories by seasonal day
#' \item \code{"heatmap"}: heatmap of standardized values by seasonal day and seasonal year
#' \item \code{"boxplot"}: distribution of standardized values
#' }
#' @param x_axis For \code{type = "series"}, x-axis style:
#' \code{"time"}, \code{"doy"}, or \code{"season_doy"}.
#' @param facet_by Faceting option. One of \code{"tree"}, \code{"season_year"},
#' or \code{"none"}.
#' @param legend_by Legend grouping. One of \code{"tree"}, \code{"season_year"},
#' or \code{"none"}.
#' @param in_season_only Logical. If \code{TRUE}, only observations inside the
#' defined season are plotted.
#' @param box_group For \code{type = "boxplot"}, grouping variable on the x-axis:
#' \code{"tree"} or \code{"season_year"}.
#' @param alpha Line or point transparency.
#' @param line_size Line width.
#' @param point_size Point size.
#' @param ... Unused.
#'
#' @return A \code{ggplot2} object, returned invisibly.
#'
#' @examples
#' \donttest{
#' data(gf_nepa17)
#'
#' out_std <- dm_standardize(
#' df = gf_nepa17,
#' season_type = "NH",
#' method = "robust_amplitude"
#' )
#'
#' plot(out_std)
#' plot(out_std, type = "seasonal")
#' plot(out_std, type = "seasonal", facet_by = "tree", legend_by = "season_year")
#' plot(out_std, type = "seasonal", facet_by = "season_year", legend_by = "tree")
#' plot(out_std, type = "heatmap", facet_by = "tree")
#' plot(out_std, type = "boxplot", facet_by = "tree", legend_by = "season_year")
#' }
#'
#' @method plot dm_standardized
#' @importFrom dplyr mutate filter %>%
#' @importFrom tidyr pivot_longer
#' @importFrom tibble as_tibble
#' @importFrom lubridate yday
#' @importFrom ggplot2 ggplot aes geom_line geom_point geom_tile geom_boxplot
#' facet_wrap theme_bw theme element_text labs scale_x_date
#' @export
plot.dm_standardized <- function(x,
y = NULL,
trees = "all",
type = c("series", "seasonal", "heatmap", "boxplot"),
x_axis = c("time", "doy", "season_doy"),
facet_by = c("tree", "season_year", "none"),
legend_by = c("season_year", "tree", "none"),
in_season_only = TRUE,
box_group = c("tree", "season_year"),
alpha = 0.8,
line_size = 0.45,
point_size = 1.6,
...) {
TIME <- value <- tree <- season_year <- season_doy <- DOY <- X <- group_var <- legend_var <- in_season <- NULL
if (!inherits(x, "dm_standardized")) {
stop("Input must be an object of class 'dm_standardized'.")
}
type <- match.arg(type)
x_axis <- match.arg(x_axis)
facet_by <- match.arg(facet_by)
legend_by <- match.arg(legend_by)
box_group <- match.arg(box_group)
if (!is.list(x) || is.null(x$data) || !is.data.frame(x$data)) {
stop("The object does not contain a valid 'data' element.")
}
dat <- tibble::as_tibble(x$data)
tree_names <- setdiff(names(dat), c("TIME", "season_year", "in_season"))
if (length(tree_names) == 0) {
stop("No dendrometer series found in 'x$data'.")
}
if (length(trees) == 1 && identical(trees, "all")) {
keep_trees <- tree_names
} else {
if (!is.character(trees)) {
stop("'trees' must be 'all' or a character vector of dendrometer column names.")
}
miss <- setdiff(trees, tree_names)
if (length(miss) > 0) {
stop(sprintf(
"The following trees are not available in the standardized object: %s",
paste(miss, collapse = ", ")
))
}
keep_trees <- trees
}
dat_long <- tidyr::pivot_longer(
dat,
cols = all_of(keep_trees),
names_to = "tree",
values_to = "value"
)
if (isTRUE(in_season_only)) {
dat_long <- dat_long %>% dplyr::filter(in_season %in% TRUE)
}
if (nrow(dat_long) == 0) {
stop("No observations available for plotting after filtering.")
}
make_season_start <- function(season_year, season_type, CS_doys = NULL) {
season_year <- as.integer(season_year)
if (season_type == "NH") {
return(as.Date(paste0(season_year, "-01-01")))
}
if (season_type == "SH") {
return(as.Date(paste0(season_year, "-07-01")))
}
as.Date(paste0(season_year, "-01-01")) + (CS_doys[1] - 1L)
}
season_type <- x$metadata$season_type
CS_doys <- x$metadata$CS_doys
dat_long$DOY <- lubridate::yday(dat_long$TIME)
season_start <- make_season_start(
season_year = dat_long$season_year,
season_type = season_type,
CS_doys = CS_doys
)
dat_long$season_doy <- as.integer(as.Date(dat_long$TIME) - season_start) + 1L
if (x_axis == "time") {
dat_long$X <- as.Date(dat_long$TIME)
x_lab <- "Time"
} else if (x_axis == "doy") {
dat_long$X <- dat_long$DOY
x_lab <- "Day of year"
} else {
dat_long$X <- dat_long$season_doy
x_lab <- "Seasonal day"
}
dat_long$season_year <- as.factor(dat_long$season_year)
if (legend_by == "tree") {
dat_long$legend_var <- dat_long$tree
legend_title <- "Tree"
} else if (legend_by == "season_year") {
dat_long$legend_var <- dat_long$season_year
legend_title <- "Season year"
} else {
dat_long$legend_var <- "All"
legend_title <- NULL
}
title_base <- paste0(
"Standardized dendrometer data (",
x$metadata$method, ", ",
x$metadata$season_type, ")"
)
if (type == "series") {
p <- ggplot2::ggplot(
dat_long,
ggplot2::aes(
x = X,
y = value,
group = interaction(tree, season_year),
color = legend_var
)
) +
ggplot2::geom_line(linewidth = line_size, alpha = alpha)
if (facet_by == "tree") {
p <- p + ggplot2::facet_wrap(~ tree, scales = "free_y", ncol = 1)
} else if (facet_by == "season_year") {
p <- p + ggplot2::facet_wrap(~ season_year, scales = "free_y", ncol = 1)
}
p <- p +
ggplot2::labs(
title = title_base,
x = x_lab,
y = "Standardized value",
color = legend_title
) +
ggplot2::theme_bw() +
ggplot2::theme(
axis.title = ggplot2::element_text(size = 14),
axis.text = ggplot2::element_text(size = 11)
)
if (legend_by == "none") {
p <- p + ggplot2::theme(legend.position = "none")
}
if (x_axis == "time") {
p <- p + ggplot2::scale_x_date(date_labels = "%b-%Y", date_breaks = "2 months")
}
print(p)
return(invisible(p))
}
if (type == "seasonal") {
p <- ggplot2::ggplot(
dat_long,
ggplot2::aes(
x = season_doy,
y = value,
group = interaction(tree, season_year),
color = legend_var
)
) +
ggplot2::geom_line(linewidth = line_size, alpha = alpha)
if (facet_by == "tree") {
p <- p + ggplot2::facet_wrap(~ tree, scales = "free_y", ncol = 1)
} else if (facet_by == "season_year") {
p <- p + ggplot2::facet_wrap(~ season_year, scales = "free_y", ncol = 1)
}
p <- p +
ggplot2::labs(
title = title_base,
x = "Seasonal day",
y = "Standardized value",
color = legend_title
) +
ggplot2::theme_bw() +
ggplot2::theme(
axis.title = ggplot2::element_text(size = 14),
axis.text = ggplot2::element_text(size = 11)
)
if (legend_by == "none") {
p <- p + ggplot2::theme(legend.position = "none")
}
print(p)
return(invisible(p))
}
if (type == "heatmap") {
p <- ggplot2::ggplot(
dat_long,
ggplot2::aes(x = season_doy, y = season_year, fill = value)
) +
ggplot2::geom_tile()
if (facet_by == "tree") {
p <- p + ggplot2::facet_wrap(~ tree, ncol = 1)
} else if (facet_by == "season_year") {
# not meaningful to facet by season_year when y is season_year
warning("For type = 'heatmap', facet_by = 'season_year' is not very informative because season_year is already on the y-axis.")
}
p <- p +
ggplot2::labs(
title = title_base,
x = "Seasonal day",
y = "Season year",
fill = "Std. value"
) +
ggplot2::theme_bw() +
ggplot2::theme(
axis.title = ggplot2::element_text(size = 14),
axis.text = ggplot2::element_text(size = 11)
)
print(p)
return(invisible(p))
}
if (type == "boxplot") {
if (box_group == "tree") {
dat_long$group_var <- dat_long$tree
x_lab2 <- "Tree"
} else {
dat_long$group_var <- dat_long$season_year
x_lab2 <- "Season year"
}
p <- ggplot2::ggplot(
dat_long,
ggplot2::aes(x = group_var, y = value, fill = legend_var)
) +
ggplot2::geom_boxplot()
if (facet_by == "tree" && box_group == "season_year") {
p <- p + ggplot2::facet_wrap(~ tree, scales = "free_y", ncol = 1)
} else if (facet_by == "season_year" && box_group == "tree") {
p <- p + ggplot2::facet_wrap(~ season_year, scales = "free_y", ncol = 1)
}
p <- p +
ggplot2::labs(
title = title_base,
x = x_lab2,
y = "Standardized value",
fill = legend_title
) +
ggplot2::theme_bw() +
ggplot2::theme(
axis.title = ggplot2::element_text(size = 14),
axis.text = ggplot2::element_text(size = 11)
)
if (legend_by == "none") {
p <- p + ggplot2::theme(legend.position = "none")
}
print(p)
return(invisible(p))
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.