Nothing
#' @title Detection and interpolation of missing values in dendrometer data
#'
#' @description
#' Detects missing timestamps (based on provided resolution), inserts rows with \code{NA},
#' and optionally interpolates missing values using either cubic spline interpolation
#' (\code{\link[zoo:na.spline]{na.spline}}) or seasonal decomposition interpolation
#' (\code{\link[forecast:na.interp]{na.interp}}).
#'
#' Optionally, the function can test the reliability of interpolation for increasing
#' gap lengths (e.g., 6 hours to 3 days) using real data. This allows the user to assess
#' how well a given interpolation method recovers missing values over different durations.
#'
#' @param df A data frame with the first column as datetime (\code{"yyyy-mm-dd HH:MM:SS"},
#' or POSIXct/Date), followed by dendrometer values.
#' @param resolution Integer. Temporal resolution in minutes (e.g., 60 for hourly data).
#' @param fill Logical. If \code{TRUE}, missing values will be interpolated.
#' @param method Character. Interpolation method (only used if \code{fill = TRUE}):
#' \itemize{
#' \item \code{"spline"} --- cubic spline (\code{zoo::na.spline}).
#' \item \code{"seasonal"} --- seasonal decomposition (\code{forecast::na.interp}).
#' }
#' @param assess Logical. If \code{TRUE}, run a simulation-based test to evaluate how well
#' interpolation performs at different gap lengths (on existing data).
#' @param assess_lengths_hours Integer vector. The gap durations (in hours) to test during
#' assessment. Default: \code{seq(6, 72, by = 6)}.
#' @param assess_samples_per_length Integer. Number of artificial gaps per gap length
#' per series. Default: \code{10}.
#' @param assess_buffer_hours Integer. Minimum number of hours of valid data required
#' before and after the artificial gap to allow valid assessment. Default: \code{6}.
#' @param assess_seed Integer or \code{NULL}. Random seed to ensure reproducibility
#' of sampling windows. Default: \code{NULL}.
#' @param verbose Logical. If \code{TRUE}, prints messages during filling and testing.
#'
#' @details
#' The assessment simulates interpolation over artificial gaps of different durations.
#' For each dendrometer series and each gap length:
#' \enumerate{
#' \item Random windows (with clean data) are selected.
#' \item Data in the window is temporarily set to \code{NA}.
#' \item The gap is interpolated using the selected method.
#' \item The predicted values are compared to the original (true) values using:
#' \itemize{
#' \item MAPE --- Mean Absolute Percentage Error.
#' \item MdAPE --- Median Absolute Percentage Error.
#' \item RMSE_pct --- Root Mean Square Error (\%).
#' \item Bias_pct --- Mean Percentage Error (\%).
#' \item Max_diff_abs --- Mean absolute difference in daily maximum.
#' \item Min_diff_abs --- Mean absolute difference in daily minimum.
#' \item Time_max_diff_h --- Mean absolute difference in time of daily maximum (hours).
#' \item Time_min_diff_h --- Mean absolute difference in time of daily minimum (hours).
#' }
#' }
#'
#' @return
#' A list of class \code{"dm_na_interpolation"} with components:
#' \describe{
#' \item{\code{$data}}{Data frame containing the original or gap-filled series.}
#' \item{\code{$gap_info}}{Table describing timestamp gaps and internal NA runs.}
#' \item{\code{$assessment}}{A tibble summarizing interpolation accuracy for each
#' series and gap length, or \code{NULL} when \code{assess = FALSE}.}
#' \item{\code{$filled}}{Logical indicating whether interpolation was performed.}
#' \item{\code{$assessed}}{Logical indicating whether interpolation assessment was performed.}
#' \item{\code{$method}}{Interpolation method used.}
#' \item{\code{$resolution}}{Temporal resolution in minutes.}
#' }
#'
#' @examples
#' \donttest{
#' library(dendRoAnalyst)
#' data(nepa17)
#'
#' #res0 <- dm.na.interpolation(nepa17[1:1000, ], resolution = 60)
#' #plot(res0)
#' #plot(res0, type = "gaps")
#'
#' #res1 <- dm.na.interpolation(
#' # nepa17[1:1000, ],
#' # resolution = 60,
#' # fill = TRUE,
#' # method = "spline"
#' #)
#' #plot(res1)
#' #plot(res1, type = "gaps")
#' #plot(res1, type = "interpolation", original = nepa17[1:1000, ])
#'
#' #res2 <- dm.na.interpolation(
#' # nepa17[1:1000, ],
#' # resolution = 60,
#' # fill = TRUE,
#' # method = "seasonal",
#' # assess = TRUE
#' # )
#' #plot(res2)
#' #plot(res2, type = "assessment", metric = "MdAPE")
#' }
#'
#' @importFrom stats ts median
#' @importFrom lubridate ymd_hms minutes
#' @importFrom dplyr mutate rename select bind_cols first lead last left_join summarise group_by arrange bind_rows relocate
#' @importFrom tibble as_tibble tibble
#' @importFrom zoo na.spline
#' @importFrom forecast na.interp
#' @importFrom utils head
#' @export
dm.na.interpolation <- function(df, resolution, fill = FALSE, method = "spline",
assess = FALSE,
assess_lengths_hours = seq(6, 72, by = 6),
assess_samples_per_length = 10,
assess_buffer_hours = 6,
assess_seed = NULL,
verbose = FALSE) {
to_posix <- function(x) {
if (inherits(x, "POSIXct")) return(x)
if (inherits(x, "Date")) return(as.POSIXct(x))
lubridate::ymd_hms(x)
}
TIME <- time_min <- diff_time <- start_time <- end_time <- type <- series <- prev_time <- next_time <- NULL
start_missing <- end_missing <- n_consecutive <- duration_min <- duration_hr <- Series <- n_cases <- n_points <- MAPE <- MdAPE <- NULL
RMSE_pct <- Bias_pct <- Max_diff_abs <- Min_diff_abs <- Time_max_diff_h <- Time_min_diff_h <- NULL
make_output <- function(data, gap_info, assessment = NULL,
filled = FALSE, assessed = FALSE) {
out <- list(
data = tibble::as_tibble(data),
gap_info = tibble::as_tibble(gap_info),
assessment = if (is.null(assessment)) NULL else tibble::as_tibble(assessment),
filled = filled,
assessed = assessed,
method = method,
resolution = resolution
)
class(out) <- "dm_na_interpolation"
out
}
replace_na_with_spline <- function(column) {
if (!is.numeric(column) || !anyNA(column)) return(column)
if (sum(!is.na(column)) < 2) return(column)
sp <- tryCatch(
zoo::na.spline(column),
error = function(e) column
)
if (length(sp) == length(column)) {
column[is.na(column)] <- sp[is.na(column)]
}
column
}
replace_na_with_seasonality <- function(column, s) {
if (!is.numeric(column) || !anyNA(column)) return(column)
if (sum(!is.na(column)) < 2) return(column)
tryCatch(
as.numeric(forecast::na.interp(stats::ts(column, frequency = s))),
error = function(e) column
)
}
pct_errors <- function(truth, fit) {
pe <- 100 * (fit - truth) / truth
ape <- abs(pe)
c(
MAPE = mean(ape, na.rm = TRUE),
MdAPE = stats::median(ape, na.rm = TRUE),
RMSE_pct = sqrt(mean(pe^2, na.rm = TRUE)),
Bias_pct = mean(pe, na.rm = TRUE)
)
}
quiet <- function(x) suppressWarnings(suppressMessages(x))
mean_or_na <- function(x) {
if (length(x) == 0 || all(is.na(x))) return(NA_real_)
mean(x, na.rm = TRUE)
}
find_na_runs <- function(time_vec, x, series_name) {
na_flag <- is.na(x)
if (!any(na_flag)) {
return(tibble::tibble(
series = character(0),
start_time = as.POSIXct(character()),
end_time = as.POSIXct(character()),
n_consecutive = integer(0)
))
}
r <- rle(na_flag)
ends <- cumsum(r$lengths)
starts <- c(1, utils::head(ends, -1) + 1)
sel <- which(r$values)
if (!length(sel)) {
return(tibble::tibble(
series = character(0),
start_time = as.POSIXct(character()),
end_time = as.POSIXct(character()),
n_consecutive = integer(0)
))
}
tibble::tibble(
series = series_name,
start_time = time_vec[starts[sel]],
end_time = time_vec[ends[sel]],
n_consecutive = r$lengths[sel]
)
}
if (!(method %in% c("spline", "seasonal"))) {
stop("The method must be either 'spline' or 'seasonal'.")
}
df <- tibble::as_tibble(df)
names(df)[1] <- "TIME"
df$TIME <- to_posix(df$TIME)
if (any(is.na(df$TIME))) {
stop("The datetime column could not be fully converted to POSIXct.")
}
df2 <- df %>%
dplyr::mutate(
time_min = as.integer(difftime(TIME, dplyr::first(TIME), units = "mins")),
diff_time = dplyr::lead(time_min) - time_min
)
gap_rows <- which(df2$diff_time > resolution)
if (length(gap_rows) > 0L) {
ts_gap_info <- tibble::tibble(
type = "timestamp_gap",
series = "ALL",
prev_time = df2$TIME[gap_rows],
next_time = df2$TIME[gap_rows] + lubridate::minutes(df2$diff_time[gap_rows]),
start_missing = df2$TIME[gap_rows] + lubridate::minutes(resolution),
end_missing = df2$TIME[gap_rows] + lubridate::minutes(df2$diff_time[gap_rows] - resolution),
n_consecutive = as.integer(df2$diff_time[gap_rows] / resolution - 1L),
duration_min = as.integer(df2$diff_time[gap_rows] - resolution),
duration_hr = (df2$diff_time[gap_rows] - resolution) / 60
)
} else {
ts_gap_info <- tibble::tibble(
type = character(0),
series = character(0),
prev_time = as.POSIXct(character()),
next_time = as.POSIXct(character()),
start_missing = as.POSIXct(character()),
end_missing = as.POSIXct(character()),
n_consecutive = integer(0),
duration_min = integer(0),
duration_hr = numeric(0)
)
}
df <- dplyr::select(df2, -time_min, -diff_time)
if (nrow(ts_gap_info) > 0L) {
x1 <- dplyr::first(df$TIME)
x2 <- dplyr::last(df$TIME)
full_time <- tibble::tibble(
TIME = seq(from = x1, to = x2, by = paste(resolution, "mins"))
)
df_work <- dplyr::left_join(full_time, df, by = "TIME")
} else {
df_work <- df
}
na_runs_list <- lapply(names(df)[-1], function(nm) {
find_na_runs(df$TIME, df[[nm]], nm)
})
na_runs_info <- dplyr::bind_rows(na_runs_list)
if (nrow(na_runs_info)) {
na_runs_info <- na_runs_info %>%
dplyr::mutate(
type = "internal_na",
prev_time = as.POSIXct(NA),
next_time = as.POSIXct(NA),
duration_min = NA_integer_,
duration_hr = NA_real_
) %>%
dplyr::rename(
start_missing = start_time,
end_missing = end_time
) %>%
dplyr::relocate(
type, series, prev_time, next_time, start_missing, end_missing,
n_consecutive, duration_min, duration_hr
)
}
gap_info <- dplyr::bind_rows(ts_gap_info, na_runs_info)
if (nrow(ts_gap_info) == 0L && nrow(na_runs_info) == 0L) {
message("There is no gap in your dendrometer data.")
return(make_output(
data = df,
gap_info = gap_info,
assessment = NULL,
filled = FALSE,
assessed = FALSE
))
}
if (nrow(ts_gap_info) == 0L && nrow(na_runs_info) > 0L) {
message("There is no gap in timestamp but there are internal NA values.")
print(gap_info)
if (!fill) {
return(make_output(
data = df,
gap_info = gap_info,
assessment = NULL,
filled = FALSE,
assessed = FALSE
))
}
}
if (nrow(ts_gap_info) > 0L) {
print(gap_info)
if (!fill) {
return(make_output(
data = df_work,
gap_info = gap_info,
assessment = NULL,
filled = FALSE,
assessed = FALSE
))
}
}
if (verbose) {
message("Filling missing values using method = '", method, "'.")
}
if (nrow(ts_gap_info) > 0L) {
if (max(ts_gap_info$n_consecutive, na.rm = TRUE) >= (1440 / resolution)) {
warning("There is a timestamp gap lasting more than 24 h; interpolation may be unreliable.")
}
}
if (method == "spline") {
df_vals <- lapply(df_work[-1], replace_na_with_spline)
} else {
s <- 1440 / resolution
df_vals <- lapply(df_work[-1], function(col) replace_na_with_seasonality(col, s))
}
df_filled <- dplyr::bind_cols(df_work[1], tibble::as_tibble(df_vals))
if (!assess) {
return(make_output(
data = df_filled,
gap_info = gap_info,
assessment = NULL,
filled = TRUE,
assessed = FALSE
))
}
if (!is.null(assess_seed)) {
set.seed(assess_seed)
}
if (verbose) {
message(
"Running self test on artificial gaps: ",
paste(assess_lengths_hours, collapse = ", "),
" hours"
)
}
time_dates <- as.Date(df_filled$TIME)
time_hours <- as.numeric(format(df_filled$TIME, "%H")) +
as.numeric(format(df_filled$TIME, "%M")) / 60 +
as.numeric(format(df_filled$TIME, "%S")) / 3600
circular_hour_diff <- function(x, y) {
d <- abs(x - y)
pmin(d, 24 - d)
}
fast_daily_extrema <- function(date_vec, hour_vec, value_vec) {
udates <- unique(date_vec)
out <- lapply(udates, function(dd) {
idx <- which(date_vec == dd)
vv <- value_vec[idx]
hh <- hour_vec[idx]
if (length(vv) == 0 || all(is.na(vv))) {
return(data.frame(
DATE = dd,
Min = NA_real_,
Time_min_h = NA_real_,
Max = NA_real_,
Time_max_h = NA_real_
))
}
i_min <- which.min(replace(vv, is.na(vv), Inf))[1]
i_max <- which.max(replace(vv, is.na(vv), -Inf))[1]
data.frame(
DATE = dd,
Min = min(vv, na.rm = TRUE),
Time_min_h = hh[i_min],
Max = max(vv, na.rm = TRUE),
Time_max_h = hh[i_max]
)
})
do.call(rbind, out)
}
daily_case_differences_fast <- function(idx_days, truth, fit) {
out_na <- c(
Max_diff_abs = NA_real_,
Min_diff_abs = NA_real_,
Time_max_diff_h = NA_real_,
Time_min_diff_h = NA_real_
)
if (!any(idx_days)) {
return(out_na)
}
truth_daily <- fast_daily_extrema(
date_vec = time_dates[idx_days],
hour_vec = time_hours[idx_days],
value_vec = truth[idx_days]
)
fit_daily <- fast_daily_extrema(
date_vec = time_dates[idx_days],
hour_vec = time_hours[idx_days],
value_vec = fit[idx_days]
)
merged <- merge(
truth_daily,
fit_daily,
by = "DATE",
suffixes = c("_truth", "_fit")
)
if (nrow(merged) == 0) {
return(out_na)
}
c(
Max_diff_abs = mean(abs(merged$Max_fit - merged$Max_truth), na.rm = TRUE),
Min_diff_abs = mean(abs(merged$Min_fit - merged$Min_truth), na.rm = TRUE),
Time_max_diff_h = mean(
circular_hour_diff(merged$Time_max_h_fit, merged$Time_max_h_truth),
na.rm = TRUE
),
Time_min_diff_h = mean(
circular_hour_diff(merged$Time_min_h_fit, merged$Time_min_h_truth),
na.rm = TRUE
)
)
}
impute_with_gap <- function(series, gap_idx, meth) {
tmp <- series
tmp[gap_idx] <- NA_real_
if (meth == "spline") {
tryCatch(
as.numeric(quiet(zoo::na.spline(tmp))),
error = function(e) tmp
)
} else {
s <- 1440 / resolution
tryCatch(
as.numeric(quiet(forecast::na.interp(stats::ts(tmp, frequency = s)))),
error = function(e) tmp
)
}
}
pts_per_hour <- max(1L, round(60 / resolution))
buffer_pts <- max(1L, round(assess_buffer_hours * pts_per_hour))
n_total <- nrow(df_filled)
max_rows <- (ncol(df_filled) - 1L) *
length(assess_lengths_hours) *
assess_samples_per_length
stat_rows <- vector("list", max_rows)
row_id <- 0L
for (j in seq.int(2, ncol(df_filled))) {
series_name <- names(df_filled)[j]
y_full <- df_filled[[j]]
ok_full <- is.finite(y_full)
cs_ok <- c(0L, cumsum(ok_full))
for (gap_h in assess_lengths_hours) {
gap_pts <- max(1L, round(gap_h * pts_per_hour))
start_min <- buffer_pts + 1L
start_max <- n_total - gap_pts - buffer_pts + 1L
if (start_max < start_min) {
if (verbose) {
message("No valid windows for ", series_name, " at ", gap_h, " h.")
}
next
}
starts <- seq.int(start_min, start_max)
win_start <- starts - buffer_pts
win_end <- starts + gap_pts - 1L + buffer_pts
required_n <- gap_pts + 2L * buffer_pts
finite_counts <- cs_ok[win_end + 1L] - cs_ok[win_start]
valid_starts <- starts[finite_counts == required_n]
if (length(valid_starts) == 0L) {
if (verbose) {
message("No valid windows for ", series_name, " at ", gap_h, " h.")
}
next
}
starts_to_use <- if (length(valid_starts) > assess_samples_per_length) {
sample(valid_starts, assess_samples_per_length, replace = FALSE)
} else {
valid_starts
}
for (i in starts_to_use) {
idx_gap <- i:(i + gap_pts - 1L)
y_imputed <- impute_with_gap(y_full, idx_gap, method)
errs_all <- pct_errors(y_full[idx_gap], y_imputed[idx_gap])
affected_dates <- unique(time_dates[idx_gap])
idx_days <- time_dates %in% affected_dates
daily_errs <- daily_case_differences_fast(
idx_days = idx_days,
truth = y_full,
fit = y_imputed
)
row_id <- row_id + 1L
stat_rows[[row_id]] <- data.frame(
Series = series_name,
gap_h = gap_h,
n_cases = 1L,
n_points = length(idx_gap),
MAPE = errs_all["MAPE"],
MdAPE = errs_all["MdAPE"],
RMSE_pct = errs_all["RMSE_pct"],
Bias_pct = errs_all["Bias_pct"],
Max_diff_abs = daily_errs["Max_diff_abs"],
Min_diff_abs = daily_errs["Min_diff_abs"],
Time_max_diff_h = daily_errs["Time_max_diff_h"],
Time_min_diff_h = daily_errs["Time_min_diff_h"],
stringsAsFactors = FALSE
)
}
}
}
if (row_id == 0L) {
warning("Assessment produced no results (insufficient clean windows).")
assessment <- tibble::tibble()
} else {
raw_stats <- do.call(rbind, stat_rows[seq_len(row_id)])
assessment <- as.data.frame(raw_stats) %>%
dplyr::group_by(Series, gap_h) %>%
dplyr::summarise(
n_cases = sum(n_cases),
n_points = sum(n_points),
MAPE = mean_or_na(MAPE),
MdAPE = mean_or_na(MdAPE),
RMSE_pct = mean_or_na(RMSE_pct),
Bias_pct = mean_or_na(Bias_pct),
Max_diff_abs = mean_or_na(Max_diff_abs),
Min_diff_abs = mean_or_na(Min_diff_abs),
Time_max_diff_h = mean_or_na(Time_max_diff_h),
Time_min_diff_h = mean_or_na(Time_min_diff_h),
.groups = "drop"
) %>%
dplyr::arrange(Series, gap_h)
}
make_output(
data = df_filled,
gap_info = gap_info,
assessment = assessment,
filled = TRUE,
assessed = TRUE
)
}
#' @title Plot method for dendrometer NA interpolation results
#'
#' @description
#' S3 plot method for objects returned by \code{\link{dm.na.interpolation}}.
#'
#' @param x Object returned by \code{dm.na.interpolation()}.
#' @param type Character. Plot type:
#' \code{"gaps"}, \code{"interpolation"}, or \code{"assessment"}.
#' If omitted, the default is:
#' \itemize{
#' \item \code{"assessment"} when assessment exists,
#' \item \code{"interpolation"} when filled data exist,
#' \item \code{"gaps"} when gaps exist but data were not filled,
#' \item otherwise \code{"interpolation"}.
#' }
#' @param series Optional character vector of series names to plot.
#' @param metric Character assessment metric used when \code{type = "assessment"}.
#' @param original Optional original input data frame used before interpolation.
#' @param start Optional start datetime for subsetting.
#' @param end Optional end datetime for subsetting.
#' @param free_y Logical. If \code{TRUE}, facets use free y scales.
#' @param ncol Integer. Number of facet columns.
#' @param facet Logical. If \code{TRUE}, facet assessment plots by series.
#' @param empty_gaps Character. Behavior when \code{type = "gaps"} but no gaps are
#' present: \code{"plot"} for an informative empty plot, or \code{"error"}.
#' @param ... Further arguments.
#'
#' @return A \code{ggplot2} object.
#'
#' @examples
#' \donttest{
#' library(dendRoAnalyst)
#' data(nepa17)
#'
#' ## No gaps: defaults to time-series plot
#' #res0 <- dm.na.interpolation(nepa17[1:1000, ], resolution = 60)
#' #plot(res0)
#' #plot(res0, type = "gaps")
#' #plot(res0, type = "gaps", empty_gaps = "error")
#'
#' ## Filled data
#' #res1 <- dm.na.interpolation(
#' # nepa17[1:1000, ],
#' # resolution = 60,
#' # fill = TRUE,
#' # method = "spline"
#' #)
#' #plot(res1)
#' #plot(res1, type = "gaps")
#' #plot(res1, type = "interpolation", original = nepa17[1:1000, ])
#'
#' ## Assessed data
#' #res2 <- dm.na.interpolation(
#' # nepa17[1:1000, ],
#' # resolution = 60,
#' # fill = TRUE,
#' # method = "seasonal",
#' # assess = TRUE
#' #)
#' #plot(res2)
#' #plot(res2, type = "assessment", metric = "MdAPE")
#' }
#'
#' @method plot dm_na_interpolation
#' @export
plot.dm_na_interpolation <- function(x,
type = NULL,
series = NULL,
metric = "MdAPE",
original = NULL,
start = NULL,
end = NULL,
free_y = TRUE,
ncol = 1,
facet = TRUE,
empty_gaps = c("plot", "error"),
...) {
if (!inherits(x, "dm_na_interpolation")) {
stop("x must be an object returned by dm.na.interpolation().")
}
empty_gaps <- match.arg(empty_gaps)
has_assessment <- isTRUE(x$assessed) &&
!is.null(x$assessment) &&
nrow(x$assessment) > 0
has_gaps <- !is.null(x$gap_info) &&
nrow(x$gap_info) > 0
if (is.null(type)) {
if (has_assessment) {
type <- "assessment"
} else if (isTRUE(x$filled)) {
type <- "interpolation"
} else if (has_gaps) {
type <- "gaps"
} else {
type <- "interpolation"
}
}
type <- match.arg(type, choices = c("gaps", "interpolation", "assessment"))
if (type == "gaps") {
return(plot_dm_gaps(
x = x,
series = series,
empty_gaps = empty_gaps
))
}
if (type == "interpolation") {
return(plot_dm_interpolation(
x = x,
original = original,
series = series,
start = start,
end = end,
free_y = free_y,
ncol = ncol
))
}
if (type == "assessment") {
if (!has_assessment) {
stop("Assessment plot is only available when dm.na.interpolation(..., assess = TRUE) produced results.")
}
return(plot_dm_assessment(
x = x,
metric = metric,
series = series,
facet = facet
))
}
}
#' @title Plot detected gaps in dendrometer data
#'
#' @param x Object returned by \code{dm.na.interpolation()}.
#' @param series Optional character vector of series names to plot.
#' @param empty_gaps Character. Behavior when no gaps are present:
#' \code{"plot"} for an informative empty plot, or \code{"error"}.
#'
#' @return A \code{ggplot2} object.
#' @examples
#' \donttest{
#' #res <- dm.na.interpolation(nepa17[1:1000, ], resolution = 60)
#' #plot_dm_gaps(res)
#' }
#' @export
plot_dm_gaps <- function(x, series = NULL, empty_gaps = c("plot", "error")) {
empty_gaps <- match.arg(empty_gaps)
start_missing <- end_missing <- type <- NULL
parts <- .dm_interp_extract_parts(x)
data <- parts$data
gap_info <- parts$gap_info
if (is.null(gap_info) || nrow(gap_info) == 0) {
if (empty_gaps == "error") {
stop("No gap information available.")
}
return(.dm_interp_empty_plot("No gaps detected in the data."))
}
series_all <- names(data)[-1]
if (is.null(series)) {
series <- series_all
} else {
series <- intersect(series, series_all)
if (!length(series)) {
stop("None of the requested 'series' were found in the result.")
}
}
gap_plot <- .dm_interp_expand_gap_info(gap_info, series_all = series_all)
gap_plot <- gap_plot[gap_plot$series %in% series, , drop = FALSE]
if (nrow(gap_plot) == 0) {
if (empty_gaps == "error") {
stop("No gaps available for the selected series.")
}
return(.dm_interp_empty_plot("No gaps available for the selected series."))
}
gap_plot$series <- factor(gap_plot$series, levels = rev(series))
ggplot2::ggplot(gap_plot) +
ggplot2::geom_segment(
ggplot2::aes(
x = start_missing,
xend = end_missing,
y = series,
yend = series,
colour = type
),
linewidth = 5,
alpha = 0.6,
lineend = "butt"
) +
ggplot2::geom_point(
ggplot2::aes(x = start_missing, y = series, colour = type),
size = 1.5
) +
ggplot2::labs(
x = "Time",
y = "Series",
colour = "Gap type",
title = "Detected gaps in dendrometer data"
) +
ggplot2::theme_bw()
}
#' @title Plot original and interpolated dendrometer series
#'
#' @param x Object returned by \code{dm.na.interpolation()}.
#' @param original Optional original input data frame used in
#' \code{dm.na.interpolation()}.
#' @param series Optional character vector of series names to plot.
#' @param start Optional start datetime for plotting subset.
#' @param end Optional end datetime for plotting subset.
#' @param free_y Logical. If \code{TRUE}, each facet gets its own y-scale.
#' @param ncol Integer. Number of facet columns.
#'
#' @return A \code{ggplot2} object.
#' @examples
#' \donttest{
#' #res <- dm.na.interpolation(
#' # nepa17[1:1000, ],
#' # resolution = 60,
#' # fill = TRUE,
#' # method = "spline"
#' #)
#' #plot_dm_interpolation(res, original = nepa17[1:1000, ])
#' }
#' @export
plot_dm_interpolation <- function(x, original = NULL, series = NULL,
start = NULL, end = NULL,
free_y = TRUE, ncol = 1) {
TIME <- xmin <- xmax <- type <- filled <- NULL
parts <- .dm_interp_extract_parts(x)
plot_data <- parts$data
gap_info <- parts$gap_info
plot_data <- tibble::as_tibble(plot_data)
names(plot_data)[1] <- "TIME"
series_all <- names(plot_data)[-1]
if (is.null(series)) {
series <- series_all
} else {
series <- intersect(series, series_all)
if (!length(series)) {
stop("None of the requested 'series' were found in the result.")
}
}
plot_data <- plot_data[, c("TIME", series), drop = FALSE]
if (!is.null(start)) {
plot_data <- plot_data[plot_data$TIME >= as.POSIXct(start), , drop = FALSE]
}
if (!is.null(end)) {
plot_data <- plot_data[plot_data$TIME <= as.POSIXct(end), , drop = FALSE]
}
filled_long <- tidyr::pivot_longer(
plot_data,
cols = -TIME,
names_to = "series",
values_to = "filled"
)
if (!is.null(original)) {
original <- tibble::as_tibble(original)
names(original)[1] <- "TIME"
miss_series <- setdiff(series, names(original))
if (length(miss_series)) {
stop(
"These series are missing in 'original': ",
paste(miss_series, collapse = ", ")
)
}
original <- dplyr::left_join(
plot_data["TIME"],
original[, c("TIME", series), drop = FALSE],
by = "TIME"
)
if (!is.null(start)) {
original <- original[original$TIME >= as.POSIXct(start), , drop = FALSE]
}
if (!is.null(end)) {
original <- original[original$TIME <= as.POSIXct(end), , drop = FALSE]
}
original_long <- tidyr::pivot_longer(
original,
cols = -TIME,
names_to = "series",
values_to = "original"
)
plot_df <- dplyr::left_join(
filled_long,
original_long,
by = c("TIME", "series")
)
plot_df$imputed <- is.na(plot_df$original) & !is.na(plot_df$filled)
} else {
plot_df <- filled_long
plot_df$original <- NA_real_
plot_df$imputed <- FALSE
if (!is.null(gap_info) && nrow(gap_info) > 0) {
gap_plot <- .dm_interp_expand_gap_info(gap_info, series_all = series_all)
gap_plot <- gap_plot[gap_plot$series %in% series, , drop = FALSE]
if (!is.null(start)) {
gap_plot <- gap_plot[gap_plot$end_missing >= as.POSIXct(start), , drop = FALSE]
}
if (!is.null(end)) {
gap_plot <- gap_plot[gap_plot$start_missing <= as.POSIXct(end), , drop = FALSE]
}
for (i in seq_len(nrow(gap_plot))) {
sel <- plot_df$series == gap_plot$series[i] &
plot_df$TIME >= gap_plot$start_missing[i] &
plot_df$TIME <= gap_plot$end_missing[i]
plot_df$imputed[sel] <- TRUE
}
}
}
gap_rect <- NULL
if (!is.null(gap_info) && nrow(gap_info) > 0) {
gap_rect <- .dm_interp_expand_gap_info(gap_info, series_all = series_all)
gap_rect <- gap_rect[gap_rect$series %in% series, , drop = FALSE]
if (!is.null(start)) {
gap_rect <- gap_rect[gap_rect$end_missing >= as.POSIXct(start), , drop = FALSE]
}
if (!is.null(end)) {
gap_rect <- gap_rect[gap_rect$start_missing <= as.POSIXct(end), , drop = FALSE]
}
if (nrow(gap_rect) > 0) {
step_sec <- .dm_interp_time_step_seconds(plot_data$TIME)
gap_rect$xmin <- gap_rect$start_missing - step_sec / 2
gap_rect$xmax <- gap_rect$end_missing + step_sec / 2
}
}
title_txt <- if (isTRUE(x$filled)) {
"Original and interpolated dendrometer series"
} else {
"Dendrometer series"
}
p <- ggplot2::ggplot(plot_df, ggplot2::aes(x = TIME)) +
ggplot2::theme_bw() +
ggplot2::labs(
x = "Time",
y = "Dendrometer value",
title = title_txt
)
if (!is.null(gap_rect) && nrow(gap_rect) > 0) {
p <- p +
ggplot2::geom_rect(
data = gap_rect,
ggplot2::aes(
xmin = xmin, xmax = xmax,
ymin = -Inf, ymax = Inf,
fill = type
),
inherit.aes = FALSE,
alpha = 0.12
)
}
if (any(!is.na(plot_df$original))) {
p <- p +
ggplot2::geom_line(
ggplot2::aes(y = original),
linewidth = 0.35,
colour = "grey45",
na.rm = TRUE
) +
ggplot2::geom_line(
ggplot2::aes(y = filled),
linewidth = 0.45,
colour = "#1F78B4",
na.rm = TRUE
) +
ggplot2::geom_point(
data = plot_df[plot_df$imputed, , drop = FALSE],
ggplot2::aes(y = filled),
colour = "#D7301F",
size = 1.1,
na.rm = TRUE
)
} else {
p <- p +
ggplot2::geom_line(
ggplot2::aes(y = filled),
linewidth = 0.45,
colour = "#1F78B4",
na.rm = TRUE
)
if (any(plot_df$imputed)) {
p <- p +
ggplot2::geom_point(
data = plot_df[plot_df$imputed, , drop = FALSE],
ggplot2::aes(y = filled),
colour = "#D7301F",
size = 1.1,
na.rm = TRUE
)
}
}
p +
ggplot2::facet_wrap(
~series,
scales = if (free_y) "free_y" else "fixed",
ncol = ncol
) +
ggplot2::labs(fill = "Gap type")
}
#' @title Plot interpolation assessment metrics
#'
#' @param x Object returned by \code{dm.na.interpolation()} with
#' \code{assess = TRUE}.
#' @param metric Character. One of:
#' \code{"MAPE"}, \code{"MdAPE"}, \code{"RMSE_pct"}, \code{"Bias_pct"},
#' \code{"Max_diff_abs"}, \code{"Min_diff_abs"},
#' \code{"Time_max_diff_h"}, \code{"Time_min_diff_h"}.
#' @param series Optional character vector of series names to plot.
#' @param facet Logical. If \code{TRUE}, create one panel per series.
#'
#' @return A \code{ggplot2} object.
#' @examples
#' \donttest{
#' #res <- dm.na.interpolation(
#' # nepa17[1:1000, ],
#' # resolution = 60,
#' # fill = TRUE,
#' # method = "seasonal",
#' # assess = TRUE
#' #)
#' #plot_dm_assessment(res, metric = "MdAPE")
#' }
#' @export
plot_dm_assessment <- function(x, metric = "MdAPE", series = NULL, facet = TRUE) {
gap_h <- value <- Series <- NULL
parts <- .dm_interp_extract_parts(x)
assessment <- parts$assessment
if (is.null(assessment) || nrow(assessment) == 0) {
stop("No assessment found. Run dm.na.interpolation(..., assess = TRUE).")
}
valid_metrics <- c(
"MAPE", "MdAPE", "RMSE_pct", "Bias_pct",
"Max_diff_abs", "Min_diff_abs",
"Time_max_diff_h", "Time_min_diff_h"
)
if (!(metric %in% valid_metrics)) {
stop(
"Invalid metric. Choose one of: ",
paste(valid_metrics, collapse = ", ")
)
}
assessment <- tibble::as_tibble(assessment)
if (!is.null(series)) {
assessment <- assessment[assessment$Series %in% series, , drop = FALSE]
if (nrow(assessment) == 0) {
stop("No matching series found in assessment.")
}
}
assessment$value <- assessment[[metric]]
p <- ggplot2::ggplot(
assessment,
ggplot2::aes(x = gap_h, y = value, group = Series, colour = Series)
) +
ggplot2::geom_line(linewidth = 0.5, na.rm = TRUE) +
ggplot2::geom_point(size = 1.8, na.rm = TRUE) +
ggplot2::theme_bw() +
ggplot2::labs(
x = "Artificial gap length (hours)",
y = metric,
colour = "Series",
title = paste("Interpolation assessment:", metric)
)
if (facet) {
p + ggplot2::facet_wrap(~Series, scales = "free_y")
} else {
p
}
}
# -------------------------------------------------------------------------
# Internal helpers
# ----------------------------------------------------------------
.dm_interp_extract_parts <- function(x) {
if (!inherits(x, "dm_na_interpolation")) {
stop("x must be an object returned by dm.na.interpolation().")
}
list(
data = tibble::as_tibble(x$data),
gap_info = x$gap_info,
assessment = x$assessment
)
}
.dm_interp_expand_gap_info <- function(gap_info, series_all) {
if (is.null(gap_info) || nrow(gap_info) == 0) {
return(tibble::tibble(
type = character(0),
series = character(0),
start_missing = as.POSIXct(character()),
end_missing = as.POSIXct(character()),
n_consecutive = integer(0)
))
}
out <- list()
k <- 0L
ts_rows <- gap_info[gap_info$type == "timestamp_gap", , drop = FALSE]
if (nrow(ts_rows) > 0) {
for (i in seq_len(nrow(ts_rows))) {
for (s in series_all) {
k <- k + 1L
out[[k]] <- tibble::tibble(
type = ts_rows$type[i],
series = s,
start_missing = ts_rows$start_missing[i],
end_missing = ts_rows$end_missing[i],
n_consecutive = ts_rows$n_consecutive[i]
)
}
}
}
na_rows <- gap_info[gap_info$type == "internal_na", , drop = FALSE]
if (nrow(na_rows) > 0) {
na_rows <- na_rows[na_rows$series %in% series_all, , drop = FALSE]
if (nrow(na_rows) > 0) {
for (i in seq_len(nrow(na_rows))) {
k <- k + 1L
out[[k]] <- tibble::tibble(
type = na_rows$type[i],
series = na_rows$series[i],
start_missing = na_rows$start_missing[i],
end_missing = na_rows$end_missing[i],
n_consecutive = na_rows$n_consecutive[i]
)
}
}
}
if (!length(out)) {
return(tibble::tibble(
type = character(0),
series = character(0),
start_missing = as.POSIXct(character()),
end_missing = as.POSIXct(character()),
n_consecutive = integer(0)
))
}
dplyr::bind_rows(out)
}
.dm_interp_time_step_seconds <- function(time_vec) {
if (length(time_vec) < 2) {
return(0)
}
stats::median(diff(as.numeric(time_vec)), na.rm = TRUE)
}
.dm_interp_empty_plot <- function(label = "No gaps detected in the data.") {
ggplot2::ggplot() +
ggplot2::annotate("text", x = 0, y = 0, label = label, size = 5) +
ggplot2::xlim(-1, 1) +
ggplot2::ylim(-1, 1) +
ggplot2::labs(
title = "Detected gaps in dendrometer data",
x = NULL,
y = NULL
) +
ggplot2::theme_void()
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.