Nothing
#' Calculate a per-sample local threshold for charcoal peak identification
#'
#' Determines a sliding-window threshold value for each sample, based on the
#' distribution of C_peak within a window centred on that sample. The noise
#' component within each window is modelled as a zero-mean Gaussian (residuals),
#' a one-mean Gaussian (ratios), or via a Gaussian mixture model (GMM).
#' Mirrors \code{CharThreshLocal.m} from the MATLAB v2.0 codebase.
#'
#' @param charcoal Named list with \code{peak} (C_peak series), \code{ybpI}
#' (resampled ages), and \code{accI}.
#' @param smoothing Named list with \code{yr} (window width in years).
#' @param peak_analysis Named list with \code{cPeak}, \code{threshMethod},
#' \code{threshValues} (length 4).
#' @param site Character string (site name; unused in R, kept for API
#' symmetry).
#' @param results Named list (unused in R, kept for API symmetry).
#' @param plot_data 0/1 flag; ignored in R (no diagnostic plots).
#'
#' @return Named list \code{char_thresh} with elements:
#' \describe{
#' \item{pos}{Numeric matrix [N x 4]: per-sample positive threshold for each
#' of the four \code{threshValues}, after Lowess smoothing.}
#' \item{neg}{Numeric matrix [N x 4]: per-sample negative threshold, after
#' Lowess smoothing.}
#' \item{SNI}{Numeric vector [N]: signal-to-noise index time series, Lowess-
#' smoothed and clamped to \eqn{\geq 0}.}
#' \item{GOF}{Numeric vector [N]: KS goodness-of-fit p-values per sample
#' (NA where fewer than 4 noise samples exist).}
#' }
#'
#' @details
#' ## Window selection
#' The half-window is \code{hw = round(0.5 * smoothing$yr / r)} samples, where
#' \code{r = mean(diff(charcoal$ybpI))} is the record resolution. The window
#' is shifted (not shrunk) at record boundaries, matching MATLAB's loop logic.
#'
#' ## NaN bridging within windows
#' NaN entries in \code{charcoal$peak} (from record gaps) are replaced with the
#' neutral value (0 for residuals, 1 for ratios) before distribution fitting.
#' This preserves window size while preventing gap samples from biasing the fit,
#' matching the v2.0 bug fix documented in \code{CharThreshLocal.m}.
#'
#' ## Small-sample fallback
#' If fewer than 4 non-neutral samples exist in the window, a simple Gaussian
#' is fitted (same as method 2) and the KS test is skipped.
#'
#' ## GMM (threshMethod = 3)
#' MATLAB's \code{GaussianMixture(X, 2, 2, false)} is replicated by
#' \code{gaussian_mixture_em(X)} from \code{utils_gaussian_mixture.R}. This
#' uses the same first/last-point initialisation and loose convergence
#' criterion (\eqn{\epsilon = 0.03 \log N}) as the original Bowman CLUSTER
#' EM, substantially improving agreement with MATLAB threshold values compared
#' to \code{mclust}. The noise component is identified as the Gaussian with
#' the smaller mean. Poor-fit fallback (mu1 == mu2) re-fits with K = 3 and
#' takes the two components with the smallest means, mirroring the MATLAB v2.0
#' bug fix (the fallback passes the local window X, not the full record).
#'
#' ## KS goodness-of-fit
#' MATLAB evaluates the fitted normal CDF at 101 equally-spaced bin centres
#' and calls \code{kstest()} with a custom CDF table. R uses
#' \code{stats::ks.test(noise, "pnorm", mu, sigma)}, which evaluates the
#' continuous CDF at each observation. P-values may differ by a small amount
#' for small samples; the statistic converges as sample size grows.
#'
#' ## Post-loop smoothing
#' After the per-sample loop, \code{pos}, \code{neg}, and \code{SNI} are
#' smoothed with \code{char_lowess(span = smoothing$yr / r, iter = 0)}.
#' SNI is then clamped to \eqn{\geq 0}.
#'
#' @seealso [char_thresh_global()], [char_smooth()], [CharAnalysis()]
# Requires: gaussian_mixture_em() from utils_gaussian_mixture.R
# (source that file before sourcing this one, or ensure it is on the search path)
char_thresh_local <- function(charcoal, smoothing, peak_analysis,
site = NULL, results = NULL,
plot_data = 0L) {
r <- mean(diff(charcoal$ybpI)) # yr per sample
hw <- round(0.5 * smoothing$yr / r) # half-window in samples
N <- length(charcoal$peak)
n_tv <- length(peak_analysis$threshValues)
P <- peak_analysis$threshValues[4L] # percentile used for SNI
# Neutral value replaces NaN gap entries before distribution fitting
# (0 for residuals, 1 for ratios)
neutral_val <- if (peak_analysis$cPeak == 1L) 0 else 1
# Diagnostic data for CharPlotFig2_ThreshDiagnostics: capture up to 25 evenly-spaced samples.
# Mirrors MATLAB CharThreshLocal.m lines 228-229.
plot_step <- max(round(N / 25L), 1L)
in_plot_set <- seq(hw * 2L, N, by = plot_step)
diag_data <- vector("list", 0L)
# Preallocate output arrays (NaN-initialised matching MATLAB)
char_thresh <- list()
char_thresh$pos <- matrix(NA_real_, nrow = N, ncol = n_tv)
char_thresh$neg <- matrix(NA_real_, nrow = N, ncol = n_tv)
char_thresh$SNI <- rep(NA_real_, N)
char_thresh$GOF <- rep(NA_real_, N)
# ===========================================================================
# PER-SAMPLE LOOP
# ===========================================================================
for (i in seq_len(N)) {
# Reset GMM variables each iteration to prevent stale values from a
# previous iteration persisting when the current sample falls back to
# single-Gaussian (R does not scope loop bodies as new environments).
mu_both <- NULL
sigma_both <- NULL
fit <- NULL
# ---- Window selection (shifted at boundaries) ---------------------------
# Mirrors MATLAB CharThreshLocal.m lines 86-92 (1-indexed)
if (i <= hw) {
X <- charcoal$peak[seq_len(hw + i)]
} else if (i > N - hw) {
X <- charcoal$peak[(i - hw):N]
} else {
X <- charcoal$peak[(i - hw):(i + hw)]
}
# Replace NaN (gap) values with neutral value to preserve window size.
# v2.0 bug fix: stripping NaN changes window size and distribution shape
# near record gaps; substitution of neutral value avoids this.
X[is.na(X)] <- neutral_val
# ---- Small-sample fallback: < 4 non-neutral samples --------------------
# If the window is nearly all neutral (gap region), fit a simple Gaussian
# instead of a GMM and skip the KS test.
if (sum(X != neutral_val) < 4L) {
if (peak_analysis$cPeak == 1L) {
neg_vals <- X[X <= 0]
sigma_hat_i <- if (length(neg_vals) == 0L) {
stats::sd(X)
} else {
stats::sd(c(neg_vals, abs(neg_vals)))
}
mu_hat_i <- 0
} else {
sub_vals <- X[X <= 1] - 1
sigma_hat_i <- stats::sd(c(sub_vals, abs(sub_vals)) + 1)
mu_hat_i <- 1
}
char_thresh$pos[i, ] <- stats::qnorm(peak_analysis$threshValues,
mu_hat_i, sigma_hat_i)
char_thresh$neg[i, ] <- stats::qnorm(1 - peak_analysis$threshValues,
mu_hat_i, sigma_hat_i)
t_pos <- stats::qnorm(P, mu_hat_i, sigma_hat_i)
sig_i <- X[X > t_pos]
noise_i <- X[X <= t_pos]
char_thresh$SNI[i] <- if (length(sig_i) > 0L && length(noise_i) > 2L) {
(1 / length(sig_i)) *
sum((sig_i - mean(noise_i)) / stats::sd(noise_i)) *
((length(noise_i) - 2L) / length(noise_i))
} else {
0
}
next # KS test skipped for small-sample windows
}
# ---- Estimate local noise distribution ----------------------------------
if (peak_analysis$threshMethod == 2L) {
if (peak_analysis$cPeak == 1L) {
# Residuals: zero-mean Gaussian
neg_vals <- X[X <= 0]
sigma_hat_i <- stats::sd(c(neg_vals, abs(neg_vals)))
mu_hat_i <- 0
} else {
# Ratios: one-mean Gaussian
sub_vals <- X[X <= 1] - 1
sigma_hat_i <- stats::sd(c(sub_vals, abs(sub_vals)) + 1)
mu_hat_i <- 1
}
} else if (peak_analysis$threshMethod == 3L) {
if (sum(X) == 0) {
# All-zero window: fall back to simple Gaussian
# (mirrors MATLAB lines 157-173)
if (peak_analysis$cPeak == 1L) {
mu_hat_i <- 0
neg_vals <- X[X <= 0]
sigma_hat_i <- if (length(neg_vals) == 0L) stats::sd(X)
else stats::sd(c(neg_vals, abs(neg_vals)))
} else {
mu_hat_i <- 1
sub_vals <- X[X <= 1] - 1
sigma_hat_i <- stats::sd(c(sub_vals, abs(sub_vals)) + 1)
}
} else {
# GMM: replicate MATLAB's GaussianMixture(X, 2, 2, false) using the
# direct R port gaussian_mixture_em() from utils_gaussian_mixture.R.
# This matches MATLAB's first/last-point initialisation and loose
# convergence criterion, substantially reducing threshold differences.
fit <- tryCatch(
gaussian_mixture_em(X, k = 2L),
error = function(e) NULL
)
if (is.null(fit)) {
# EM failed: fall back to simple Gaussian (method 2 behaviour)
if (peak_analysis$cPeak == 1L) {
neg_vals <- X[X <= 0]
sigma_hat_i <- if (length(neg_vals) == 0L) stats::sd(X)
else stats::sd(c(neg_vals, abs(neg_vals)))
mu_hat_i <- 0
} else {
sub_vals <- X[X <= 1] - 1
sigma_hat_i <- stats::sd(c(sub_vals, abs(sub_vals)) + 1)
mu_hat_i <- 1
}
} else {
# gaussian_mixture_em returns mu and sigma sorted ascending:
# mu[1] = background component, mu[2] = peak component.
mu_both <- fit$mu
sigma_both <- fit$sigma
if (mu_both[1L] == mu_both[2L]) {
# Poor GMM fit: re-fit with K = 3, take two smallest-mean components.
# Mirrors the v2.0 bug fix (re-fits on local window X, not full record).
warning("char_thresh_local: poor GMM fit at sample ", i,
" (mu1 == mu2). Re-fitting with K = 3.")
fit3 <- tryCatch(
gaussian_mixture_em(X, k = 3L),
error = function(e) NULL
)
if (!is.null(fit3)) {
# fit3$mu is already sorted ascending; take the two smallest
mu_both <- fit3$mu[1:2]
sigma_both <- fit3$sigma[1:2]
}
# If fit3 also failed, keep mu_both/sigma_both from original fit.
}
# Noise component = Gaussian with the smaller mean (index 1,
# already sorted ascending by gaussian_mixture_em).
mu_hat_i <- mu_both[1L]
sigma_hat_i <- sigma_both[1L]
}
}
}
# ---- Compute local threshold values from inverse normal CDF ------------
char_thresh$pos[i, ] <- stats::qnorm(peak_analysis$threshValues,
mu_hat_i, sigma_hat_i)
char_thresh$neg[i, ] <- stats::qnorm(1 - peak_analysis$threshValues,
mu_hat_i, sigma_hat_i)
# ---- Signal-to-noise index (Kelly et al.) --------------------------------
t_pos <- stats::qnorm(P, mu_hat_i, sigma_hat_i)
sig_i <- X[X > t_pos]
noise_i <- X[X <= t_pos]
char_thresh$SNI[i] <- if (length(sig_i) > 0L && length(noise_i) > 2L) {
(1 / length(sig_i)) *
sum((sig_i - mean(noise_i)) / stats::sd(noise_i)) *
((length(noise_i) - 2L) / length(noise_i))
} else {
0
}
# ---- Goodness-of-fit: one-sample KS test --------------------------------
# Noise subsample is all window values at or below the threshold.
# MATLAB builds a 101-point CDF table then calls kstest(); R uses
# ks.test() against the continuous normal CDF directly. P-values converge
# for moderate sample sizes; small-sample differences are expected.
noise_for_ks <- X[X <= t_pos]
if (length(noise_for_ks) > 3L) {
ks_result <- suppressWarnings(
stats::ks.test(noise_for_ks, "pnorm", mu_hat_i, sigma_hat_i)
)
char_thresh$GOF[i] <- ks_result$p.value
}
# ---- Capture diagnostic data for CharPlotFig2_ThreshDiagnostics ----------
# Store per-sample fit info for up to 25 evenly-spaced samples so the
# figure function can reconstruct local window histograms + PDF overlays
# without re-running the loop. Mirrors MATLAB lines 228-290.
if (i %in% in_plot_set && length(diag_data) < 25L) {
# Recover second GMM component if available; otherwise replicate first.
# mu_both and fit are reset to NULL at the top of each iteration, so
# a non-NULL value here reliably indicates the GMM succeeded this sample.
if (!is.null(mu_both) && length(mu_both) >= 2L && !is.null(fit)) {
mu2 <- mu_both[2L]
sig2 <- sigma_both[2L]
prop1 <- fit$prop[1L]
prop2 <- fit$prop[2L]
} else {
mu2 <- mu_hat_i
sig2 <- sigma_hat_i
prop1 <- 1
prop2 <- 0
}
diag_data[[length(diag_data) + 1L]] <- list(
i = i,
yr_bp = charcoal$ybpI[i],
X = X,
mu1 = mu_hat_i, sig1 = sigma_hat_i, prop1 = prop1,
mu2 = mu2, sig2 = sig2, prop2 = prop2,
t_pos = t_pos,
sni = char_thresh$SNI[i], # pre-smoothing value
gof = char_thresh$GOF[i]
)
}
} # end per-sample loop
# ===========================================================================
# SMOOTH THRESHOLD AND SNI SERIES WITH LOWESS
# Mirrors MATLAB CharThreshLocal.m lines 296-304.
# span = threshYr / r (same span used for background smoothing)
# ===========================================================================
span <- smoothing$yr / r
char_thresh$SNI <- char_lowess(char_thresh$SNI, span = span, iter = 0L)
char_thresh$SNI[char_thresh$SNI < 0] <- 0
for (i in seq_len(n_tv)) {
char_thresh$pos[, i] <- char_lowess(char_thresh$pos[, i],
span = span, iter = 0L)
char_thresh$neg[, i] <- char_lowess(char_thresh$neg[, i],
span = span, iter = 0L)
}
char_thresh$diag <- diag_data
char_thresh
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.