Nothing
## -----------------------------------------------------------------------------
#| message: false
#| echo: false
library(resemble)
library(prospectr)
Sys.setenv(OMP_NUM_THREADS = 2)
## -----------------------------------------------------------------------------
#| label: data-prep
#| message: false
library(resemble)
library(prospectr)
data(NIRsoil)
# Wavelengths
wavs <- as.numeric(colnames(NIRsoil$spc))
# Preprocess: detrend + first derivative
NIRsoil$spc_pr <- savitzkyGolay(
detrend(NIRsoil$spc, wav = wavs),
m = 1, p = 1, w = 7
)
# Split into training and test sets
# Note: missing values in the response are allowed in liblex
train_x <- NIRsoil$spc_pr[NIRsoil$train == 1, ]
train_y <- NIRsoil$Ciso[NIRsoil$train == 1]
test_x <- NIRsoil$spc_pr[NIRsoil$train == 0, ]
test_y <- NIRsoil$Ciso[NIRsoil$train == 0]
cat("Training set:", nrow(train_x), "observations\n")
cat("Test set:", nrow(test_x), "observations\n")
## -----------------------------------------------------------------------------
#| label: neighbors-example1
# Fixed neighborhood sizes to evaluate
neighbors_k(k = seq(40, 120, by = 20))
## -----------------------------------------------------------------------------
#| label: neighbors-example2
# Threshold-based selection with bounds
neighbors_diss(
threshold = seq(0.05, 0.5, length.out = 10),
k_min = 40,
k_max = 150
)
## -----------------------------------------------------------------------------
#| label: diss-example
# Moving-window correlation dissimilarity (recommended for spectra)
diss_correlation(ws = 37, scale = TRUE)
# PCA-based Mahalanobis distance with optimized components
diss_pca()
## -----------------------------------------------------------------------------
#| label: fit-method-example
# waPLS with modified PLS algorithm and scaling
fit_wapls(
min_ncomp = 3,
max_ncomp = 15,
scale = TRUE,
method = "mpls"
)
## -----------------------------------------------------------------------------
#| label: control-example
#| eval: false
# # Build library with hyperparameter tuning
# liblex_control(mode = "build", tune = TRUE)
## -----------------------------------------------------------------------------
#| eval: false
# ciso_lib_k <- liblex(
# Xr = train_x,
# Yr = train_y,
# neighbors = neighbors_k(seq(40, 80, by = 20)),
# diss_method = diss_correlation(ws = 37, scale = TRUE),
# fit_method = fit_wapls(
# min_ncomp = 3,
# max_ncomp = 15,
# scale = TRUE,
# method = "mpls"
# ),
# control = liblex_control(tune = TRUE)
# )
#
# ciso_lib_k
## -----------------------------------------------------------------------------
#| label: liblex-fixed-k
#| cache: true
#| echo: false
#| message: false
#| warning: false
ciso_lib_k <- liblex(
Xr = train_x,
Yr = train_y,
neighbors = neighbors_k(seq(40, 80, by = 20)),
diss_method = diss_correlation(ws = 37, scale = TRUE),
fit_method = fit_wapls(
min_ncomp = 3,
max_ncomp = 15,
scale = TRUE,
method = "mpls"
),
control = liblex_control(tune = TRUE),
verbose = FALSE
)
ciso_lib_k
## -----------------------------------------------------------------------------
#| label: fig-liblex
#| fig-cap: "Top: Best model obtained for each neighborhood size (based on the RMSE). Bottom: Centroids of the neighborhoods, usled later in prediction to selet the models to be used."
#| fig-align: "center"
#| fig-width: 7.5
#| fig-height: 6.5
plot(ciso_lib_k)
## -----------------------------------------------------------------------------
#| label: liblex-threshold
#| cache: true
#| echo: false
ciso_lib_thr <- liblex(
Xr = train_x,
Yr = train_y,
neighbors = neighbors_diss(
threshold = seq(0.05, 0.5, length.out = 5),
k_min = 40,
k_max = 150
),
diss_method = diss_correlation(ws = 37, scale = TRUE),
fit_method = fit_wapls(
min_ncomp = 3,
max_ncomp = 15,
scale = TRUE,
method = "mpls"
),
control = liblex_control(tune = TRUE),
verbose = FALSE
)
ciso_lib_thr
## -----------------------------------------------------------------------------
#| cache: true
#| eval: false
# ciso_lib_thr <- liblex(
# Xr = train_x,
# Yr = train_y,
# neighbors = neighbors_diss(
# threshold = seq(0.05, 0.5, length.out = 5),
# k_min = 40,
# k_max = 150
# ),
# diss_method = diss_correlation(ws = 37, scale = TRUE),
# fit_method = fit_wapls(
# min_ncomp = 3,
# max_ncomp = 15,
# scale = TRUE,
# method = "mpls"
# ),
# control = liblex_control(tune = TRUE)
# )
#
# ciso_lib_thr
## -----------------------------------------------------------------------------
#| label: fig-centroids
#| fig-cap: "Neighborhood centroids (blue) compared to test spectra (red)"
#| fig-width: 8
#| fig-height: 5
#| fig-align: center
wavs_pr <- as.numeric(colnames(ciso_lib_k$scaling$local_x_center))
matplot(
wavs_pr,
t(test_x),
col = rgb(1, 0, 0, 0.3),
lty = 1,
type = "l",
xlab = "Wavelength (nm)",
ylab = "First derivative detrended absorbance"
)
matlines(
wavs_pr,
t(ciso_lib_k$scaling$local_x_center),
col = rgb(0, 0, 1, 0.3),
lty = 1
)
grid(lty = 1)
legend(
"topright",
legend = c("Samples to predict", "Neighborhood centroids"),
col = c(rgb(1, 0, 0, 0.8), rgb(0, 0, 1, 0.8)),
lty = 1,
lwd = 2,
bty = "n"
)
## -----------------------------------------------------------------------------
#| label: fig-regression-coefficients
#| fig-cap: "Regression coefficients (top) and intercept distribution (bottom) of the local experts"
#| fig-width: 8
#| fig-height: 7
#| fig-align: center
par(mfrow = c(2, 1), mar = c(4, 4, 2, 1))
# Regression coefficients across wavelengths
models <- ciso_lib_k$coefficients
matplot(
x = wavs_pr,
y = t(models$B),
type = "l",
lty = 1,
col = rgb(0.23, 0.51, 0.96, 0.15),
xlab = "Wavelength (nm)",
ylab = "Regression coefficient",
main = paste0("Regression coefficients (", nrow(models$B), " experts)")
)
# Add mean coefficient profile
lines(wavs_pr, colMeans(models$B), col = "red", lwd = 2)
grid(lty = 1)
legend(
"topright",
legend = c("Individual experts", "Mean"),
col = c(rgb(0.23, 0.51, 0.96, 0.6), "red"),
lty = 1,
lwd = c(1, 2),
bty = "n"
)
# Distribution of intercepts
plot(
density(models$B0, na.rm = TRUE),
col = rgb(0.23, 0.51, 0.96),
lwd = 2,
xlab = "Intercept value",
ylab = "Density",
main = "Distribution of intercepts"
)
polygon(
density(models$B0, na.rm = TRUE),
col = rgb(0.23, 0.51, 0.96, 0.2),
border = NA
)
abline(v = mean(models$B0, na.rm = TRUE), col = "red", lty = 2, lwd = 2)
grid(lty = 1)
legend(
"topright",
legend = c("Density", "Mean"),
col = c(rgb(0.23, 0.51, 0.96), "red"),
lty = c(1, 2),
lwd = 2,
bty = "n"
)
par(mfrow = c(1, 1))
## -----------------------------------------------------------------------------
#| label: kmeans-anchors
# Select 350 representative anchors using k-means sampling
# on the first 20 principal components
set.seed(1124)
kms <- naes(
train_x,
k = 350,
pc = 20,
iter.max = 100,
.center = TRUE,
.scale = TRUE
)
anchor_km <- kms$model
cat("Selected", length(anchor_km), "anchors via k-means\n")
## -----------------------------------------------------------------------------
#| label: liblex-anchored
#| cache: true
#| echo: false
ciso_lib_anchored <- liblex(
Xr = train_x,
Yr = train_y,
neighbors = neighbors_k(seq(40, 80, by = 20)),
diss_method = diss_correlation(ws = 37, scale = TRUE),
fit_method = fit_wapls(
min_ncomp = 3,
max_ncomp = 15,
scale = TRUE,
method = "mpls"
),
anchor_indices = anchor_km,
control = liblex_control(tune = TRUE),
verbose = FALSE
)
ciso_lib_anchored
## -----------------------------------------------------------------------------
#| cache: true
#| eval: false
# ciso_lib_anchored <- liblex(
# Xr = train_x,
# Yr = train_y,
# neighbors = neighbors_k(seq(40, 80, by = 20)),
# diss_method = diss_correlation(ws = 37, scale = TRUE),
# fit_method = fit_wapls(
# min_ncomp = 3,
# max_ncomp = 15,
# scale = TRUE,
# method = "mpls"
# ),
# anchor_indices = anchor_km,
# control = liblex_control(tune = TRUE)
# )
#
# ciso_lib_anchored
## -----------------------------------------------------------------------------
# Number of experts in the anchored library
n_experts <- nrow(ciso_lib_anchored$coefficients$B)
cat("Number of experts in the anchored library:", n_experts, "\n")
## -----------------------------------------------------------------------------
#| label: fig-centroids-anchored
#| fig-cap: "Neighborhood centroids from anchored library compared to test spectra"
#| fig-width: 8
#| fig-height: 5
#| fig-align: center
wavs_pr <- as.numeric(colnames(ciso_lib_anchored$scaling$local_x_center))
n_experts <- nrow(ciso_lib_anchored$scaling$local_x_center)
n_test <- nrow(test_x)
# Plot test spectra first (background)
matplot(
wavs_pr,
t(test_x),
col = rgb(0.8, 0.2, 0.2, 0.2),
lty = 1,
type = "l",
xlab = "Wavelength (nm)",
ylab = "First derivative detrended absorbance",
main = paste0("Coverage: ", n_experts, " experts vs ", n_test, " test samples")
)
# Overlay centroids
matlines(
wavs_pr,
t(ciso_lib_anchored$scaling$local_x_center),
col = rgb(0.23, 0.51, 0.96, 0.3),
lty = 1
)
grid(lty = 1)
legend(
"topright",
legend = c(
paste0("Test samples (n = ", n_test, ")"),
paste0("Centroids (n = ", n_experts, ")")
),
col = c(
rgb(0.8, 0.2, 0.2, 0.5),
rgb(0.23, 0.51, 0.96, 0.5)
),
lty = 1,
lwd = c(1, 1, 2, 2),
bty = "n"
)
## -----------------------------------------------------------------------------
#| label: predict-basic
ciso_pred <- predict(ciso_lib_k, newdata = test_x, verbose = FALSE)
# Prediction output structure
names(ciso_pred)
# Main predictions with uncertainty
head(ciso_pred$predictions)
## -----------------------------------------------------------------------------
#| label: weighting-options
#| eval: false
# # Gaussian kernel (default)
# predict(ciso_lib_k, newdata = test_x, weighting = "gaussian")
#
# # Tricubic kernel (similar to LOCAL algorithm)
# predict(ciso_lib_k, newdata = test_x, weighting = "tricube")
#
# # Equal weights
# predict(ciso_lib_k, newdata = test_x, weighting = "none")
## -----------------------------------------------------------------------------
#| label: enforce-indices
#| eval: false
# # Always include specific experts in predictions
# predict(ciso_lib_k, newdata = test_x, enforce_indices = c(1, 5, 10))
## -----------------------------------------------------------------------------
#| label: evaluation
pred_values <- ciso_pred$predictions$pred
pred_sd <- ciso_pred$predictions$pred_sd
# Performance metrics (excluding missing values)
complete <- !is.na(test_y)
r2 <- cor(pred_values[complete], test_y[complete])^2
rmse <- sqrt(mean((pred_values[complete] - test_y[complete])^2))
cat("R²: ", round(r2, 3), "\n")
cat("RMSE:", round(rmse, 3), "\n")
## -----------------------------------------------------------------------------
#| label: uncertainty-filter
# Filter predictions by uncertainty threshold
unc_threshold <- quantile(pred_sd[complete], 0.75)
reliable <- complete & (pred_sd < unc_threshold)
r2_filtered <- cor(pred_values[reliable], test_y[reliable])^2
rmse_filtered <- sqrt(mean((pred_values[reliable] - test_y[reliable])^2))
cat("After filtering high-uncertainty predictions:\n")
cat(" Retained:", sum(reliable), "/", sum(complete), "observations\n")
cat(" R²: ", round(r2_filtered, 3), "\n")
cat(" RMSE: ", round(rmse_filtered, 3), "\n")
## -----------------------------------------------------------------------------
#| label: fig-validation
#| fig-cap: "Predicted versus observed values"
#| fig-width: 6
#| fig-height: 5
#| fig-align: center
lims <- range(pred_values[complete], test_y[complete], na.rm = TRUE)
plot(
pred_values[complete],
test_y[complete],
pch = 16,
col = rgb(0, 0, 0, 0.5),
xlab = "Predicted",
ylab = "Observed",
xlim = lims,
ylim = lims
)
abline(0, 1, col = "red", lwd = 2)
grid(lty = 1)
## -----------------------------------------------------------------------------
#| label: parallel-example
#| eval: false
# library(doParallel)
#
# # Register parallel backend
# n_cores <- min(parallel::detectCores() - 1, 4)
# cl <- makeCluster(n_cores)
# registerDoParallel(cl)
#
# # Build library with parallel processing
# ciso_lib_parallel <- liblex(
# Xr = train_x,
# Yr = train_y,
# neighbors = neighbors_k(seq(40, 120, by = 20)),
# diss_method = diss_correlation(ws = 37, scale = TRUE),
# fit_method = fit_wapls(
# min_ncomp = 3,
# max_ncomp = 15,
# scale = TRUE,
# method = "mpls"
# ),
# control = liblex_control(
# tune = TRUE,
# allow_parallel = TRUE,
# chunk_size = 10
# )
# )
#
# # Clean up
# stopCluster(cl)
# registerDoSEQ()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.