Nothing
## -----------------------------------------------------------------------------
#| label: tbl-preprocessing
#| tbl-cap: "Pre-processing functions available in the package."
#| echo: false
library(prospectr)
preprocessing_fns <- data.frame(
`Function` = c(
"`movav`", "`savitzkyGolay`", "`gapDer`", "`baseline`",
"`continuumRemoval`", "`detrend`", "`standardNormalVariate`",
"`msc`", "`binning`", "`resample`", "`resample2`",
"`blockScale`", "`blockNorm`"
),
`Description` = c(
"Simple moving (or running) average filter",
"Savitzky--Golay smoothing and derivative",
"Gap--segment derivative",
"Baseline removal (similar to `continuumRemoval`)",
"Computes continuum--removed values",
"Detrend normalisation",
"Standard Normal Variate (SNV) transformation",
"Multiplicative Scatter Correction",
"Average a signal in column bins",
"Resample a signal to new band positions",
"Resample a signal using new FWHM values",
"Block scaling",
"Sum of squares block weighting"
),
check.names = FALSE
)
knitr::kable(preprocessing_fns)
## -----------------------------------------------------------------------------
#| label: load-NIRsoil
#| message: false
library(prospectr)
data(NIRsoil)
# NIRsoil is a data.frame with 825 observations and 5 variables:
# Nt (Total Nitrogen), Ciso (Carbon), CEC (Cation Exchange Capacity),
# train (0/1 indicator for training (1) and validation (0) samples),
# spc (spectral matrix)
str(NIRsoil)
## -----------------------------------------------------------------------------
#| label: fig-movav
#| fig-cap: "Effect of a moving average with window size of 11 bands on a raw spectrum."
#| fig-height: 4
#| fig-width: 6
#| echo: true
# Add some noise
noisy <- NIRsoil$spc + rnorm(length(NIRsoil$spc), 0, 0.001)
# Plot the first spectrum
plot(
x = as.numeric(colnames(NIRsoil$spc)),
y = noisy[1, ],
type = "l",
lwd = 1.5,
xlab = "Wavelength",
ylab = "Absorbance"
)
# Window size of 11 bands; note the 5 first and last bands are lost
X <- movav(X = noisy, w = 11)
lines(x = as.numeric(colnames(X)), y = X[1, ], lwd = 1.5, col = "red")
grid()
legend(
"topleft",
legend = c("Raw", "Moving average"),
lty = 1,
col = c("black", "red")
)
## -----------------------------------------------------------------------------
#| label: savitzky-golay
#| echo: true
# p = polynomial order
# w = window size (must be odd)
# m = m-th derivative (0 = smoothing)
# Accepts vectors, data frames, or matrices (observations row-wise)
sgvec <- savitzkyGolay(X = NIRsoil$spc[1, ], p = 3, w = 11, m = 0)
sg <- savitzkyGolay(X = NIRsoil$spc, p = 3, w = 11, m = 0)
# Bands at the edges of the spectral matrix are lost
dim(NIRsoil$spc)
dim(sg)
## -----------------------------------------------------------------------------
#| label: tbl-derivatives
#| tbl-cap: "Advantages and drawbacks of derivative spectra."
#| echo: false
der_table <- data.frame(
Advantage = c(
"Reduces baseline offset",
"Can resolve overlapping absorptions",
"Compensates for instrumental drift",
"Enhances small spectral absorptions",
"Often increases predictive accuracy for complex datasets"
),
Drawback = c(
"Risk of overfitting the calibration model",
"Increases noise; smoothing required",
"Increases uncertainty in model coefficients",
"Complicates spectral interpretation",
"Removes the baseline"
)
)
knitr::kable(der_table)
## -----------------------------------------------------------------------------
#| label: fig-derivatives
#| fig-cap: "First and second derivatives of a raw spectrum."
#| fig-height: 4
#| fig-width: 6
#| echo: true
d1 <- t(diff(t(NIRsoil$spc), differences = 1)) # first derivative
d2 <- t(diff(t(NIRsoil$spc), differences = 2)) # second derivative
plot(
as.numeric(colnames(d1)), d1[1, ],
type = "l", lwd = 1.5,
xlab = "Wavelength", ylab = ""
)
lines(as.numeric(colnames(d2)), d2[1, ], lwd = 1.5, col = "red")
grid()
legend(
"topleft",
legend = c("1st derivative", "2nd derivative"),
lty = 1, col = c("black", "red")
)
## -----------------------------------------------------------------------------
#| label: fig-gap-derivative
#| fig-cap: "Comparison of first derivative (finite difference) and
#| gap derivative (lag = 10 bands) for the first spectrum."
#| fig-height: 4
#| fig-width: 6
#| echo: true
# m = derivative order; w = gap size; s = segment size
gsd1 <- gapDer(X = NIRsoil$spc, m = 1, w = 11, s = 5)
plot(
as.numeric(colnames(d1)), d1[1, ],
type = "l", lwd = 1.5,
xlab = "Wavelength (nm)", ylab = "1st derivative"
)
par(new = TRUE)
plot(
as.numeric(colnames(gsd1)), gsd1[1, ],
type = "l", lwd = 1.5, col = "red",
xaxt = "n", yaxt = "n",
xlab = "", ylab = ""
)
axis(4, col = "red")
mtext("Gap-segment derivative", side = 4, line = 3, col = "red")
grid()
legend(
"topleft",
legend = c("1st derivative", "Gap-segment 1st derivative"),
lty = 1, col = c("black", "red")
)
par(new = FALSE)
## -----------------------------------------------------------------------------
#| label: fig-snv
#| fig-cap: "Raw absorbance spectra (left) and SNV-transformed spectra (right)
#| for the first five observations."
#| fig-height: 4
#| fig-width: 8
#| echo: true
snv <- standardNormalVariate(X = NIRsoil$spc)
wav <- as.numeric(colnames(NIRsoil$spc))
par(mfrow = c(1, 2))
matplot(
wav, t(NIRsoil$spc[1:5, ]),
type = "l", lty = 1, lwd = 1.5,
col = c("black", "grey20", "grey40", "grey60", "grey80"),
xlab = "Wavelength (nm)", ylab = "Absorbance",
main = "Raw"
)
grid()
matplot(
wav, t(snv[1:5, ]),
type = "l", lty = 1, lwd = 1.5,
col = c("red", "tomato", "coral", "salmon", "lightsalmon"),
xlab = "Wavelength (nm)", ylab = "SNV (Absorbance)",
main = "SNV"
)
grid()
par(mfrow = c(1, 1))
## -----------------------------------------------------------------------------
#| label: fig-msc
#| fig-cap: "Effect of MSC on a raw spectrum (reference = mean spectrum)."
#| fig-height: 4
#| fig-width: 6
#| echo: true
# Reference spectrum is the column mean of the full matrix
msc_spc <- msc(X = NIRsoil$spc, ref_spectrum = colMeans(NIRsoil$spc))
plot(
as.numeric(colnames(NIRsoil$spc)), NIRsoil$spc[1, ],
type = "l", lwd = 1.5,
xlab = "Wavelength (nm)", ylab = "Absorbance"
)
lines(as.numeric(colnames(msc_spc)), msc_spc[1, ], lwd = 1.5, col = "red")
grid()
legend(
"topleft",
legend = c("Raw", "MSC"),
lty = 1, col = c("black", "red")
)
## -----------------------------------------------------------------------------
#| label: msc-transfer
#| eval: false
#| echo: true
# # Training and validation partitions
# Xr <- NIRsoil$spc[NIRsoil$train == 1, ]
# Xu <- NIRsoil$spc[NIRsoil$train == 0, ]
#
# # Fit MSC on the training set (reference = mean of Xr by default)
# Xr_msc <- msc(Xr)
#
# # Apply the same reference spectrum to the validation set
# Xu_msc <- msc(Xu, ref_spectrum = attr(Xr_msc, "Reference spectrum"))
## -----------------------------------------------------------------------------
#| label: fig-detrend
#| fig-cap: "Effect of SNV-Detrend on a raw spectrum. Note the different
#| y-axis scales: raw absorbance (left) and detrended signal (right)."
#| fig-height: 4
#| fig-width: 6
#| echo: true
wav <- as.numeric(colnames(NIRsoil$spc))
dt <- detrend(X = NIRsoil$spc, wav = wav)
# Dual y-axis: raw and detrended spectra are on different scales
plot(
wav, NIRsoil$spc[1, ],
type = "l", lwd = 1.5,
xlab = "Wavelength (nm)", ylab = "Absorbance"
)
par(new = TRUE)
plot(
wav, dt[1, ],
type = "l", lwd = 1.5, col = "red",
xaxt = "n", yaxt = "n",
xlab = "", ylab = ""
)
axis(4, col = "red")
mtext("Detrended", side = 4, line = 3, col = "red")
grid()
legend(
"topleft",
legend = c("Raw", "SNV-Detrend"),
lty = 1, col = c("black", "red")
)
par(new = FALSE)
## -----------------------------------------------------------------------------
#| label: fig-baseline
#| fig-cap: "Raw absorbance spectra (left) and baseline-corrected spectra (right)
#| for the first three observations."
#| fig-height: 4
#| fig-width: 8
#| echo: true
wav <- as.numeric(colnames(NIRsoil$spc))
bs <- baseline(NIRsoil$spc, wav)
fitted_baselines <- attr(bs, "baselines")
par(mfrow = c(1, 2))
# Raw spectra with fitted baselines
matplot(
wav, t(NIRsoil$spc[1:3, ]),
type = "l", lty = 1, lwd = 1.5,
col = c("black", "grey40", "grey70"),
xlab = "Wavelength (nm)", ylab = "Absorbance",
main = "Raw and baseline"
)
matlines(wav, t(fitted_baselines[1:3, ]), lty = 2, col = c("black", "grey40", "grey70"))
grid()
# Baseline-corrected spectra
matplot(
wav, t(bs[1:3, ]),
type = "l", lty = 1, lwd = 1.5,
col = c("red", "tomato", "salmon"),
xlab = "Wavelength (nm)", ylab = "Absorbance",
main = "Baseline corrected"
)
grid()
par(mfrow = c(1, 1))
## -----------------------------------------------------------------------------
#| label: block-scale
#| echo: true
# type = "soft" or "hard"
# Returns a list with the scaled matrix (Xscaled) and the divisor (f)
bsc <- blockScale(X = NIRsoil$spc, type = "hard")$Xscaled
sum(apply(bsc, 2, var)) # should equal 1
## -----------------------------------------------------------------------------
#| label: block-norm
#| echo: true
# targetnorm = desired Frobenius norm (sum of squares) for X
bn <- blockNorm(X = NIRsoil$spc, targetnorm = 1)$Xscaled
sum(bn^2) # should equal 1
## -----------------------------------------------------------------------------
#| label: fig-resample
#| fig-cap: "Original NIR spectrum and resampled spectrum at a coarser
#| resolution (every 10 nm) using interpolation."
#| fig-height: 4
#| fig-width: 6
#| echo: true
wav <- as.numeric(colnames(NIRsoil$spc))
wav_coarse <- seq(min(wav), max(wav), by = 10)
rs <- resample(X = NIRsoil$spc, wav = wav, new.wav = wav_coarse)
plot(
wav, NIRsoil$spc[1, ],
type = "l", lwd = 1.5,
xlab = "Wavelength (nm)", ylab = "Absorbance"
)
lines(
wav_coarse, rs[1, ], lwd = 1.5, col = "red", type = "b", pch = 19, cex = 0.5
)
grid()
legend(
"topleft",
legend = c("Original", "Resampled (10 nm)"),
lty = 1, col = c("black", "red")
)
## -----------------------------------------------------------------------------
#| label: fig-continuum-removal
#| fig-cap: "Raw absorbance spectra (left) and continuum-removed spectra (right)
#| for the first three observations."
#| fig-height: 4
#| fig-width: 8
#| echo: true
# type = "A" for absorbance, "R" for reflectance (default)
cr <- continuumRemoval(X = NIRsoil$spc, type = "A")
wav <- as.numeric(colnames(NIRsoil$spc))
par(mfrow = c(1, 2))
matplot(
wav, t(NIRsoil$spc[1:3, ]),
type = "l", lty = 1, lwd = 1.5,
col = c("black", "grey40", "grey70"),
xlab = "Wavelength (nm)", ylab = "Absorbance",
main = "Raw"
)
grid()
matplot(
wav, t(cr[1:3, ]),
type = "l", lty = 1, lwd = 1.5,
col = c("red", "tomato", "salmon"),
xlab = "Wavelength (nm)", ylab = "Continuum-removed",
main = "Continuum removal"
)
grid()
par(mfrow = c(1, 1))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.