Nothing
## ----logo, echo = FALSE, out.width = '20%', fig.align = 'right', out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px; display: inline-block; float: right;margin: 50px 0px 0px 20px;"'----
knitr::include_graphics("logo.jpg")
## ----eval = TRUE--------------------------------------------------------------
citation(package = "prospectr")
## ----NIRsoil, tidy = TRUE, message = FALSE------------------------------------
library(prospectr)
data(NIRsoil)
# NIRsoil is a data.frame with 825 obs and 5 variables:
# Nt (Total Nitrogen), Ciso (Carbon), CEC (Cation Exchange Capacity),
# train (vector of 0,1 indicating training (1) and validation (0) samples),
# spc (spectral matrix)
str(NIRsoil)
## ----movin, fig.cap = "Effect of a moving average with window size of 11 bands on a raw spectrum", fig.height = 4, fig.width = 6, fig.retina = 1, out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
# add some noise
noisy <- NIRsoil$spc + rnorm(length(NIRsoil$spc), 0, 0.001)
# Plot the first spectrum
plot(x = as.numeric(colnames(NIRsoil$spc)),
y = noisy[1, ],
type = "l",
lwd = 1.5,
xlab = "Wavelength",
ylab = "Absorbance")
X <- movav(X = noisy, w = 11) # window size of 11 bands
# Note that the 5 first and last bands are lost in the process
lines(x = as.numeric(colnames(X)), y = X[1,], lwd = 1.5, col = "red")
grid()
legend("topleft",
legend = c("raw", "moving average"),
lty = c(1, 1), col = c("black", "red"))
## ----savits, tidy=TRUE--------------------------------------------------------
# p = polynomial order
# w = window size (must be odd)
# m = m-th derivative (0 = smoothing)
# The function accepts vectors, data.frames or matrices.
# For a matrix input, observations should be arranged row-wise
sgvec <- savitzkyGolay(X = NIRsoil$spc[1,], p = 3, w = 11, m = 0)
sg <- savitzkyGolay(X = NIRsoil$spc, p = 3, w = 11, m = 0)
# note that bands at the edges of the spectral matrix are lost !
dim(NIRsoil$spc)
dim(sg)
## ----d1,fig.cap="Effect of first derivative and second derivative", fig.height = 4, fig.width = 6, fig.retina = 1, out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
# X = wavelength
# Y = spectral matrix
# n = order
d1 <- t(diff(t(NIRsoil$spc), differences = 1)) # first derivative
d2 <- t(diff(t(NIRsoil$spc), differences = 2)) # second derivative
plot(as.numeric(colnames(d1)),
d1[1,],
type = "l",
lwd = 1.5,
xlab = "Wavelength",
ylab = "")
lines(as.numeric(colnames(d2)), d2[1,], lwd = 1.5, col = "red")
grid()
legend("topleft",
legend = c("1st der", "2nd der"),
lty = c(1, 1),
col = c("black", "red"))
## ----gapder,fig.cap="Effect of 1st-order gap derivative ", fig.height = 4, fig.width = 6, fig.retina = 1, out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
# first derivative with a gap of 10 bands
gd1 <- t(diff(t(NIRsoil$spc), differences = 1, lag = 10))
## ----gapseg,fig.cap="Effect of 1st-order gap-segment derivative ", fig.height = 4, fig.width = 6, fig.retina = 1, out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
# m = order of the derivative
# w = gap size
# s = segment size
# first derivative with a gap of 5 bands
gsd1 <- gapDer(X = NIRsoil$spc, m = 1, w = 11, s = 5)
plot(as.numeric(colnames(d1)),
d1[1,],
type = "l",
lwd = 1.5,
xlab = "Wavelength",
ylab = "")
lines(as.numeric(colnames(gsd1)), gsd1[1,], lwd = 1.5, col = "red")
grid()
legend("topleft",
legend = c("1st der","gap-segment 1st der"),
lty = c(1,1),
col = c("black", "red"))
## ----snv, eval=TRUE, fig.cap="Effect of SNV on raw spectra", fig.height = 4, fig.width = 6, fig.retina = 1, out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
snv <- standardNormalVariate(X = NIRsoil$spc)
## ----msc, fig.cap="Effect of MSC on raw spectra", fig.height = 4, fig.width = 6, fig.retina = 1, out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
# X = input spectral matrix
msc_spc <- msc(X = NIRsoil$spc, ref_spectrum = colMeans(NIRsoil$spc))
plot(as.numeric(colnames(NIRsoil$spc)),
NIRsoil$spc[1,],
type = "l",
xlab = "Wavelength, nm", ylab = "Absorbance",
lwd = 1.5)
lines(as.numeric(colnames(NIRsoil$spc)),
msc_spc[1,],
lwd = 1.5, col = "red")
axis(4, col = "red")
grid()
legend("topleft",
legend = c("raw", "MSC signal"),
lty = c(1, 1),
col = c("black", "red"))
par(new = FALSE)
## ----eval = FALSE-------------------------------------------------------------
# # a reference set of spectra
# Xr <- NIRsoil$spc[NIRsoil$train == 1, ]
#
# # an "unseen" set of spectra
# Xu <- NIRsoil$spc[NIRsoil$train == 0, ]
#
# # apply msc to Xr
# Xr_msc <- msc(Xr)
#
# # apply the same msc to Xu
# attr(Xr_msc, "Reference spectrum") # use this info from the previous object
#
# Xu_msc <- msc(Xu, ref_spectrum = attr(Xr_msc, "Reference spectrum"))
## ----detrend, fig.cap="Effect of SNV-Detrend on raw spectra",fig.height = 4, fig.width = 6, fig.retina = 1, out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
# X = input spectral matrix
# wav = band centers
dt <- detrend(X = NIRsoil$spc, wav = as.numeric(colnames(NIRsoil$spc)))
plot(as.numeric(colnames(NIRsoil$spc)),
NIRsoil$spc[1,],
type = "l",
xlab = "Wavelength",
ylab = "Absorbance",
lwd = 1.5)
par(new = TRUE)
plot(dt[1,],
xaxt = "n",
yaxt = "n",
xlab = "",
ylab = "",
lwd = 1.5,
col = "red",
type = "l")
axis(4, col = "red")
grid()
legend("topleft",
legend = c("raw", "detrend signal"),
lty = c(1, 1),
col = c("black", "red"))
par(new = FALSE)
## ----baselined, fig.cap = "Absorbance spectra, baseline spectra and baseline corrected spectra", fig.height = 4, fig.width = 6, fig.retina = 1, out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
data(NIRsoil)
wav <- as.numeric(colnames(NIRsoil$spc))
# plot of the 3 first absorbance spectra
matplot(wav,
t(NIRsoil$spc[1:3, ]),
type = "l",
ylim = c(0, .6),
xlab = "Wavelength /nm",
ylab = "Absorbance")
grid()
bs <- baseline(NIRsoil$spc, wav)
matlines(wav, t(bs[1:3, ]))
fitted_baselines <- attr(bs, "baselines")
matlines(wav, t(fitted_baselines[1:3, ]))
## ----bscale, tidy = TRUE------------------------------------------------------
# X = spectral matrix
# type = "soft" or "hard"
# The ouptut is a list with the scaled matrix (Xscaled) and the divisor (f)
bs <- blockScale(X = NIRsoil$spc, type = "hard")$Xscaled
sum(apply(bs, 2, var)) # this works!
## ----bnorm, tidy = TRUE-------------------------------------------------------
# X = spectral matrix
# targetnorm = desired norm for X
bn <- blockNorm(X = NIRsoil$spc,targetnorm = 1)$Xscaled
sum(bn^2) # this works!
## ----cr, fig.cap = "Absorbance and continuum-removed absorbance spectra", fig.height = 4, fig.width = 6, fig.retina = 1, out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
# type of data: 'R' for reflectance (default), 'A' for absorbance
cr <- continuumRemoval(X = NIRsoil$spc, type = "A")
# plot of the 10 first abs spectra
matplot(as.numeric(colnames(NIRsoil$spc)),
t(NIRsoil$spc[1:3,]),
type = "l",
lty = 1,
ylim = c(0,.6),
xlab="Wavelength /nm",
ylab="Absorbance")
matlines(as.numeric(colnames(NIRsoil$spc)), lty = 1, t(cr[1:3, ]))
grid()
## ----naes, fig.cap = "Selection of 5 samples by k-means sampling", fig.height = 4.5, fig.width = 4, fig.retina = 1, fig.align ='center', out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
# X = the input matrix
# k = number of calibration samples to be selected
# pc = if pc is specified, k-mean is performed in the pc space
# (here we will use only the two 1st pcs)
# iter.max = maximum number of iterations allowed for the k-means clustering.
kms <- naes(X = NIRsoil$spc, k = 5, pc = 2, iter.max = 100)
# Plot the pcs scores and clusters
plot(kms$pc, col = rgb(0, 0, 0, 0.3), pch = 19, main = "k-means")
grid()
# Add the selected points
points(kms$pc[kms$model, ], col = "red", pch = 19)
## ----ken, fig.cap="Selection of 40 calibration samples with the Kennard-Stone algorithm", fig.height = 4.5, fig.width = 4, fig.retina = 1, fig.align ='center', out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
# Create a dataset for illustrating how the calibration sampling
# algorithms work
X <- data.frame(x1 = rnorm(1000), x2 = rnorm(1000))
plot(X, col = rgb(0, 0, 0, 0.3), pch = 19, main = "Kennard-Stone (synthetic)")
grid()
# kenStone produces a list with row index of the points selected for calibration
ken <- kenStone(X, k = 40)
# plot selected points
points(X[ken$model,], col = "red", pch = 19, cex = 1.4)
## ----ken2, fig.cap="Kennard-Stone sampling on the NIRsoil dataset", fig.height = 4.5, fig.width = 4, fig.retina = 1, fig.align = 'center', out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
# Test with the NIRsoil dataset
# one can also use the mahalanobis distance (metric argument)
# computed in the pc space (pc argument)
ken_mahal <- kenStone(X = NIRsoil$spc, k = 20, metric = "mahal", pc = 2)
# The pc components in the output list stores the pc scores
plot(ken_mahal$pc[,1],
ken_mahal$pc[,2],
col = rgb(0, 0, 0, 0.3),
pch = 19,
xlab = "PC1",
ylab = "PC2",
main = "Kennard-Stone")
grid()
# This is the selected points in the pc space
points(ken_mahal$pc[ken_mahal$model, 1],
ken_mahal$pc[ken_mahal$model,2],
pch = 19, col = "red")
## ----ksinitalization, tidy = TRUE---------------------------------------------
# Indices of the initialization samples
initialization_ind <- c(486, 702, 722, 728)
ken_mahal_init <- kenStone(
X = NIRsoil$spc,
k = 20,
metric = "mahal",
pc = 2,
init = initialization_ind)
ken_mahal_init$model
## ----ken3, fig.cap="Kennard-Stone sampling with initialization samples", fig.height = 4.5, fig.width = 4, fig.retina = 1, fig.align = 'center', out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
# The pc components in the output list stores the pc scores
plot(ken_mahal_init$pc[,1],
ken_mahal_init$pc[,2],
col = rgb(0, 0, 0, 0.3),
pch = 19,
xlab = "PC1",
ylab = "PC2",
main = "Kennard-Stone with 4 initialization samples")
grid()
# This is the selected points in the pc space
points(ken_mahal$pc[ken_mahal$model, 1],
ken_mahal$pc[ken_mahal$model, 2],
pch = 19, col = "red")
# Our initialization samples
points(ken_mahal$pc[initialization_ind, 1],
ken_mahal$pc[initialization_ind, 2],
pch = 19, cex = 1.5, col = "blue")
## ----duplex, fig.cap="Selection of 15 calibration and validation samples with the DUPLEX algorithm", fig.height = 4.5, fig.width = 4, fig.retina = 1, fig.align = 'center', out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
dup <- duplex(X = X, k = 15) # k is the number of selected samples
plot(X, col = rgb(0, 0, 0, 0.3), pch = 19, main = "DUPLEX")
grid()
# calibration samples
points(X[dup$model, 1], X[dup$model, 2], col = "red", pch = 19)
# validation samples
points(X[dup$test,1], X[dup$test,2], col = "dodgerblue", pch = 19)
legend("topright",
legend = c("calibration", "validation"),
pch = 19,
col = c("red", "dodgerblue"))
## ----shenk, fig.cap="Selection of samples with the SELECT algorithm", fig.height = 4.5, fig.width = 4, fig.retina = 1, fig.align = 'center', out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
shenk <- shenkWest(X = NIRsoil$spc, d.min = 0.6, pc = 2)
plot(shenk$pc, col = rgb(0, 0, 0, 0.3), pch = 19, main = "SELECT")
grid()
points(shenk$pc[shenk$model,], col = "red", pch = 19)
## ----puchwein, fig.cap="Samples selected by the Puchwein algorithm", fig.height = 4.5, fig.width = 4, fig.retina = 1, fig.align = 'center', out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
pu <- puchwein(X = NIRsoil$spc, k = 0.2, pc =2)
plot(pu$pc, col = rgb(0, 0, 0, 0.3), pch = 19, main = "puchwein")
grid()
points(pu$pc[pu$model,],col = "red", pch = 19) # selected samples
## ----puchwein2, fig.cap="How to find the optimal loop", eval = FALSE----------
# par(mfrow = c(2, 1))
# plot(pu$leverage$removed,pu$leverage$diff,
# type = "l",
# xlab = "# samples removed",
# ylab="Difference between th. and obs sum of leverages")
# # This basically shows that the first loop is optimal
# plot(pu$leverage$loop,nrow(NIRsoil) - pu$leverage$removed,
# xlab = "# loops",
# ylab = "# samples kept", type = "l")
# par(mfrow = c(1, 1))
## ----honigs, fig.cap="Spectra selected with the Honigs algorithm and bands used", fig.height = 4.5, fig.width = 4, fig.retina = 1, fig.align = 'center', out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
# type = "A" is for absorbance data
ho <- honigs(X = NIRsoil$spc, k = 10, type = "A")
# plot calibration spectra
matplot(as.numeric(colnames(NIRsoil$spc)),
t(NIRsoil$spc[ho$model,]),
type = "l",
xlab = "Wavelength", ylab = "Absorbance")
# add bands used during the selection process
abline(v = as.numeric(colnames(NIRsoil$spc))[ho$bands], lty = 2)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.