prospectr.R
In prospectr: Miscellaneous Functions for Processing and Sample Selection of Spectroscopic Data

## ----logo, echo = FALSE, out.width = '20%', fig.align = 'right', out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px; display: inline-block; float: right;margin: 50px 0px 0px 20px;"'----
knitr::include_graphics("logo.jpg")

## ----eval = TRUE--------------------------------------------------------------
citation(package = "prospectr")

## ----NIRsoil, tidy = TRUE, message = FALSE------------------------------------
library(prospectr)
data(NIRsoil)
# NIRsoil is a data.frame with 825 obs and 5 variables: 
# Nt (Total Nitrogen), Ciso (Carbon), CEC (Cation Exchange Capacity), 
# train (vector of 0,1 indicating training (1) and validation (0) samples),
# spc (spectral matrix)
str(NIRsoil)

## ----movin, fig.cap = "Effect of a moving average with window size of 11 bands on a raw spectrum", fig.height = 4, fig.width = 6, fig.retina = 1, out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
# add some noise
noisy <- NIRsoil$spc + rnorm(length(NIRsoil$spc), 0, 0.001) 
# Plot the first spectrum
plot(x = as.numeric(colnames(NIRsoil$spc)),
     y = noisy[1, ],
     type = "l",
     lwd = 1.5,
     xlab = "Wavelength", 
     ylab = "Absorbance") 
X <- movav(X = noisy, w = 11) # window size of 11 bands
# Note that the 5 first and last bands are lost in the process
lines(x = as.numeric(colnames(X)), y = X[1,], lwd = 1.5, col = "red")
grid()
legend("topleft", 
       legend = c("raw", "moving average"), 
       lty = c(1, 1), col = c("black", "red"))

## ----savits, tidy=TRUE--------------------------------------------------------
# p = polynomial order
# w = window size (must be odd)
# m = m-th derivative (0 = smoothing)
# The function accepts vectors, data.frames or matrices.
# For a matrix input, observations should be arranged row-wise
sgvec <- savitzkyGolay(X = NIRsoil$spc[1,], p = 3, w = 11, m = 0) 
sg <- savitzkyGolay(X = NIRsoil$spc, p = 3, w = 11, m = 0) 
# note that bands at the edges of the spectral matrix are lost !
dim(NIRsoil$spc)
dim(sg)

## ----d1,fig.cap="Effect of first derivative and second derivative", fig.height = 4, fig.width = 6, fig.retina = 1, out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
# X = wavelength
# Y = spectral matrix
# n = order
d1 <- t(diff(t(NIRsoil$spc), differences = 1)) # first derivative
d2 <- t(diff(t(NIRsoil$spc), differences = 2)) # second derivative
plot(as.numeric(colnames(d1)), 
     d1[1,], 
     type = "l", 
     lwd = 1.5, 
     xlab = "Wavelength", 
     ylab = "")
lines(as.numeric(colnames(d2)), d2[1,], lwd = 1.5, col = "red")
grid()
legend("topleft", 
       legend = c("1st der", "2nd der"), 
       lty = c(1, 1),
       col = c("black", "red"))

## ----gapder,fig.cap="Effect of 1st-order gap derivative ", fig.height = 4, fig.width = 6, fig.retina = 1, out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
# first derivative with a gap of 10 bands
gd1 <- t(diff(t(NIRsoil$spc), differences = 1, lag = 10)) 

## ----gapseg,fig.cap="Effect of 1st-order gap-segment derivative ", fig.height = 4, fig.width = 6, fig.retina = 1, out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
# m = order of the derivative
# w = gap size
# s = segment size
# first derivative with a gap of 5 bands
gsd1 <- gapDer(X = NIRsoil$spc, m = 1, w = 11, s = 5) 
plot(as.numeric(colnames(d1)), 
     d1[1,], 
     type = "l", 
     lwd = 1.5, 
     xlab = "Wavelength", 
     ylab = "")
lines(as.numeric(colnames(gsd1)), gsd1[1,], lwd = 1.5, col = "red")
grid()
legend("topleft",
       legend = c("1st der","gap-segment 1st der"), 
       lty = c(1,1), 
       col = c("black", "red"))

## ----snv, eval=TRUE, fig.cap="Effect of SNV on raw spectra", fig.height = 4, fig.width = 6, fig.retina = 1, out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
snv <- standardNormalVariate(X = NIRsoil$spc)

## ----msc, fig.cap="Effect of MSC on raw spectra", fig.height = 4, fig.width = 6, fig.retina = 1, out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
# X = input spectral matrix
msc_spc <- msc(X = NIRsoil$spc, ref_spectrum = colMeans(NIRsoil$spc))

plot(as.numeric(colnames(NIRsoil$spc)), 
     NIRsoil$spc[1,], 
     type = "l", 
     xlab = "Wavelength, nm", ylab = "Absorbance", 
     lwd = 1.5)
lines(as.numeric(colnames(NIRsoil$spc)), 
      msc_spc[1,], 
      lwd = 1.5, col = "red")
axis(4, col = "red")
grid()
legend("topleft", 
       legend = c("raw", "MSC signal"), 
       lty = c(1, 1),
       col = c("black", "red"))
par(new = FALSE)

## ----eval = FALSE-------------------------------------------------------------
#  # a reference set of spectra
#  Xr <- NIRsoil$spc[NIRsoil$train == 1, ]
#  
#  # an "unseen" set of spectra
#  Xu <- NIRsoil$spc[NIRsoil$train == 0, ]
#  
#  # apply msc to Xr
#  Xr_msc <- msc(Xr)
#  
#  # apply the same msc to Xu
#  attr(Xr_msc, "Reference spectrum") # use this info from the previous object
#  
#  Xu_msc <- msc(Xu, ref_spectrum = attr(Xr_msc, "Reference spectrum"))

## ----detrend, fig.cap="Effect of SNV-Detrend on raw spectra",fig.height = 4, fig.width = 6, fig.retina = 1, out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
# X = input spectral matrix
# wav = band centers
dt <- detrend(X = NIRsoil$spc, wav = as.numeric(colnames(NIRsoil$spc)))
plot(as.numeric(colnames(NIRsoil$spc)), 
     NIRsoil$spc[1,], 
     type = "l", 
     xlab = "Wavelength", 
     ylab = "Absorbance", 
     lwd = 1.5)
par(new = TRUE)
plot(dt[1,], 
     xaxt = "n", 
     yaxt = "n",
     xlab = "", 
     ylab = "", 
     lwd = 1.5, 
     col = "red", 
     type = "l")
axis(4, col = "red")
grid()
legend("topleft", 
       legend = c("raw", "detrend signal"), 
       lty = c(1, 1),
       col = c("black", "red"))
par(new = FALSE)

## ----baselined, fig.cap = "Absorbance spectra, baseline spectra and baseline corrected spectra", fig.height = 4, fig.width = 6, fig.retina = 1, out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
data(NIRsoil)

wav <- as.numeric(colnames(NIRsoil$spc))
# plot of the 3 first absorbance spectra
matplot(wav,
        t(NIRsoil$spc[1:3, ]),
        type = "l",
        ylim = c(0, .6),
        xlab = "Wavelength /nm",
        ylab = "Absorbance")
grid()
 
bs <- baseline(NIRsoil$spc, wav)
matlines(wav, t(bs[1:3, ]))
 
fitted_baselines <- attr(bs, "baselines")
matlines(wav, t(fitted_baselines[1:3, ]))

## ----bscale, tidy = TRUE------------------------------------------------------
# X = spectral matrix
# type = "soft" or "hard"
# The ouptut is a list with the scaled matrix (Xscaled) and the divisor (f)
bs <- blockScale(X = NIRsoil$spc, type = "hard")$Xscaled
sum(apply(bs, 2, var)) # this works!

## ----bnorm, tidy = TRUE-------------------------------------------------------
# X = spectral matrix
# targetnorm = desired norm for X
bn <- blockNorm(X = NIRsoil$spc,targetnorm = 1)$Xscaled
sum(bn^2) # this works!

## ----cr, fig.cap = "Absorbance and continuum-removed absorbance spectra", fig.height = 4, fig.width = 6, fig.retina = 1, out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
# type of data: 'R' for reflectance (default), 'A' for absorbance
cr <- continuumRemoval(X = NIRsoil$spc, type = "A")
# plot of the 10 first abs spectra
matplot(as.numeric(colnames(NIRsoil$spc)),
        t(NIRsoil$spc[1:3,]),
        type = "l",
        lty = 1,
        ylim = c(0,.6),
        xlab="Wavelength /nm", 
        ylab="Absorbance")
matlines(as.numeric(colnames(NIRsoil$spc)), lty = 1, t(cr[1:3, ]))
grid()

## ----naes, fig.cap = "Selection of 5 samples by k-means sampling", fig.height = 4.5, fig.width = 4, fig.retina = 1, fig.align ='center', out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
# X = the input matrix
# k = number of calibration samples to be selected
# pc = if pc is specified, k-mean is performed in the pc space 
# (here we will use only the two 1st pcs)
# iter.max =  maximum number of iterations allowed for the k-means clustering.
kms <- naes(X = NIRsoil$spc, k = 5, pc = 2, iter.max = 100)
# Plot the pcs scores and clusters
plot(kms$pc, col = rgb(0, 0, 0, 0.3), pch = 19, main = "k-means") 
grid()
# Add the selected points
points(kms$pc[kms$model, ], col = "red", pch = 19)

## ----ken, fig.cap="Selection of 40 calibration samples with the Kennard-Stone algorithm", fig.height = 4.5, fig.width = 4, fig.retina = 1, fig.align ='center', out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
# Create a dataset for illustrating how the calibration sampling 
# algorithms work
X <- data.frame(x1 = rnorm(1000), x2 = rnorm(1000))
plot(X, col = rgb(0, 0, 0, 0.3), pch = 19, main = "Kennard-Stone (synthetic)") 
grid()
# kenStone produces a list with row index of the points selected for calibration
ken <- kenStone(X, k = 40) 
# plot selected points
points(X[ken$model,], col = "red", pch = 19, cex = 1.4) 

## ----ken2, fig.cap="Kennard-Stone sampling on the NIRsoil dataset", fig.height = 4.5, fig.width = 4, fig.retina = 1, fig.align = 'center', out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----

# Test with the NIRsoil dataset
# one can also use the mahalanobis distance (metric argument)
# computed in the pc space (pc argument)
ken_mahal <- kenStone(X = NIRsoil$spc, k = 20, metric = "mahal", pc = 2)
# The pc components in the output list stores the pc scores
plot(ken_mahal$pc[,1], 
     ken_mahal$pc[,2], 
     col = rgb(0, 0, 0, 0.3), 
     pch = 19, 
     xlab = "PC1",
     ylab = "PC2",
     main = "Kennard-Stone") 
grid()
# This is the selected points in the pc space
points(ken_mahal$pc[ken_mahal$model, 1], 
       ken_mahal$pc[ken_mahal$model,2], 
       pch = 19, col = "red") 

## ----ksinitalization, tidy = TRUE---------------------------------------------
# Indices of the initialization samples
initialization_ind <- c(486, 702, 722, 728) 
ken_mahal_init <- kenStone(
  X = NIRsoil$spc, 
  k = 20, 
  metric = "mahal", 
  pc = 2, 
  init = initialization_ind)

ken_mahal_init$model

## ----ken3, fig.cap="Kennard-Stone sampling with initialization samples", fig.height = 4.5, fig.width = 4, fig.retina = 1, fig.align = 'center', out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----

# The pc components in the output list stores the pc scores
plot(ken_mahal_init$pc[,1], 
     ken_mahal_init$pc[,2], 
     col = rgb(0, 0, 0, 0.3), 
     pch = 19, 
     xlab = "PC1",
     ylab = "PC2",
     main = "Kennard-Stone with 4 initialization samples") 
grid()
# This is the selected points in the pc space
points(ken_mahal$pc[ken_mahal$model, 1], 
       ken_mahal$pc[ken_mahal$model, 2], 
       pch = 19, col = "red") 

# Our initialization samples
points(ken_mahal$pc[initialization_ind, 1], 
       ken_mahal$pc[initialization_ind, 2], 
       pch = 19, cex = 1.5, col = "blue") 


## ----duplex, fig.cap="Selection of 15 calibration and validation samples with the DUPLEX algorithm", fig.height = 4.5, fig.width = 4, fig.retina = 1, fig.align = 'center', out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
dup <- duplex(X = X, k = 15) # k is the number of selected samples
plot(X, col = rgb(0, 0, 0, 0.3), pch = 19, main = "DUPLEX") 
grid()
# calibration samples
points(X[dup$model, 1], X[dup$model, 2], col = "red", pch = 19) 
# validation samples
points(X[dup$test,1], X[dup$test,2], col = "dodgerblue", pch = 19) 
legend("topright", 
       legend = c("calibration", "validation"), 
       pch = 19, 
       col = c("red", "dodgerblue"))

## ----shenk, fig.cap="Selection of samples with the SELECT algorithm", fig.height = 4.5, fig.width = 4, fig.retina = 1, fig.align = 'center', out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
shenk <- shenkWest(X = NIRsoil$spc, d.min = 0.6, pc = 2)
plot(shenk$pc, col = rgb(0, 0, 0, 0.3), pch = 19, main = "SELECT") 
grid()
points(shenk$pc[shenk$model,], col = "red", pch = 19)

## ----puchwein, fig.cap="Samples selected by the Puchwein algorithm", fig.height = 4.5, fig.width = 4, fig.retina = 1, fig.align = 'center', out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
pu <- puchwein(X = NIRsoil$spc, k = 0.2, pc =2)
plot(pu$pc, col = rgb(0, 0, 0, 0.3), pch = 19, main = "puchwein") 
grid()
points(pu$pc[pu$model,],col = "red", pch = 19) # selected samples

## ----puchwein2, fig.cap="How to find the optimal loop", eval = FALSE----------
#  par(mfrow = c(2, 1))
#  plot(pu$leverage$removed,pu$leverage$diff,
#       type = "l",
#       xlab = "# samples removed",
#       ylab="Difference between th. and obs sum of leverages")
#  # This basically shows that the first loop is optimal
#  plot(pu$leverage$loop,nrow(NIRsoil) - pu$leverage$removed,
#       xlab = "# loops",
#       ylab = "# samples kept", type = "l")
#  par(mfrow = c(1, 1))

## ----honigs, fig.cap="Spectra selected with the Honigs algorithm and bands used", fig.height = 4.5, fig.width = 4, fig.retina = 1, fig.align = 'center', out.extra='style= "background-color: #FFFFFF; border: 10px solid transparent; padding:0px"'----
# type = "A" is for absorbance data
ho <- honigs(X = NIRsoil$spc, k = 10, type = "A") 
# plot calibration spectra
matplot(as.numeric(colnames(NIRsoil$spc)),
        t(NIRsoil$spc[ho$model,]),
        type = "l",
        xlab = "Wavelength", ylab = "Absorbance")
# add bands used during the selection process
abline(v = as.numeric(colnames(NIRsoil$spc))[ho$bands], lty = 2)
Any scripts or data that you put into this service are public.
prospectr documentation built on June 22, 2024, 11:08 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
prospectr
Miscellaneous Functions for Processing and Sample Selection of Spectroscopic Data

inst/doc/prospectr.R
In prospectr: Miscellaneous Functions for Processing and Sample Selection of Spectroscopic Data

Try the prospectr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

prospectr Miscellaneous Functions for Processing and Sample Selection of Spectroscopic Data

inst/doc/prospectr.R In prospectr: Miscellaneous Functions for Processing and Sample Selection of Spectroscopic Data

Try the prospectr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

prospectr
Miscellaneous Functions for Processing and Sample Selection of Spectroscopic Data

inst/doc/prospectr.R
In prospectr: Miscellaneous Functions for Processing and Sample Selection of Spectroscopic Data