# R/kenstone.R In inspectr: Storing, manipulating and analysis spectroscopy and associated data in R.

#### Defines functions .distToSubset.distToMeankenstone.matrix

```if (!isGeneric("kenstone"))
setGeneric("kenstone", function(x, size, ...)
standardGeneric("kenstone"))

setMethod("kenstone", "Spectra",
function(x, size, ...) {
idx <- kenstone.matrix(spectra(x), size = size, ...)
idx
}
)

## Kennard-Stone algorithm for calibration set selection
##
## -- use it on PCs to compress the data !! --
##
#
# 1. Select the object which is closest to the data mean
# and add it to the subset
#
# 2. Calculate the dissimilarity between the remaining objects
# in the data set and the objects in the subset
#
# 3. Select the object, which is the most dissimilar to the ones
# already included in the subset
#
# 4. return to 2 until the desired number of objects in the subset
# is reached
#
kenstone.matrix <- function(x, size, progress = TRUE, ...){
# Initialisation
k <- 1
sub <- vector(mode = 'numeric', length = size)
sub_mat <- matrix(NA, ncol = ncol(x), nrow = size)

# Compute data mean
mn <- colMeans(x)
# Get closest point to the mean
idx <- which.min(.distToMean(x, mn))
# Put point into subset and remove point from x
sub[k] <- idx
sub_mat[k,] <- x[idx,]
k <- k + 1
#   x <- x[-1*idx,]
x[idx, ] <- NA

# Get farest point from the first selected subset
idx <- which.max(.distToMean(x, sub_mat[1, ]))
# Put point into subset and remove point from x
sub[k] <- idx
sub_mat[k, ] <- x[idx, ]
k <- k + 1
#   x <- x[-1*idx,]
x[idx, ] <- NA

if (progress) {
i_pb <- 1
pb <- txtProgressBar(min = 1, max = size, style = 3)
}

# Sequentially select points
while(k <= size) {
#
# (1) compute the distance between each point in remaining in x
# and each point already in the subset
# (2) for each point in x, select the MINIMUM distance with each
# point already in the subset
# (3) the algorithm is selecting the point in x that have the
# BIGGEST minimum value (ie is most dissimilar to points already
# in the subset)
#
# I know it sounds crazy but it goes better with a pen and paper
#
idx <- which.max(apply(.distToSubset(x, sub_mat[1:(k-1),]), 2, min))
sub[k] <- idx
sub_mat[k,] <- x[idx,]
k <- k + 1
#     x <- x[-1*idx,]
x[idx, ] <- NA

if (progress) {
setTxtProgressBar(pb, k)
}
}

if (progress)
close(pb)

sub
}

# Distance of each point of a matrix x to
# a unique value
.distToMean <- function(x, mn){
apply(x, 1, function(x) dist(rbind(mn, x)))
}

# Distance of each point in a matrix x to
# each point in another matrix sub
.distToSubset <- function(x, sub) {
apply(x, 1, function(y) apply(sub, 1, function(x) dist(rbind(x, y))))
}
```

## Try the inspectr package in your browser

Any scripts or data that you put into this service are public.

inspectr documentation built on May 2, 2019, 5:45 p.m.