locw | R Documentation |
locw
is a generic function for building kNN locally weighted (LW) prediction models. See the help page of function lwplsr
for wrappers.
In kNN-LW models, the prediction is implemented in two sequential steps, therafter referred to as weighting "1"
and weighting "2"
, respectively. For each new observation to predict, the two steps are as follow:
- Step (weighting) "1"
corresponds to a "binary" weighting. The k
nearest neighbors (in the training data set) of the obervation to predict are selected and constitute the neighborhood. The prediction model (implemented in the next step) is only run on this neighborhood. It is equivalent to give a weight = 1 to all the observation in the neighborhood, and a weight = 0 to the other training observations.
- Step (weighting) "2"
is a within-neighborhood weighting. Each of the k
nearest neighbors receives a statistical weight (eventually different from the usual 1/k
as in the standard PLS) that is entered as input in the prediction model. The weights depend from dissimilarities (preliminary calculated) between the new observation to predict and the k
neighbors.
In locw
, the prediction model used in step "2"
has to be defined in a separate function specified in argument fun
. If there is a number of m
new observations to predict, a list of m
vectors (defining the m
neighborhoods) has to be provided as input to locw
in argument listnn
. Each of the m
vectors contains the indexes of the nearest neighbors (in the training set) of the observation to predict. The m
vectors are not necessary of same length, i.e. the neighborhood size can vary between observations to predict. Then locw
runs the prediction model successively for each of the m
neighborhoods, returning m
predictions.
locw(
Xr = NULL, Yr,
Xu = NULL, Yu = NULL,
listnn,
listw = NULL,
fun,
stor = TRUE,
print = TRUE,
...
)
Xr |
A |
Yr |
For quantive responses: A |
Xu |
A |
Yu |
For quantive responses: A |
listnn |
A list of |
listw |
A list of |
fun |
A function defining the prediction model to run on the |
stor |
Logical (default to |
print |
Logical (default = |
... |
Optionnal arguments to pass in function |
A list of outputs (see examples), such as:
y |
Responses for the test data. |
fit |
Predictions for the test data. |
r |
Residuals for the test data. |
fm |
A list of the local fitted models. |
Lesnoff, M., Metz, M., Roger, J.M.. Comparison of locally weighted PLS strategies for regression and discrimination on agronomic NIR Data. Submitted to Journal of Chemometrics.
data(datcass)
data(datforages)
############################# QUANTITATIVE RESPONSE
Xr <- datcass$Xr
yr <- datcass$yr
Xu <- datcass$Xu
yu <- datcass$yu
Xr <- detrend(Xr)
Xu <- detrend(Xu)
headm(Xr)
headm(Xu)
## A locally weighted PLSR model where:
## The dissimilarity between the observations are defined by the Mahalanobis distance
## calculated from a global PLS score space of ncompdis = 10 components.
## - Weighting "1" = selection of k = 50 nearest neighbors
## - Weighting "2" = weights within each neighborhood calculated with "wdist"
ncompdis <- 10
h <- 2
k <- 50
ncomp <- 20
z <- pls(Xr, yr, Xu, ncomp = ncompdis)
resn <- getknn(z$Tr, z$Tu, k = k, diss = "mahalanobis")
listnn <- resn$listnn
listw <- lapply(resn$listd, wdist, h = h)
fm <- locw(
Xr, yr,
Xu, yu,
listnn = listnn,
listw = listw,
fun = plsr,
ncomp = ncomp,
print = TRUE
)
names(fm)
head(fm$y)
head(fm$fit)
head(fm$r)
z <- mse(fm, ~ ncomp + k)
z[z$rmsep == min(z$rmsep), ]
plotmse(z, group = z$k)
## Without weighting "2"
ncompdis <- 10
k <- 50
ncomp <- 20
z <- pls(Xr, yr, Xu, ncomp = ncompdis)
resn <- getknn(z$Tr, z$Tu, k = k, diss = "mahalanobis")
listnn <- resn$listnn
fm <- locw(
Xr, yr,
Xu, yu,
listnn = listnn,
fun = plsr,
ncomp = ncomp,
print = TRUE
)
z <- mse(fm, ~ ncomp + k)
z[z$rmsep == min(z$rmsep), ]
plotmse(z, group = z$k)
############################# QUALITATIVE RESPONSE
Xr <- datforages$Xr
yr <- datforages$yr
Xu <- datforages$Xu
yu <- datforages$yu
Xr <- savgol(snv(Xr), n = 21, p = 2, m = 2)
Xu <- savgol(snv(Xu), n = 21, p = 2, m = 2)
headm(Xr)
headm(Xu)
table(yr)
table(yu)
## A locally weighted PLS-QDA model where:
## The dissimilarity between the observations are defined by the Mahalanobis distance
## calculated from a global PLS score space of ncompdis = 10 components.
## - Weighting "1" = selection of k = 50 nearest neighbors
## - Weighting "2" = weights within each neighborhood calculated with "wdist"
ncompdis <- 10
h <- 2
k <- 50
ncomp <- 10
z <- pls(Xr, dummy(yr), Xu, ncomp = ncompdis)
resn <- getknn(z$Tr, z$Tu, k = k, diss = "mahalanobis")
listnn <- resn$listnn
listw <- lapply(resn$listd, wdist, h = h)
fm <- locw(
Xr, yr,
Xu, yu,
listnn = listnn,
listw = listw,
fun = plsda,
da = daprob, lda = FALSE,
ncomp = ncomp,
print = TRUE
)
names(fm)
head(fm$y)
head(fm$fit)
head(fm$r)
z <- err(fm, ~ ncomp + k)
z[z$errp == min(z$errp), ]
plotmse(z, nam = "errp", group = z$k)
## A locally weighted PLSDA (non parametric) model
## on preliminary calculated global scores
zfm <- pls(Xr, dummy(yr), Xu, ncomp = 25)
ncompdis <- 10
h <- 2
k <- 100
ncomp <- 15
resn <- getknn(zfm$Tr[, 1:ncompdis], zfm$Tu[, 1:ncompdis],
k = k, diss = "mahalanobis")
listnn <- resn$listnn
listw <- lapply(resn$listd, wdist, h = h)
fm <- locw(
zfm$Tr, yr,
zfm$Tu, yu,
listnn = listnn,
listw = NULL,
fun = plsda, dens = dkerngauss,
da = daprob,
ncomp = ncomp,
print = TRUE
)
z <- err(fm, ~ ncomp + k)
z[z$errp == min(z$errp), ]
plotmse(z, nam = "errp", group = z$k)
############################# OBJECTS RETURNED BY THE FUNCTION
n <- 8
p <- 6
set.seed(1)
X <- matrix(rnorm(n * p, mean = 10), ncol = p, byrow = TRUE)
row.names(X) <- paste("AA", 1:n, sep = "")
y1 <- 100 * rnorm(nrow(X))
y2 <- 100 * rnorm(nrow(X))
Y <- cbind(y1, y2)
set.seed(NULL)
Xr <- X
Yr <- Y
Xu <- X[c(1, 2, 4), ] ; Yu <- Y[c(1, 2, 4), ]
z <- pls(Xr, Yr, Xu, ncomp = 3)
z <- getknn(z$Tr, z$Tu, k = 5, diss = "mahalanobis")
listnn <- z$listnn
listw <- lapply(z$listd, wdist, h = 2)
fm <- locw(
Xr, Yr,
Xu, Yu,
listnn = listnn,
fun = plsr,
listw = listw,
ncomp = 2,
stor = TRUE
)
names(fm)
fm[c("y", "fit", "r")]
########### Object fm$fm
## = list of the outputs for each predicted observation
## Length of the list = nrow(Xu)
names(fm$fm)
########### Observation i
i <- 1
#i <- 2
#i <- 3
names(fm$fm[[i]])
fm$fm[[i]]
# Neighbors
fm$fm[[i]]$nn
# b-coefficients of the model
bcoef(fm$fm[[i]])
########### Score and orthogonal distances for the PLS models
lscordis(fm)
lodis(fm, Xr, Xu)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.