#' @title Uninformative variable elimination in PLS (UVE-PLS)
#'
#' @description Artificial noise variables are added to the predictor set before the PLSR
#' model is fitted. All the original variables having lower "importance" than the artificial
#' noise variables are eliminated before the procedure is repeated until a stop criterion is
#' reached.
#'
#' @param y vector of response values (\code{numeric} or \code{factor}).
#' @param X numeric predictor \code{matrix}.
#' @param ncomp integer number of components (default = 10).
#' @param N number of samples Mone Carlo simulations (default = 3).
#' @param ratio the proportion of the samples to use for calibration (default = 0.75).
#' @param MCUVE.threshold thresholding separate signal from noise (default = NA creates
#' automatic threshold from data).
#'
#' @return Returns a vector of variable numbers corresponding to the model
#' having lowest prediction error.
#'
#' @author Tahir Mehmood, Kristian Hovde Liland, Solve Sæbø.
#'
#' @references V. Centner, D. Massart, O. de Noord, S. de Jong, B. Vandeginste, C. Sterna,
#' Elimination of uninformative variables for multivariate calibration, Analytical Chemistry
#' 68 (1996) 3851-3858.
#'
#' @seealso \code{\link{VIP}} (SR/sMC/LW/RC), \code{\link{filterPLSR}}, \code{\link{shaving}},
#' \code{\link{stpls}}, \code{\link{truncation}},
#' \code{\link{bve_pls}}, \code{\link{ga_pls}}, \code{\link{ipw_pls}}, \code{\link{mcuve_pls}},
#' \code{\link{rep_pls}}, \code{\link{spa_pls}},
#' \code{\link{lda_from_pls}}, \code{\link{lda_from_pls_cv}}, \code{\link{setDA}}.
#'
#' @examples
#' data(gasoline, package = "pls")
#' with( gasoline, mcuve_pls(octane, NIR) )
#'
#' @export
mcuve_pls<- function(y,X,ncomp=10, N=3,ratio=0.75, MCUVE.threshold=NA){
# uninformative variable elimination(UVE)-PLS.
# Input: y: m x 1 (measured property)
# X: m x n (Sample matrix)
# ncomp: The max PC for cross-validation
# N: The number of Monte Carlo Simulation.
# ratio: The ratio of calibration samples to the total samples.
modeltype <- "prediction"
if (is.factor(y)) {
modeltype <- "classification"
y.orig <- as.numeric(y)
y <- model.matrix(~ y-1)
tb <- names(table(y.orig))
# tb<-as.numeric(names(table(y)))
} else {
y <- as.matrix(y)
}
# Strip X
X <- unclass(as.matrix(X))
Mx <- nrow(X)
Nx <- ncol(X)
W <- matrix(runif(Nx*Mx,0,1), Mx, Nx)
Z <- cbind(X, W)
K <- floor(Mx*ratio)
C <- matrix(0, N, Nx*2)
ncomp <- min(c(dim(X), ncomp))
for( i in 1:N){
temp <- sample(Mx)
calk <- temp[1:K]
Zcal <- Z[calk, ]; ycal <- y[calk,]
pls.object <- plsr(ycal ~ Zcal, ncomp=min(ncomp, (ncol(Zcal)-1)), validation = "LOO")
if (modeltype == "prediction"){
opt.comp <- which.min(pls.object$validation$PRESS[1,])
} else if (modeltype == "classification"){
classes <- lda_from_pls_cv(pls.object, Zcal, y.orig[calk], ncomp)
opt.comp <- which.max(colSums(classes==y.orig[calk]))
}
C[i,] <- pls.object$coefficients[,1,opt.comp]
}
U <- apply(C, 2, mean)
SD <- apply(C, 2, sd)
RI <- U/SD
if(is.na( MCUVE.threshold)) {
MCUVE.threshold <- max(abs(RI[-(1:Nx)])) # max.RI
}
UVE.selection <- which(abs(RI[(1:Nx)]) > MCUVE.threshold)
if(length(UVE.selection)<= (ncomp +1)) {
UVE.selection <- sort(abs(RI[(1:Nx)]), decreasing=TRUE, index.return = T)$ix [1:ncomp]
}
return(list(mcuve.selection=UVE.selection))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.