pls_algo <- function(W, P, Q, A, M, C, h) {
# Construction of w
AA <- t(A) %*% A
e <- eigen(AA)
q <- e$vectors[,which.max(e$values)]
w <- C %*% A %*% q
w <- w / sqrt(sum(w^2))
# Construction of p
p <- M %*% w
c <- as.numeric(t(w) %*% M %*% w)
p <- p / c
# Construction of q
q <- t(A) %*% w / c
# Assignment of w, p, q in their matrix
W[,h] <- w
P[,h] <- p
Q[,h] <- q
# Update of the first variables
A <- A - (c * p %*% t(q))
M <- M - (c * p %*% t(p))
C <- C - (w %*% t(p))
return(list(W=W, P=P, Q=Q, A=A, M=M, C=C))
}
#' Fonction pls
#'
#' @description Generate a plsda model according to formula and data provided. This model can be cross-validated.
#' @param X The predictors must be a set of quantitative variables.
#' @param Y The response must be a qualitative variable.
#' @param nc The number of component to compute. If cross-validation is activated it will be used as the maximum number of component to return.
#' @param cv If TRUE a cross-validation will be done. Default is FALSE.
#' @param nfold If cv parameter is TRUE, nfold will specify the kind of cross-validation. Default is 0.
#' A value of 0 will be equivalent to a "Leave One Out" cross-validation. If > 0 it will do a k-fold cross-validation with k = nfold.
#' @return A plsda object that contains the component matrix, the trained model as coefficients, the explained variance on X and Y, VIP values, quality of the model and cross-validation results if asked
#' @keywords fit, pls, plsda
#' @export
#' @importFrom stats cor sd
pls <- function(X, Y, nc, cv, nfold) {
X.init <- as.matrix(X)
Y.init <- as.matrix(Y)
# Scaling
X <- scale(X.init)
Y <- scale(Y.init)
# Initialization of general variables
n <- nrow(X)
px <- ncol(X)
qy <- ncol(Y)
namesX <- colnames(X)
namesY <- colnames(Y)
# Initialization of matrices
W <- matrix(0, px, nc)
P <- matrix(0, px, nc)
Q <- matrix(0, qy, nc)
# Initialization of the first variables for the algo
A <- t(X) %*% Y
M <- t(X) %*% X
C <- diag(nrow(A))
# Initializing Cross Validation
if (cv) {
RESS <- NULL
PRESS <- NULL
Q2 <- NULL
# Leave one out if nfold is 0
if (nfold == 0) {
nfold <- nrow(X)
}
# Shuffling data
shuffle <- sample(nrow(X))
X.cv <- X[shuffle,]
Y.cv <- Y[shuffle,]
# Creating folds
folds <- cut(seq(1, nrow(X)), breaks = nfold, labels=FALSE)
Xapp <- NULL
Yapp <- NULL
Xtest <- NULL
Ytest <- NULL
n.cv <- NULL
W.cv <- NULL
P.cv <- NULL
Q.cv <- NULL
A.cv <- NULL
M.cv <- NULL
C.cv <- NULL
# Initializing variables for each fold
for (i in 1:nfold) {
# Segmenting
indexes <- which(folds == i, arr.ind = TRUE)
# Scaling
Xapp[[i]] <- as.matrix(X.cv[-indexes,, drop=FALSE])
Yapp[[i]] <- as.matrix(Y.cv[-indexes,, drop=FALSE])
Xtest[[i]] <- as.matrix(X.cv[indexes,, drop=FALSE])
Ytest[[i]] <- as.matrix(Y.cv[indexes,, drop=FALSE])
# Initialization of general variables
n.cv[[i]] <- nrow(Xapp[[i]])
# Initialization of matrices
W.cv[[i]] <- matrix(0, px, nc)
P.cv[[i]] <- matrix(0, px, nc)
Q.cv[[i]] <- matrix(0, qy, nc)
# Initialization of the first variables for the algo
A.cv[[i]] <- t(Xapp[[i]]) %*% Yapp[[i]]
M.cv[[i]] <- t(Xapp[[i]]) %*% Xapp[[i]]
C.cv[[i]] <- diag(nrow(A.cv[[i]]))
}
}
# Loop : Algo PLS
for (h in 1:nc) {
# Cross validation
if (cv) {
# Evaluation of previous run
Y.hat <- X %*% (W %*% t(Q))
RESS[h] <- sum((Y.hat - Y)^2)
press.cv <- NULL
for (i in 1:nfold) {
# Applying PLS algo on training set
res <- pls_algo(W.cv[[i]], P.cv[[i]], Q.cv[[i]], A.cv[[i]], M.cv[[i]], C.cv[[i]], h)
W.cv[[i]] <- res$W
P.cv[[i]] <- res$P
Q.cv[[i]] <- res$Q
A.cv[[i]] <- res$A
M.cv[[i]] <- res$M
C.cv[[i]] <- res$C
# Evaluation on testing set
Y.hat.cv <- Xtest[[i]] %*% (W.cv[[i]] %*% t(Q.cv[[i]]))
press.cv[i] <- sum((Y.hat.cv - Ytest[[i]])^2)
}
PRESS[h] <- sum(press.cv)
if (h > 1) {
Q2[h-1] <- 1 - (PRESS[h] / RESS[h-1])
if (Q2[h-1] < 0) {
break
}
}
}
# Launching PLS algorithm
res <- pls_algo(W, P, Q, A, M, C, h)
W <- res$W
P <- res$P
Q <- res$Q
A <- res$A
M <- res$M
C <- res$C
}
# If Cross Validation, adaptation of results to the real number of components
if (cv) {
nc <- h - 1
W <- W[, 1:nc, drop=FALSE]
P <- P[, 1:nc, drop=FALSE]
Q <- Q[, 1:nc, drop=FALSE]
}
# Calculation of standards beta coefficients
B <- W %*% t(Q)
# Calculation of PLS regression coefficients
Br <- diag(1/apply(X.init, 2, sd)) %*% B %*% diag(apply(Y.init, 2, sd))
# Constant calculation
Ct <- as.vector(apply(Y.init,2,mean) - apply(X.init,2,mean) %*% Br)
# Calculation of matrix of components
Th <- X %*% W
colnames(Th) <- paste(rep("Comp", nc), 1:nc ,sep=" ")
# Final matrix of the coefficients
dimnames(Br)=list(namesX, namesY)
coeffs <- rbind(Constant = Ct, Br)
# Cross validation results
CV <- NULL
if (cv) {
CV <- cbind(PRESS[1:nc], RESS[1:nc], Q2[1:nc])
colnames(CV) <- c("PRESS", "RESS", "Q2")
}
# Detail of R2 and redundancy
# For X (not cumulative and cumulative)
Rx <- cor(X.init,Th)^2
colnames(Rx) <- paste(rep("Comp",nc), 1:nc, sep=" ")
if (nc == 1) {
Var.Explained.X <- rbind(Rx,Redundancy=mean(Rx))
Rx.cum <- as.matrix(apply(Rx,1,cumsum))
Var.Explained.X.Cum <- rbind(Rx.cum,Redundancy=mean(Rx.cum))
} else {
Var.Explained.X <- rbind(Rx,Redundancy=colMeans(Rx))
Rx.cum <- t(apply(Rx,1,cumsum))
Var.Explained.X.Cum <- rbind(Rx.cum,Redundancy=colMeans(Rx.cum))
}
# For Y (not cumulative and cumulated)
Ry <- cor(Y.init,Th)^2
colnames(Ry) <- paste(rep("Comp",nc), 1:nc, sep=" ")
if (nc == 1) {
Var.Explained.Y <- rbind(Ry, Redundancy=mean(Ry))
Ry.cum <- as.matrix(apply(Ry, 1, cumsum))
Var.Explained.Y.Cum <- rbind(Ry.cum, Redundancy=mean(Ry.cum))
} else {
Var.Explained.Y <- rbind(Ry, Redundancy=colMeans(Ry))
Ry.cum <- t(apply(Ry, 1, cumsum))
Var.Explained.Y.Cum <- rbind(Ry.cum, Redundancy=colMeans(Ry.cum))
}
# Quality of the overall model
quality <- rbind(Var.Explained.X.Cum[nrow(Var.Explained.X.Cum),], Var.Explained.Y.Cum[nrow(Var.Explained.Y.Cum),])
rownames(quality) <- c("R2Xcum","R2Ycum")
# Variable Importance in the Projection (VIP)
VIP <- matrix(0,ncol(X.init), nc)
for (i in 1:ncol(X.init)) {
for(j in 1:nc) {
VIP[i,j] <- sqrt(px/sum(Ry[,1:j]) * sum(as.matrix(Ry[,1:j]) %*% (W[i,1:j]^2)))
}
}
dimnames(VIP) <- list(namesX, paste(rep("Comp", nc), 1:nc, sep=" "))
# Variables returned
result <- structure(list(Comp.Matrix = Th,
Coeffs = coeffs,
Var.Explained.X = Var.Explained.X,
Var.Explained.X.Cum = Var.Explained.X.Cum,
Var.Explained.Y = Var.Explained.Y,
Var.Explained.Y.Cum = Var.Explained.Y.Cum,
Quality = quality,
VIP = VIP,
CV = CV))
class(result) <- "plsda"
return(result)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.