Nothing
#' L-PLS regression
#'
#' @param X1 \code{matrix} of size IxN (middle matrix)
#' @param X2 \code{matrix} of size IxJ (left matrix)
#' @param X3 \code{matrix} of size KxN (top matrix)
#' @param ncomp number of L-PLS components
#' @param doublecenter \code{logical} indicating if centering should be done both ways for X1 (default=TRUE)
#' @param scale \code{logical vector} of length three indicating if each of the matrices should be autoscaled.
#' @param type \code{character} indicating type of L-PLS ("exo"=default, "exo_ort" or "endo")
#' @param impute \code{logical} indicating if SVD-based imputation of missing data is required.
#' @param niter \code{numeric} giving number of iterations in component extraction loop.
#' @param subsetX2 \code{vector} defining optional sub-setting of X2 data.
#' @param subsetX3 \code{vector} defining optional sub-setting of X3 data.
#' @param ... Additional arguments, not used.
#'
#' @description Simultaneous decomposition of three blocks connected in an L pattern.
#'
#' @details Two versions of L-PLS
#' are available: exo- and endo-L-PLS which assume an outward or inward relationship between the
#' main block X1 and the two other blocks X2 and X3.
#'
#' The \code{exo_ort} algorithm returns orthogonal scores and should be chosen for visual
#' exploration in correlation loading plots. If exo-L-PLS with prediction is the main purpose
#' of the model then the non-orthogonal \code{exo} type L-PLS should be chosen for which the
#' predict function has prediction implemented.
#'
#' ![](LPLSsmall.png "L-PLS diagram")
#'
#' @return An object of type \code{lpls} and \code{multiblock} containing all results from the L-PLS
#' analysis. The object type \code{lpls} is associated with functions for correlation loading plots,
#' prediction and cross-validation. The type \code{multiblock} is associated with the default functions
#' for result presentation (\code{\link{multiblock_results}}) and plotting (\code{\link{multiblock_plots}}).
#'
#' @author Solve Sæbø (adapted by Kristian Hovde Liland)
#'
#' @references
#' * Martens, H., Anderssen, E., Flatberg, A.,Gidskehaug, L.H., Høy, M., Westad, F.,Thybo, A., and Martens, M. (2005). Regression of a data matrix on descriptors of both its rows and of its columns via latent variables: L-PLSR. Computational Statistics & Data Analysis, 48(1), 103 – 123.
#' * Sæbø, S., Almøy, T., Flatberg, A., Aastveit, A.H., and Martens, H. (2008). LPLS-regression: a method for prediction and classification under the influence of background information on predictor variables. Chemometrics and Intelligent Laboratory Systems, 91, 121–132.
#' * Sæbø, S., Martens, M. and Martens H. (2010) Three-block data modeling by endo- and exo-LPLS regression. In Handbook of Partial Least Squares: Concepts, Methods and Applications. Esposito Vinzi, V.; Chin, W.W.; Henseler, J.; Wang, H. (Eds.). Springer.
#'
#' @examples
#' # Simulate data set
#' sim <- lplsData(I = 30, N = 20, J = 5, K = 6, ncomp = 2)
#' X1 <- sim$X1; X2 <- sim$X2; X3 <- sim$X3
#' lp <- lpls(X1,X2,X3) # exo-L-PLS
#'
#' @seealso Overviews of available methods, \code{\link{multiblock}}, and methods organised by main structure: \code{\link{basic}}, \code{\link{unsupervised}}, \code{\link{asca}}, \code{\link{supervised}} and \code{\link{complex}}.
#' Functions for computation and extraction of results and plotting are found in \code{\link{lpls_results}}.
#' @export
lpls <- function(X1,X2,X3, ncomp = 2, doublecenter = TRUE, scale = c(FALSE,FALSE,FALSE),
type = c("exo"), impute = FALSE, niter = 25, subsetX2 = NULL, subsetX3 = NULL,...){
# Rename inputs from Smilde, Liland and Næs 2021 to Sæbø et al.
X1a <- X1
X1 <- X2
X2 <- X1a
X3 <- t(X3)
#The three matrices are assumed positioned in this manner
# Sæbø et al. | Smilde, Næs and Liland 2021
# _________ | _______
# | | | | |
# | | | | |
# | t(X3) | | | X3 |
# | | | | |
# |_________| | |_______|
# |
# |
# _______ _________ | _______ _________
# | | | | | | | | |
# | | | | | | | | |
# | X1 | | X2 | | | X1 | | X2 |
# | | | | | | | | |
# |_______| |_________| | |_______| |_________|
#
# if(type!="exo_ort") impute=TRUE
if(impute){
if(any(is.na(X1))) X1 <- svd.imp(X1)
if(any(is.na(X2))) X2 <- svd.imp(X2)
if(any(is.na(X3))) X3 <- svd.imp(X3)
}
# Extracting subsets
if(!is.null(subsetX2)){
X1 <- X1[subsetX2,,drop=F]
X2 <- X2[subsetX2,,drop=F]
}
if(!is.null(subsetX3)){
X3 <- X3[subsetX3,,drop=F]
X2 <- X2[,subsetX3,drop=F]
}
# For saving
X1save <- X1
X2save <- X2
X3save <- X3
# Dimensions
X1dim <- dim(X1)
X2dim <- dim(X2)
X3dim <- dim(X3)
# Centering and scaling
X1 <- scale(X1, scale=scale[2])
mX1 <- attr(X1, "scaled:center")
X3 <- scale(X3, scale=scale[3])
mX3 <- attr(X3, "scaled:center")
rowmX2 <- apply(X2,1,mean, na.rm=TRUE)
colmX2 <- apply(X2,2,mean, na.rm=TRUE)
grandmX2 <- mean(X2, na.rm=TRUE)
# Do double centering of X2?
if(doublecenter){
X2 <- X2 - t(matrix(rep(1, X2dim[2]), ncol = 1)%*%rowmX2) -
matrix(rep(1,X2dim[1]), ncol = 1)%*%colmX2 +
matrix(grandmX2, nrow=X2dim[1], ncol = X2dim[2])
} else {
X2 <- X2 - matrix(grandmX2,nrow=X2dim[1],ncol=X2dim[2])
}
# Column scaling of X2
X2 <- scale(X2, scale = scale[1])
attributes(X1save) <- attributes(X1)
attributes(X2save) <- attributes(X2)
attributes(X3save) <- attributes(X3)
dlist <- list(X1 = as.matrix(t(X1)), X2 = as.matrix(X2), X3 = as.matrix(X3))
T11 <- T12 <- T21 <- T22 <- T31 <- T32 <- numeric(0)
W21 <- W22 <- numeric(0)
P1 <- P3 <- P21 <- P22 <- numeric(0)
D <- diag(rep(1,ncomp))
X1totvar <- X2totvar <- X3totvar <- rep(0,ncomp+1)
for(a in 1:ncomp){
X1totvar[a] <- sum(dlist$X1^2, na.rm=TRUE)
X2totvar[a] <- sum(dlist$X2^2, na.rm=TRUE)
X3totvar[a] <- sum(dlist$X3^2, na.rm=TRUE)
latent <- extractscores(dlist,niter=niter)
# Score vectors
T11 <- cbind(T11, vecnorm(latent[[1]]$t1))
T12 <- cbind(T12, vecnorm(latent[[1]]$t2))
T21 <- cbind(T21, vecnorm(latent[[2]]$t1))
T22 <- cbind(T22, vecnorm(latent[[2]]$t2))
T31 <- cbind(T31, vecnorm(latent[[3]]$t1))
T32 <- cbind(T32, vecnorm(latent[[3]]$t2))
#Fitting an exo-LPLS model
if(type=="exo_ort"){
#Orthogonal scores
P1 <- cbind(P1, projectonto(dlist$X1,T21[,a]))
P3 <- cbind(P3, projectonto(dlist$X3,T22[,a]))
P21 <- cbind(P21, projectonto(dlist$X2,T21[,a]))
P22 <- cbind(P22, projectonto(dlist$X2,T22[,a]))
d <- drop(solve(crossprod(T21[,a]))%*%t(T21[,a])%*%P22[,a])
D[a,a] <- d
#Deflation
dlist$X1 <- dlist$X1 - P1[,a]%*%t(T21[,a])
dlist$X3 <- dlist$X3 - T22[,a]%*%t(P3[,a])
dlist$X2 <- dlist$X2 - T21[,a]%*%t(P21[,a]) - P22[,a]%*%t(T22[,a]) + T21[,a]%*%t(T22[,a])*drop(d)
}#end type=="exo"
if(type=="exo"){
#Simpler model for X2, but non-orthogonal scores
P1 <- projectonto(t(X1),T21)
P3 <- projectonto(t(X3),T22)
P21 <- projectonto(t(X2),T21)
P22 <- projectonto(X2,T22)
D <- solve(crossprod(T21))%*%t(T21)%*%P22
#Deflation
dlist$X1 <- t(X1 - T21%*%t(P1))
dlist$X3 <- X3 - T22%*%t(P3)
dlist$X2 <- X2 - T21%*%D%*%t(T22)
}#end type=="exo"
if(type=="endo"){
P1 <- projectonto(t(X1),T12)
P3 <- projectonto(t(X3),T31)
D <- solve(crossprod(T12))%*%t(T12)%*%projectonto(X2, T31)
#Deflation
dlist$X1 <- t(X1 - T12%*%t(P1))
dlist$X3 <- X3 - T31%*%t(P3)
dlist$X2 <- X2 - T12%*%D%*%t(T31)
}#end type=="endo"
}
X1totvar[ncomp+1] <-sum(dlist$X1^2, na.rm=TRUE)
X2totvar[ncomp+1] <-sum(dlist$X2^2, na.rm=TRUE)
X3totvar[ncomp+1] <-sum(dlist$X3^2, na.rm=TRUE)
X1varprop <- diff((X1totvar[1]-X1totvar)/(X1totvar[1]))
X2varprop <- diff((X2totvar[1]-X2totvar)/(X2totvar[1]))
X3varprop <- diff((X3totvar[1]-X3totvar)/(X3totvar[1]))
#Various output
B1 <- B3 <- Ca <- NULL
if(type!="endo"){
B1 <- T31%*%solve(t(P21)%*%T31)%*%t(P1)
B3 <- T12%*%solve(t(P22)%*%T12)%*%t(P3)
# options(warn=-1)
suppressWarnings({
R1 <- cor(X1,T21, use="pairwise.complete.obs")
R21 <- t(cor(T21,X2, use="pairwise.complete.obs"))
R22 <- t(cor(T22,t(X2), use="pairwise.complete.obs"))
R3 <- cor(X3,T22, use="pairwise.complete.obs")
R2rmean <- cor(rowmX2,T21, use="pairwise.complete.obs")
R2cmean <- cor(colmX2,T22, use="pairwise.complete.obs")
})
# options(warn=0)
} else if(type=="endo"){
V1 <- T11%*%solve(t(P1)%*%T11)
V3 <- T32%*%solve(t(P3)%*%T32)
Ca <- V1%*%D%*%t(V3)
# options(warn=-1)
suppressWarnings({
R1 <- cor(X1,T12, use="pairwise.complete.obs")
R21 <- t(cor(T12,X2, use="pairwise.complete.obs"))
R22 <- t(cor(T31,t(X2), use="pairwise.complete.obs"))
R3 <- cor(X3,T31, use="pairwise.complete.obs")
R2rmean <- cor(rowmX2,T12, use="pairwise.complete.obs")
R2cmean <- cor(colmX2,T31, use="pairwise.complete.obs")
})
# options(warn=0)
}
res <- list(call=match.call())
res$ncomp <- ncomp
res$coefficients <- list(B1=B1, B3=B3, C=Ca)
res$blockScores <- list(T11=T11, T12=T12, T21=T21, T22=T22, T31=T31, T32=T32)
res$blockLoadings<- list(P1=P1, P3=P3, P21=P21, P22=P22, D=D)
res$corloadings <- list(R1=R1, R21=R21, R22=R22, R3=R3, R2rmean=R2rmean, R2cmean=R2cmean)
res$means <- list(mX1=mX1, mX3=mX3, grandmX2=grandmX2, rowmX2=rowmX2, colmX2=colmX2)
res$data <- list(X1=X1save, X2=X2save, X3=X3save)
res$residuals <- dlist
res$options <- list(doublecenter=doublecenter, scale=scale, type=type)
res$vars <- list(X1varprop=X1varprop, X2varprop=X2varprop, X3varprop=X3varprop)
res$info <- list(method = "L-PLS",
scores = "Not used", loadings = "Not used",
blockScores = "Block scores", blockLoadings = "Block loadings")
class(res) <- c('lpls', 'multiblock','list')
return(res)
}
# ---------------------------------------------------------------
extractscores <- function(datalist, niter = 25, truncperc = NULL, truncvec = NULL){
nmat <- length(datalist)
dims <- lapply(datalist, dim)
dimok <- TRUE
for(ii in 1:(nmat-1)){
dimok <- ifelse(dims[[ii]][2]==dims[[(ii+1)]][1], TRUE, FALSE)
}
if(!dimok) stop("Dimension mismatch\n")
scorelist<-list()
for(ii in 1:nmat){
scorelist[[ii]]<-list(t1 = matrix(0, nrow = dims[[ii]][1], ncol = 1),
t2 = matrix(0, nrow = dims[[ii]][2], ncol = 1))
}
#Initiating the NIPALS algorithm
scorelist[[1]]$t1 <- as.matrix(rnorm(dims[[1]][1], 0, 1))
#NIPALS
for(k in 1:niter){
scorelist[[1]]$t2 <- projectonto(datalist[[1]], scorelist[[1]]$t1)
for(j in 2:nmat){
scorelist[[j]]$t2 <- projectonto(datalist[[j]], scorelist[[(j-1)]]$t2)
}
scorelist[[nmat]]$t1 <- projectonto(datalist[[nmat]], scorelist[[nmat]]$t2)
for(j in (nmat-1):1){
scorelist[[j]]$t1 <- projectonto(datalist[[j]], scorelist[[(j+1)]]$t1)
}
}
return(scorelist)
}
# ------------------------------------------------------------
projectonto <- function(A,b){
A.na <- any(is.na(A))
if(is.null(dim(A))) A <- matrix(A, ncol = 1)
if(is.null(dim(b))) b <- matrix(b, ncol = 1)
if(!any(dim(A)==dim(b)[1])) stop("Non-matching dimensions")
q <- dim(b)[2]
#Orthogonalize b if multiple b's
if(A.na && q>1){
svd1 <- svd(b)
b <- b%*%svd1$v
}
if(dim(A)[1]==dim(b)[1]) A <- t(A)
if(!A.na){
proj <- A%*%b%*%solve(crossprod(b))
} else {
miss <- which(is.na(A))
A[miss] <- 0
if(q==1){
bb <- t(b^2)
ones <- matrix(1,dim(A)[2],dim(A)[1])
ones[miss] <- 0
btbinv <- 1/drop(bb%*%ones)
proj <- (A%*%b)*btbinv
} else if(q>1){
proj <- numeric()
for(i in 1:q){
bb <- t(b[,i]^2)
ones <- matrix(1,dim(A)[2],dim(A)[1])
ones[miss] <- 0
btbinv <- 1/drop(bb%*%ones)
proj <- cbind(proj,(A%*%b[,i])*btbinv)
}
proj <- proj%*%t(svd1$v)
}
}
return(proj)
}
vecnorm <- function(vec){
vec / sqrt(drop(crossprod(vec)))
}
svd.imp <- function(X, max.niter=50, expl.min=0.98, interactive=FALSE, tol=1e-3, ploteval=FALSE){
ncomp = min(dim(X))
missing <- which(is.na(X))
X.scaled <- scale(X,scale=FALSE)
colmeans <- attr(X.scaled,"scaled:center")
meanmat <- matrix(1,nrow=dim(X)[1],ncol=1)%*%colmeans
X.imp <- X
impvals <- meanmat[missing]
X.imp[missing] <- impvals
initiate <- TRUE
j<-1
relchange <- 1
change <- numeric()
while(relchange > tol & j<=max.niter){
uvd <- svd(X.imp)
if(initiate){
ssx <- rep(0,ncomp)
for(i in 1:ncomp){
D <- matrix(0,i,i)
diag(D) <- uvd$d[1:i]
ssx[i] <- sum((uvd$u[,1:i,drop=F]%*%D%*%t(uvd$v[,1:i]))^2)
initiate <- FALSE
}
ssxtot <- sum(X.imp^2)
explvar <- ssx/ssxtot
if(interactive){
plot(1:ncomp,explvar)
ncomp <- readline("Choose number of components for imputation \n")
ncomp <- as.numeric(ncomp)
}else{
ncomp <- min(which(explvar > expl.min))
}
}
D <- matrix(0,ncomp,ncomp)
diag(D) <- uvd$d[1:ncomp]
Xhat <- uvd$u[,1:ncomp,drop=F]%*%D%*%t(uvd$v[,1:ncomp,drop=F])
impvallength <- sqrt(sum((impvals)^2))
change[j] <- relchange <- sqrt(sum((impvals-Xhat[missing])^2))
impvals <- Xhat[missing]
X.imp[missing] <- impvals
if(ploteval){
plot(0:j,c(1,change/impvallength),ylab="Change",main=paste("Relative change in imputed values using",ncomp,"components\n"),xlab="iteration")
}
j <- j+1
}
X.imp
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.