Nothing
### spls.in.R (2014-10)
###
### Sparse PLS regression called in m.rirls.spls
###
### Copyright 2015-10 Ghislain DURIF
###
### Adapted from R package "spls"
### Reference: Chun H and Keles S (2010)
### "Sparse partial least squares for simultaneous dimension reduction and variable selection",
### Journal of the Royal Statistical Society - Series B, Vol. 72, pp. 3--25.
###
### This file is part of the `plsgenomics' library for R and related languages.
### It is made available under the terms of the GNU General Public
### License, version 2, or at your option, any later version,
### incorporated herein by reference.
###
### This program is distributed in the hope that it will be
### useful, but WITHOUT ANY WARRANTY; without even the implied
### warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
### PURPOSE. See the GNU General Public License for more
### details.
###
### You should have received a copy of the GNU General Public
### License along with this program; if not, write to the Free
### Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
### MA 02111-1307, USA
spls.in <- function(Xtrain, Ytrain, lambda.l1, ncomp, weight.mat=NULL,
adapt=TRUE, center.X=TRUE, center.Y=TRUE,
scale.X=TRUE, scale.Y=TRUE, weighted.center=FALSE) {
#####################################################################
#### Initialisation
#####################################################################
ntrain <- nrow(Xtrain) # nb observations
p <- ncol(Xtrain) # nb covariates
index.p <- c(1:p)
Ytrain <- as.matrix(Ytrain)
q <- ncol(Ytrain)
# weighting matrix V
if(!is.null(weight.mat)) { # weighting in scalar product (in observation space of dimension n)
V <- as.matrix(weight.mat)
} else { # no weighting in scalar product
V <- diag(rep(1, ntrain), nrow=ntrain, ncol=ntrain)
}
#####################################################################
#### centering and scaling
#####################################################################
if (!weighted.center) {
# Xtrain mean
meanXtrain <- apply(Xtrain, 2, mean)
# Xtrain sd
sigmaXtrain <- apply(Xtrain, 2, sd)
# test if predictors with null variance
if ( any( sigmaXtrain < .Machine$double.eps )) {
stop("Some of the columns of the predictor matrix have zero variance.")
}
# centering & eventually scaling X
if(center.X && scale.X) {
sXtrain <- scale( Xtrain, center=meanXtrain, scale=sigmaXtrain)
} else if(center.X && !scale.X) {
sXtrain <- scale( Xtrain, center=meanXtrain, scale=FALSE)
} else {
sXtrain <- Xtrain
}
# Y mean
meanYtrain <- apply(Ytrain, 2, mean)
# Y sd
sigmaYtrain <- apply(Ytrain, 2, sd)
# test if predictors with null variance
if ( any( sigmaYtrain < .Machine$double.eps )) {
stop("The response matrix has zero variance.")
}
# centering & eventually scaling Y
if(center.Y && scale.Y) {
sYtrain <- scale( Ytrain, center=meanYtrain, scale=sigmaYtrain )
} else if(center.Y && !scale.Y) {
sYtrain <- scale( Ytrain, center=meanYtrain, scale=FALSE )
} else {
sYtrain <- Ytrain
}
} else { # weighted scaling
sumV <- sum(diag(V))
# X mean
meanXtrain <- matrix(diag(V), nrow=1) %*% Xtrain / sumV
# X sd
sigmaXtrain <- apply(Xtrain, 2, sd)
# test if predictors with null variance
if ( any( sigmaXtrain < .Machine$double.eps ) ) {
stop("Some of the columns of the predictor matrix have zero variance.")
}
# centering & eventually scaling X
sXtrain <- scale( Xtrain, center=meanXtrain, scale=FALSE )
# Y mean
meanYtrain <- matrix(diag(V), nrow=1) %*% Ytrain / sumV
# Y sd
sigmaYtrain <- apply(Ytrain, 2, sd)
# test if predictors with null variance
if ( any( sigmaYtrain < .Machine$double.eps ) ) {
stop("The response matrix have zero variance.")
}
# centering & eventually scaling Y
sYtrain <- scale( Ytrain, center=meanYtrain, scale=FALSE )
}
#####################################################################
#### Result objects
#####################################################################
betahat <- matrix(0, nrow=p, ncol=1)
betamat <- list()
X1 <- sXtrain
Y1 <- sYtrain
W <- matrix(data=NA, nrow=p, ncol=ncomp) # spls weight over each component
T <- matrix(data=NA, nrow=ntrain, ncol=ncomp) # spls components
P <- matrix(data=NA, nrow=ncomp, ncol=p) # regression of X over T
Q <- matrix(data=NA, nrow=ncomp, ncol=q) # regression of Y over T
#####################################################################
#### Main iteration
#####################################################################
if ( is.null(colnames(Xtrain)) ) {
Xnames <- index.p
} else {
Xnames <- colnames(Xtrain)
}
new2As <- list()
## SPLS
for (k in 1:ncomp) {
## define M
M <- t(X1) %*% (V %*% Y1)
#### soft threshold
Mnorm1 <- median( abs(M) )
M <- M / Mnorm1
## adpative version
if (adapt) {
wi <- 1/abs(M)
what <- ust.adapt(M, lambda.l1, wi)
} else {
## non adaptive version
what <- ust(M, lambda.l1)
}
#### construct active set A
A <- unique( index.p[ what!=0 | betahat[,1]!=0 ] )
new2A <- index.p[ what!=0 & betahat[,1]==0 ]
#### fit pls with selected predictors (meaning in A)
X.A <- sXtrain[ , A, drop=FALSE ]
plsfit <- wpls( Xtrain=X.A, Ytrain=sYtrain, weight.mat=V, ncomp=min(k,length(A)), type="pls1", center.X=FALSE, scale.X=FALSE, center.Y=FALSE, scale.Y=FALSE, weighted.center=FALSE )
#### output storage
# weights
w.k <- matrix(data=what, ncol=1)
w.k <- w.k / sqrt(as.numeric(t(w.k) %*% w.k))
W[,k] <- w.k
# components on total observation space
t.k <- (X1 %*% w.k) / as.numeric(t(w.k) %*% w.k)
T[,k] <- t.k
# regression of X over T
p.k <- (t(X1) %*% (V %*% t.k)) / as.numeric(t(t.k) %*% (V %*% t.k))
P[k,] <- t(p.k)
# regression of Y over T
q.k <- (t(Y1) %*% (V %*% t.k)) / as.numeric(t(t.k) %*% (V %*% t.k))
Q[k,] <- t(q.k)
## update
Y1 <- sYtrain - plsfit$T %*% plsfit$Q
X1 <- sXtrain
X1[,A] <- sXtrain[,A] - plsfit$T %*% plsfit$P
betahat <- matrix( 0, p, q )
betahat[A,] <- matrix( plsfit$coeff, length(A), q )
betamat[[k]] <- betahat # for cv.spls
# variables that join the active set
new2As[[k]] <- new2A
}
##### return objects
## components in lower subspace of selected variables
T.low <- plsfit$T
#### betahat for non centered and non scaled data
if((!scale.X) || (weighted.center)) { # if X non scaled, betahat don't have to be corrected regards sd.x
sd.X <- rep(1, p)
} else { # if X is scaled, it has to
sd.X <- sigmaXtrain
}
if((!scale.Y) || (weighted.center)) {
sd.Y <- 1
} else {
sd.Y <- sigmaYtrain
}
betahat.nc <- sd.Y * betahat / sd.X
intercept <- meanYtrain - ( sd.Y * (drop( (meanXtrain / sd.X) %*% betahat)) )
betahat.nc <- as.matrix(c(intercept, betahat.nc))
#### return object
result <- list( betahat=betahat, betahat.nc=betahat.nc,
X.score=T, X.score.low=T.low, X.loading=P, Y.loading=Q, X.weight=W,
A=A, lenA=length(A))
return(result)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.