Nothing
### logit.spls.R (2014-10)
###
### Ridge Iteratively Reweighted Least Squares followed by Adaptive Sparse PLS regression for binary response
###
### Copyright 2014-10 Ghislain DURIF
###
### Adapted from rpls function in plsgenomics package, copyright 2006-01 Sophie Lambert-Lacroix
###
### This file is part of the `plsgenomics' library for R and related languages.
### It is made available under the terms of the GNU General Public
### License, version 2, or at your option, any later version,
### incorporated herein by reference.
###
### This program is distributed in the hope that it will be
### useful, but WITHOUT ANY WARRANTY; without even the implied
### warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
### PURPOSE. See the GNU General Public License for more
### details.
###
### You should have received a copy of the GNU General Public
### License along with this program; if not, write to the Free
### Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
### MA 02111-1307, USA
#' @title
#' Classification procedure for binary response based on a logistic model,
#' solved by a combination of the Ridge Iteratively Reweighted Least Squares
#' (RIRLS) algorithm and the Adaptive Sparse PLS (SPLS) regression
#' @aliases logit.spls
#'
#' @description
#' The function \code{logit.spls} performs compression and variable selection
#' in the context of binary classification (with possible prediction)
#' using Durif et al. (2017) algorithm based on Ridge IRLS and sparse PLS.
#'
#' @details
#' The columns of the data matrices \code{Xtrain} and \code{Xtest} may
#' not be standardized, since standardizing can be performed by the function
#' \code{logit.spls} as a preliminary step.
#'
#' The procedure described in Durif et al. (2017) is used to compute
#' latent sparse components that are used in a logistic regression model.
#' In addition, when a matrix \code{Xtest} is supplied, the procedure
#' predicts the response associated to these new values of the predictors.
#'
#' @param Xtrain a (ntrain x p) data matrix of predictor values.
#' \code{Xtrain} must be a matrix. Each row corresponds to an observation
#' and each column to a predictor variable.
#' @param Ytrain a (ntrain) vector of (continuous) responses. \code{Ytrain}
#' must be a vector or a one column matrix, and contains the response variable
#' for each observation. \code{Ytrain} should take values in \{0,1\}.
#' @param lambda.ridge a positive real value. \code{lambda.ridge} is the Ridge
#' regularization parameter for the RIRLS algorithm (see details).
#' @param lambda.l1 a positive real value, in [0,1]. \code{lambda.l1} is the
#' sparse penalty parameter for the dimension reduction step by sparse PLS
#' (see details).
#' @param ncomp a positive integer. \code{ncomp} is the number of
#' PLS components. If \code{ncomp=0},then the Ridge regression is performed
#' without any dimension reduction (no SPLS step).
#' @param Xtest a (ntest x p) matrix containing the predictor values for the
#' test data set. \code{Xtest} may also be a vector of length p
#' (corresponding to only one test observation). Default value is NULL,
#' meaning that no prediction is performed.
#' @param adapt a boolean value, indicating whether the sparse PLS selection
#' step sould be adaptive or not (see details).
#' @param maxIter a positive integer. \code{maxIter} is the maximal number of
#' iterations in the Newton-Raphson parts in the RIRLS algorithm (see details).
#' @param svd.decompose a boolean parameter. \code{svd.decompose} indicates
#' wether or not the predictor matrix \code{Xtrain} should be decomposed by
#' SVD (singular values decomposition) for the RIRLS step (see details).
#' @param center.X a boolean value indicating whether the data matrices
#' \code{Xtrain} and \code{Xtest} (if provided) should be centered or not.
#' @param scale.X a boolean value indicating whether the data matrices
#' \code{Xtrain} and \code{Xtest} (if provided) should be scaled or not
#' (\code{scale.X=TRUE} implies \code{center.X=TRUE}) in the spls step.
#' @param weighted.center a boolean value indicating whether the centering
#' should take into account the weighted l2 metric or not in the SPLS step.
#'
#' @return An object of class \code{logit.spls} with the following attributes
#' \item{Coefficients}{the (p+1) vector containing the linear coefficients
#' associated to the predictors and intercept in the logistic model
#' explaining the response Y.}
#' \item{hatY}{the (ntrain) vector containing the estimated response value on
#' the train set \code{Xtrain}.}
#' \item{hatYtest}{the (ntest) vector containing the predicted labels
#' for the observations from \code{Xtest} (if provided).}
#' \item{DeletedCol}{the vector containing the indexes of columns with null
#' variance in \code{Xtrain} that were skipped in the procedure.}
#' \item{A}{the active set of predictors selected by the procedures.
#' \code{A} is a subset of 1:p.}
#' \item{Anames}{Vector of selected predictor names, i.e. the names of the
#' columns from \code{Xtrain} that are in \code{A}.}
#' \item{converged}{a \{0,1\} value indicating whether the RIRLS algorithm did
#' converge in less than \code{maxIter} iterations or not.}
#' \item{X.score}{a (n x ncomp) matrix being the observations coordinates or
#' scores in the new component basis produced by the SPLS step (sparse PLS).
#' Each column t.k of \code{X.score} is a SPLS component.}
#' \item{X.weight}{a (p x ncomp) matrix being the coefficients of predictors
#' in each components produced by sparse PLS. Each column w.k of
#' \code{X.weight} verifies t.k = Xtrain x w.k (as a matrix product).}
#' \item{Xtrain}{the design matrix.}
#' \item{sXtrain}{the scaled predictor matrix.}
#' \item{Ytrain}{the response observations.}
#' \item{sPseudoVar}{the scaled pseudo-response produced by the RIRLS
#' algorithm.}
#' \item{lambda.ridge}{the Ridge hyper-parameter used to fit the model.}
#' \item{lambda.l1}{the sparse hyper-parameter used to fit the model.}
#' \item{ncomp}{the number of components used to fit the model.}
#' \item{V}{the (ntrain x ntrain) matrix used to weight the metric in the
#' sparse PLS step. \code{V} is the inverse of the covariance matrix of the
#' pseudo-response produced by the RIRLS step.}
#' \item{proba}{the (ntrain) vector of estimated probabilities for the
#' observations in code \code{Xtrain}, that are used to estimate the
#' \code{hatY} labels.}
#' \item{proba.test}{the (ntest) vector of predicted probabilities for the
#' new observations in \code{Xtest}, that are used to predict the
#' \code{hatYtest} labels.}
#'
#' @references
#' Durif G., Modolo L., Michaelsson J., Mold J. E., Lambert-Lacroix S.,
#' Picard F. (2017). High Dimensional Classification with combined Adaptive
#' Sparse PLS and Logistic Regression, (in prep),
#' available on (\url{http://arxiv.org/abs/1502.05933}).
#'
#' @author
#' Ghislain Durif (\url{http://thoth.inrialpes.fr/people/gdurif/}).
#'
#' @seealso \code{\link{spls}}, \code{\link{logit.spls.cv}}
#'
#' @examples
#' \dontrun{
#' ### load plsgenomics library
#' library(plsgenomics)
#'
#' ### generating data
#' n <- 100
#' p <- 100
#' sample1 <- sample.bin(n=n, p=p, kstar=20, lstar=2,
#' beta.min=0.25, beta.max=0.75,
#' mean.H=0.2, sigma.H=10, sigma.F=5)
#' X <- sample1$X
#' Y <- sample1$Y
#'
#' ### splitting between learning and testing set
#' index.train <- sort(sample(1:n, size=round(0.7*n)))
#' index.test <- (1:n)[-index.train]
#'
#' Xtrain <- X[index.train,]
#' Ytrain <- Y[index.train,]
#' Xtest <- X[index.test,]
#' Ytest <- Y[index.test,]
#'
#' ### fitting the model, and predicting new observations
#' model1 <- logit.spls(Xtrain=Xtrain, Ytrain=Ytrain, lambda.ridge=2,
#' lambda.l1=0.5, ncomp=2, Xtest=Xtest, adapt=TRUE,
#' maxIter=100, svd.decompose=TRUE)
#'
#' str(model1)
#'
#' ### prediction error rate
#' sum(model1$hatYtest!=Ytest) / length(index.test)
#' }
#'
#' @export
logit.spls <- function(Xtrain, Ytrain, lambda.ridge, lambda.l1, ncomp,
Xtest=NULL, adapt=TRUE, maxIter=100, svd.decompose=TRUE,
center.X=TRUE, scale.X=FALSE, weighted.center=TRUE) {
#####################################################################
#### Initialisation
#####################################################################
Xtrain <- as.matrix(Xtrain)
ntrain <- nrow(Xtrain) # nb observations
p <- ncol(Xtrain) # nb covariates
index.p <- c(1:p)
if(is.factor(Ytrain)) {
Ytrain <- as.numeric(levels(Ytrain))[Ytrain]
}
Ytrain <- as.integer(Ytrain)
Ytrain <- as.matrix(Ytrain)
q <- ncol(Ytrain)
one <- matrix(1,nrow=1,ncol=ntrain)
cnames <- NULL
if(!is.null(colnames(Xtrain))) {
cnames <- colnames(Xtrain)
} else {
cnames <- paste0(1:p)
}
#####################################################################
#### Tests on type input
#####################################################################
# if multicategorical response
if(length(table(Ytrain)) > 2) {
warning("message from logit.spls: multicategorical response")
results = multinom.spls(Xtrain=Xtrain, Ytrain=Ytrain, lambda.ridge=lambda.ridge,
lambda.l1=lambda.l1, ncomp=ncomp, Xtest=Xtest, adapt=adapt,
maxIter=maxIter, svd.decompose=svd.decompose,
center.X=center.X, scale.X=scale.X, weighted.center=weighted.center)
return(results)
}
# On Xtrain
if ((!is.matrix(Xtrain)) || (!is.numeric(Xtrain))) {
stop("Message from logit.spls: Xtrain is not of valid type")
}
if (ncomp > p) {
warning("Message from logit.spls: ncomp>p is not valid, ncomp is set to p")
ncomp <- p
}
if (p==1) {
# stop("Message from logit.spls: p=1 is not valid")
warning("Message from logit.spls: p=1 is not valid, ncomp is set to 0")
ncomp <- 0
}
# On Xtest if necessary
if (!is.null(Xtest)) {
if (is.vector(Xtest)==TRUE) {
Xtest <- matrix(Xtest,ncol=p)
}
Xtest <- as.matrix(Xtest)
ntest <- nrow(Xtest)
if ((!is.matrix(Xtest)) || (!is.numeric(Xtest))) {
stop("Message from logit.spls: Xtest is not of valid type")
}
if (p != ncol(Xtest)) {
stop("Message from logit.spls: columns of Xtest and columns of Xtrain must be equal")
}
}
# On Ytrain
if ((!is.matrix(Ytrain)) || (!is.numeric(Ytrain))) {
stop("Message from logit.spls: Ytrain is not of valid type")
}
if (q != 1) {
stop("Message from logit.spls: Ytrain must be univariate")
}
if (nrow(Ytrain)!=ntrain) {
stop("Message from logit.spls: the number of observations in Ytrain is not equal to the Xtrain row number")
}
# On Ytrain value
if (sum(is.na(Ytrain))!=0) {
stop("Message from logit.spls: NA values in Ytrain")
}
if (sum(!(Ytrain %in% c(0,1)))!=0) {
stop("Message from logit.spls: Ytrain is not of valid type")
}
if (sum(as.numeric(table(Ytrain))==0)!=0) {
stop("Message from logit.spls: there are empty classes")
}
# On hyper parameter: lambda.ridge, lambda.l1
if ((!is.numeric(lambda.ridge)) || (lambda.ridge<0) || (!is.numeric(lambda.l1)) || (lambda.l1<0)) {
stop("Message from logit.spls: lambda is not of valid type")
}
# ncomp type
if ((!is.numeric(ncomp)) || (round(ncomp)-ncomp!=0) || (ncomp<0) || (ncomp>p)) {
stop("Message from logit.spls: ncomp is not of valid type")
}
# maxIter
if ((!is.numeric(maxIter)) || (round(maxIter)-maxIter!=0) || (maxIter<1)) {
stop("Message from logit.spls: maxIter is not of valid type")
}
#####################################################################
#### Move into the reduced space
#####################################################################
r <- min(p, ntrain)
DeletedCol <- NULL
### Standardize the Xtrain matrix
# standard deviation (biased one) of Xtrain
sigma2train <- apply(Xtrain, 2, var) * (ntrain-1)/(ntrain)
# test on sigma2train
# predictor with null variance ?
if (sum(sigma2train < .Machine$double.eps)!=0){
# predicteur with non null variance < 2 ?
if (sum(sigma2train < .Machine$double.eps)>(p-2)){
stop("Message from logit.spls: the procedure stops because number of predictor variables with no null variance is less than 1.")
}
warning("There are covariables with nul variance")
# remove predictor with null variance
Xtrain <- Xtrain[,which(sigma2train >= .Machine$double.eps)]
if (!is.null(Xtest)) {
Xtest <- Xtest[,which(sigma2train>= .Machine$double.eps)]
}
# list of removed predictors
DeletedCol <- index.p[which(sigma2train < .Machine$double.eps)]
# removed null standard deviation
sigma2train <- sigma2train[which(sigma2train >= .Machine$double.eps)]
# new number of predictor
p <- ncol(Xtrain)
r <- min(p,ntrain)
}
if(!is.null(DeletedCol)) {
cnames <- cnames[-DeletedCol]
}
# mean of Xtrain
meanXtrain <- apply(Xtrain,2,mean)
# center and scale Xtrain
if(center.X && scale.X) {
sXtrain <- scale(Xtrain, center=meanXtrain, scale=sqrt(sigma2train))
} else if(center.X && !scale.X) {
sXtrain <- scale(Xtrain, center=meanXtrain, scale=FALSE)
} else {
sXtrain <- Xtrain
}
sXtrain.nosvd <- sXtrain # keep in memory if svd decomposition
# Compute the svd when necessary -> case p > ntrain (high dim)
if ((p > ntrain) && (svd.decompose) && (p>1)) {
# svd de sXtrain
svd.sXtrain <- svd(t(sXtrain))
# number of singular value non null
r <- length(svd.sXtrain$d[abs(svd.sXtrain$d)>10^(-13)])
V <- svd.sXtrain$u[,1:r]
D <- diag(c(svd.sXtrain$d[1:r]))
U <- svd.sXtrain$v[,1:r]
sXtrain <- U %*% D
rm(D)
rm(U)
rm(svd.sXtrain)
}
# center and scale Xtest if necessary
if (!is.null(Xtest)) {
meanXtest <- apply(Xtest,2,mean)
sigma2test <- apply(Xtest,2,var)
if(center.X && scale.X) {
sXtest <- scale(Xtest, center=meanXtrain, scale=sqrt(sigma2train))
} else if(center.X && !scale.X) {
sXtest <- scale(Xtest, center=meanXtrain, scale=FALSE)
} else {
sXtest <- Xtest
}
sXtest.nosvd <- sXtest # keep in memory if svd decomposition
# if svd decomposition
if ((p > ntrain) && (svd.decompose)) {
sXtest <- sXtest%*%V
}
}
#####################################################################
#### Ridge IRLS step
#####################################################################
fit <- wirrls(Y=Ytrain, Z=cbind(rep(1,ntrain),sXtrain), Lambda=lambda.ridge, NbrIterMax=maxIter, WKernel=diag(rep(1,ntrain)))
converged=fit$Cvg
# Check WIRRLS convergence
if (converged==0) {
warning("Message from logit.spls : Ridge IRLS did not converge; try another lambda.ridge value")
}
# if ncomp == 0 then wirrls without spls step
if (ncomp==0) {
BETA <- fit$Coefficients
}
#####################################################################
#### weighted SPLS step
#####################################################################
# if ncomp > 0
if (ncomp!=0) {
#Compute ponderation matrix V and pseudo variable z
#Pseudovar = Eta + W^-1 Psi
# Eta = X * betahat (covariate summary)
Eta <- cbind(rep(1, ntrain), sXtrain) %*% fit$Coefficients
## Run SPLS on Xtrain without svd decomposition
sXtrain = sXtrain.nosvd
if (!is.null(Xtest)) {
sXtest = sXtest.nosvd
}
# mu = h(Eta)
mu = 1 / (1 + exp(-Eta))
# ponderation matrix : V
diagV <- mu * (1-mu)
V <- diag(c(diagV))
# inv de V
diagVinv = 1/ifelse(diagV!=0, diagV, diagV+0.00000001)
Vinv = diag(c(diagVinv))
Psi <- Ytrain-mu
pseudoVar = Eta + Vinv %*% Psi
# V-Center the sXtrain and pseudo variable
sumV=sum(diagV)
# Weighted centering of Pseudo variable
VmeanPseudoVar <- sum(V %*% Eta + Psi ) / sumV
# Weighted centering of sXtrain
VmeansXtrain <- t(diagV)%*%sXtrain/sumV
# SPLS(X, pseudo-var, weighting = V)
resSPLS = spls(Xtrain=sXtrain, Ytrain=pseudoVar, ncomp=ncomp,
weight.mat=V, lambda.l1=lambda.l1, adapt=adapt,
center.X=center.X, scale.X=scale.X,
center.Y=TRUE, scale.Y=FALSE,
weighted.center=weighted.center)
BETA = resSPLS$betahat.nc
} else {
resSPLS = list(A=NULL, X.score=NULL, X.weight=NULL, sXtrain=sXtrain, sYtrain=NULL, V=NULL)
}
#####################################################################
#### classification step
#####################################################################
hatY <- numeric(ntrain)
hatY <- cbind(rep(1,ntrain),sXtrain) %*% BETA
hatY <- as.numeric(hatY>0)
proba <- numeric(ntrain)
proba <- inv.logit( cbind(rep(1,ntrain),sXtrain) %*% BETA )
if (!is.null(Xtest)) {
hatYtest <- cbind(rep(1,ntest),sXtest) %*% BETA
hatYtest <- as.numeric(hatYtest>0)
proba.test = inv.logit( cbind(rep(1,ntest),sXtest) %*% BETA )
} else {
hatYtest <- NULL
proba.test <- NULL
}
#####################################################################
#### Conclude
#####################################################################
Coefficients=BETA
if(p > 1) {
Coefficients[-1] <- diag(c(1/sqrt(sigma2train)))%*%BETA[-1]
} else {
Coefficients[-1] <- (1/sqrt(sigma2train))%*%BETA[-1]
}
Coefficients[1] <- BETA[1] - meanXtrain %*% Coefficients[-1]
#### RETURN
Anames <- cnames[resSPLS$A]
result <- list(Coefficients=Coefficients, hatY=hatY, hatYtest=hatYtest, DeletedCol=DeletedCol,
A=resSPLS$A, Anames=Anames, converged=converged,
X.score=resSPLS$X.score, X.weight=resSPLS$X.weight,
sXtrain=resSPLS$sXtrain, sPseudoVar=resSPLS$sYtrain,
lambda.ridge=lambda.ridge, lambda.l1=lambda.l1, ncomp=ncomp,
V=resSPLS$V, proba=proba, proba.test=proba.test, Xtrain=Xtrain, Ytrain=Ytrain)
class(result) <- "logit.spls"
return(result)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.