R/spls.R

####### Finding the number of components using bootstrap for a given eta
#' Title
#'
#' @param x Matrix of predictors.
#' @param y Vector or matrix of responses.
#' @param fold Number of fold for cross-validation
#' @param eta Thresholding parameter. eta should be between 0 and 1.
#' @param R Number of resamplings.
#' @param maxnt Maximum number of components allowed in a spls model.
#' @param kappa Parameter to control the effect of the concavity of the 
#' objective function and the closeness of original and surrogate 
#' direction vectors. kappa is relevant only when responses are multivariate. 
#' kappa should be between 0 and 0.5. Default is 0.5.
#' @param select PLS algorithm for variable selection. Alternatives are 
#' "pls2" or "simpls". Default is "pls2".
#' @param fit PLS algorithm for model fitting. Alternatives are "kernelpls", 
#' "widekernelpls", "simpls", or "oscorespls". Default is "simpls".
#' @param scale.x Scale predictors by dividing each predictor variable by 
#' its sample standard deviation?
#' @param scale.y Scale responses by dividing each response variable by its 
#' sample standard deviation?
#' @param plot.it Plot the results.
#' @param typeBCa Include computation for BCa type interval.
#' @param verbose Displays information on the algorithm.
#'
#' @return list of 3: mspemat matrix of results, eta.opt numeric value, K.opt numeric value)
#' 
#' @export
#'
#' @author Jérémy Magnanensi, Frédéric Bertrand\cr
#' \email{frederic.bertrand@@utt.fr}\cr
#' \url{https://fbertran.github.io/homepage/}
#' 
#' @references A new bootstrap-based stopping criterion in PLS component construction,
#' J. Magnanensi, M. Maumy-Bertrand, N. Meyer and F. Bertrand (2016), in The Multiple Facets of Partial Least Squares and Related Methods, 
#' \doi{10.1007/978-3-319-40643-5_18}\cr
#' 
#' A new universal resample-stable bootstrap-based stopping criterion for PLS component construction,
#' J. Magnanensi, F. Bertrand, M. Maumy-Bertrand and N. Meyer, (2017), Statistics and Computing, 27, 757–774. 
#' \doi{10.1007/s11222-016-9651-4}\cr
#' 
#' New developments in Sparse PLS regression, J. Magnanensi, M. Maumy-Bertrand, 
#' N. Meyer and F. Bertrand, (2021), Frontiers in Applied Mathematics and Statistics, 
#' \doi{10.3389/fams.2021.693126}\cr.
#' 
#' @examples
#' set.seed(314)
#' data(pine, package = "plsRglm")
#' Xpine<-pine[,1:10]
#' ypine<-log(pine[,11])
#' nbcomp.bootspls(x=Xpine,y=ypine,eta=.2, maxnt=1)
#' \donttest{
#' set.seed(314)
#' data(pine, package = "plsRglm")
#' Xpine<-pine[,1:10]
#' ypine<-log(pine[,11])
#' nbcomp.bootspls.para(x=Xpine,y=ypine,eta=c(.2,.6))
#' }
nbcomp.bootspls=function (x, y, fold = 10, eta, R=500, maxnt=10, kappa = 0.5, 
                   select = "pls2", fit = "simpls", scale.x = TRUE, 
                   scale.y = FALSE, plot.it = TRUE, typeBCa = TRUE, 
                   verbose=TRUE) 
{
  x <- as.matrix(x)
  n <- nrow(x)
  p <- ncol(x)
  ip <- c(1:p)
  y <- as.matrix(y)
  q <- ncol(y)
  type <- correctp.withoutK(x, y, eta, kappa, select, fit)
  eta <- type$eta
  kappa <- type$kappa
  select <- type$select
  fit <- type$fit
  foldi <- split(sample(1:n), rep(1:fold, length = n))
  mspemat <- matrix(0, length(eta), 1)
  K.opti=rep(0,length(eta))
  ncolBoot <- 5+2*typeBCa
  for (i in 1:length(eta)) {
    if(verbose){cat(paste("eta =", eta[i], "\n"))}
    ### Finding the number of components
    inter=0
    indK=1
    if(verbose){print(indK)}
    resK=spls.Cboot(x, y, eta = eta[i], kappa = kappa, 
                    K = indK, select = select, fit = fit, scale.x = scale.x, 
                    scale.y = scale.y, verbose = FALSE)
    compTsim=resK$tt
    databoot=cbind(scale(y,scale=F),compTsim)
    bootcomp<-boot::boot(data=databoot, statistic=coefs.plsR.CSim, sim="ordinary", stype="i", R=R)
    confYT=t(as.matrix(plsRglm::confints.bootpls(bootcomp, typeBCa = typeBCa)))
    
    while ((confYT[1,ncolBoot]>0) & (indK<maxnt+1)) {
      indK=indK+1
      if(verbose){print(indK)}
      resK<-spls.Cboot(x, y, eta = eta[i], kappa = kappa, 
                       K = indK, select = select, fit = fit, scale.x = scale.x, 
                       scale.y = scale.y, verbose = FALSE)
      ind=1
      compTsim=resK$tt[,ind]
      databoot=cbind(scale(y,scale=F),compTsim)
      bootcomp<-boot::boot(data=databoot, statistic=coefs.plsR.CSim, sim="ordinary", stype="i", R=R)
      confC=t(as.matrix(plsRglm::confints.bootpls(bootcomp, typeBCa = typeBCa)))
      while ((confC[1,ncolBoot]>0) && (ind<indK)){
        ind=ind+1
        compTsim=resK$tt[,1:ind]
        databoot=cbind(scale(y,scale=F),compTsim)
        bootcomp<-boot::boot(data=databoot, statistic=coefs.plsR.CSim, sim="ordinary", stype="i", R=R)
        confC=t(as.matrix(plsRglm::confints.bootpls(bootcomp, typeBCa = typeBCa)))
      }
      if (ind!=indK){
        #confYT=matrix(0, indK, ncolBoot+1)
        confYT=matrix(0, 1, ncolBoot+1)
      }else{
        confYT=confC
      }
    }
    K.opti[i]=indK - 1
    
    
    mspemati <- matrix(0, fold, 1)
    for (j in 1:fold) {
      omit <- foldi[[j]]
      object <- spls::spls(x[-omit, , drop = FALSE], y[-omit, 
                                                       , drop = FALSE], eta = eta[i], kappa = kappa, 
                           K = K.opti[i], select = select, fit = fit, scale.x = scale.x, 
                           scale.y = scale.y, trace = FALSE)
      newx <- x[omit, , drop = FALSE]
      newx <- scale(newx, object$meanx, object$normx)
      betamat <- object$betamat
      pred <- newx %*% betamat[[K.opti[i]]] + matrix(1, nrow(newx), 
                                                     1) %*% object$mu
      mspemati[j, ] <- mean(apply((y[omit, 
      ] - pred)^2, 2, mean))
    }
    mspemat[i, ] <- apply(mspemati, 2, mean)
  }
  minpmse <- min(mspemat)
  rownames(mspemat) <- paste("eta=",eta,"; K=",K.opti)
  colnames(mspemat) <- ""
  eta.opt <- max(eta[mspemat == minpmse])
  K.opt <- K.opti[eta == max(eta[mspemat == minpmse])]
  if(verbose){cat(paste("\nOptimal parameters: eta = ", eta.opt, ", ", 
            sep = ""))}
  if(verbose){cat(paste("K = ", K.opt, "\n", sep = ""))}
  if (plot.it) {
    oldpar <- par(no.readonly = TRUE)
    on.exit(par(oldpar))
    par(mar=c(5,7,4,2))
    spls::heatmap.spls(t(mspemat), main = "CV MSPE Plot", 
                       coln = 16, as = "n")
  }
  rownames(mspemat) <- paste("eta=", eta,", K=", K.opti)
  cv <- list(mspemat = mspemat, eta.opt = eta.opt, K.opt = K.opt)
  return(cv)
}

####### Finding the number of components using boostrap for a given eta (parallel version)
#' Title
#'
#' @param x Matrix of predictors.
#' @param y Vector or matrix of responses.
#' @param fold Number of fold for cross-validation
#' @param eta Thresholding parameter. eta should be between 0 and 1.
#' @param R Number of resamplings.
#' @param maxnt Maximum number of components allowed in a spls model.
#' @param kappa Parameter to control the effect of the concavity of the 
#' objective function and the closeness of original and surrogate 
#' direction vectors. kappa is relevant only when responses are multivariate. 
#' kappa should be between 0 and 0.5. Default is 0.5.
#' @param select PLS algorithm for variable selection. Alternatives are 
#' "pls2" or "simpls". Default is "pls2".
#' @param fit PLS algorithm for model fitting. Alternatives are "kernelpls", 
#' "widekernelpls", "simpls", or "oscorespls". Default is "simpls".
#' @param scale.x Scale predictors by dividing each predictor variable by 
#' its sample standard deviation?
#' @param scale.y Scale responses by dividing each response variable by its 
#' sample standard deviation?
#' @param plot.it Plot the results.
#' @param typeBCa Include computation for BCa type interval.
#' @param ncpus Number of cpus for parallel computing.
#' @param verbose Displays information on the algorithm.
#'
#' @return list of 3: mspemat matrix of results, eta.opt numeric value, K.opt numeric value)
#' @export
#'
#' @author Jérémy Magnanensi, Frédéric Bertrand\cr
#' \email{frederic.bertrand@@utt.fr}\cr
#' \url{https://fbertran.github.io/homepage/}
#' 
#' @references A new bootstrap-based stopping criterion in PLS component construction,
#' J. Magnanensi, M. Maumy-Bertrand, N. Meyer and F. Bertrand (2016), in The Multiple Facets of Partial Least Squares and Related Methods, 
#' \doi{10.1007/978-3-319-40643-5_18}\cr
#' 
#' A new universal resample-stable bootstrap-based stopping criterion for PLS component construction,
#' J. Magnanensi, F. Bertrand, M. Maumy-Bertrand and N. Meyer, (2017), Statistics and Computing, 27, 757–774. 
#' \doi{10.1007/s11222-016-9651-4}\cr
#' 
#' New developments in Sparse PLS regression, J. Magnanensi, M. Maumy-Bertrand, 
#' N. Meyer and F. Bertrand, (2021), Frontiers in Applied Mathematics and Statistics, 
#' \doi{10.3389/fams.2021.693126}\cr.
#' 
#' @examples
#' set.seed(314)
#' data(pine, package = "plsRglm")
#' Xpine<-pine[,1:10]
#' ypine<-log(pine[,11])
#' nbcomp.bootspls.para(x=Xpine,y=ypine,eta=.2, maxnt=1)
#' \donttest{
#' set.seed(314)
#' data(pine, package = "plsRglm")
#' Xpine<-pine[,1:10]
#' ypine<-log(pine[,11])
#' nbcomp.bootspls.para(x=Xpine,y=ypine,eta=c(.2,.6))
#' }
nbcomp.bootspls.para=function (x, y, fold = 10, eta, R=500, maxnt=10, kappa = 0.5, 
                         select = "pls2", fit = "simpls", scale.x = TRUE, 
                         scale.y = FALSE, plot.it = TRUE, typeBCa = TRUE, 
                         ncpus=1, verbose=TRUE) 
{
  lll<-NA
  x <- as.matrix(x)
  n <- nrow(x)
  p <- ncol(x)
  ip <- c(1:p)
  y <- as.matrix(y)
  q <- ncol(y)
  ncolBoot <- 5+2*typeBCa
  type <- correctp.withoutK(x, y, eta, kappa, select, fit)
  eta <- type$eta
  kappa <- type$kappa
  select <- type$select
  fit <- type$fit
  foldi <- split(sample(1:n), rep(1:fold, length = n))
  #cl <- makeCluster(6)
  #registerDoParallel(cl)
  doParallel::registerDoParallel(cores=ncpus)
  requireNamespace("foreach",quietly = TRUE)
  par.K.opti=foreach::foreach(lll=1:length(eta), .combine="cbind", 
                              .export=c("x","y", "eta", "kappa", "select", 
                                        "fit", "scale.x", "scale.y", "maxnt")) %dopar% {
    if(verbose){print(paste("eta =", eta[lll]))}
    ### Finding the number of components
    inter=0
    indK=1
    #if(verbose){print(indK)}
    resK=spls.Cboot(x, y, eta = eta[lll], kappa = kappa, 
                    K = indK, select = select, fit = fit, scale.x = scale.x, 
                    scale.y = scale.y, verbose = FALSE)
    compTsim=resK$tt
    databoot=cbind(scale(y,scale=F),compTsim)
    bootcomp<-boot::boot(data=databoot, statistic=coefs.plsR.CSim, sim="ordinary", stype="i", R=R)
    confYT=t(as.matrix(plsRglm::confints.bootpls(bootcomp, typeBCa = typeBCa)))
    
    while ((confYT[1,ncolBoot]>0) & (indK<maxnt+1)) {
      indK=indK+1
      if(verbose){print(indK)}
      resK<-spls.Cboot(x, y, eta = eta[lll], kappa = kappa, 
                       K = indK, select = select, fit = fit, scale.x = scale.x, 
                       scale.y = scale.y, verbose = FALSE)
      ind=1
      compTsim=resK$tt[,ind]
      databoot=cbind(scale(y,scale=F),compTsim)
      bootcomp<-boot::boot(data=databoot, statistic=coefs.plsR.CSim, sim="ordinary", stype="i", R=R)
      confC=t(as.matrix(plsRglm::confints.bootpls(bootcomp, typeBCa = typeBCa)))
      while ((confC[1,ncolBoot]>0) && (ind<indK)){
        ind=ind+1
        compTsim=resK$tt[,1:ind]
        databoot=cbind(scale(y,scale=F),compTsim)
        bootcomp<-boot::boot(data=databoot, statistic=coefs.plsR.CSim, sim="ordinary", stype="i", R=R)
        confC=t(as.matrix(plsRglm::confints.bootpls(bootcomp, typeBCa = typeBCa)))
      }
      if (ind!=indK){
        #confYT=matrix(0, indK, ncolBoot+1)
        confYT=matrix(0, 1, ncolBoot+1)
      }else{
        confYT=confC
      }
    }

    mspemati <- matrix(0, fold, 1)
    for (j in 1:fold) {
      omit <- foldi[[j]]
      object <- spls::spls(x[-omit, , drop = FALSE], y[-omit, 
                                                       , drop = FALSE], eta = eta[lll], kappa = kappa, 
                           K = indK - 1, select = select, fit = fit, scale.x = scale.x, 
                           scale.y = scale.y, trace = FALSE)
      newx <- x[omit, , drop = FALSE]
      newx <- scale(newx, object$meanx, object$normx)
      betamat <- object$betamat
      pred <- newx %*% betamat[[indK - 1]] + matrix(1, nrow(newx), 
                                                    1) %*% object$mu
      mspemati[j, ] <- mean(apply((y[omit, 
      ] - pred)^2, 2, mean))
    }
    c(indK - 1, apply(mspemati, 2, mean))
  }
  doParallel::stopImplicitCluster()
  
  if(!is.matrix(par.K.opti)){par.K.opti <- matrix(par.K.opti,ncol=1)}
  K.opti=par.K.opti[1,]
  mspemat=as.matrix(par.K.opti[2,],ncol=1)
  minpmse <- min(mspemat)
  rownames(mspemat) <- paste("eta=",eta,"; K=",K.opti)
  colnames(mspemat) <- ""
  eta.opt <- max(eta[mspemat == minpmse])
  K.opt <- K.opti[eta == max(eta[mspemat == minpmse])]
  if(verbose){cat(paste("\nOptimal parameters: eta = ", eta.opt, ", ", 
            sep = ""))}
  if(verbose){cat(paste("K = ", K.opt, "\n", sep = ""))}
  if (plot.it) {
    oldpar <- par(no.readonly = TRUE)
    on.exit(par(oldpar))
    par(mar=c(5,7,4,2))
    spls::heatmap.spls(t(mspemat), main = "CV MSPE Plot", 
                       coln = 16, as = "n")
  }
  cv <- list(mspemat = mspemat, eta.opt = eta.opt, K.opt = K.opt)
  return(cv)
}

Try the bootPLS package in your browser

Any scripts or data that you put into this service are public.

bootPLS documentation built on Sept. 30, 2024, 9:13 a.m.