R/internal-bootPLS.R
In bootPLS: Bootstrap Hyperparameter Selection for PLS Models and Extensions

#' @title Internal bigPLS functions
#' 
#' @name internal-bootPLS
#' 
#' @description These are not to be called by the user.
#' 
#' @aliases ust spls.dv correctp correctp.withoutK spls.Cboot cv.split
#' @author Jérémy Magnanensi, Frédéric Bertrand\cr
#' \email{frederic.bertrand@@utt.fr}\cr
#' \url{https://fbertran.github.io/homepage/}
#' 
#' @references A new bootstrap-based stopping criterion in PLS component construction,
#' J. Magnanensi, M. Maumy-Bertrand, N. Meyer and F. Bertrand (2016), in The Multiple Facets of Partial Least Squares and Related Methods, 
#' \doi{10.1007/978-3-319-40643-5_18}\cr
#' 
#' A new universal resample-stable bootstrap-based stopping criterion for PLS component construction,
#' J. Magnanensi, F. Bertrand, M. Maumy-Bertrand and N. Meyer, (2017), Statistics and Computing, 27, 757–774. 
#' \doi{10.1007/s11222-016-9651-4}\cr
#' 
#' New developments in Sparse PLS regression, J. Magnanensi, M. Maumy-Bertrand, 
#' N. Meyer and F. Bertrand, (2021), Frontiers in Applied Mathematics and Statistics, 
#' \doi{10.3389/fams.2021.693126}\cr.
#' 
#' @keywords internal
NULL


### For spls
ust<-function (b, eta)
{
  b.ust <- matrix(0, length(b), 1)
  if (eta < 1) {
    valb <- abs(b) - eta * max(abs(b))
    b.ust[valb >= 0] <- valb[valb >= 0] * (sign(b))[valb >=
                                                      0]
  }
  return(b.ust)
}

spls.dv<-function (Z, eta, kappa, eps, maxstep)
{
  p <- nrow(Z)
  q <- ncol(Z)
  Znorm1 <- median(abs(Z))
  Z <- Z/Znorm1
  if (q == 1) {
    c <- ust(Z, eta)
  }
  if (q > 1) {
    M <- Z %*% t(Z)
    dis <- 10
    i <- 1
    if (kappa == 0.5) {
      c <- matrix(10, p, 1)
      c.old <- c
      while (dis > eps & i <= maxstep) {
        mcsvd <- svd(M %*% c)
        a <- mcsvd$u %*% t(mcsvd$v)
        c <- ust(M %*% a, eta)
        dis <- max(abs(c - c.old))
        c.old <- c
        i <- i + 1
      }
    }
    if (kappa > 0 & kappa < 0.5) {
      kappa2 <- (1 - kappa)/(1 - 2 * kappa)
      c <- matrix(10, p, 1)
      c.old <- c
      h <- function(lambda) {
        alpha <- solve(M + lambda * diag(p)) %*% M %*%
          c
        obj <- t(alpha) %*% alpha - 1/kappa2^2
        return(obj)
      }
      if (h(eps) * h(1e+30) > 0) {
        while (h(eps) <= 1e+05) {
          M <- 2 * M
          c <- 2 * c
        }
      }
      while (dis > eps & i <= maxstep) {
        if (h(eps) * h(1e+30) > 0) {
          while (h(eps) <= 1e+05) {
            M <- 2 * M
            c <- 2 * c
          }
        }
        lambdas <- uniroot(h, c(eps, 1e+30))$root
        a <- kappa2 * solve(M + lambdas * diag(p)) %*%
          M %*% c
        c <- ust(M %*% a, eta)
        dis <- max(abs(c - c.old))
        c.old <- c
        i <- i + 1
      }
    }
  }
  return(c)
}

correctp=function (x, y, eta, K, kappa, select, fit) 
{
  if (min(eta) < 0 | max(eta) >= 1) {
    if (max(eta) == 1) {
      stop("eta should be strictly less than 1!")
    }
    if (length(eta) == 1) {
      stop("eta should be between 0 and 1!")
    }
    else {
      stop("eta should be between 0 and 1! \n  Choose appropriate range of eta!")
    }
  }
  if (max(K) > ncol(x)) {
    stop("K cannot exceed the number of predictors! Pick up smaller K!")
  }
  if (max(K) >= nrow(x)) {
    stop("K cannot exceed the sample size! Pick up smaller K!")
  }
  if (min(K) <= 0 | !all(K%%1 == 0)) {
    if (length(K) == 1) {
      stop("K should be a positive integer!")
    }
    else {
      stop("K should be a positive integer! \n  Choose appropriate range of K!")
    }
  }
  if (kappa > 0.5 | kappa < 0) {
    warning("kappa should be between 0 and 0.5! kappa=0.5 is used. \n\n")
    kappa <- 0.5
  }
  if (select != "pls2" & select != "simpls") {
    warning("Invalid PLS algorithm for variable selection.\n")
    warning("pls2 algorithm is used. \n\n")
    select <- "pls2"
  }
  fits <- c("simpls", "kernelpls", "widekernelpls", "oscorespls")
  if (!any(fit == fits)) {
    warning("Invalid PLS algorithm for model fitting\n")
    warning("simpls algorithm is used. \n\n")
    fit <- "simpls"
  }
  list(K = K, eta = eta, kappa = kappa, select = select, fit = fit)
}

correctp.withoutK=function (x, y, eta, kappa, select, fit) 
{
  if (min(eta) < 0 | max(eta) >= 1) {
    if (max(eta) == 1) {
      stop("eta should be strictly less than 1!")
    }
    if (length(eta) == 1) {
      stop("eta should be between 0 and 1!")
    }
    else {
      stop("eta should be between 0 and 1! \n  Choose appropriate range of eta!")
    }
  }
  if (kappa > 0.5 | kappa < 0) {
    warning("kappa should be between 0 and 0.5! kappa=0.5 is used. \n\n")
    kappa <- 0.5
  }
  if (select != "pls2" & select != "simpls") {
    warning("Invalid PLS algorithm for variable selection.\n")
    warning("pls2 algorithm is used. \n\n")
    select <- "pls2"
  }
  fits <- c("simpls", "kernelpls", "widekernelpls", "oscorespls")
  if (!any(fit == fits)) {
    warning("Invalid PLS algorithm for model fitting\n")
    warning("simpls algorithm is used. \n\n")
    fit <- "simpls"
  }
  list(eta = eta, kappa = kappa, select = select, fit = fit)
}

spls.Cboot=function (x, y, K, eta, kappa = 0.5, select = "pls2", fit = "simpls", 
                     scale.x = TRUE, scale.y = FALSE, eps = 1e-04, maxstep = 100, 
                     verbose = FALSE) 
{
  x <- as.matrix(x)
  n <- nrow(x)
  p <- ncol(x)
  ip <- c(1:p)
  y <- as.matrix(y)
  q <- ncol(y)
  one <- matrix(1, 1, n)
  mu <- one %*% y/n
  y <- scale(y, drop(mu), FALSE)
  meanx <- drop(one %*% x)/n
  x <- scale(x, meanx, FALSE)
  if (scale.x) {
    normx <- sqrt(drop(one %*% (x^2))/(n - 1))
    if (any(normx < .Machine$double.eps)) {
      stop("Some of the columns of the predictor matrix have zero variance.")
    }
    x <- scale(x, FALSE, normx)
  }
  else {
    normx <- rep(1, p)
  }
  if (scale.y) {
    normy <- sqrt(drop(one %*% (y^2))/(n - 1))
    if (any(normy < .Machine$double.eps)) {
      stop("Some of the columns of the response matrix have zero variance.")
    }
    y <- scale(y, FALSE, normy)
  }
  else {
    normy <- rep(1, q)
  }
  betahat <- matrix(0, p, q)
  betamat <- list()
  x1 <- x
  y1 <- y
  type <- correctp(x, y, eta, K, kappa, select, fit)
  eta <- type$eta
  K <- type$K
  kappa <- type$kappa
  select <- type$select
  fit <- type$fit
  if (is.null(colnames(x))) {
    xnames <- c(1:p)
  }
  else {
    xnames <- colnames(x)
  }
  new2As <- list()
  if (verbose) {cat("The variables that join the set of selected variables at each step:\n")}
  for (k in 1:K) {
    Z <- t(x1) %*% y1
    what <- spls.dv(Z, eta, kappa, eps, maxstep)
    A <- unique(ip[what != 0 | betahat[, 1] != 0])
    new2A <- ip[what != 0 & betahat[, 1] == 0]
    xA <- x[, A, drop = FALSE]
    plsfit <- pls::plsr(y ~ xA, ncomp = min(k, length(A)), 
                        method = fit, scale = FALSE)
    betahat <- matrix(0, p, q)
    betahat[A, ] <- matrix(coef(plsfit), length(A), q)
    betamat[[k]] <- betahat
    pj <- plsfit$projection
    if (select == "pls2") {
      y1 <- y - x %*% betahat
    }
    if (select == "simpls") {
      pw <- pj %*% solve(t(pj) %*% pj) %*% t(pj)
      x1 <- x
      x1[, A] <- x[, A, drop = FALSE] - x[, A, drop = FALSE] %*% 
        pw
    }
    new2As[[k]] <- new2A
    if (verbose) {
      if (length(new2A) <= 10) {
        cat(paste("- ", k, "th step (K=", k, "):\n", 
                  sep = ""))
        cat(xnames[new2A])
        cat("\n")
      }
      else {
        cat(paste("- ", k, "th step (K=", k, "):\n", 
                  sep = ""))
        nlines <- ceiling(length(new2A)/10)
        for (i in 0:(nlines - 2)) {
          cat(xnames[new2A[(10 * i + 1):(10 * (i + 1))]])
          cat("\n")
        }
        cat(xnames[new2A[(10 * (nlines - 1) + 1):length(new2A)]])
        cat("\n")
      }
    }
  }
  coeffC <- pls::Yloadings(plsfit)[,1:min(K, length(A))]
  tt <- pls::scores(plsfit)[,1:min(K, length(A))]
  if (!is.null(colnames(x))) {
    rownames(betahat) <- colnames(x)
  }
  if (q > 1 & !is.null(colnames(y))) {
    colnames(betahat) <- colnames(y)
  }
  object <- list(x = x, y = y, coeffC = coeffC, tt = tt, betahat = betahat, A = A, betamat = betamat, 
                 new2As = new2As, mu = mu, meanx = meanx, normx = normx, 
                 normy = normy, eta = eta, K = K, kappa = kappa, select = select, 
                 fit = fit, projection = pj)
  class(object) <- "spls"
  object
}

cv.split=function (y, fold) 
{
  n <- length(y)
  group <- table(y)
  x <- c()
  for (i in 1:length(group)) {
    x.group <- c(1:n)[y == names(group)[i]]
    x <- c(x, sample(x.group))
  }
  foldi <- split(x, rep(1:fold, length = n))
  return(foldi)
}

wpls=function (x, y, V, K = ncol(x), type = "pls1", center.x = TRUE, 
               scale.x = FALSE) 
{
  n <- nrow(x)
  p <- ncol(x)
  q <- ncol(y)
  x1 <- x
  y1 <- y
  W <- matrix(0, p, K)
  T <- matrix(0, n, K)
  Q <- matrix(0, q, K)
  P <- matrix(0, p, K)
  for (k in 1:K) {
    w <- t(x1) %*% as.matrix(V * y1)
    w <- w/sqrt(sum(w^2))
    W[, k] <- w
    t <- x1 %*% w
    T[, k] <- t
    coef.q <- sum(t * V * y1)/sum(t * V * t)
    Q[, k] <- coef.q
    coef.p <- t(as.matrix(t * V)) %*% x1/sum(t * V * t)
    P[, k] <- coef.p
    if (type == "pls1") {
      y1 <- y1 - t %*% coef.q
      x1 <- x1 - t %*% coef.p
    }
    if (type == "simpls") {
      pj <- w
      pw <- pj %*% solve(t(pj) %*% pj) %*% t(pj)
      x1 <- x1 - x1 %*% pw
    }
  }
  list(W = W, T = T, Q = Q, P = P)
}

### Updating SGPLS function to get T

sgpls.T=function (x, y, K, eta, scale.x = TRUE, eps = 1e-05, denom.eps = 1e-20, 
                  zero.eps = 1e-05, maxstep = 100, br = TRUE, ftype = "iden") 
{
  x <- as.matrix(x)
  n <- nrow(x)
  p <- ncol(x)
  ip <- c(1:p)
  y <- as.matrix(y)
  q <- ncol(y)
  one <- matrix(1, 1, n)
  mu <- apply(x, 2, mean)
  x0 <- scale(x, mu, FALSE)
  if (scale.x) {
    sigma <- apply(x, 2, sd)
    x0 <- scale(x0, FALSE, sigma)
  }
  else {
    sigma <- rep(1, ncol(x))
    x0 <- x0
  }
  beta1hat <- matrix(0, p, q)
  beta1hat.old <- beta1hat + 1000
  beta0hat <- 0
  re <- 100
  min.re <- 1000
  nstep <- 0
  nstep.min <- 0
  while (re > eps & nstep < maxstep) {
    if (nstep == 0) {
      p0 <- (y + 0.5)/2
      V <- as.vector(p0 * (1 - p0))
      A <- c(1:p)
    }
    else {
      exp.xb <- exp(beta0hat + x0 %*% beta1hat)
      p0 <- exp.xb/(1 + exp.xb)
      p0[exp.xb == Inf] <- 1 - zero.eps
      p0[p0 < zero.eps] <- zero.eps
      p0[p0 > (1 - zero.eps)] <- 1 - zero.eps
      V <- as.vector(p0 * (1 - p0))
    }
    switch(ftype, hat = {
      H <- hat(sweep(cbind(rep(1, n), x0), 1, sqrt(V), 
                     "*"), intercept = FALSE)
    }, iden = {
      H <- rep(1, n)
    })
    if (nstep == 0) {
      y0 <- beta0hat + x0 %*% beta1hat + (y - p0)/V
    }
    else {
      V <- V * (H * br + 1)
      y0 <- beta0hat + x0 %*% beta1hat + (y + H * br/2 - 
                                            (H * br + 1) * p0)/V
    }
    y1 <- y0
    y1 <- y1 - mean(y1)
    x1 <- x0
    A.old <- c()
    for (k in 1:K) {
      Z <- t(x1) %*% as.matrix(V * y1)
      Znorm1 <- median(abs(Z))
      Z <- Z/Znorm1
      what <- ust(Z, eta)
      A <- sort(unique(c(A.old, ip[what != 0])))
      x0A <- x0[, A, drop = FALSE]
      plsfit <- wpls(x0A, y0, V, K = min(k, length(A)), 
                     type = "pls1", center.x = FALSE, scale.x = FALSE)
      A.old <- A
      y1 <- y0 - plsfit$T %*% t(plsfit$Q)
      x1 <- x0
      x1[, A] <- x0[, A] - plsfit$T %*% t(plsfit$P)
    }
    x0A <- x0[, A, drop = FALSE]
    plsfit <- wpls(x0A, y0, V, K = min(K, length(A)), type = "pls1", 
                   center.x = FALSE, scale.x = FALSE)
    W <- plsfit$W
    T <- plsfit$T
    P <- plsfit$P
    Q <- plsfit$Q
    beta1hat.old <- beta1hat
    beta1hat <- matrix(0, p, q)
    beta1hat[A, ] <- W %*% solve(t(P) %*% W) %*% t(Q)
    beta0hat <- weighted.mean((y0 - T %*% t(Q)), sqrt(V))
    re <- mean(abs(beta1hat - beta1hat.old))/mean(abs(beta1hat.old) + 
                                                    denom.eps)
    nstep <- nstep + 1
    if (re < min.re & nstep > 1) {
      min.re <- re
      nstep.min <- nstep
      beta1hat.min <- beta1hat
      beta0hat.min <- beta0hat
      A.min <- A
      W.min <- W
    }
  }
  if (re > eps) {
    if (nstep.min > 0) {
      converged <- FALSE
      beta1hat <- beta1hat.min
      beta0hat <- beta0hat.min
      A <- A.min
      W <- W.min
    }
  }
  betahat <- matrix(c(beta0hat, beta1hat))
  if (!is.null(colnames(x))) {
    rownames(betahat) <- 1:nrow(betahat)
    rownames(betahat)[1] <- "intercept"
    rownames(betahat)[2:nrow(betahat)] <- colnames(x)
  }
  else {
    rownames(betahat) <- c(0, paste("x", 1:p, sep = ""))
    rownames(betahat)[1] <- "intercept"
  }
  object <- list(x = x, y = y, x0 = x0, eta = eta, K = K, CoeffC=Q, tt=T, betahat = betahat, 
                 A = A, W = W, mu = mu, sigma = sigma)
  class(object) <- "sgpls"
  object
}
Any scripts or data that you put into this service are public.
bootPLS documentation built on Sept. 30, 2024, 9:13 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
bootPLS
Bootstrap Hyperparameter Selection for PLS Models and Extensions

R/internal-bootPLS.R
In bootPLS: Bootstrap Hyperparameter Selection for PLS Models and Extensions

Try the bootPLS package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

bootPLS Bootstrap Hyperparameter Selection for PLS Models and Extensions

R/internal-bootPLS.R In bootPLS: Bootstrap Hyperparameter Selection for PLS Models and Extensions

Try the bootPLS package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

bootPLS
Bootstrap Hyperparameter Selection for PLS Models and Extensions

R/internal-bootPLS.R
In bootPLS: Bootstrap Hyperparameter Selection for PLS Models and Extensions