Nothing
#' Data-Driven Sparse Partial Least Squares
#'
#' The main function of the package. It does both start the ddsPLS algorithm,
#' using bootstrap analysis. Also it estimates automatically the number of
#' components and the regularization coefficients.
#' One regularization parameter per component only is needed to select both in
#' \code{x} and in \code{y}. Build the optimal model, of the class
#' \code{ddsPLS}.
#' Among the different parameters, the \code{lambda} is the vector of parameters that are
#' tested by the algorithm along each component for each bootstrap sample. The total number
#' of bootstrap samples is fixed by the parameter \code{n_B}, for this parameter, the more
#' the merrier, even if costs more in computation time.
#' This gives access to 3 S3 methods (\code{\link{summary.ddsPLS}}, \code{\link{plot.ddsPLS}} and \code{\link{predict.ddsPLS}}).
#'
#' @param X matrix, the covariate matrix (n,p).
#' @param Y matrix, the response matrix (n,q).
#' @param criterion character, whether \code{diffR2Q2} to be minimized, default,
#' or \code{Q2} to be maximized.
#' @param doBoot logical, whether performing bootstrap operations, default to
#' \code{TRUE}. If equal to
#' \code{FALSE}, a model with is built on the parameters \code{lambda} and the
#' number of components is the length of this vector.
#' In that context, the parameter \code{n_B} is ignored. If equal to
#' \code{TRUE}, the ddsPLS algorithm, through bootstrap validation,
#' is started using \code{lambda} as a grid and \code{n_B} as
#' the total number of bootstrap samples to simulate per component.
#' @param LD boolean. Wether or not to consider low dimensional dataset. If
#' sequal to \code{TRUE}, no low value is estimated for \code{lambda} (\code{lambda0}=0). Else,
#' \code{lambda} is estimated thanks to in order to prevent from including too
#' much variables in the current component.
#' @param lambdas vector, the to be tested values for \code{lambda}.
#' Each value for \code{lambda} can be interpreted in terms of correlation
#' allowed in the model.
#' More precisely, a covariate `x[j]` is not selected if its empirical
#' correlation with all the response variables `y[1..q]` is below \code{lambda}.
#' A response variable `y[k]` is not selected if its empirical correlation
#' with all the covariates `x[1..p]` is below \code{lambda}.
#' Default to \code{seq(0,1,length.out = 30)}.
#' @param n_B integer, the number of to be simulated bootstrap samples.
#' Default to \code{50}.
#' @param n_lambdas integer, the number of lambda values. Taken into account
#' only if \code{lambdas} is \code{NULL}. Default to 100.
#' @param lambda_roof real, the maximum value to be tested by the algorithm for
#' \code{lambda}. This is automatically fixed by the algorithm.
#' @param lowQ2 real, the minimum value of Q^2_B to accept the
#' current lambda value. Default to \code{0.0}.
#' @param NCORES integer, the number of cores used. Default to \code{1}.
#' @param verbose boolean, whether to print current results. Defaut to
#' \code{FALSE}.
#' @param errorMin real, not to be used.
#'
#' @return
#' \item{model}{a list containing the PLS parameters:}
#' \itemize{
#' \item \code{$P}: Loadings for \code{X}.
#' \item \code{$C}: Loadings for \code{Y}.
#' \item \code{$t}: Scores.
#' \item \code{$V}: Weights for \code{Y}.
#' \item \code{$U}: Loadings for \code{X}.
#' \item \code{$U_star}: Loadings for \code{X} in original base: $U_star=U(P'U)^{-1}$.
#' \item \code{$B}: Regression matrix of \code{Y} on \code{X}.
#' \item \code{$muY}: Empirical mean of \code{Y}.
#' \item \code{$muX}: Empirical mean of \code{X}.
#' \item \code{$sdY}: Empirical standard deviation of \code{Y}.
#' \item \code{$sdX}: Empirical standard deviation of \code{X}.
#' }
#' \item{results}{a list containing the ddsPLS descriptors after bootstrap
#' operations:}
#' \itemize{
#' \item \code{$PropQ2hPos}: A list of size \code{R}+1 where \code{R} is the
#' evaluated number of components. Each element is a vector of length
#' \code{n_lambdas}. Each value is the proportion of times the \code{Q2h}
#' statistics is positive among the \code{n_B} estimated ddsPLS models.
#' \item \code{$Q2h}: A list of size \code{R}+1 where \code{R} is the
#' evaluated number of components. Each element is a
#' \code{(n_B,n_lambdas)}-matrix. Each value is the value for the statistics
#' \code{Q2h}.
#' \item \code{$Q2}: : A list of size \code{R}+1 where \code{R} is the
#' evaluated number of components. Each element is a
#' \code{(n_B,n_lambdas)}-matrix. Each value is the value for the statistics
#' \code{Q2}.
#' \item \code{$R2h}: : A list of size \code{R}+1 where \code{R} is the
#' evaluated number of components. Each element is a
#' \code{(n_B,n_lambdas)}-matrix. Each value is the value for the statistics
#' \code{R2h}.
#' \item \code{$R2}: : A list of size \code{R}+1 where \code{R} is the
#' evaluated number of components. Each element is a
#' \code{(n_B,n_lambdas)}-matrix. Each value is the value for the statistics
#' \code{R2}.
#' \item \code{$V}: Empirical means and variances of the weights for
#' \code{Y} for each component.
#' \item \code{$U}: Empirical means and variances of the weights for
#' \code{X} for each component.
#' \item \code{$U_star}: Empirical means and variances of the loadings for
#' \code{X} in original base for each component.
#' \item \code{$C}: Empirical means and variances of the loadings for
#' \code{Y} for each component.
#' \item \code{$P}: Empirical means and variances of the loadings for
#' \code{X} for each component.
#' \item \code{$t}: Empirical means and variances of the score for each
#' component.
#' \item \code{$R2mean_diff_Q2mean}: Differences of the empirical means of
#' the statistics R2 and Q2.
#' \item \code{$Q2hmean}: Empirical means of the statistic Q2h.
#' \item \code{$Q2mean}: Empirical means of the statistic Q2.
#' \item \code{$R2hmean}: Empirical means of the statistic R2h.
#' \item \code{$R2mean}: Empirical means of the statistic R2.
#' \item \code{$R2sd}: Empirical standard deviations of the statistic R2.
#' \item \code{$R2hsd}: Empirical standard deviations of the statistic R2h.
#' \item \code{$Q2sd}: Empirical standard deviations of the statistic Q2.
#' \item \code{$Q2hsd}: Empirical standard deviations of the statistic Q2h.
#' \item \code{$R2_diff_Q2sd}: Differences of the empirical standard
#' deviations of the statistics R2 and Q2.
#' \item \code{$lambdas}: Values tested for \code{lambdas}.
#' }
#' \item{varExplained_in_X}{a list containing the explained variances in
#' \code{X} per component (\code{Comp}) or cumulated (\code{Cumu}).}
#' \item{varExplained}{a list containing the explained variances in
#' \code{Y} per component (\code{Comp}), cumulated (\code{Cumu}), or per
#' dimension of \code{Y} separately. The three last objects detail the
#' explained variances per dimension of \code{Y} per component
#' (\code{PerYPerComp$Comp}) or cumulated (\code{PerYPerComp$Cumu}).}
#' \item{R}{The evaluated number of components.}
#' \item{lambda}{The \code{R} values evaluated for \code{lambda}.}
#' \item{lambda_optim}{a list containing 3 matrices with boolean values
#' corresponding to wether or not each to be tested value for \code{lambda} has
#' been indeed tested.}
#' \item{Q2, Q2h, R2, R2h}{vector. The \code{R} values evaluated for \code{Q2},
#' \code{Q2h}, \code{R2} and \code{R2h}.}
#' \item{lowQ2}{The input parameter of the same name.}
#' \item{X}{The input parameter of the same name.}
#' \item{doBoot}{The input parameter of the same name.}
#' \item{Y_est}{The estimated values for the response variable.}
#' \item{Y_obs}{The observed values for the response variable.}
#' \item{Selection}{A list of two elements of the indices corresponding with
#' the variables selected in \code{X} and in \code{Y}.}
#' \item{call}{The call given to the function.}
#' \item{criterion}{The input parameter of the same name.}
#'
#' @export list
#' @importFrom foreach %dopar%
#' @importFrom foreach %do%
#' @importFrom foreach foreach
#' @importFrom stats cov var
#'
#' @examples
#' n <- 100 ; d <- 2 ; p <- 20 ; q <- 2
#' phi <- matrix(rnorm(n*d),n,d)
#' a <- rep(1,p/4) ; b <- rep(1,p/2)
#' X <- phi%*%matrix(c(1*a,0*a,0*b,1*a,3*b,0*a),nrow = d,byrow = TRUE) +
#' matrix(rnorm(n*p,sd = 1/4),n,p)
#' Y <- phi%*%matrix(c(1,0,0,0),nrow = d,byrow = TRUE) +
#' matrix(rnorm(n*q,sd = 1/4),n,q)
#' res <- ddsPLS(X,Y,verbose=TRUE)
#'
#' @seealso \code{\link{summary.ddsPLS}}, \code{\link{plot.ddsPLS}}, \code{\link{predict.ddsPLS}}
#'
#' @useDynLib ddsPLS
ddsPLS <- function(X,Y,criterion="diffR2Q2",
doBoot=TRUE,LD=FALSE,
lambdas=NULL,n_B=50,n_lambdas=100,lambda_roof=NULL,
lowQ2=0.0,NCORES=1,errorMin=1e-9,verbose=FALSE){
bootstrapWrap <- function(U,V,X,Y,lambdas,lambda_prev,
R,n_B,doBoot=TRUE,n,p,q,n_lambdas,lambda0.){
res <- bootstrap_Rcpp(U,V,X,Y,lambdas,lambda_prev,
R,n_B,doBoot,n,p,q,n_lambdas,lambda0.)
res
}
getLambdas <- function(xSC,ySC,n,p,q){
getLambda0 <- function(xSC,ySC,n,p,q){
Sig_est <- matrix(rep(cov(xSC,ySC),n),nrow = n,byrow = TRUE)
theta <- colMeans((xSC*matrix(rep(ySC,p),n,byrow = FALSE)-Sig_est)^2)
mean(sqrt(log(max(p,q))*theta/n))
}
mean(unlist(lapply(1:q,function(j){getLambda0(xSC,ySC[,j],n,p,q)})))
}
if(!is.na(sum(X))){
if(!is.matrix(Y))
{
Y <- t(t(Y))
}
if(criterion %in% c("diffR2Q2","Q2")){
call <- match.call()
n <- nrow(Y)
p <- ncol(X)
q <- ncol(Y)
# Standardize X and Y train and test.
sdY <- apply(Y,2,sd)
muY <- apply(Y,2,mean)
id_na_y_mean <- which(is.na(sdY))
if(length(id_na_y_mean)>0){
muY[id_na_y_mean] = sdY[id_na_y_mean] <- 0
}
Y_init = scale(Y);
Y_init[which(is.na(Y_init))] <- 0
RSS0 <- sum(scale(Y,scale = FALSE)^2)
RSS0_y <- apply(Y,2,function(yy){sum(scale(yy,scale = FALSE)^2)})
sdX <- apply(X,2,sd)
muX <- apply(X,2,mean)
id_na_x_mean <- which(is.na(sdX))
if(length(id_na_x_mean)>0){
muX[id_na_x_mean] = sdX[id_na_x_mean] <- 0
}
X_init = scale(X);
X_init[which(is.na(X_init))] <- 0
sd_y_x_inv <- matrix(0,p,q)
for(j in 1:q){
sd_y_x_inv[,j] <- sdY[j]
}
for(i in 1:p){
if(sdX[i]!=0){
sd_y_x_inv[i,] <- 1/sdX[i]
}else{
sd_y_x_inv[i,] <- 0
}
}
# Create lambda values
useL0 <- FALSE
lambda0 <- NULL
if(is.null(lambdas)){
if(is.null(lambda_roof)){
lambdas <- seq(0,max(abs(cov(X_init,Y_init))),length.out = n_lambdas)
}else{
lambdas <- seq(0,lambda_roof,length.out = n_lambdas+1)[-n_lambdas-1]
}
if(!LD){
lambda0 <- c(lambda0,getLambdas(X_init,Y_init,n,p,q))
}else{
lambda0 <- 0
}
useL0 <- TRUE
}else{
lambda0 <- 0
}
n_lambdas <- length(lambdas)
# First covariance work
COVInit = crossprod(Y_init,X_init)/(n-1);
maxCOVInit = max(abs(COVInit))
lambda_prev <- rep(0,n)
TEST <- rep(0,n_lambdas)
test_lambdas <- list()
nb_ValsOk <- 0
if(doBoot){
U_out <- matrix(0,p,n); V0 <- matrix(0,q,n)
B_previous <- matrix(0,p,q)
R <- 1
test <- TRUE
R2Best <- rep(0,n)
R2hBest <- rep(0,n)
Q2Best <- rep(0,n)
Q2hBest <- rep(0,n)
explainedVar <- rep(0,n)
Results <- list()
varExplained = varExplainedTot = varExplained_y = varExplainedTot_y <- NULL
Results$R2 = Results$R2h = Results$Q2 = Results$Q2h = Results$PropQ2hPos <- list()
Results$R2mean = Results$R2hmean = Results$Q2mean = Results$Q2hmean =
Results$R2mean_diff_Q2mean = Results$t = Results$P = Results$C =
Results$U_star = Results$U = Results$V <- list()
h <- 0 ; bestID <- 0;
Q2_previous <- -1e9 ; bestVal <- -1e9
if (verbose) {
cat(" ______________\n");
cat(" | ddsPLS |\n");
cat("=====================----------------=====================\n");
}
while (test){
if (verbose) {
cat(paste("Should we build component " ,h+1 , " ? Bootstrap pending...\n",sep=""))
}
if(h>0 & useL0){
## Look at values for lambda_0
X_here <- X_init
Y_here <- Y_init
for(hr in 1:h){
tr <- resOUT$t[,hr]
pr <- resOUT$P[,hr]
cr <- resOUT$C[,hr]
X_here <- X_here - tcrossprod(tr,pr)
Y_here <- Y_here - tcrossprod(tr,cr)
}
lambda0 <- c(lambda0,getLambdas(X_here,Y_here,n,p,q))
useL0 <- TRUE
}
if(!useL0) lambda0 <- rep(0,h+1)
NCORES_w <- min(NCORES,n_B)
n_B_i <- ceiling(n_B/NCORES)
`%my_do%` <- ifelse(NCORES_w!=1,{
out<-`%dopar%`;cl <- makeCluster(NCORES_w)
registerDoParallel(cl);out},{out <- `%do%`;out})
res <- foreach(i_B=1:NCORES_w,.packages = "ddsPLS",
.combine='c',.multicombine=TRUE) %my_do% {
bootstrapWrap(U_out,V0,X_init,Y_init,lambdas,lambda_prev,
R=h+1,n_B_i,doBoot=TRUE,n,p,q,n_lambdas,
lambda0.=lambda0)
}
if(NCORES_w!=1) stopCluster(cl)
## Criterion qulity descriptors
Results$R2[[h+1]] <- do.call(rbind,res[which(names(res)=="R2")])
Results$R2h[[h+1]] <- do.call(rbind,res[which(names(res)=="R2h")])
Results$Q2[[h+1]] <- do.call(rbind,res[which(names(res)=="Q2")])
Results$Q2h[[h+1]] <- do.call(rbind,res[which(names(res)=="Q2h")])
Results$PropQ2hPos[[h+1]] <- apply(Results$Q2h[[h+1]],2,function(v){sum(v>=0)/length(v)})
Results$R2mean[[h+1]] <- colMeans(Results$R2[[h+1]])
Results$R2hmean[[h+1]] <- colMeans(Results$R2h[[h+1]])
Results$Q2mean[[h+1]] <- colMeans(Results$Q2[[h+1]])
Results$Q2hmean[[h+1]] <- colMeans(Results$Q2h[[h+1]])
Results$R2mean_diff_Q2mean[[h+1]] <- Results$R2mean[[h+1]]-Results$Q2mean[[h+1]]
Results$R2sd[[h+1]] <- apply(Results$R2[[h+1]],2,sd)
Results$R2hsd[[h+1]] <- apply(Results$R2h[[h+1]],2,sd)
Results$Q2sd[[h+1]] <- apply(Results$Q2[[h+1]],2,sd)
Results$Q2hsd[[h+1]] <- apply(Results$Q2h[[h+1]],2,sd)
Results$R2_diff_Q2sd[[h+1]] <- apply(Results$R2[[h+1]]-Results$Q2[[h+1]],2,sd)
TEST <- (Results$Q2hmean[[h+1]]>lowQ2)*
(Results$Q2mean[[h+1]]>Q2_previous)*
(lambdas>=lambda0[h+1])==1#*(Results$PropQ2hPos[[h+1]]==1)
nb_ValsOk = sum(TEST)
test_lambdas[[h+1]] <- TEST
# resOUT <- NULL
test_t2 <- FALSE
if (nb_ValsOk>0){
if(criterion=="diffR2Q2" & !LD){
bestVal = min(Results$R2mean_diff_Q2mean[[h+1]][TEST])
bestID = which(Results$R2mean_diff_Q2mean[[h+1]]==bestVal)[1]
}else if(criterion=="Q2" | LD){
bestVal = max(Results$Q2hmean[[h+1]][TEST])
bestID = which(Results$Q2hmean[[h+1]]==bestVal)[1]
}
test_total <- TRUE
while(test_total){
lambda_prev[h+1] = lambdas[bestID]
resMozna <- modelddsPLSCpp_Rcpp(U_out,V0,X_init,Y_init,
lambda_prev,R=h+1,n,p,q,lambda0)
test_t2 <- sum((resMozna$t[,h+1])^2)>errorMin
if(test_t2){
test_total <- FALSE
}else{
bestIDNext <- bestID - 1
test_neighboor <- bestIDNext<=1
if(!test_neighboor){
bestID <- bestIDNext
test_total <- TRUE
}else{
test_total <- FALSE
}
}
}
if(test_t2){
resMozna -> resOUT
resMozna <- NULL
h <- h + 1
for(hh in 1:h){
P_test <- resOUT$P[,hh]
if(which.max(P_test)!=which.max(abs(P_test))){
resOUT$P[,hh] <- -resOUT$P[,hh]
resOUT$C[,hh] <- -resOUT$C[,hh]
resOUT$t[,hh] <- -resOUT$t[,hh]
resOUT$V[,hh] <- -resOUT$V[,hh]
resOUT$U[,hh] <- -resOUT$U[,hh]
resOUT$U_star[,hh] <- -resOUT$U_star[,hh]
}
}
Q2Best[h] = Results$Q2mean[[h]][bestID]
Q2hBest[h] = Results$Q2hmean[[h]][bestID]
R2Best[h] = Results$R2mean[[h]][bestID]
R2hBest[h] = Results$R2hmean[[h]][bestID]
Q2_previous = Q2Best[h]
U_out[,1:h] = resOUT$U[,1:h]
V0[,1:h] = resOUT$V[,1:h]
## Look at variabilities in P
TT <- do.call(rbind,res[which(names(res)=="t")])[,c(1:n)+(bestID-1)*n,drop=FALSE]
PP <- do.call(rbind,res[which(names(res)=="P")])[,c(1:p)+(bestID-1)*p,drop=FALSE]
CC <- do.call(rbind,res[which(names(res)=="C")])[,c(1:q)+(bestID-1)*q,drop=FALSE]
UUSSTTAARR <- do.call(rbind,res[which(names(res)=="U_star")])[,c(1:p)+(bestID-1)*p,drop=FALSE]
UU <- do.call(rbind,res[which(names(res)=="U")])[,c(1:p)+(bestID-1)*p,drop=FALSE]
VV <- do.call(rbind,res[which(names(res)=="V")])[,c(1:q)+(bestID-1)*q,drop=FALSE]
for(i_B in 1:n_B){
sign_i_B <- sign(VV[i_B,which.max(abs(VV[i_B,]))])
if(sign_i_B<0){
VV[i_B,] <- VV[i_B,]*(-1)
PP[i_B,] <- PP[i_B,]*(-1)
CC[i_B,] <- CC[i_B,]*(-1)
UUSSTTAARR[i_B,] <- UUSSTTAARR[i_B,]*(-1)
UU[i_B,] <- UU[i_B,]*(-1)
TT[i_B,] <- TT[i_B,]*(-1)
}
}
TT_boot_mean <- colMeans(TT)
TT_boot_var <- apply(TT,2,var)
VV_boot_mean <- colMeans(VV)
VV_boot_var <- apply(VV,2,var)
CC_boot_mean <- colMeans(CC)
CC_boot_var <- apply(CC,2,var)
PP_boot_mean <- colMeans(PP)
PP_boot_var <- apply(PP,2,var)
UU_boot_mean <- colMeans(UU)
UU_boot_var <- apply(UU,2,var)
UUSSTTAARR_boot_mean <- colMeans(UUSSTTAARR)
UUSSTTAARR_boot_var <- apply(UUSSTTAARR,2,var)
rm(VV,CC,PP,UU,UUSSTTAARR)
Results$t[[h]] <- list(mean=TT_boot_mean,var=TT_boot_var)
Results$U[[h]] <- list(mean=UU_boot_mean,var=UU_boot_var)
Results$V[[h]] <- list(mean=VV_boot_mean,var=VV_boot_var)
Results$C[[h]] <- list(mean=CC_boot_mean,var=CC_boot_var)
Results$P[[h]] <- list(mean=PP_boot_mean,var=PP_boot_var)
Results$U_star[[h]] <- list(mean=UUSSTTAARR_boot_mean,var=UUSSTTAARR_boot_var)
## Next
B_out <- resOUT$B
for (i in 1:p){
if(sdX[i]>errorMin){
B_out[i,] <- B_out[i,]/sdX[i]
}
}
for (j in 1:q){
B_out[,j] <- B_out[,j]*sdY[j]
}
B_out -> resOUT$B
out0 <- list(model=list(muX=muX,muY=muY,B=B_previous),R=1);class(out0)="ddsPLS"
out1 <- list(model=list(muX=muX,muY=muY,B=B_out),R=1);class(out1)="ddsPLS"
Y_est_0 <- predict(out0,X,doDiagnosis=FALSE)$Y_est
Y_est_1 <- predict(out1,X,doDiagnosis=FALSE)$Y_est
cor2_0 <- unlist(lapply(1:q,function(j){
1-sum((Y_est_0[,j]-Y[,j])^2)/sum((muY[j]-Y[,j])^2)
}))
cor2_1 <- unlist(lapply(1:q,function(j){
1-sum((Y_est_1[,j]-Y[,j])^2)/sum((muY[j]-Y[,j])^2)
}))
## Compute regression on current component only
t_r <- resOUT$t[,h]
Pi_r <- (abs(resOUT$V[,h])>1e-20)*1
Y_est_r <- tcrossprod(t_r)%*%Y/sum(t_r^2)
for(j in 1:q){
if(Pi_r[j]==0){
Y_est_r[,j] <- muY[j]
}else{
Y_est_r[,j] <- Y_est_r[,j] + muY[j]
}
}
cor2_r <- unlist(lapply(1:q,function(j){
1-sum((Y_est_r[,j]-Y[,j])^2)/sum((muY[j]-Y[,j])^2)
}))
##
B_previous <- B_out
varExplained <- c(varExplained,mean(cor2_r)*100)#cor2_1-cor2_0)*100)
varExplainedTot <- c(varExplainedTot,mean(cor2_1)*100)
varExplained_y <- rbind(varExplained_y,cor2_r*100)#(cor2_1-cor2_0)*100)
varExplainedTot_y <- rbind(varExplainedTot_y,(cor2_1)*100)
if (verbose) {
ress <- data.frame(
list(
" "=" ",
"lambda"=round(lambdas[bestID],2),
"R2"=round(Results$R2mean[[h]][bestID],2),
"R2h"=round(Results$R2hmean[[h]][bestID],2),
"Q2"=round(Results$Q2mean[[h]][bestID],2),
"Q2h"=round(Results$Q2hmean[[h]][bestID],2),
"VarExpl"=paste(round(varExplained[h]),"%",sep=""),
"VarExpl Tot"=paste(round(varExplainedTot[h]),"%",sep="")
)
)
rownames(ress) <- ""
colnames(ress)[1] <- " "
print(ress)
cat(paste(" ...component ",h," built!","\n",sep=""))
}
}
}
if (!test_t2 | nb_ValsOk<=0){
if(verbose) cat(paste(" ...component ",h+1," not built!","\n",sep=""))
test = FALSE;
if(h==0){
if(verbose){
if(sum(Results$Q2hmean[[h+1]]>lowQ2)==0){
cat(" ...no Q2r large enough for tested lambda.\n")
}
}
}
}
}
if(verbose) {
cat("===================== =====================\n");
cat(" ================\n");
}
lambda_sol=R2Sol=R2hSol=Q2Sol=Q2hSol <- rep(0,h)
for (r in 0:h) {
lambda_sol[r] = lambda_prev[r];
R2Sol[r] = R2Best[r];
R2hSol[r] = R2hBest[r];
Q2Sol[r] = Q2Best[r];
Q2hSol[r] = Q2hBest[r];
}
out <- list()
if(h>0){
out$model <- resOUT
out$model$muY <- muY
out$model$muX <- muX
out$model$sdY <- sdY
out$model$sdX <- sdX
Results$lambdas <- lambdas
out$results <- Results
out$varExplained_in_X <- list()
norm_X_tot <- (n-1)*p
norm_comp <- colSums(out$model$t^2)
var_comp <- norm_comp/norm_X_tot
out$varExplained_in_X$Comp <- var_comp*100
out$varExplained_in_X$Cumu <- cumsum(var_comp)*100
out$varExplained <- list()
out$varExplained$Comp <- varExplained
out$varExplained$Cumu <- varExplainedTot
}else{
out$model = NULL
# out$results <- NULL
# out$resultsNotBuilt <- Results
# out$resultsNotBuilt$lambdas <- lambdas
out$results <- Results
out$results$lambdas <- lambdas
out$model$muY <- muY
out$model$muX <- muX
out$model$sdY <- sdY
out$model$sdX <- sdX
out$results$Q2hmean = out$results$Q2mean =
out$results$R2hmean = out$results$R2mean = 0
}
out$R = h
out$lambda = lambda_sol
out$lambda_optim <- test_lambdas
out$Q2 = Q2Sol
out$Q2h = Q2hSol
out$R2 = R2Sol
out$R2h = R2hSol
out$lowQ2=lowQ2
out$X <- X
out$doBoot <- doBoot
class(out) <- "ddsPLS"
if(h>0){
out$Y_est <- predict(out,X,doDiagnosis=FALSE)$Y_est
out$Y_obs <- Y
colnames(out$Y_est) = colnames(Y)
rownames(out$model$V) = colnames(Y)
rownames(out$model$U) = colnames(X)
rownames(out$model$C) = colnames(Y)
rownames(out$model$P) = colnames(X)
rownames(out$model$U_star) = colnames(X)
rownames(out$model$B) = colnames(X)
colnames(out$model$B) = colnames(Y)
out$varExplained_in_X <- list()
norm_X_tot <- (n-1)*p
norm_comp <- colSums(out$model$t^2)
var_comp <- norm_comp/norm_X_tot
out$varExplained_in_X$Comp <- var_comp*100
out$varExplained_in_X$Cumu <- cumsum(var_comp)*100
out$varExplained$PerY <- varExplainedTot_y[h,,drop=FALSE]
out$varExplained$PerYPerComp <- list()
out$varExplained$PerYPerComp$Comp <- varExplained_y
out$varExplained$PerYPerComp$Cumu <- varExplainedTot_y
selX <- (rowSums(abs(out$model$B))>1e-9)*1
selY <- (colSums(abs(out$model$B))>1e-9)*1
out$Selection <- list(X=which(selX==1),Y=which(selY==1))
out$call <- call
}else{
out$Y_est <- predict(out,X,doDiagnosis=FALSE)$Y_est
out$Y_obs <- Y
colnames(out$Y_est) = colnames(Y)
}
out$criterion=criterion
if (verbose & h>0) {
plot(out)
}
}else{
R <- length(lambdas)
B_total <- matrix(0,p,q)
if(R!=0){
U_out <- matrix(0,p,R); V0 <- matrix(0,q,R)
varExplained=varExplainedTot <- rep(0,R)
varExplained_y=varExplainedTot_y <- matrix(0,R,q)
lambda0 <- rep(0,R)
for(r in 1:R){
resr <- modelddsPLSCpp_Rcpp(U_out,V0,X_init,Y_init,lambdas,
R=r,n,p,q,lambda0)
U_out[,r] = resr$U[,r]
V0[,r] = resr$V[,r]
# Compute regressions
## Compute regression on current component only
t_r <- resr$t[,r]
Pi_r <- (abs(resr$V[,r])>1e-20)*1
Y_est_r <- tcrossprod(t_r)%*%Y/sum(t_r^2)
for(j in 1:q){
if(Pi_r[j]==0){
Y_est_r[,j] <- muY[j]
}else{
Y_est_r[,j] <- Y_est_r[,j] + muY[j]
}
}
cor2_r <- unlist(lapply(1:q,function(j){
1-sum((Y_est_r[,j]-Y[,j])^2)/sum((muY[j]-Y[,j])^2)
}))
B_1 <- resr$B
for (i in 1:p){
if(sdX[i]>errorMin){
B_1[i,] <- B_1[i,]/sdX[i]
}
}
for (j in 1:q){
B_1[,j] <- B_1[,j]*sdY[j]
}
out1 <- list(model=list(muX=muX,muY=muY,B=B_1),R=1);class(out1)="ddsPLS"
Y_est_1 <- predict(out1,X,doDiagnosis=FALSE)$Y_est
cor2_1 <- unlist(lapply(1:q,function(j){
1-sum((Y_est_1[,j]-Y[,j])^2)/sum((muY[j]-Y[,j])^2)
}))
# Compute explained variances
varExplained[r] <- mean(cor2_r)*100
varExplainedTot[r] <- mean(cor2_1)*100
varExplained_y[r,] <- cor2_r*100
varExplainedTot_y[r,] <- cor2_1*100
B_total <- B_total + B_1
}
resr$B <- B_total
idBad <- which(sqrt(colSums(resr$t^2))<1e-9)
if(length(idBad)>0){
resr$U[,idBad] <- 0
resr$V[,idBad] <- 0
}
}else{
resr <- list(U=NULL,V=NULL,t=NULL,B=B_total)
varExplained <- NULL
varExplainedTot <- NULL
varExplained_y <- NULL
varExplainedTot_y <- NULL
}
out <- list()
out$model <- resr
out$criterion=criterion
out$model$muY <- muY
out$model$muX <- muX
out$model$sdY <- sdY
out$model$sdX <- sdX
out$R <- R
out$lambda = lambdas
class(out) <- "ddsPLS"
out$varExplained_in_X <- list()
norm_X_tot <- (n-1)*p
norm_comp <- colSums(out$model$t^2)
var_comp <- norm_comp/norm_X_tot
out$varExplained_in_X$Comp <- var_comp*100
out$varExplained_in_X$Cumu <- cumsum(var_comp)*100
out$varExplained <- list()
out$varExplained$PerY <- varExplainedTot_y
out$varExplained$Comp <- varExplained
out$varExplained$Cumu <- varExplainedTot
out$varExplained$PerYPerComp <- list()
out$varExplained$PerYPerComp$Comp <- varExplained_y
out$varExplained$PerYPerComp$Cumu <- varExplainedTot_y
out$X <- X
out$Y_est <- predict(out,X,doDiagnosis=FALSE)$Y_est
out$Y_obs <- Y
colnames(out$Y_est) = colnames(Y)
rownames(out$model$V) = colnames(Y)
rownames(out$model$U) = colnames(X)
rownames(out$model$C) = colnames(Y)
rownames(out$model$P) = colnames(X)
rownames(out$model$U_star) = colnames(X)
rownames(out$model$B) = colnames(X)
colnames(out$model$B) = colnames(Y)
selX <- (rowSums(abs(out$model$B))>1e-9)*1
selY <- (colSums(abs(out$model$B))>1e-9)*1
out$Selection <- list(X=which(selX==1),Y=which(selY==1))
out$call <- call
}
}else{
out <- NULL
cat("Please select a correct type of criterion among `diffR2Q2` (default), `Q2`.")
}
out$doBoot <- doBoot
}
else{
out <- NULL
cat("There are some missing values in X, please consider dealing with those
before applying ddsPLS.")
}
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.