R/opls-methods.R

#### opls (MultiDataSet) ####

#' @rdname opls
#' @export
setMethod("opls", signature(x = "MultiDataSet"),
          function(x,
                   y = NULL,
                   fig.pdfC = c("none", "interactive", "myfile.pdf")[2],                   
                   info.txtC = c("none", "interactive", "myfile.txt")[2],
                   ...) {

            if (!(info.txtC %in% c("none", "interactive")))
              sink(info.txtC, append = TRUE)
            
            infTxtC <- info.txtC
            if (infTxtC != "none")
              infTxtC <- "interactive"
            
            if (!(fig.pdfC %in% c("none", "interactive")))
              grDevices::pdf(fig.pdfC)
            
            figPdfC <- fig.pdfC
            if (figPdfC != "none")
              figPdfC <- "none"
            
            oplsMsetLs <- vector(mode = "list",
                                 length = length(names(x)))
            names(oplsMsetLs) <- names(x)
            
            for (setC in names(x)) {
              
              if (info.txtC != "none")
                cat("\n\nBuilding the model for the '", setC, "' dataset:\n", sep = "")

              plotL <- TRUE

              setOpls <- tryCatch(ropls::opls(x[[setC]],
                                              y = y,
                                              info.txtC = infTxtC,
                                              fig.pdfC = figPdfC,
                                              ...),
                                  error = function(e) NULL)
              
              if (is.null(setOpls)) {
                
                if (info.txtC != "none")
                  cat("No model could be built for the '",
                      setC,
                      "' dataset.\n",
                      sep = "")
                
                setOpls <- new("opls")
                setOpls@eset <- x[[setC]]
                plotL <- FALSE
                
              } else if (grepl("PLS", setOpls@typeC) &&
                         ropls::getSummaryDF(setOpls)["Total", "pQ2"] > 0.05) {
                
                if (info.txtC != "none")                  
                  cat("No model was included for the '",
                      setC,
                      "' dataset because pQ2 was above 5%.\n",
                      sep = "")
                
                setOpls <- new("opls")
                setOpls@eset <- x[[setC]]
                plotL <- FALSE
                
              }
              
              oplsMsetLs[[setC]] <- setOpls

              if (fig.pdfC != "none" && plotL)
                ropls::plot(oplsMsetLs[[setC]],
                            parSetNameC = setC,
                            plotSubC = paste0("[", setC, "]"),
                            fig.pdfC = fig.pdfC)
              
            }
            
            if (!(fig.pdfC %in% c("none", "interactive")))
              grDevices::dev.off()
            
            if (!(info.txtC %in% c("none", "interactive")))
              sink()
            
            oplsMset <- new("oplsMultiDataSet")
            oplsMset@oplsLs <- oplsMsetLs
            
            return(invisible(oplsMset))
            
          })

#### opls (ExpressionSet) ####

#' @rdname opls
#' @export
setMethod("opls", signature(x = "ExpressionSet"),
          function(x, y = NULL, ...) {
            
            if (is.null(y)) {

              opl <- ropls::opls(t(exprs(x)), ...)

            } else {
              if (!is.character(y)) {
                stop("'y' must be a character when the 'opls' method is applied to an 'ExpressionSet' instance")
              } else {
                if (!(y %in% colnames(pData(x)))) {
                  stop("'y' must be the name of a column of the sampleMetadata slot of the 'ExpressionSet' instance")
                } else {
                  rspFcVcn <- pData(x)[, y]
                  opl <- ropls::opls(t(exprs(x)), rspFcVcn, ...)
                }
              }
            }
            
            modC <- opl@typeC
            sumDF <- getSummaryDF(opl)
            scoreMN <- getScoreMN(opl)
            loadingMN <- getLoadingMN(opl)
            
            vipVn <- coeMN <- orthoScoreMN <- orthoLoadingMN <- orthoVipVn <- NULL
            
            if (grepl("PLS", modC)) {
              
              vipVn <- getVipVn(opl)
              coeMN <- coef(opl)
              
              if (grepl("OPLS", modC)) {
                orthoScoreMN <- getScoreMN(opl, orthoL = TRUE)
                orthoLoadingMN <- getLoadingMN(opl, orthoL = TRUE)
                orthoVipVn <- getVipVn(opl, orthoL = TRUE)
              }
              
            }
            
            rspModC <- gsub("-", "", modC)
            if (rspModC != "PCA")
              rspModC <- paste0(make.names(y), "_", rspModC)
            
            pdaDF <- pData(x)
            fdaDF <- fData(x)
            
            ## x-scores
            
            if (sumDF[, "pre"] > 0) {
              
              scx1Vn <- .genVec(x, "sample", "numeric")
              scx1Vn[rownames(scoreMN)] <- scoreMN[, 1]
              pdaDF[, paste0(rspModC, "_xscor-p1")] <- scx1Vn
              
            }
            
            if (grepl("OPLS", modC) && sumDF[, "ort"] > 0) {
              
              scx2Vn <- .genVec(x, "sample", "numeric")
              scx2Vn[rownames(orthoScoreMN)] <- orthoScoreMN[, 1]
              pdaDF[, paste0(rspModC, "_xscor-o1")] <- scx2Vn
              
            } else if (sumDF[, "pre"] > 1) {
              
              scx2Vn <- .genVec(x, "sample", "numeric")
              scx2Vn[rownames(scoreMN)] <- scoreMN[, 2]
              pdaDF[, paste0(rspModC, "_xscor-p2")] <- scx2Vn
              
            }
            
            ## prediction
            
            if (modC != "PCA") {
              
              fitMCN <- as.matrix(fitted(opl))
              
              for (colI in 1:ncol(fitMCN)) {
                
                fitVcn <- .genVec(x, "sample", mode(fitMCN))
                fitVcn[rownames(fitMCN)] <- fitMCN[, colI]
                
                pdaDF[, paste0(rspModC,
                               ifelse(!is.null(colnames(fitMCN)[colI]),
                                      paste0(colnames(fitMCN)[colI], "_"),
                                      ""),
                               "_fitted")] <- fitVcn
                
              }
            }
            
            ## x-loadings
            
            if (sumDF[, "pre"] > 0) {
              
              loa1Vn <- .genVec(x, "feature", "numeric")
              loa1Vn[rownames(loadingMN)] <- loadingMN[, 1]
              fdaDF[, paste0(rspModC, "_xload-p1")] <- loa1Vn
              
            }
            
            if (grepl("OPLS", modC) && sumDF[, "ort"] > 0) {
              
              loa2Vn <- .genVec(x, "feature", "numeric")
              loa2Vn[rownames(orthoLoadingMN)] <- orthoLoadingMN[, 1]
              fdaDF[, paste0(rspModC, "_xload-o1")] <- loa2Vn
              
            } else if (sumDF[, "pre"] > 1) {
              
              loa2Vn <- .genVec(x, "feature", "numeric")
              loa2Vn[rownames(loadingMN)] <- loadingMN[, 2]
              fdaDF[, paste0(rspModC, "_xload-p2")] <- loa2Vn
              
            }
            
            ## VIP
            
            if (!is.null(vipVn)) {
              
              pvipVn <- .genVec(x, "feature", "numeric")
              pvipVn[names(vipVn)] <- vipVn
              fdaDF[, paste0(rspModC,
                             "_VIP",
                             ifelse(!is.null(orthoVipVn),
                                    "-pred",
                                    ""))] <- pvipVn
              if (!is.null(orthoVipVn)) {
                ovipVn <- .genVec(x, "feature", "numeric")
                ovipVn[names(orthoVipVn)] <- orthoVipVn
                fdaDF[, paste0(rspModC,
                               "_VIP-ortho")] <- ovipVn
              }
            }
            
            ## coefficients
            
            if (!is.null(coeMN)) {
              
              for (colI in 1:ncol(coeMN)) {
                
                coeVn <- .genVec(x, "feature", "numeric")
                coeVn[rownames(coeMN)] <- coeMN[, colI]
                
                if (ncol(coeMN) == 1) {
                  fdaDF[, paste0(rspModC, "_coef")] <- coeVn
                } else
                  fdaDF[, paste0(rspModC, "_", colnames(coeMN)[colI], "-coef")] <- coeVn
                
              }
            }
            
            pData(x) <- pdaDF
            fData(x) <- fdaDF
            
            opl@eset <- x
            
            return(invisible(opl))
            
          })

#### opls (data.frame) ####

#' @rdname opls
#' @export
setMethod("opls", signature(x = "data.frame"),
          function(x, ...) {
            if (!all(sapply(x, data.class) == "numeric")) {
              stop("'x' data frame must contain columns of 'numeric' vectors only", call. = FALSE)
            } else
              x <- as.matrix(x)
            opl <- opls(x, ...)
            return(invisible(opl))
          })

####    opls  (matrix)  ####

#' PCA, PLS(-DA), and OPLS(-DA)
#'
#' PCA, PLS, and OPLS regression, classification, and cross-validation with the
#' NIPALS algorithm
#'
#' @name opls
#' @aliases opls opls,ExpressionSet-method opls,data.frame-method
#' opls,matrix-method
#' @docType methods
#' @param x Numerical data frame or matrix (observations x variables; NAs are
#' allowed); or ExpressionSet object with non empty exprs, for PCA, and
#' phenoData@data, for (O)PLS(-DA), slots
#' @param y Response to be modelled: Either 1) 'NULL' for PCA (default) or 2) a
#' numerical vector (same length as 'x' row number) for single response (O)PLS,
#' or 3) a numerical matrix (same row number as 'x') for multiple response PLS,
#' 4) a factor (same length as 'x' row number) for (O)PLS-DA, or 5) a character
#' indicating the name of the column of the phenoData@data to be used, when x
#' is an ExpressionSet object. Note that, for convenience, character vectors
#' are also accepted for (O)PLS-DA as well as single column numerical (resp.
#' character) matrix for (O)PLS (respectively (O)PLS-DA). NAs are allowed in
#' numeric responses.
#' @param predI Integer: number of components (predictive componenents in case
#' of PLS and OPLS) to extract; for OPLS, predI is (automatically) set to 1; if
#' set to NA [default], autofit is performed: a maximum of 10 components are
#' extracted until (i) PCA case: the variance is less than the mean variance of
#' all components (note that this rule requires all components to be computed
#' and can be quite time-consuming for large datasets) or (ii) PLS case: either
#' R2Y of the component is < 0.01 (N4 rule) or Q2Y is < 0 (for more than 100
#' observations) or 0.05 otherwise (R1 rule)
#' @param orthoI Integer: number of orthogonal components (for OPLS only); when
#' set to 0 [default], PLS will be performed; otherwise OPLS will be peformed;
#' when set to NA, OPLS is performed and the number of orthogonal components is
#' automatically computed by using the cross-validation (with a maximum of 9
#' orthogonal components).
#' @param algoC Default algorithm is 'svd' for PCA (in case of no missing
#' values in 'x'; 'nipals' otherwise) and 'nipals' for PLS and OPLS; when
#' asking to use 'svd' for PCA on an 'x' matrix containing missing values, NAs
#' are set to half the minimum of non-missing values and a warning is generated
#' @param crossvalI Integer: number of cross-validation segments (default is
#' 7); The number of samples (rows of 'x') must be at least >= crossvalI
#' @param log10L Should the 'x' matrix be log10 transformed? Zeros are set to 1
#' prior to transformation
#' @param permI Integer: number of random permutations of response labels to
#' estimate R2Y and Q2Y significance by permutation testing [default is 20 for
#' single response models (without train/test partition), and 0 otherwise]
#' @param scaleC Character: either no centering nor scaling ('none'),
#' mean-centering only ('center'), mean-centering and pareto scaling
#' ('pareto'), or mean-centering and unit variance scaling ('standard')
#' [default]
#' @param subset Integer vector: indices of the observations to be used for
#' training (in a classification scheme); use NULL [default] for no partition
#' of the dataset; use 'odd' for a partition of the dataset in two equal sizes
#' (with respect to the classes proportions)
#' @param plotSubC Character: Graphic subtitle
#' @param fig.pdfC Character: File name with '.pdf' extension for the figure;
#' if 'interactive' (default), figures will be displayed interactively; if 'none',
#' no figure will be generated
#' @param info.txtC Character: File name with '.txt' extension for the printed
#' results (call to sink()'); if 'interactive' (default), messages will be
#' printed on the screen; if 'none', no verbose will be generated

#' @param printL Logical: deprecated: use the 'info.txtC' argument instead
#' @param plotL Logical: deprecated: use the 'fig.pdfC' argument instead
#' @param .sinkC Character: deprecated: use the 'info.txtC' argument instead
#' @param ... Currently not used.
#' @return An S4 object of class 'opls' containing the following slots:
#' \itemize{
#' \item typeC Character: model type (PCA, PLS, PLS-DA, OPLS, or OPLS-DA)
#' \item descriptionMC Character matrix: Description of the data set (number
#' of samples, variables, etc.)
#' \item modelDF Data frame with the model overview (number of components, R2X, R2X(cum), R2Y, R2Y(cum), Q2, Q2(cum),
#' significance, iterations)
#' \item summaryDF Data frame with the model summary (cumulated R2X, R2Y and Q2); RMSEE is the square root of the mean
#' error between the actual and the predicted responses
#' \item subsetVi Integer vector: Indices of observations in the training data set
#' \item pcaVarVn PCA: Numerical vector of variances of length: predI
#' \item vipVn PLS(-DA): Numerical vector of Variable Importance in Projection; OPLS(-DA): Numerical vector of Variable Importance for Prediction (VIP4,p from Galindo-Prieto et al, 2014)
#' \item orthoVipVn OPLS(-DA): Numerical vector of Variable Importance for Orthogonal Modeling (VIP4,o from Galindo-Prieto et al, 2014)
#' \item xMeanVn Numerical vector: variable means of the 'x' matrix
#' \item xSdVn Numerical vector: variable standard deviations of the 'x' matrix
#' \item yMeanVn (O)PLS: Numerical vector: variable means of the 'y' response (transformed into a dummy matrix in case it is of 'character' mode initially)
#' \item ySdVn (O)PLS: Numerical vector: variable standard deviations of the 'y' response (transformed into a dummy matrix in case it is of 'character' mode initially)
#' \item xZeroVarVi Numerical vector: indices of variables with variance < 2.22e-16 which were excluded from 'x' before building the model
#' \item scoreMN Numerical matrix of x scores (T; dimensions: nrow(x) x predI) X = TP' + E; Y = TC' + F
#' \item loadingMN Numerical matrix of x loadings (P; dimensions: ncol(x) x predI) X = TP' + E
#' \item weightMN (O)PLS: Numerical matrix of x weights (W; same dimensions as loadingMN)
#' \item orthoScoreMN OPLS: Numerical matrix of orthogonal scores (Tortho; dimensions: nrow(x) x number of orthogonal components)
#' \item orthoLoadingMN OPLS: Numerical matrix of orthogonal loadings (Portho; dimensions: ncol(x) x number of orthogonal components)
#' \item orthoWeightMN OPLS: Numerical matrix of orthogonal weights (same dimensions as orthoLoadingMN)
#' \item cMN (O)PLS: Numerical matrix of Y weights (C; dimensions: number of responses or number of classes in case of qualitative response) x number of predictive components; Y = TC' + F
#' \item coMN) (O)PLS: Numerical matrix of Y orthogonal weights; dimensions: number of responses or number of classes in case of qualitative response with more than 2 classes x number of orthogonal components
#' \item uMN (O)PLS: Numerical matrix of Y scores (U; same dimensions as scoreMN); Y = UC' + G
#' \item weightStarMN Numerical matrix of projections (W*; same dimensions as loadingMN); whereas columns of weightMN are derived from successively deflated matrices, columns of weightStarMN relate to the original 'x' matrix: T = XW*; W*=W(P'W)inv
#' \item suppLs List of additional objects to be used internally by the 'print', 'plot', and 'predict' methods
#' }
#' @author Etienne Thevenot, \email{etienne.thevenot@@cea.fr}
#' @references Eriksson et al. (2006). Multi- and Megarvariate Data Analysis.
#' Umetrics Academy.  Rosipal and Kramer (2006). Overview and recent advances
#' in partial least squares Tenenhaus (1990). La regression PLS : theorie et
#' pratique. Technip.  Wehrens (2011). Chemometrics with R. Springer.  Wold et
#' al. (2001). PLS-regression: a basic tool of chemometrics
#' @examples
#'
#' ## PCA
#'
#' data(foods) ## see Eriksson et al. (2001); presence of 3 missing values (NA)
#' head(foods)
#' foodMN <- as.matrix(foods[, colnames(foods) != "Country"])
#' rownames(foodMN) <- foods[, "Country"]
#' head(foodMN)
#' foo.pca <- opls(foodMN)
#'
#' ## PLS with a single response
#'
#' data(cornell) ## see Tenenhaus, 1998
#' head(cornell)
#' cornell.pls <- opls(as.matrix(cornell[, grep("x", colnames(cornell))]),
#'                     cornell[, "y"])
#'
#' ## Complementary graphics
#'
#' plot(cornell.pls, typeVc = c("outlier", "predict-train", "xy-score", "xy-weight"))
#'
#' #### PLS with multiple (quantitative) responses
#'
#' data(lowarp) ## see Eriksson et al. (2001); presence of NAs
#' head(lowarp)
#' lowarp.pls <- opls(as.matrix(lowarp[, c("glas", "crtp", "mica", "amtp")]),
#'                    as.matrix(lowarp[, grepl("^wrp", colnames(lowarp)) |
#'                                       grepl("^st", colnames(lowarp))]))
#'
#' ## PLS-DA
#'
#' data(sacurine)
#' attach(sacurine)
#' sacurine.plsda <- opls(dataMatrix, sampleMetadata[, "gender"])
#'
#' ## OPLS-DA
#'
#' sacurine.oplsda <- opls(dataMatrix, sampleMetadata[, "gender"], predI = 1, orthoI = NA)
#'
#' ## Application to an ExpressionSet
#' 
#' sacSet <- Biobase::ExpressionSet(assayData = t(dataMatrix), 
#'                                  phenoData = new("AnnotatedDataFrame", 
#'                                                  data = sampleMetadata), 
#'                                  featureData = new("AnnotatedDataFrame", 
#'                                                    data = variableMetadata),
#'                                  experimentData = new("MIAME", 
#'                                                       title = "sacurine"))
#'                                                       
#' sacPlsda <- opls(sacSet, "gender")
#' sacSet <- getEset(sacPlsda)
#' head(Biobase::pData(sacSet))
#' head(Biobase::fData(sacSet))
#' 
#' detach(sacurine)
#' 
#' ## Application to a MultiDataSet
#' 
#' # Loading the 'NCI60_4arrays' from the 'omicade4' package
#' data("NCI60_4arrays", package = "omicade4")
#' # Selecting two of the four datasets
#' setNamesVc <- c("agilent", "hgu95")
#' # Creating the MultiDataSet instance
#' nciMset <- MultiDataSet::createMultiDataSet()
#' # Adding the two datasets as ExpressionSet instances
#' for (setC in setNamesVc) {
#'   # Getting the data
#'   exprMN <- as.matrix(NCI60_4arrays[[setC]])
#'   pdataDF <- data.frame(row.names = colnames(exprMN),
#'                         cancer = substr(colnames(exprMN), 1, 2),
#'                         stringsAsFactors = FALSE)
#'   fdataDF <- data.frame(row.names = rownames(exprMN),
#'                         name = rownames(exprMN),
#'                         stringsAsFactors = FALSE)
#'   # Building the ExpressionSet
#'   eset <- Biobase::ExpressionSet(assayData = exprMN,
#'                                  phenoData = new("AnnotatedDataFrame",
#'                                                  data = pdataDF),
#'                                  featureData = new("AnnotatedDataFrame",
#'                                                    data = fdataDF),
#'                                  experimentData = new("MIAME",
#'                                                       title = setC))
#'   # Adding to the MultiDataSet
#'   nciMset <- MultiDataSet::add_eset(nciMset, eset, dataset.type = setC,
#'                                     GRanges = NA, warnings = FALSE)
#' }
#' # Summary of the MultiDataSet
#' nciMset
#' # Principal Component Analysis of each data set
#' nciPca <- ropls::opls(nciMset)
#' # Coloring the Score plot according to cancer types
#' ropls::plot(nciPca, y = "cancer", typeVc = "x-score")
#' # Getting the updated MultiDataSet (now including scores and loadings)
#' nciMset <- ropls::getMset(nciPca)
#' # Restricting to the 'ME' and 'LE' cancer types
#' sampleNamesVc <- Biobase::sampleNames(nciMset[["agilent"]])
#' cancerTypeVc <- Biobase::pData(nciMset[["agilent"]])[, "cancer"]
#' nciMset <- nciMset[sampleNamesVc[cancerTypeVc %in% c("ME", "LE")], ]
#' # Building PLS-DA models for the cancer type, and getting back the updated MultiDataSet
#' nciPlsda <- ropls::opls(nciMset, "cancer", predI = 2)
#' nciMset <- ropls::getMset(nciPlsda)
#' # Viewing the new variable metadata (including VIP and coefficients)
#' lapply(Biobase::fData(nciMset), head)
#' @rdname opls
#' @export
setMethod("opls", signature(x = "matrix"),
          function(x,
                   y = NULL,
                   predI = NA,
                   orthoI = 0,
                   
                   algoC = c("default", "nipals", "svd")[1],
                   crossvalI = 7,
                   log10L = FALSE,
                   permI = 20,
                   scaleC = c("none", "center", "pareto", "standard")[4],
                   subset = NULL,

                   plotSubC = NA,                   
                   fig.pdfC = c("none", "interactive", "myfile.pdf")[2],                   
                   info.txtC = c("none", "interactive", "myfile.txt")[2],
                   
                   printL = TRUE,
                   plotL = TRUE,
                   .sinkC = NULL,
                   ...) {
            
            if (!printL) {
              warning("'printL' argument is deprecated; use 'info.txtC' instead.",
                      call. = FALSE)
              info.txtC <- "none"
            }
            
            if (!plotL) {
              warning("'plotL' argument is deprecated; use 'fig.pdfC' instead.",
                      call. = FALSE)
              fig.pdfC <- "none"
            }
            
            if (!is.null(.sinkC)) {
              warning("'.sinkC' argument is deprecated; use 'info.txtC' instead.",
                      call. = FALSE)
              info.txtC <- .sinkC
            }
            
            if (is.null(info.txtC)) {
              warning("'info.txtC = NULL' argument value is deprecated; use 'info.txtC = 'none'' instead.",
                      call. = FALSE)
              info.txtC <- 'none'
            }
            
            if (is.na(info.txtC)) {
              warning("'info.txtC = NA' argument value is deprecated; use 'info.txtC = 'interactive'' instead.",
                      call. = FALSE)
              info.txtC <- 'interactive'
            }
            
            if (is.null(fig.pdfC)) {
              warning("'fig.pdfC = NULL' argument value is deprecated; use 'fig.pdfC = 'none'' instead.",
                      call. = FALSE)
              fig.pdfC <- 'none'
            }
            
            if (is.na(fig.pdfC)) {
              warning("'fig.pdfC = NA' argument value is deprecated; use 'fig.pdfC = 'interactive'' instead.",
                      call. = FALSE)
              fig.pdfC <- 'interactive'
            }
            
            
            if (!(info.txtC %in% c("none", "interactive")))
              sink(info.txtC, append = TRUE)
            
            ## Checking arguments
            
            ## x -> xMN
            
            if (is.data.frame(x)) {
              if (!all(sapply(x, data.class) == "numeric")) {
                stop("'x' data frame must contain columns of 'numeric' vectors only", call. = FALSE)
              } else
                x <- as.matrix(x)
            } else if (is.matrix(x)) {
              if (mode(x) != "numeric")
                stop("'x' matrix must be of 'numeric' mode", call. = FALSE)
            } else
              stop("'x' must be either a data.frame or a matrix", call. = FALSE)
            
            if (any(apply(x, 2, function(colVn) all(is.na(colVn)))))
              stop("'x' contains columns with 'NA' only", call. = FALSE)
            
            if (any(is.infinite(x)))
              stop("'x' contains infinite values", call. = FALSE)
            
            if (any(is.nan(x)))
              stop("'x' contains NaN values", call. = FALSE)
            
            xMN <- x
            
            ## y -> yMCN
            
            yLevelVc <- NULL
            
            if (!is.null(y)) {
              
              if (is.vector(y)) {
                
                if (!(mode(y) %in% c("character", "numeric")))
                  stop("'y' vector must be of 'character' or 'numeric' type", call. = FALSE)
                
                if (length(y) != nrow(xMN))
                  stop("'y' vector length must be equal to the number of rows of 'x'", call. = FALSE)
                
                yMCN <- matrix(y, ncol = 1)
                
              } else if (is.factor(y)) {
                
                if (length(y) != nrow(xMN))
                  stop("'y' factor length must be equal to the number of rows of 'x'", call. = FALSE)
                
                yLevelVc <- levels(y)
                
                yMCN <- matrix(as.character(y), ncol = 1)
                
              } else if (is.matrix(y)) {
                
                if (!(mode(y) %in% c("character", "numeric")))
                  stop("'y' matrix must be of 'character' or 'numeric' type", call. = FALSE)
                
                if (nrow(y) != nrow(xMN))
                  stop("'x' and 'y' must have the same number of rows", call. = FALSE)
                
                if (ncol(y) > 1 && mode(y) != "numeric")
                  stop("Multiple response 'y' matrices must be of numeric type", call. = FALSE)
                
                yMCN <- y
                
              } else
                stop("'y' must be either a vector, a factor, or a matrix", call. = FALSE)
              
            } else
              yMCN <- NULL
            
            
            ## NA in Y only possible for multi-response regression (i.e., Y is a numeric matrix)
            
            if (!is.null(yMCN) &&
               ncol(yMCN) == 1 &&
               any(is.na(drop(yMCN))))
              stop("In case of single response modeling, 'y' must not contain missing values", call. = FALSE)
            
            if (!is.logical(log10L))
              stop("'log10L' must be a logical", call. = FALSE)
            
            if (permI < 0 || (permI - floor(permI)) > 1e-10)
              stop("'permI' must be an integer", call. = FALSE)
            
            if (permI > 0 && (is.null(yMCN) || ncol(yMCN) > 1)) {
              ## warning("Permutation testing available for single response (O)PLS(-DA) models only", call. = FALSE)
              permI <- 0
            }
            
            if (permI > 0 && !is.null(subset)) {
              permI <- 0
              warning("'permI' set to 0 because train/test partition is selected", call. = FALSE)
            }
            
            if (!(algoC %in% c('default', 'nipals', 'svd')))
              stop("'algoC' must be either 'default', 'nipals', or 'svd'", call. = FALSE)
            
            if (algoC == "default")
              algoC <- ifelse(is.null(yMCN) && !any(is.na(c(xMN))), "svd", "nipals")
            
            if (!is.null(yMCN) && algoC != "nipals")
              stop("'nipals' algorithm must be used for (O)PLS(-DA)", call. = FALSE)
            
            if ((is.na(orthoI) || orthoI > 0) && is.null(yMCN))
              stop("'y' response cannot be NULL for OPLS(-DA) modeling", call. = FALSE)
            
            if (!is.null(yMCN)) {
              if (is.na(orthoI) || orthoI > 0) {
                if (ncol(yMCN) > 1) {
                  stop("OPLS regression only available for a single 'y' response", call. = FALSE)
                } else if (mode(yMCN) == "character" && length(unique(drop(yMCN))) > 2)
                  stop("OPLS-DA only available for binary classification (use PLS-DA for multiple classes)", call. = FALSE)
              }
            }
            
            if (is.na(orthoI) || orthoI > 0)
              if (is.na(predI) || predI > 1) {
                predI <- 1
                warning("OPLS: number of predictive components ('predI' argument) set to 1", call. = FALSE)
              }
            
            if (!is.na(predI) && !is.na(orthoI) && ((predI + orthoI) > min(dim(xMN))))
              stop("The sum of 'predI' (", predI, ") and 'orthoI' (", orthoI, ") exceeds the minimum dimension of the 'x' data matrix (", min(dim(xMN)), ")" , call. = FALSE)
            
            if (!(length(scaleC) == 1 && scaleC %in% c('none', 'center', 'pareto', 'standard')))
              stop("'scaleC' must be either 'none', 'center', 'pareto', or 'standard'", call. = FALSE)
            
            if (!is.null(subset) && (is.null(yMCN) || ncol(yMCN) > 1))
              stop("train/test partition with 'subset' only available for (O)PLS(-DA) models of a single 'y' response", call. = FALSE)
            
            if (!is.null(subset) &&
               !(mode(subset) == 'character' && subset == 'odd') &&
               !all(subset %in% 1:nrow(xMN)))
              stop("'subset' must be either set to 'odd' or an integer vector of 'x' row numbers", call. = FALSE)
            
            if (crossvalI > nrow(xMN))
              stop("'crossvalI' must be less than the row number of 'x'", call. = FALSE)
            
            
            ## Constants
            
            epsN <- .Machine[["double.eps"]] ## [1] 2.22e-16
            
            
            ## Character to numeric convertion function (for classification)
            
            if (!is.null(yMCN) && mode(yMCN) == "character") {
              
              if (!is.null(yLevelVc)) {
                claVc <- yLevelVc
              } else
                claVc <- sort(unique(drop(yMCN)))
              
              if (length(claVc) == 2) {
                ## binary response kept as a single vector for OPLS-DA computations
                .char2numF <- function(inpMCN,
                                       c2nL = TRUE) {
                  
                  if (c2nL) {
                    
                    outMCN <- inpMCN == claVc[2]
                    mode(outMCN) <- "numeric"
                    
                  } else {
                    
                    outMCN <- matrix(claVc[as.numeric(inpMCN > 0.5) + 1],
                                     ncol = 1,
                                     dimnames = dimnames(inpMCN))
                    
                  }
                  
                  return(outMCN)
                  
                }
                
              } else
                .char2numF <- function(inpMCN,
                                       c2nL = TRUE) {
                  
                  if (c2nL) {
                    
                    outMCN  <- t(sapply(drop(inpMCN),
                                        function(claC) as.numeric(claVc == claC)))
                    colnames(outMCN) <- claVc
                    
                    
                  } else {
                    
                    outMCN <- t(t(apply(inpMCN, 1,
                                        function(rowVn) claVc[which(rowVn == max(rowVn))[1]])))
                    colnames(outMCN) <- "y1"
                    
                  }
                  
                  return(outMCN)
                  
                }
              
            } else
              .char2numF <- NULL
            
            
            ####   Computations   ####
            
            
            ## rownames and colnames
            
            if (is.null(rownames(xMN)))
              if (!is.null(yMCN) && !is.null(rownames(yMCN))) {
                rownames(xMN) <- rownames(yMCN)
              } else
                rownames(xMN) <- paste0("s", 1:nrow(xMN))
            if (is.null(colnames(xMN)))
              colnames(xMN) <- paste0("x", 1:ncol(xMN))
            
            if (!is.null(yMCN)) {
              if (is.null(rownames(yMCN)))
                rownames(yMCN) <- rownames(xMN)
              if (is.null(colnames(yMCN)))
                colnames(yMCN) <- paste0("y", 1:ncol(yMCN))
            }
            
            ## Log10 transformation
            
            if (log10L)
              xMN <- .log10F(xMN)
            
            
            ## Test indices
            
            obsIniVi <- 1:nrow(xMN)
            
            if (!is.null(subset)) {
              subsetL <- TRUE
              if (length(subset) == 1 && subset == "odd") {
                
                if (mode(yMCN) == "numeric")
                  subsetVi <- seq(1, nrow(xMN), by = 2)
                else {
                  subsetVi <- integer()
                  for (claC in unique(drop(yMCN)))
                    subsetVi <- c(subsetVi,
                                  which(drop(yMCN) == claC)[seq(1, sum(drop(yMCN) == claC), by = 2)])
                  subsetVi <- sort(subsetVi)
                }
              } else
                subsetVi <- subset
              if (crossvalI > length(subsetVi))
                stop("'crossvalI' must be less than the number of samples in the subset", call. = FALSE)
            } else {
              subsetL <- FALSE
              subsetVi <- numeric()
            }
            
            
            ## Filtering out zero variance variables
            
            xVarIndLs <- list()
            xVarIndLs[[1]] <- 1:nrow(xMN)
            
            if (subsetL) {
              xVarIndLs[[1]] <- subsetVi
            } ## else if(!is.null(yMCN) && ncol(yMCN) == 1 && nrow(xMN) >= 2 * crossvalI)
            ##   for(cvkN in 1:crossvalI)
            ##       xVarIndLs <- c(xVarIndLs, list(setdiff(1:nrow(xMN), cvaOutLs[[cvkN]])))
            
            xVarVarLs <- lapply(xVarIndLs,
                                function(xVarVi) {
                                  apply(xMN[xVarVi, , drop = FALSE],
                                        2,
                                        function(colVn) var(colVn, na.rm = TRUE))
                                })
            
            xZeroVarVi <- integer()
            for (k in 1:length(xVarVarLs))
              xZeroVarVi <- union(xZeroVarVi, which(xVarVarLs[[k]] < epsN))
            
            if (length(xZeroVarVi) > 0) {
              names(xZeroVarVi) <- colnames(xMN)[xZeroVarVi]
              xMN <- xMN[, -xZeroVarVi, drop = FALSE]
              warning("The variance of the ",
                      length(xZeroVarVi),
                      " following variables is less than ",
                      signif(epsN, 2),
                      " in the full or partial (cross-validation) dataset: these variables will be removed:\n",
                      paste(names(xZeroVarVi), collapse = ", "),
                      call. = FALSE)
            }
            
            
            ## Core
            
            opl <- .coreF(xMN = xMN,
                          yMCN = yMCN,
                          orthoI = orthoI,
                          predI = predI,
                          scaleC = scaleC,
                          algoC = algoC,
                          crossvalI = crossvalI,
                          subsetL = subsetL,
                          subsetVi = subsetVi,
                          .char2numF = .char2numF)
  
            if (is.null(y)) {          
              opl@suppLs["y"] <- list(NULL)
            } else
              opl@suppLs[["y"]] <- y
            
            if (is.null(opl@suppLs[["yMCN"]])) {
              opl@typeC <- "PCA"
            } else {
              if (ncol(opl@suppLs[["yMCN"]]) > 1 || mode(opl@suppLs[["yMCN"]]) == "numeric")
                opl@typeC <- "PLS"
              else
                opl@typeC <- "PLS-DA"
            }
            if (opl@summaryDF[, "ort"] > 0)
              opl@typeC <- paste("O", opl@typeC, sep = "")
            
            opl@xZeroVarVi <- xZeroVarVi
            ## opl@suppLs[["yLevelVc"]] <- yLevelVc
            
            
            ## Permutation testing (Szymanska et al, 2012)
            
            if (permI > 0) {
              
              modSumVc <- colnames(opl@summaryDF)
              
              permMN <- matrix(0,
                               nrow = 1 + permI,
                               ncol = length(modSumVc),
                               dimnames = list(NULL, modSumVc))
              
              perSimVn <- numeric(1 + permI)
              perSimVn[1] <- 1
              
              
              permMN[1, ] <- as.matrix(opl@summaryDF)
              
              for (k in 1:permI) {
                
                yVcn <- drop(opl@suppLs[["yMCN"]])
                if (!subsetL) {
                  yPerVcn <- sample(yVcn)
                } else {
                  yPerVcn <- numeric(nrow(xMN))
                  refVi <- opl@subsetVi
                  tesVi <- setdiff(1:nrow(xMN), refVi)
                  yPerVcn[refVi] <- sample(yVcn[refVi])
                  yPerVcn[tesVi] <- yVcn[tesVi]
                }
                yPerMCN <- matrix(yPerVcn, ncol = 1)
                
                perOpl <- .coreF(xMN = xMN,
                                 yMCN = yPerMCN,
                                 orthoI = opl@summaryDF[, "ort"],
                                 predI = opl@summaryDF[, "pre"],
                                 scaleC = scaleC,
                                 algoC = algoC,
                                 crossvalI = crossvalI,
                                 subsetL = subsetL,
                                 subsetVi = opl@subsetVi,
                                 .char2numF = .char2numF)
                
                permMN[1 + k, ] <- as.matrix(perOpl@summaryDF)
                
                perSimVn[1 + k] <- .similarityF(opl@suppLs[["yMCN"]], yPerMCN,
                                                .char2numF = .char2numF,
                                                charL = mode(opl@suppLs[["yMCN"]]) == "character")
                
              }
              
              permMN <- cbind(permMN, sim = perSimVn)
              
              perPvaVn <- c(pR2Y = (1 + length(which(permMN[-1, "R2Y(cum)"] >= permMN[1, "R2Y(cum)"]))) / (nrow(permMN) - 1),
                            pQ2 = (1 + length(which(permMN[-1, "Q2(cum)"] >= permMN[1, "Q2(cum)"]))) / (nrow(permMN) - 1))
              opl@summaryDF[, "pR2Y"] <- perPvaVn["pR2Y"]
              opl@summaryDF[, "pQ2"] <- perPvaVn["pQ2"]
              
              opl@suppLs[["permMN"]] <- permMN
              
            }
            
            ####   Numerical results   ####
            
            opl@descriptionMC <- rbind(samples = ifelse(!subsetL,
                                                        nrow(xMN),
                                                        length(subsetVi)),
                                       X_variables = ncol(xMN),
                                       near_zero_excluded_X_variables = length(opl@xZeroVarVi))
            
            totN <- length(c(xMN))
            nasN <- sum(is.na(c(xMN)))
            
            if (!is.null(opl@suppLs[["yMCN"]])) {
              
              opl@descriptionMC <- rbind(opl@descriptionMC,
                                         Y_variables = ncol(opl@suppLs[["yMCN"]]))
              totN <- totN + length(c(opl@suppLs[["yMCN"]]))
              nasN <- nasN + sum(is.na(c(opl@suppLs[["yMCN"]])))
              
            }
            
            opl@descriptionMC <- rbind(opl@descriptionMC,
                                       missing_values = paste0(nasN, " (", round(nasN / totN * 100), "%)"))
            
            
            ## Raw summary
            
            opl@suppLs[["topLoadI"]] <- 3
            
            if (ncol(xMN) > opl@suppLs[["topLoadI"]]) {
              xVarVn <- apply(xMN, 2, var)
              names(xVarVn) <- 1:length(xVarVn)
              xVarVn <- sort(xVarVn)
              xVarSorVin <- as.numeric(names(xVarVn[seq(1, length(xVarVn), length = opl@suppLs[["topLoadI"]])]))
              opl@suppLs[["xSubIncVarMN"]] <- xMN[, xVarSorVin, drop = FALSE]
            } else
              opl@suppLs[["xSubIncVarMN"]] <- xMN
            
            if (ncol(xMN) <= 100) {
              
              xCorMN <- cor(xMN, use = "pairwise.complete.obs")
              xCorMN[lower.tri(xCorMN, diag = TRUE)] <- 0
              
              if (ncol(xMN) > opl@suppLs[["topLoadI"]]) {
                
                xCorNexDF <- which(abs(xCorMN) >= sort(abs(xCorMN), decreasing = TRUE)[opl@suppLs[["topLoadI"]] + 1],
                                   arr.ind = TRUE)
                
                xCorDisMN <- matrix(0,
                                    nrow = nrow(xCorNexDF),
                                    ncol = nrow(xCorNexDF),
                                    dimnames = list(colnames(xMN)[xCorNexDF[, "row"]],
                                                    colnames(xMN)[xCorNexDF[, "col"]]))
                
                for (k in 1:nrow(xCorDisMN))
                  xCorDisMN[k, k] <- xCorMN[xCorNexDF[k, "row"], xCorNexDF[k, "col"]]
                
              } else
                xCorDisMN <- xCorMN
              
              opl@suppLs[["xCorMN"]] <- xCorDisMN
              
              rm(xCorDisMN)
              
            }
            
            ## Printing
            
            if (info.txtC != "none") {
              show(opl)
            }

            ## Plotting

            if (fig.pdfC != "none")
              plot(opl, typeVc = "summary", plotSubC = plotSubC, fig.pdfC = fig.pdfC)
          
            ## Closing connection
            
            if (!(info.txtC %in% c("none", "interactive")))
              sink()
            
            ## Returning
            
            return(invisible(opl))
            
          })


####    show    ####

#' Show method for 'opls' objects
#'
#' Displays information about the dataset and the model.
#'
#' @aliases show.opls show,opls-method
#' @param object An S4 object of class \code{opls}, created by the \code{opls}
#' function.
#' @return Invisible.
#' @author Philippe Rinaudo and Etienne Thevenot (CEA)
#' @examples
#'
#' data(sacurine)
#' attach(sacurine)
#' sacurine.plsda <- opls(dataMatrix, sampleMetadata[, "gender"])
#'
#' show(sacurine.plsda)
#'
#' detach(sacurine)
#'
#' @rdname show
#' @export
setMethod("show", "opls",
          function(object) {
            
            cat(object@typeC, "\n", sep = "")
            
            cat(object@descriptionMC["samples", ],
                " samples x ",
                object@descriptionMC["X_variables", ],
                " variables",
                ifelse(grepl("PLS", object@typeC),
                       paste0(" and ", ncol(object@suppLs[["yMCN"]]), " response", ifelse(ncol(object@suppLs[["yMCN"]]) > 1, "s", "")),
                       ""), "\n", sep = "")
            
            cat(object@suppLs[["scaleC"]], " scaling of predictors",
                ifelse(object@typeC == "PCA",
                       "",
                       paste0(" and ",
                              ifelse(mode(object@suppLs[["yMCN"]]) == "character" && object@suppLs[["scaleC"]] != "standard",
                                     "standard scaling of ",
                                     ""),
                              "response(s)")), "\n", sep = "")
            
            if (substr(object@descriptionMC["missing_values", ], 1, 1) != "0")
              cat(object@descriptionMC["missing_values", ], " NAs\n", sep = "")
            
            if (substr(object@descriptionMC["near_zero_excluded_X_variables", ], 1, 1) != "0")
              cat(object@descriptionMC["near_zero_excluded_X_variables", ],
                  " excluded variables (near zero variance)\n", sep = "")
            
            optDigN <- options()[["digits"]]
            options(digits = 3)
            print(object@summaryDF)
            options(digits = optDigN)
            
          }) ## show


####    print    ####

#' Print method for 'opls' objects
#'
#' Displays information about the dataset and the model.
#'
#' @aliases print.opls print,opls-method
#' @param x An S4 object of class \code{opls}, created by the \code{opls}
#' function.
#' @param ... Currently not used.
#' @return Invisible.
#' @examples
#'
#' data(sacurine)
#' attach(sacurine)
#' sacurine.plsda <- opls(dataMatrix, sampleMetadata[, "gender"])
#'
#' print(sacurine.plsda)
#'
#' detach(sacurine)
#'
#' @rdname print
#' @export
setMethod("print", "opls",
          function(x, ...) {
            
            cat("\n1) Data set:\n", sep = "")
            
            cat(x@descriptionMC["samples", ],
                " samples x ",
                x@descriptionMC["X_variables", ],
                " variables",
                ifelse(grepl("PLS", x@typeC),
                       paste0(" and ", ncol(x@suppLs[["yMCN"]]), " response", ifelse(ncol(x@suppLs[["yMCN"]]) > 1, "s", "")),
                       ""),
                "\n", sep = "")
            
            cat(x@descriptionMC["missing_values", ], " NAs\n", sep = "")
            
            cat(x@descriptionMC["near_zero_excluded_X_variables", ], " excluded variables (near zero variance)\n", sep = "")
            
            cat(x@suppLs[["scaleC"]], " x", ifelse(x@typeC != "PCA", " and y", ""), " scaling\n", sep = "")
            
            cat("Summary of the ", x@suppLs[["topLoadI"]], " increasing variance spaced raw variables:\n", sep = "")
            print(summary(x@suppLs[["xSubIncVarMN"]]))
            
            if (!is.null(x@suppLs[["xCorMN"]])) {
              cat("Correlations between the X-variables:\n")
              print(signif(x@suppLs[["xCorMN"]], 2))
              cat("\n", sep = "")
            }
            
            cat("\n2) Model: ", x@typeC, "\n", sep = "")
            
            cat("Correlations between variables and first 2 components:\n", sep = "")
            
            if (x@summaryDF[, "pre"] + x@summaryDF[, "ort"] < 2) {
              
              warning("A single component model has been selected by cross-validation", call. = FALSE)
              
              tCompMN <- x@scoreMN
              pCompMN <- x@loadingMN
              
            } else {
              
              if (x@summaryDF[, "ort"] > 0) {
                tCompMN <- cbind(x@scoreMN[, 1], x@orthoScoreMN[, 1])
                pCompMN <- cbind(x@loadingMN[, 1], x@orthoLoadingMN[, 1])
                colnames(pCompMN) <- colnames(tCompMN) <- c("h1", "o1")
              } else {
                tCompMN <- x@scoreMN[, 1:2]
                pCompMN <- x@loadingMN[, 1:2]
              }
              
            }
            
            cxtCompMN <- cor(x@suppLs[["xModelMN"]], tCompMN,
                             use = "pairwise.complete.obs")
            
            if (x@suppLs[["topLoadI"]] * 4 < ncol(x@suppLs[["xModelMN"]])) {
              
              pexVi <- integer(x@suppLs[["topLoadI"]] * ncol(pCompMN) * 2) ## 'ex'treme values
              
              for (k in 1:ncol(pCompMN)) {
                
                pkVn <-  pCompMN[, k]
                
                pexVi[1:(2 * x@suppLs[["topLoadI"]]) + 2 * x@suppLs[["topLoadI"]] * (k - 1)] <- c(order(pkVn)[1:x@suppLs[["topLoadI"]]],
                                                                                                  rev(order(pkVn, decreasing = TRUE)[1:x@suppLs[["topLoadI"]]]))
                
              }
              
            } else
              pexVi <- 1:ncol(x@suppLs[["xModelMN"]])
            
            pxtCompMN <- cbind(pCompMN,
                               cxtCompMN)
            
            if (ncol(pCompMN) == 1) {
              colnames(pxtCompMN)[2] <- paste0("cor_", colnames(pxtCompMN)[2])
            } else
              colnames(pxtCompMN)[3:4] <- paste0("cor_", colnames(pxtCompMN)[3:4])
            
            topLoadMN <- pxtCompMN
            
            topLoadMN <- topLoadMN[pexVi, , drop = FALSE]
            
            if (x@suppLs[["topLoadI"]] * 4 < ncol(x@suppLs[["xModelMN"]]) &&
               ncol(pCompMN) > 1) {
              
              topLoadMN[(2 * x@suppLs[["topLoadI"]] + 1):(4 * x@suppLs[["topLoadI"]]), c(1, 3)] <- NA
              topLoadMN[1:(2 * x@suppLs[["topLoadI"]]), c(2, 4)] <- NA
              
            }
            print(signif(topLoadMN, 2))
            
            message("")
            
            optDigN <- options()[["digits"]]
            options(digits = 3)
            print(x@modelDF)
            options(digits = optDigN)
            
          })





####    fitted    ####

#' Fitted method for 'opls' objects
#'
#' Returns predictions of the (O)PLS(-DA) model on the training dataset
#'
#' @aliases fitted.opls fitted,opls-method
#' @param object An S4 object of class \code{opls}, created by the \code{opls}
#' function.
#' @param ... Currently not used.
#' @return Predictions (either a vector, factor, or matrix depending on the y
#' response used for training the model)
#' @author Etienne Thevenot, \email{etienne.thevenot@@cea.fr}
#' @examples
#'
#' data(sacurine)
#' attach(sacurine)
#' sacurine.plsda <- opls(dataMatrix, sampleMetadata[, "gender"])
#'
#' fitted(sacurine.plsda)
#'
#' detach(sacurine)
#'
#' @rdname fitted
#' @export
setMethod("fitted", "opls",
          function(object, ...) {
            
            if (!is.null(object@suppLs[["yPreMN"]])) {
              
              if (mode(object@suppLs[["yMCN"]]) == "character") {
                
                yPredMCN <- object@suppLs[[".char2numF"]](object@suppLs[["yPreMN"]],
                                                          c2nL = FALSE)
                
                if (is.vector(object@suppLs[["y"]])) {
                  fit <- c(yPredMCN)
                  names(fit) <- rownames(yPredMCN)
                } else if (is.factor(object@suppLs[["y"]])) {
                  fit <- c(yPredMCN)
                  names(fit) <- rownames(yPredMCN)
                  fit <- factor(fit, levels = levels(object@suppLs[["y"]]))
                } else if (is.matrix(object@suppLs[["y"]])) {
                  fit <- yPredMCN
                } else
                  stop() ## this case should not happen
                
              } else {
                
                yPredMCN <- object@suppLs[["yPreMN"]]
                
                if (is.vector(object@suppLs[["y"]])) {
                  fit <- c(yPredMCN)
                  names(fit) <- rownames(yPredMCN)
                } else if (is.matrix(object@suppLs[["y"]])) {
                  fit <- yPredMCN
                } else
                  stop() ## this case should not happen
                
              }
              
              return(fit)
              
            } else
              return(NULL)
            
          }) ## fitted


####    tested    ####

#' Tested method for (O)PLS models
#'
#' Returns predictions of the (O)PLS(-DA) model on the out of the box samples
#' (when a 'subset' of samples has been selected when training the model)
#'
#' @aliases tested tested,opls-method
#' @param object An S4 object of class \code{opls}, created by \code{opls}
#' function.
#' @param ... Currently not used.
#' @return Predictions (either a vector, factor, or matrix depending on the y
#' response used for training the model)
#' @author Etienne Thevenot, \email{etienne.thevenot@@cea.fr}
#' @examples
#'
#' data(sacurine)
#' attach(sacurine)
#'
#' testedorMN <- dataMatrix
#' responseFc <- sampleMetadata[, "gender"]
#'
#' sacurine.plsda <- opls(testedorMN,
#'                        responseFc,
#'                        subset = "odd")
#'
#' trainVi <- getSubsetVi(sacurine.plsda)
#'
#' table(responseFc[trainVi], fitted(sacurine.plsda))
#'
#' detach(sacurine)
#'
#' @rdname tested
#' @export
setMethod("tested", "opls",
          function(object) {
            
            if(!is.null(object@suppLs[["yTesMN"]])) {
              
              if(mode(object@suppLs[["yMCN"]]) == "character") {
                
                yTestMCN <- object@suppLs[[".char2numF"]](object@suppLs[["yTesMN"]],
                                                          c2nL = FALSE)
                if(is.vector(object@suppLs[["y"]])) {
                  test <- c(yTestMCN)
                  names(test) <- rownames(yTestMCN)
                } else if(is.factor(object@suppLs[["y"]])) {
                  test <- c(yTestMCN)
                  names(test) <- rownames(yTestMCN)
                  test <- factor(test, levels = levels(object@suppLs[["y"]]))
                } else if(is.matrix(object@suppLs[["y"]])) {
                  test <- yTestMCN
                } else
                  stop() ## this case should not happen
                
              } else {
                
                yTestMCN <- object@suppLs[["yTesMN"]]
                
                if(is.vector(object@suppLs[["y"]])) {
                  test <- c(yTestMCN)
                  names(test) <- rownames(yTestMCN)
                } else if(is.matrix(object@suppLs[["y"]])) {
                  test <- yTestMCN
                } else
                  stop() ## this case should not happen
                
              }
              
              return(test)
              
            } else
              stop("Test results only available for (O)PLS(-DA) models", call. = FALSE)
            
            
          })


####    coef    ####

#' Coefficients method for (O)PLS models
#'
#' Coefficients of the (O)PLS(-DA) regression model
#'
#' @aliases coef.opls coef,opls-method
#' @param object An S4 object of class \code{opls}, created by \code{opls}
#' function.
#' @param ... Currently not used.
#' @return Numeric matrix of coefficients (number of rows equals the number of
#' variables, and the number of columns equals the number of responses)
#' @author Etienne Thevenot, \email{etienne.thevenot@@cea.fr}
#' @examples
#'
#' data(sacurine)
#' attach(sacurine)
#'
#' sacurine.plsda <- opls(dataMatrix,
#'                        sampleMetadata[, "gender"])
#'
#' head(coef(sacurine.plsda))
#'
#' detach(sacurine)
#'
#' @rdname coef
#' @export
setMethod("coef", "opls",
          function(object, ...) {
            return(object@coefficientMN)
          })


####    residuals    ####

#' Residuals method for (O)PLS models
#'
#' Returns the residuals from the (O)PLS(-DA) regression models
#'
#' @aliases residuals.opls residuals,opls-method
#' @param object An S4 object of class \code{opls}, created by \code{opls}
#' function.
#' @param ... Currently not used.
#' @return Numeric matrix or vector (same dimensions as the modeled y
#' response); if y is a character vector or a factor (in case of
#' classification), the residuals equal 0 (predicted class identical to the
#' true class) or 1 (prediction error)
#' @author Etienne Thevenot, \email{etienne.thevenot@@cea.fr}
#' @examples
#'
#' data(sacurine)
#' attach(sacurine)
#'
#' sacurine.pls <- opls(dataMatrix,
#'                      sampleMetadata[, "age"])
#'
#' head(residuals(sacurine.pls))
#'
#' detach(sacurine)
#'
#' @rdname residuals
#' @export
setGeneric("residuals", getGeneric("residuals", package = "stats"))
setMethod("residuals", "opls",
          function(object, ...) {
            
            if (grepl("PLS", object@typeC)) {
              
              fit <- fitted(object)
              
              if (length(object@subsetVi) == 0) {
                trainVi <- 1:length(fit)
              } else
                trainVi <- object@subsetVi
              
              if (mode(object@suppLs[["yMCN"]]) == "numeric") {
                y <- object@suppLs[["y"]]
                if (is.matrix(y))
                  y <- y[trainVi, , drop = FALSE]
                else
                  y <- y[trainVi]
                return(y - fit)
              } else
                return(as.numeric(as.character(c(object@suppLs[["y"]])[trainVi]) != as.character(c(fit))))
              
            } else
              stop("'residuals' defined for (O)PLS(-DA) regression models only", call. = FALSE)
            
          }) ## residuals


####    predict    ####

#' Predict method for (O)PLS models
#'
#' Returns predictions of the (O)PLS(-DA) model on a new dataset
#'
#' @aliases predict.opls predict,opls-method
#' @param object An S4 object of class \code{opls}, created by \code{opls}
#' function.
#' @param newdata Either a data frame or a matrix, containing numeric columns
#' only, with the same number of columns (variables) as the 'x' used for model
#' training with 'opls'.
#' @param ... Currently not used.
#' @return Predictions (either a vector, factor, or matrix depending on the y
#' response used for training the model)
#' @author Etienne Thevenot, \email{etienne.thevenot@@cea.fr}
#' @examples
#'
#' data(sacurine)
#' attach(sacurine)
#'
#' predictorMN <- dataMatrix
#' responseFc <- sampleMetadata[, "gender"]
#'
#' sacurine.plsda <- opls(predictorMN,
#'                        responseFc,
#'                        subset = "odd")
#'
#' trainVi <- getSubsetVi(sacurine.plsda)
#'
#' table(responseFc[trainVi], fitted(sacurine.plsda))
#'
#' table(responseFc[-trainVi],
#'       predict(sacurine.plsda, predictorMN[-trainVi, ]))
#'
#' detach(sacurine)
#'
#' @rdname predict
#' @export
setMethod("predict", "opls",
          function(object, newdata, ...) {
            
            if(object@typeC == "PCA")
              stop("Predictions currently available for (O)PLS(-DA) models only (not PCA)",
                   call. = FALSE)
            
            if(missing(newdata)) {
              
              return(fitted(object))
              
            } else {
              
              if(is.data.frame(newdata)) {
                if(!all(sapply(newdata, data.class) == "numeric")) {
                  stop("'newdata' data frame must contain numeric columns only", call. = FALSE)
                } else
                  newdata <- as.matrix(newdata)
              } else if(is.matrix(newdata)) {
                if(mode(newdata) != "numeric")
                  stop("'newdata' matrix must be of 'numeric' mode", call. = FALSE)
              } else
                stop("'newdata' must be either a data.frame or a matrix", call. = FALSE)
              
              if(ncol(newdata) != as.numeric(object@descriptionMC["X_variables", ])) {
                if(length(object@xZeroVarVi) == 0) {
                  stop("'newdata' number of variables is ",
                       ncol(newdata),
                       " whereas the number of variables used for model training was ",
                       as.numeric(object@descriptionMC["X_variables", ]),
                       ".",
                       call. = FALSE)
                } else if(ncol(newdata) - as.numeric(object@descriptionMC["X_variables", ]) ==
                          as.numeric(object@descriptionMC["near_zero_excluded_X_variables", ])) {
                  warning(as.numeric(object@descriptionMC["near_zero_excluded_X_variables", ]),
                          " near zero variance variables excluded during the model training will be removed from 'newdata'.",
                          call. = FALSE)
                  newdata <- newdata[, -object@xZeroVarVi, drop = FALSE]
                } else {
                  stop("'newdata' number of variables (",
                       ncol(newdata),
                       ") does not correspond to the number of initial variables (",
                       as.numeric(object@descriptionMC["X_variables", ]),
                       ") minus the number of near zero variance variables excluded during the training (",
                       as.numeric(object@descriptionMC["near_zero_excluded_X_variables", ]),
                       ").",
                       call. = FALSE)
                }
              }
              
              xteMN <- scale(newdata, object@xMeanVn, object@xSdVn)
              
              if(object@summaryDF[, "ort"] > 0) {
                
                for(noN in 1:object@summaryDF[, "ort"]) {
                  if(object@suppLs[["naxL"]]) {
                    xtoMN <- matrix(0, nrow = nrow(xteMN), ncol = 1)
                    for(i in 1:nrow(xtoMN)) {
                      comVl <- complete.cases(xteMN[i, ])
                      xtoMN[i, ] <- crossprod(xteMN[i, comVl], object@orthoWeightMN[comVl, noN]) / drop(crossprod(object@orthoWeightMN[comVl, noN]))
                    }
                  } else
                    xtoMN <- xteMN %*% object@orthoWeightMN[, noN]
                  
                  xteMN <- xteMN - tcrossprod(xtoMN, object@orthoLoadingMN[, noN])
                }
                
              }
              
              if(object@suppLs[["naxL"]]) {
                yTesScaMN <- matrix(0, nrow = nrow(xteMN), ncol = ncol(object@coefficientMN),
                                    dimnames = list(rownames(xteMN), colnames(object@coefficientMN)))
                for(j in 1:ncol(yTesScaMN))
                  for(i in 1:nrow(yTesScaMN)) {
                    comVl <- complete.cases(xteMN[i, ])
                    yTesScaMN[i, j] <- crossprod(xteMN[i, comVl], object@coefficientMN[comVl, j])
                  }
              } else
                yTesScaMN <- xteMN %*% object@coefficientMN
              
              ## if(object@suppLs[["nayL"]])
              ##     yTesScaMN <- yTesScaMN[!is.na(yMCN[testVi, ]), , drop = FALSE]
              
              yTesMN <- scale(scale(yTesScaMN,
                                    FALSE,
                                    1 / object@ySdVn),
                              -object@yMeanVn,
                              FALSE)
              attr(yTesMN, "scaled:center") <- NULL
              attr(yTesMN, "scaled:scale") <- NULL
              
              if(is.factor(fitted(object))) {
                
                yTestMCN <- object@suppLs[[".char2numF"]](yTesMN,
                                                          c2nL = FALSE)
                predMCNFcVcn <- as.character(yTestMCN)
                names(predMCNFcVcn) <- rownames(newdata)
                predMCNFcVcn <- factor(predMCNFcVcn, levels = levels(object@suppLs[["y"]]))
                
              } else if(is.vector(fitted(object))) {
                
                if(is.character(fitted(object))) {
                  
                  yTestMCN <- object@suppLs[[".char2numF"]](yTesMN,
                                                            c2nL = FALSE)
                  predMCNFcVcn <- as.character(yTestMCN)
                  names(predMCNFcVcn) <- rownames(newdata)
                  
                } else {
                  
                  predMCNFcVcn <- as.numeric(yTesMN)
                  names(predMCNFcVcn) <- rownames(newdata)
                  
                }
                
              } else if (is.matrix(fitted(object))) {
                
                if (mode(fitted(object)) == "character") {
                  predMCNFcVcn  <- object@suppLs[[".char2numF"]](yTesMN,
                                                                 c2nL = FALSE)
                } else
                  predMCNFcVcn <- yTesMN
                
                rownames(predMCNFcVcn) <- rownames(newdata)
                
              }
              
              return(predMCNFcVcn)
              
            }
            
          })


####    getEset    ####

#' getEset method
#'
#' Extracts the complemented ExpressionSet when opls has been applied to an ExpressionSet
#'
#' @aliases getEset getEset, opls-method
#' @param object An S4 object of class \code{opls}, created by \code{opls}
#' function.
#' @param ... Currently not used
#' @return An S4 object of class \code{ExpressionSet} which contains the dataMatrix (t(exprs(eset))),
#' and the sampleMetadata (pData(eset)) and variableMetadata (fData(eset)) with the additional columns
#' containing the scores, predictions, loadings, VIP, coefficients etc.
#' @author Etienne Thevenot, \email{etienne.thevenot@@cea.fr}
#' @examples
#'
#' data(sacurine)
#' attach(sacurine)
#' 
#' sacSet <- Biobase::ExpressionSet(assayData = t(dataMatrix), 
#'                                  phenoData = new("AnnotatedDataFrame", 
#'                                                  data = sampleMetadata), 
#'                                  featureData = new("AnnotatedDataFrame", 
#'                                                    data = variableMetadata),
#'                                  experimentData = new("MIAME", 
#'                                                       title = "sacurine"))
#'                                                       
#' sacPlsda <- opls(sacSet, "gender")
#' sacSet <- getEset(sacPlsda)
#' head(Biobase::pData(sacSet))
#' head(Biobase::fData(sacSet))
#' 
#' detach(sacurine)
#'
#' @rdname getEset
#' @export
setMethod("getEset", "opls",
          function(object) {
            return(object@eset)
          })


####    getLoadingMN    ####

#' getLoadingMN method for PCA/(O)PLS(-DA) models
#'
#' (Orthogonal) loadings of the PCA/(O)PLS(-DA) model
#'
#' @aliases getLoadingMN getLoadingMN,opls-method
#' @param object An S4 object of class \code{opls}, created by \code{opls}
#' function.
#' @param orthoL Logical: Should the orthogonal loading matrix be returned
#' @param ... Currently not used.
#' (default is FALSE and the predictive loading matrix is returned)
#' @return Numeric matrix with a number of rows equal to the number of
#' variables and a number of columns equal to the number of components
#' @author Etienne Thevenot, \email{etienne.thevenot@@cea.fr}
#' @examples
#'
#' data(sacurine)
#' attach(sacurine)
#'
#' sacurine.plsda <- opls(dataMatrix,
#'                        sampleMetadata[, "gender"])
#'
#' getLoadingMN(sacurine.plsda)
#'
#' detach(sacurine)
#'
#' @rdname getLoadingMN
#' @export
setMethod("getLoadingMN", "opls",
          function(object, orthoL = FALSE) {
            if(orthoL)
              return(object@orthoLoadingMN)
            else
              return(object@loadingMN)
          })


####    getPcaVarVn    ####

#' getPcaVarVn method for PCA models
#'
#' Variance of the components (score vectors)
#'
#' @aliases getPcaVarVn getPcaVarVn,opls-method
#' @param object An S4 object of class \code{opls}, created by \code{opls}
#' function.
#' @param ... Currently not used.
#' @return Numeric vector with the same length as the number of components
#' @author Etienne Thevenot, \email{etienne.thevenot@@cea.fr}
#' @examples
#'
#' data(sacurine)
#' attach(sacurine)
#'
#' sacurine.pca <- opls(dataMatrix)
#'
#' getPcaVarVn(sacurine.pca)
#'
#' detach(sacurine)
#'
#' @rdname getPcaVarVn
#' @export
setMethod("getPcaVarVn", "opls",
          function(object) {
            return(object@pcaVarVn)
          })


####    getScoreMN    ####

#' getScoreMN method for PCA/(O)PLS(-DA) models
#'
#' (Orthogonal) scores of the (O)PLS(-DA) model
#'
#' @aliases getScoreMN getScoreMN,opls-method
#' @param object An S4 object of class \code{opls}, created by \code{opls}
#' function.
#' @param orthoL Logical: Should the orthogonal score matrix be returned
#' (default is FALSE and the predictive score matrix is returned)
#' @param ... Currently not used.
#' @return Numeric matrix with a number of rows equal to the number of samples
#' and a number of columns equal to the number of components
#' @author Etienne Thevenot, \email{etienne.thevenot@@cea.fr}
#' @examples
#'
#' data(sacurine)
#' attach(sacurine)
#'
#' sacurine.plsda <- opls(dataMatrix,
#'                        sampleMetadata[, "gender"])
#'
#' getScoreMN(sacurine.plsda)
#'
#' detach(sacurine)
#'
#' @rdname getScoreMN
#' @export
setMethod("getScoreMN", "opls",
          function(object, orthoL = FALSE) {
            if (orthoL)
              return(object@orthoScoreMN)
            else
              return(object@scoreMN)
          })




####    getSubsetVi    ####

#' getSubsetVi method for (O)PLS(-DA) models
#'
#' Extracts the indices of the samples used for building the model (when a
#' subset argument has been specified)
#'
#' @aliases getSubsetVi getSubsetVi,opls-method
#' @param object An S4 object of class \code{opls}, created by \code{opls}
#' function.
#' @param ... Currently not used.
#' @return Integer vector with the indices of the samples used for training
#' @author Etienne Thevenot, \email{etienne.thevenot@@cea.fr}
#' @examples
#'
#' data(sacurine)
#' attach(sacurine)
#'
#' predictorMN <- dataMatrix
#' responseFc <- sampleMetadata[, "gender"]
#'
#' sacurine.plsda <- opls(predictorMN,
#'                        responseFc,
#'                        subset = "odd")
#'
#' trainVi <- getSubsetVi(sacurine.plsda)
#'
#' table(responseFc[trainVi], fitted(sacurine.plsda))
#'
#' detach(sacurine)
#'
#' @rdname getSubsetVi
#' @export
setMethod("getSubsetVi", "opls",
          function(object) {
            return(object@subsetVi)
          })


####    getSummaryDF    ####

#' getSummaryDF method for PCA/(O)PLS models
#'
#' Summary of model metrics
#'
#' @aliases getSummaryDF getSummaryDF,opls-method
#' @param object An S4 object of class \code{opls}, created by \code{opls}
#' function.
#' @param ... Currently not used.
#' @return Data frame
#' @author Etienne Thevenot, \email{etienne.thevenot@@cea.fr}
#' @examples
#'
#' data(sacurine)
#' attach(sacurine)
#'
#' sacurine.plsda <- opls(dataMatrix,
#'                        sampleMetadata[, "gender"])
#'
#' getSummaryDF(sacurine.plsda)
#'
#' detach(sacurine)
#'
#' @rdname getSummaryDF
#' @export
setMethod("getSummaryDF", "opls",
          function(object) {
            return(object@summaryDF)
          })


####    getVipVn    ####

#' getVipVn method for (O)PLS(-DA) models
#'
#' (Orthogonal) VIP of the (O)PLS(-DA) model
#'
#'
#' @aliases getVipVn getVipVn,opls-method
#' @param object An S4 object of class \code{opls}, created by \code{opls}
#' function.
#' @param orthoL Logical: Should the orthogonal VIP be returned (default is
#' FALSE and the predictive VIP is returned)
#' @param ... Currently not used.
#' @return Numeric vector with a length equal to the number of variables and a
#' number of columns equal to the number of components
#' @author Etienne Thevenot, \email{etienne.thevenot@@cea.fr}
#' @references Galindo-Prieto B., Eriksson L. and Trygg J. (2014). Variable
#' influence on projection (VIP) for orthogonal projections to latent
#' structures (OPLS). Journal of Chemometrics 28, 623-632.
#' @examples
#'
#' data(sacurine)
#' attach(sacurine)
#'
#' sacurine.plsda <- opls(dataMatrix,
#'                        sampleMetadata[, "gender"])
#'
#' getVipVn(sacurine.plsda)
#'
#' detach(sacurine)
#'
#' @rdname getVipVn
#' @export
setMethod("getVipVn", "opls",
          function(object, orthoL = FALSE) {
            if(orthoL)
              return(object@orthoVipVn)
            else
              return(object@vipVn)
          })


####    getWeightMN    ####

#' getWeightMN method for (O)PLS(-DA) models
#'
#' (Orthogonal) weights of the (O)PLS(-DA) model
#'
#'
#' @aliases getWeightMN getWeightMN,opls-method
#' @param object An S4 object of class \code{opls}, created by \code{opls}
#' function.
#' @param orthoL Logical: Should the orthogonal weight matrix be returned
#' @param ... Currently not used.
#' (default is FALSE and the predictive weight matrix is returned)
#' @return Numeric matrix with a number of rows equal to the number of
#' variables and a number of columns equal to the number of components
#' @author Etienne Thevenot, \email{etienne.thevenot@@cea.fr}
#' @examples
#'
#' data(sacurine)
#' attach(sacurine)
#'
#' sacurine.plsda <- opls(dataMatrix,
#'                        sampleMetadata[, "gender"])
#'
#' getWeightMN(sacurine.plsda)
#'
#' detach(sacurine)
#'
#' @rdname getWeightMN
#' @export
setMethod("getWeightMN", "opls",
          function(object, orthoL = FALSE) {
            if(orthoL)
              return(object@orthoWeightMN)
            else
              return(object@weightMN)
          })


#' Checking the consistency of an ExpressionSet instance with W4M format
#'
#' @param eset An S4 object of class ExpressionSet.
#' @param ... Currently not used.
#' @return Invisible TRUE logical in case of success (otherwise generates an
#' error)
#' @author Etienne Thevenot, \email{etienne.thevenot@@cea.fr}
#' @examples
#' sacSet <- fromW4M(file.path(path.package("ropls"), "extdata"))
#' print(checkW4M(sacSet))
#' @rdname checkW4M
setMethod("checkW4M", "ExpressionSet",
          function(eset, ...) {
            
            datMN <- t(exprs(eset))
            samDF <- pData(eset)
            varDF <- fData(eset)
            
            chkL <- .checkW4mFormatF(datMN, samDF, varDF)
            
            if(!chkL) {
              stop("Problem with the sample or variable names in the tables to be imported from (exported to) W4M", call. = FALSE)
            } else
              return(TRUE)
          })

#' Exporting ExpressionSet instance into 3 tabulated files.
#'
#' The 3 .tsv files are written with the indicated \code{file} prefix, and
#' '_dataMatrix.tsv', '_sampleMetadata.tsv', and '_variableMetadata.tsv'
#' suffices, respectively. Note that the \code{dataMatrix} is transposed before
#' export (e.g., the samples are written column wise in the 'dataMatrix.tsv'
#' exported file).
#'
#' @param eset An S4 object of class \code{ExpressionSet}
#' function.
#' @param filePrefixC Character: common prefix (including repository full path)
#' of the three file names: for example, the 'c:/mydata/setname' value will
#' result in writting the 'c:/mydata/setname_dataMatrix.tsv',
#' 'c:/mydata/setname_sampleMetadata.tsv', and
#' 'c:/mydata/setname_variableMetadata.tsv' files.
#' @param verboseL Logical: should comments be printed?
#' @param ... Currently not used.
#' @return No object returned.
#' @author Etienne Thevenot, \email{etienne.thevenot@@cea.fr}
#' @examples
#'  sacSet <- fromW4M(file.path(path.package("ropls"), "extdata"))
#'  toW4M(sacSet)
#' @rdname toW4M
setMethod("toW4M", "ExpressionSet",
          function(eset, filePrefixC = paste0(getwd(), "/out_"), verboseL = TRUE, ...){
            
            if(checkW4M(eset)) {
              
              datMN <- exprs(eset)
              datDF <- cbind.data.frame(dataMatrix = rownames(datMN),
                                        as.data.frame(datMN))
              
              filDatC <- paste0(filePrefixC, "dataMatrix.tsv")
              filSamC <- paste0(filePrefixC, "sampleMetadata.tsv")
              filVarC <- paste0(filePrefixC, "variableMetadata.tsv")
              
              write.table(datDF,
                          file = filDatC,
                          quote = FALSE,
                          row.names = FALSE,
                          sep = "\t")
              
              samDF <- pData(eset)
              samDF <- cbind.data.frame(sampleMetadata = rownames(samDF),
                                        samDF)
              write.table(samDF,
                          file = filSamC,
                          quote = FALSE,
                          row.names = FALSE,
                          sep = "\t")
              
              varDF <- fData(eset)
              varDF <- cbind.data.frame(variableMetadata = rownames(varDF),
                                        varDF)
              write.table(varDF,
                          file = filVarC,
                          quote = FALSE,
                          row.names = FALSE,
                          sep = "\t")
              
              if (verboseL) {
                cat("The following 3 files:\n")
                print(basename(filDatC))
                print(basename(filSamC))
                print(basename(filVarC))
                cat("have been written in the following directory:\n")
                print(dirname(filDatC))
              }
              
            }
            
          })

Try the ropls package in your browser

Any scripts or data that you put into this service are public.

ropls documentation built on Nov. 8, 2020, 7:46 p.m.