R/RcppExports.R

Defines functions pca_nipals gaussian_process_cv predict_gaussian_process gaussian_process opls_gs opls_cv_cpp reconstruction_error project_opls predict_opls opls_get_basics opls opls_get_all opls_for_projection get_local_pls_weights get_weights get_column_sums get_column_means overall_var get_column_sds get_col_largest_sd which_min_vector which_min moving_cor_diss fast_diss_vector fast_diss

Documented in fast_diss fast_diss_vector gaussian_process gaussian_process_cv get_col_largest_sd get_column_means get_column_sds get_column_sums get_local_pls_weights get_weights moving_cor_diss opls opls_cv_cpp opls_for_projection opls_get_all opls_get_basics opls_gs overall_var pca_nipals predict_gaussian_process predict_opls project_opls reconstruction_error which_min which_min_vector

# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' @title A fast distance algorithm for two matrices written in C++ 
#' @description Computes distances between two data matrices using 
#' "euclid", "cor", "cosine" 
#' @usage 
#' fast_diss(X, Y, method)
#' @param X a matrix
#' @param Y a matrix
#' @param method a \code{string} with possible values "euclid", "cor", "cosine"
#' @return a distance matrix
#' @keywords internal
#' @useDynLib resemble
#' @author Antoine Stevens and Leonardo Ramirez-Lopez
fast_diss <- function(X, Y, method) {
    .Call('_resemble_fast_diss', PACKAGE = 'resemble', X, Y, method)
}

#' @title A fast algorithm of (squared) Euclidean cross-distance for vectors written in C++ 
#' @description A fast (parallel for linux) algorithm of (squared) Euclidean cross-distance for vectors written in C++ 
#' @usage 
#' fast_diss_vector(X)
#' @param X a vector.
#' @return a vector of distance (lower triangle of the distance matrix, stored by column)
#' @details used internally in ortho_projection
#' @author Antoine Stevens
#' @keywords internal 
#' @useDynLib resemble
fast_diss_vector <- function(X) {
    .Call('_resemble_fast_diss_vector', PACKAGE = 'resemble', X)
}

#' @title Moving/rolling correlation distance of two matrices
#' @description Computes a moving window correlation distance between two data matrices
#' @usage 
#' moving_cor_diss(X,Y,w)
#' @param X a matrix
#' @param Y a matrix
#' @param w window size (must be odd)
#' @return a matrix of correlation distance
#' @keywords internal
#' @useDynLib resemble
#' @author Leonardo Ramirez-Lopez and Antoine Stevens
moving_cor_diss <- function(X, Y, w) {
    .Call('_resemble_moving_cor_diss', PACKAGE = 'resemble', X, Y, w)
}

#' @title A function to compute row-wise index of minimum values of a square distance matrix
#' @description For internal use only
#' @usage 
#' which_min(X)
#' @param X a square matrix of distances
#' @return a vector of the indices of the minimum value in each row of the input matrix
#' @details Used internally to find the nearest neighbors
#' @keywords internal
#' @useDynLib resemble
#' @author Antoine Stevens 
which_min <- function(X) {
    .Call('_resemble_which_min', PACKAGE = 'resemble', X)
}

#' @title A function to compute indices of minimum values of a distance vector
#' @description For internal use only
#' @usage 
#' which_min_vector(X)
#' @param X a vector of distances 
#' @return a vector of the indices of the nearest neighbors
#' @details 
#' Used internally to find the nearest neighbors. 
#' It searches in lower (or upper) triangular matrix. Therefore this must be the format of the 
#' input data. The piece of code int \code{len = (sqrt(X.size()*8+1)+1)/2} generated an error in CRAN
#' since \code{sqrt} cannot be applied to integers.
#' @keywords internal
#' @useDynLib resemble
#' @author Antoine Stevens 
which_min_vector <- function(X) {
    .Call('_resemble_which_min_vector', PACKAGE = 'resemble', X)
}

#' @title Function for identifiying the column in a matrix with the largest standard deviation
#' @description Identifies the column with the largest standard deviation. For internal use only!
#' @usage get_col_largest_sd(X)
#' @param X a matrix.
#' @return a value indicating the index of the column with the largest standard deviation. 
#' @author Leonardo Ramirez-Lopez
#' @keywords internal 
#' @useDynLib resemble
get_col_largest_sd <- function(X) {
    .Call('_resemble_get_col_largest_sd', PACKAGE = 'resemble', X)
}

#' @title Function for computing the standard deviation of each column in a matrix
#' @description Computes the standard deviation of each column in a matrix. For internal use only!
#' @usage get_column_sds(X)
#' @param X a a matrix.
#' @return a vector of standard deviation values. 
#' @author Leonardo Ramirez-Lopez
#' @keywords internal 
#' @useDynLib resemble
get_column_sds <- function(X) {
    .Call('_resemble_get_column_sds', PACKAGE = 'resemble', X)
}

#' @title Function for computing the overall variance of a matrix
#' @description Computes the variance of a matrix. For internal use only!
#' @usage overall_var(X)
#' @param X a matrix.
#' @return a vector of standard deviation values. 
#' @author Leonardo Ramirez-Lopez
#' @keywords internal 
#' @useDynLib resemble
overall_var <- function(X) {
    .Call('_resemble_overall_var', PACKAGE = 'resemble', X)
}

#' @title Function for computing the mean of each column in a matrix
#' @description Computes the mean of each column in a matrix. For internal use only!
#' @usage get_column_means(X)
#' @param X a a matrix.
#' @return a vector of mean values. 
#' @author Leonardo Ramirez-Lopez
#' @keywords internal 
#' @useDynLib resemble
get_column_means <- function(X) {
    .Call('_resemble_get_column_means', PACKAGE = 'resemble', X)
}

#' @title Function for computing sum of each column in a matrix
#' @description Computes the sum of each column in a matrix. For internal use only!
#' @usage get_column_sums(X)
#' @param X a matrix.
#' @return a vector of standard deviation values. 
#' @author Leonardo Ramirez-Lopez
#' @keywords internal 
#' @useDynLib resemble
get_column_sums <- function(X) {
    .Call('_resemble_get_column_sums', PACKAGE = 'resemble', X)
}

#' @title Computes the weights for pls regressions
#' @description
#' This is an internal function that computes the wights required for obtaining
#' each vector of pls scores. Implementation is done in C++ for improved performance.
#' @param X a numeric matrix of spectral data.
#' @param Y a matrix of one column with the response variable.
#' @param algorithm a character string indicating what method to use. Options are:
#' \code{'pls'} for pls (using covariance between X and Y), 
#' \code{'mpls'} for modified pls (using correlation between X and Y as in 
#' Shenk and Westerhaus, 1991; Westerhaus 2014) or
#' \code{'xls'} for extended pls (as implemented in BUCHI NIRWise PLUS software).
#' @param xls_min_w an integer indicating the minimum window size for the "xls"
#' method. Only used if \code{algorithm = 'xls'}. Default is 3 (as in BUCHI NIRWise PLUS software).
#' @param xls_max_w an integer indicating the maximum window size for the "xls"
#' method. Only used if \code{algorithm = 'xls'}. Default is 15 (as in BUCHI NIRWise PLUS software).
#' @author Leonardo Ramirez-Lopez and Claudio Orellano
#' @references
#' Shenk, J. S., & Westerhaus, M. O. (1991). Populations structuring of 
#' near infrared spectra and modified partial least squares regression. 
#' Crop Science, 31(6), 1548-1555.
#' 
#' Westerhaus, M. (2014). Eastern Analytical Symposium Award for outstanding 
#' Wachievements in near infrared spectroscopy: my contributions to 
#' Wnear infrared spectroscopy. NIR news, 25(8), 16-20.
#' @return a `matrix` of one column containing the weights.
#' @keywords internal 
#' @useDynLib resemble
get_weights <- function(X, Y, algorithm = "pls", xls_min_w = 3L, xls_max_w = 15L) {
    .Call('_resemble_get_weights', PACKAGE = 'resemble', X, Y, algorithm, xls_min_w, xls_max_w)
}

#' @title Internal Cpp function for computing the weights of the PLS components 
#' necessary for weighted average PLS
#' @description For internal use only!. 
#' @usage
#' get_local_pls_weights(projection_mat, 
#'           xloadings, 
#'           coefficients, 
#'           new_x, 
#'           min_component, 
#'           max_component, 
#'           scale, 
#'           Xcenter, 
#'           Xscale)
#' @param projection_mat the projection matrix generated either by the \code{opls} function.
#' @param xloadings .
#' @param coefficients the matrix of regression coefficients.
#' @param new_x a matrix of one new spectra to be predicted.
#' @param min_component an integer indicating the minimum number of pls components.
#' @param max_component an integer indicating the maximum number of pls components.
#' @param scale a logical indicating whether the matrix of predictors used to create the regression model was scaled.
#' @param Xcenter a matrix of one row with the values that must be used for centering \code{newdata}.
#' @param Xscale if \code{scale = TRUE} a matrix of one row with the values that must be used for scaling \code{newdata}.
#' @return a matrix of one row with the weights for each component between the max. and min. specified. 
#' @author Leonardo Ramirez-Lopez
#' @keywords internal 
#' @useDynLib resemble
get_local_pls_weights <- function(projection_mat, xloadings, coefficients, new_x, min_component, max_component, scale, Xcenter, Xscale) {
    .Call('_resemble_get_local_pls_weights', PACKAGE = 'resemble', projection_mat, xloadings, coefficients, new_x, min_component, max_component, scale, Xcenter, Xscale)
}

#' @title orthogonal scores algorithn of partial leat squares (opls) projection
#' @description Computes orthogonal socres partial least squares (opls) 
#' projection with the NIPALS algorithm. It allows multiple response variables.
#' Although the main use of the function is for projection, it also retrieves 
#' regression coefficients. NOTE: For internal use only!
#' @usage 
#' opls_for_projection(X, Y, ncomp, scale,
#'                     maxiter, tol,
#'                     pcSelmethod = "var",
#'                     pcSelvalue = 0.01, 
#'                     algorithm = "pls", 
#'                     xls_min_w = 3, 
#'                     xls_max_w = 15)
#' @param X a matrix of predictor variables.
#' @param Y a matrix of either a single or multiple response variables.
#' @param ncomp the number of pls components.
#' @param scale logical indicating whether \code{X} must be scaled.
#' @param maxiter maximum number of iterations.
#' @param tol limit for convergence of the algorithm in the nipals algorithm.
#' @param pcSelmethod if \code{regression = TRUE}, the method for selecting the 
#' number of components. 
#' Options are: \code{'manual'}, \code{'cumvar'} (for selecting the number of 
#' principal components based on a given  cumulative amount of explained 
#' variance) and \code{'var'} (for selecting the number of principal components 
#' based on a given amount of explained variance). Default is \code{'cumvar'}.
#' @param pcSelvalue a numerical value that complements the selected method 
#' (\code{pcSelmethod}). 
#' If \code{'cumvar'} is chosen (default), \code{pcSelvalue} must be a value 
#' (larger than 0 and below 1) indicating the maximum amount of cumulative 
#' variance that the retained components should explain. Default is 0.99. 
#' If \code{'var'} is chosen, \code{pcSelvalue} must be a value (larger than 0 
#' and below 1) indicating that components that explain (individually) 
#' a variance lower than this threshold must be excluded. If \code{'manual'} 
#' is chosen, \code{pcSelvalue} has no effect and the number of components 
#' retrieved are the one specified in \code{ncomp}.
#' @param algorithm (for weights computation) a character string indicating 
#' what method to use. Options are:
#' \code{'pls'} for pls (using covariance between X and Y), 
#' \code{'mpls'} for modified pls (using correlation between X and Y) or
#' \code{'xls'} for extended pls (as implemented in BUCHI NIRWise PLUS software).
#' @param xls_min_w (for weights computation) an integer indicating the minimum window size for the "xls"
#' method. Only used if \code{algorithm = 'xls'}. Default is 3 (as in BUCHI NIRWise PLUS software).
#' @param xls_max_w (for weights computation) an integer indicating the maximum window size for the "xls"
#' method. Only used if \code{algorithm = 'xls'}. Default is 15 (as in BUCHI NIRWise PLUS software).
#' @return a list containing the following elements:
#' \itemize{
#' \item{\code{coefficients}}{ the matrix of regression coefficients.}
#' \item{\code{bo}}{ a matrix of one row containing the intercepts for 
#' each component.}
#' \item{\code{scores}}{ the matrix of scores.}
#' \item{\code{X_loadings}}{ the matrix of X loadings.}
#' \item{\code{Y_loadings}}{ the matrix of Y loadings.}
#' \item{\code{projection_mat}}{ the projection matrix.}
#' \item{\code{Y}}{ the \code{Y} input.}
#' \item{\code{variance}}{ a \code{list} conating two objects: \code{x_var} 
#' and \code{y_var}. 
#' These objects contain information on the explained variance for the \code{X} 
#' and \code{Y} matrices respectively.}
#' \item{\code{transf}}{ a \code{list} conating two objects: \code{Xcenter} 
#' and \code{Xscale}}. 
#' \item{\code{weights}}{ the matrix of wheights.}
#' }
#' @author Leonardo Ramirez-Lopez
#' @keywords internal 
#' @useDynLib resemble
opls_for_projection <- function(X, Y, ncomp, scale, maxiter, tol, pcSelmethod = "var", pcSelvalue = 0.01, algorithm = "pls", xls_min_w = 3L, xls_max_w = 15L) {
    .Call('_resemble_opls_for_projection', PACKAGE = 'resemble', X, Y, ncomp, scale, maxiter, tol, pcSelmethod, pcSelvalue, algorithm, xls_min_w, xls_max_w)
}

#' @title orthogonal scores algorithn of partial leat squares (opls_get_all)
#' @description Computes orthogonal socres partial least squares (opls_get_all) 
#' regressions with the NIPALS algorithm. It retrives a comprehensive set of
#' pls outputs (e.g. vip and sensivity radius). It allows multiple response 
#' variables. NOTE: For internal use only!
#' @usage 
#' opls_get_all(X, 
#'              Y, 
#'              ncomp, 
#'              scale, 
#'              maxiter, 
#'              tol, 
#'              algorithm = "pls", 
#'              xls_min_w = 3, 
#'              xls_max_w = 15)
#' @param X a matrix of predictor variables.
#' @param Y a matrix of either a single or multiple response variables.
#' @param ncomp the number of pls components.
#' @param scale logical indicating whether \code{X} must be scaled.
#' @param maxiter maximum number of iterations.
#' @param tol limit for convergence of the algorithm in the nipals algorithm.
#' @param algorithm (for weights computation) a character string indicating 
#' what method to use. Options are:
#' \code{'pls'} for pls (using covariance between X and Y), 
#' \code{'mpls'} for modified pls (using correlation between X and Y) or
#' \code{'xls'} for extended pls (as implemented in BUCHI NIRWise PLUS software).
#' @param xls_min_w (for weights computation) an integer indicating the minimum window size for the "xls"
#' method. Only used if \code{algorithm = 'xls'}. Default is 3 (as in BUCHI NIRWise PLUS software).
#' @param xls_max_w (for weights computation) an integer indicating the maximum window size for the "xls"
#' method. Only used if \code{algorithm = 'xls'}. Default is 15 (as in BUCHI NIRWise PLUS software).
#' @return a list containing the following elements:
#' \itemize{
#' \item{\code{ncomp}}{ the number of components used.}
#' \item{\code{coefficients}}{ the matrix of regression coefficients.}
#' \item{\code{bo}}{ a matrix of one row containing the intercepts for each component.}
#' \item{\code{scores}}{ the matrix of scores.}
#' \item{\code{X_loadings}}{ the matrix of X loadings.}
#' \item{\code{Y_loadings}}{ the matrix of Y loadings.}
#' \item{\code{vip}}{ the projection matrix.}
#' \item{\code{selectivity_ratio}}{ the matrix of selectivity ratio (see Rajalahti, Tarja, et al. 2009).}
#' \item{\code{Y}}{ the \code{Y} input.}
#' \item{\code{variance}}{ a \code{list} conating two objects: \code{x_var} and \code{y_var}. 
#' These objects contain information on the explained variance for the \code{X} and \code{Y} matrices respectively.}
#' \item{\code{transf}}{ a \code{list} conating two objects: \code{Xcenter} and \code{Xscale}}. 
#' \item{\code{weights}}{ the matrix of wheights.}} 
#' @author Leonardo Ramirez-Lopez
#' @keywords internal 
#' @useDynLib resemble
opls_get_all <- function(X, Y, ncomp, scale, maxiter, tol, algorithm = "pls", xls_min_w = 3L, xls_max_w = 15L) {
    .Call('_resemble_opls_get_all', PACKAGE = 'resemble', X, Y, ncomp, scale, maxiter, tol, algorithm, xls_min_w, xls_max_w)
}

#' @title orthogonal scores algorithn of partial leat squares (opls)
#' @description Computes orthogonal socres partial least squares (opls) 
#' regressions with the NIPALS algorithm. It allows multiple response variables. 
#' It does not return the variance information of the components. NOTE: For 
#' internal use only!
#' @usage 
#' opls(X, 
#'      Y, 
#'      ncomp, 
#'      scale, 
#'      maxiter, 
#'      tol, 
#'      algorithm = "pls", 
#'      xls_min_w = 3, 
#'      xls_max_w = 15)
#'      
#' @param X a matrix of predictor variables.
#' @param Y a matrix of either a single or multiple response variables.
#' @param ncomp the number of pls components.
#' @param scale logical indicating whether \code{X} must be scaled.
#' @param maxiter maximum number of iterations.
#' @param tol limit for convergence of the algorithm in the nipals algorithm.
#' @param algorithm (for weights computation) a character string indicating 
#' what method to use. Options are:
#' \code{'pls'} for pls (using covariance between X and Y), 
#' \code{'mpls'} for modified pls (using correlation between X and Y) or
#' \code{'xls'} for extended pls (as implemented in BUCHI NIRWise PLUS software).
#' @param xls_min_w (for weights computation) an integer indicating the minimum window size for the "xls"
#' method. Only used if \code{algorithm = 'xls'}. Default is 3 (as in BUCHI NIRWise PLUS software).
#' @param xls_max_w (for weights computation) an integer indicating the maximum window size for the "xls"
#' method. Only used if \code{algorithm = 'xls'}. Default is 15 (as in BUCHI NIRWise PLUS software).
#' @return a list containing the following elements:
#' \itemize{
#' \item{\code{coefficients}}{ the matrix of regression coefficients.}
#' \item{\code{bo}}{ a matrix of one row containing the intercepts for each component.}
#' \item{\code{scores}}{ the matrix of scores.}
#' \item{\code{X_loadings}}{ the matrix of X loadings.}
#' \item{\code{Y_loadings}}{ the matrix of Y loadings.}
#' \item{\code{projection_mat}}{ the projection matrix.}
#' \item{\code{Y}}{ the \code{Y} input.}
#' \item{\code{transf}}{ a \code{list} conating two objects: \code{Xcenter} and \code{Xscale}}. 
#' \item{\code{weights}}{ the matrix of wheights.}} 
#' @author Leonardo Ramirez-Lopez
#' @keywords internal 
#' @useDynLib resemble
opls <- function(X, Y, ncomp, scale, maxiter, tol, algorithm = "pls", xls_min_w = 3L, xls_max_w = 15L) {
    .Call('_resemble_opls', PACKAGE = 'resemble', X, Y, ncomp, scale, maxiter, tol, algorithm, xls_min_w, xls_max_w)
}

#' @title fast orthogonal scores algorithn of partial leat squares (opls)
#' @description Computes orthogonal socres partial least squares (opls) 
#' regressions with the NIPALS algorithm. It allows multiple response variables. 
#' In contrast to code{opls} function, this one does not compute unnecessary 
#' data for (local) regression.
#' For internal use only!
#' @usage 
#' opls_get_basics(X, Y, ncomp, scale, 
#'                 maxiter, tol, 
#'                 algorithm = "pls", 
#'                 xls_min_w = 3, 
#'                 xls_max_w = 15)
#' @param X a matrix of predictor variables.
#' @param Y a matrix of either a single or multiple response variables.
#' @param ncomp the number of pls components.
#' @param scale logical indicating whether \code{X} must be scaled.
#' @param maxiter maximum number of iterations.
#' @param tol limit for convergence of the algorithm in the nipals algorithm.
#' @param algorithm (for weights computation) a character string indicating 
#' what method to use. Options are:
#' \code{'pls'} for pls (using covariance between X and Y), 
#' \code{'mpls'} for modified pls (using correlation between X and Y) or
#' \code{'xls'} for extended pls (as implemented in BUCHI NIRWise PLUS software).
#' @param xls_min_w (for weights computation) an integer indicating the minimum window size for the "xls"
#' method. Only used if \code{algorithm = 'xls'}. Default is 3 (as in BUCHI NIRWise PLUS software).
#' @param xls_max_w (for weights computation) an integer indicating the maximum window size for the "xls"
#' method. Only used if \code{algorithm = 'xls'}. Default is 15 (as in BUCHI NIRWise PLUS software).
#' @return a list containing the following elements:
#' \itemize{
#' \item{\code{coefficients}}{ the matrix of regression coefficients.}
#' \item{\code{bo}}{ a matrix of one row containing the intercepts for each component.}
#' \item{\code{Y_loadings}}{ the matrix of Y loadings.}
#' \item{\code{projection_mat}}{ the projection matrix.}
#' \item{\code{transf}}{ a \code{list} conating two objects: \code{Xcenter} and \code{Xscale}}. 
#' } 
#' @author Leonardo Ramirez-Lopez
#' @keywords internal 
#' @useDynLib resemble
opls_get_basics <- function(X, Y, ncomp, scale, maxiter, tol, algorithm = "pls", xls_min_w = 3L, xls_max_w = 15L) {
    .Call('_resemble_opls_get_basics', PACKAGE = 'resemble', X, Y, ncomp, scale, maxiter, tol, algorithm, xls_min_w, xls_max_w)
}

#' @title Prediction function for the \code{opls} and \code{fopls} functions
#' @description Predicts response values based on a model generated by either by \code{opls} or the \code{fopls} functions. 
#' For internal use only!. 
#' @usage predict_opls(bo, b, ncomp, newdata, scale, Xscale)
#' @param bo a numeric value indicating the intercept.
#' @param b the matrix of regression coefficients.
#' @param ncomp an integer value indicating how may components must be used in the prediction.
#' @param newdata a matrix containing the predictor variables.
#' @param scale a logical indicating whether the matrix of predictors used to create the regression model was scaled.
#' @param Xscale if \code{scale = TRUE} a matrix of one row with the values that must be used for scaling \code{newdata}.
#' @return a matrix of predicted values.
#' @author Leonardo Ramirez-Lopez
#' @keywords internal 
#' @useDynLib resemble
predict_opls <- function(bo, b, ncomp, newdata, scale, Xscale) {
    .Call('_resemble_predict_opls', PACKAGE = 'resemble', bo, b, ncomp, newdata, scale, Xscale)
}

#' @title Projection function for the \code{opls} function
#' @description Projects new spectra onto a PLS space based on a model generated by either by \code{opls} or the \code{opls2} functions. 
#' For internal use only!. 
#' @usage project_opls(projection_mat, ncomp, newdata, scale, Xcenter, Xscale)
#' @param projection_mat the projection matrix generated by the \code{opls} function.
#' @param ncomp an integer value indicating how may components must be used in the prediction.
#' @param newdata a matrix containing the predictor variables.
#' @param scale a logical indicating whether the matrix of predictors used to create the regression model was scaled.
#' @param Xscale if \code{scale = TRUE} a matrix of one row with the values that must be used for scaling \code{newdata}.
#' @param Xcenter a matrix of one row with the values that must be used for centering \code{newdata}.
#' @return a matrix corresponding to the new spectra projected onto the PLS space 
#' @author Leonardo Ramirez-Lopez
#' @keywords internal 
#' @useDynLib resemble
project_opls <- function(projection_mat, ncomp, newdata, scale, Xcenter, Xscale) {
    .Call('_resemble_project_opls', PACKAGE = 'resemble', projection_mat, ncomp, newdata, scale, Xcenter, Xscale)
}

#' @title Projection to pls and then re-construction
#' @description Projects spectra onto a PLS space and then reconstructs it back.
#' @usage reconstruction_error(x, 
#'                             projection_mat, 
#'                             xloadings, 
#'                             scale, 
#'                             Xcenter, 
#'                             Xscale, 
#'                             scale_back = FALSE)
#' @param x a matrix to project.
#' @param projection_mat the projection matrix generated by the \code{opls_get_basics} function.
#' @param xloadings the loadings matrix generated by the \code{opls_get_basics} function.
#' @param scale logical indicating if scaling is required
#' @param Xcenter a matrix of one row with the centering values
#' @param Xscale a matrix of one row with the scaling values
#' @param scale_back compute the reconstruction error after de-centering the 
#' data and de-scaling it.
#' @return a matrix of 1 row and 1 column.
#' @author Leonardo Ramirez-Lopez
#' @keywords internal 
#' @useDynLib resemble
reconstruction_error <- function(x, projection_mat, xloadings, scale, Xcenter, Xscale, scale_back = FALSE) {
    .Call('_resemble_reconstruction_error', PACKAGE = 'resemble', x, projection_mat, xloadings, scale, Xcenter, Xscale, scale_back)
}

#' @title Internal Cpp function for performing leave-group-out cross-validations for pls regression 
#' @description For internal use only!. 
#' @usage opls_cv_cpp(X, Y, scale, method, 
#'                   mindices, pindices, 
#'                   min_component, ncomp, 
#'                   new_x, 
#'                   maxiter, tol, 
#'                   wapls_grid, 
#'                   algorithm, 
#'                   statistics = TRUE)
#' @param X a matrix of predictor variables.
#' @param Y a matrix of a single response variable.
#' @param scale a logical indicating whether the matrix of predictors 
#' (\code{X}) must be scaled.
#' @param method the method used for regression. One of the following options: 
#' \code{'pls'} or \code{'wapls'} or \code{'completewapls1p'}.
#' @param mindices a matrix with \code{n} rows and \code{m} columns where 
#' \code{m} is equivalent to the number of resampling iterations. The elements 
#' of each column indicate the indices of the observations to be used for 
#' modeling at each iteration.
#' @param pindices a matrix with \code{k} rows and \code{m} columns where 
#' \code{m} is equivalent to the number of 
#' resampling iterations. The elements of each column indicate the indices of 
#' the observations to be used for predicting at each iteration.
#' @param min_component an integer indicating the number of minimum pls 
#' components (if the \code{method = 'pls'}).
#' @param ncomp an integer indicating the number of pls components.
#' @param new_x a matrix of one row corresponding to the observation to be 
#' predicted (if the \code{method = 'wapls'}).
#' @param maxiter maximum number of iterations.
#' @param tol limit for convergence of the algorithm in the nipals algorithm.
#' @param wapls_grid the grid on which the search for the best combination of 
#' minimum and maximum pls factors of \code{'wapls'} is based on in case 
#' \code{method = 'completewapls1p'}.
#' @param algorithm either pls (\code{'pls'}) or modified pls (\code{'mpls'}). 
#' See \code{get_weigths} function.
#' @param statistics a logical value indicating whether the precision and 
#' accuracy statistics are to be returned, otherwise the predictions for each 
#' validation segment are retrieved.
#' @return 
#' if \code{statistics = true} a list containing the following one-row matrices:
#' \itemize{
#' \item{\code{rmse_seg}}{ the RMSEs.}
#' \item{\code{st_rmse_seg}}{ the standardized RMSEs.}
#' \item{\code{rsq_seg}}{ the coefficients of determination.}
#' } 
#' 
#' if \code{statistics = false} a list containing the following one-row matrices:
#' \itemize{
#' \item{\code{predictions}}{ the predictions of each of the validation 
#' segments in \code{pindices}. Each column in \code{pindices} contains the 
#' validation indices of a segment.}
#' \item{\code{st_rmse_seg}}{ the standardized RMSEs.}
#' \item{\code{rsq_seg}}{ the coefficients of determination.}
#' } 
#' 
#' If \code{method = "wapls"}, data of the pls weights are output in this 
#' list(\code{compweights}).
#'
#' If \code{method = "completewapls1"}, data of all the combination of 
#' components passed in \code{wapls_grid} are 
#' output in this list(\code{complete_compweights}).
#' 
#' @author Leonardo Ramirez-Lopez
#' @keywords internal 
#' @useDynLib resemble
opls_cv_cpp <- function(X, Y, scale, method, mindices, pindices, min_component, ncomp, new_x, maxiter, tol, wapls_grid, algorithm, statistics = TRUE) {
    .Call('_resemble_opls_cv_cpp', PACKAGE = 'resemble', X, Y, scale, method, mindices, pindices, min_component, ncomp, new_x, maxiter, tol, wapls_grid, algorithm, statistics)
}

#' @title orthogonal scores algorithm of partial leat squares (opls)
#' @description Computes orthogonal scores partial least squares (opls) 
#' regressions with the NIPALS algorithm. It allows multiple response variables. 
#' It does not return the variance information of the components. NOTE: For 
#' internal use only!
#' @usage 
#' opls_gs(Xr, 
#'         Yr,
#'         Xu, 
#'         ncomp,
#'         scale,     
#'         response = FALSE, 
#'         reconstruction = TRUE,
#'         similarity = TRUE,
#'         fresponse = TRUE,
#'         algorithm = "pls")
#'         
#' @param Xr a matrix of predictor variables for the training set.
#' @param Yr a matrix of a single response variable for the training set.
#' @param Xu a matrix of predictor variables for the test set.
#' @param ncomp the number of pls components.
#' @param scale logical indicating whether \code{X} must be scaled.
#' @param response logical indicating whether to compute the prediction of \code{Yu}.
#' @param reconstruction logical indicating whether to compute the reconstruction error of \code{Xu}.
#' @param similarity logical indicating whether to compute the the distance score between \code{Xr} and \code{Xu} (in the pls space).
#' @param fresponse logical indicating whether to compute the score of the variance not explained for \code{Yu}.
#' @param algorithm (for weights computation) a character string indicating 
#' what method to use. Options are:
#' \code{'pls'} for pls (using covariance between X and Y) or
#' \code{'mpls'} for modified pls (using correlation between X and Y).
#' @return a list containing the following elements:
#' \itemize{
#' \item{\code{ncomp}}{ the number of components.}
#' \item{\code{pred_response}}{ the response predictions for \code{Xu}.}
#' \item{\code{rmse_reconstruction}}{ the rmse of the reconstruction for \code{Xu}.}
#' \item{\code{score_dissimilarity}}{ the distance score between \code{Xr} and \code{Xu}.}} 
#' @author Leonardo Ramirez-Lopez
#' @keywords internal 
#' @useDynLib resemble
opls_gs <- function(Xr, Yr, Xu, ncomp, scale, response = FALSE, reconstruction = TRUE, similarity = TRUE, fresponse = TRUE, algorithm = "pls") {
    .Call('_resemble_opls_gs', PACKAGE = 'resemble', Xr, Yr, Xu, ncomp, scale, response, reconstruction, similarity, fresponse, algorithm)
}

#' @title Gaussian process regression with linear kernel (gaussian_process)
#' @description Carries out a gaussian process regression with a linear kernel (dot product). For internal use only!
#' @usage gaussian_process(X, Y, noisev, scale) 
#' @param X a matrix of predictor variables
#' @param Y a matrix with a single response variable
#' @param noisev a value indicating the variance of the noise for Gaussian process regression. Default is 0.001. a matrix with a single response variable
#' @param scale a logical indicating whether both the predictors 
#' and the response variable must be scaled to zero mean and unit variance.
#' @return a list containing the following elements:
#' \itemize{
#' \item{\code{b}}{ the regression coefficients.}
#' \item{\code{Xz}}{ the (final transformed) matrix of predictor variables.}
#' \item{\code{alpha}}{ the alpha matrix.}
#' \item{\code{is.scaled}}{ logical indicating whether both the predictors and response variable were scaled to zero mean and unit variance.}
#' \item{\code{Xcenter}}{ if matrix of predictors was scaled, the centering vector used for \code{X}.}
#' \item{\code{Xscale}}{ if matrix of predictors was scaled, the scaling vector used for \code{X}.}
#' \item{\code{Ycenter}}{ if matrix of predictors was scaled, the centering vector used for \code{Y}.}
#' \item{\code{Yscale}}{ if matrix of predictors was scaled, the scaling vector used for \code{Y}.}
#' }
#' @author Leonardo Ramirez-Lopez
#' @keywords internal 
#' @useDynLib resemble
gaussian_process <- function(X, Y, noisev = 0.001, scale = TRUE) {
    .Call('_resemble_gaussian_process', PACKAGE = 'resemble', X, Y, noisev, scale)
}

#' @title Prediction function for the \code{gaussian_process} function (Gaussian process regression with dot product covariance)
#' @description Predicts response values based on a model generated by the \code{gaussian_process} function (Gaussian process regression with dot product covariance). For internal use only!. 
#' @usage predict_gaussian_process(Xz, alpha, newdata, scale, Xcenter, Xscale, Ycenter, Yscale)
#' @param b the regression coefficients. 
#' @param newdata a matrix containing the predictor variables
#' @param scale a logical indicating whether the matrix of predictors used to create the regression model 
#' (in the \code{gaussian_process} function) was scaled
#' @param Xcenter if \code{center = TRUE} a matrix of one row with the values that must be used for centering \code{newdata}.
#' @param Xscale if \code{scale = TRUE} a matrix of one row with the values that must be used for scaling \code{newdata}.
#' @param Ycenter if \code{center = TRUE} a matrix of one row with the values that must be used for accounting for the centering of the response variable.
#' @param Yscale if \code{scale = TRUE} a matrix of one row with the values that must be used  for accounting for the scaling of the response variable.
#' @return a matrix of predicted values
#' @author Leonardo Ramirez-Lopez
#' @keywords internal 
#' @useDynLib resemble
predict_gaussian_process <- function(Xz, alpha, newdata, scale, Xcenter, Xscale, Ycenter, Yscale) {
    .Call('_resemble_predict_gaussian_process', PACKAGE = 'resemble', Xz, alpha, newdata, scale, Xcenter, Xscale, Ycenter, Yscale)
}

#' @title Internal Cpp function for performing leave-group-out cross 
#' validations for gaussian process
#' @description For internal use only!. 
#' @usage gaussian_process_cv(X, Y, mindices, pindices, noisev = 0.001,  
#' scale = TRUE, statistics = TRUE)
#' @param X a matrix of predictor variables.
#' @param Y a matrix of a single response variable.
#' @param mindices a matrix with \code{n} rows and \code{m} columns where \code{m} is equivalent to the number of 
#' resampling iterations. The elements of each column indicate the indices of the observations to be used for modeling at each 
#' iteration.
#' @param pindices a matrix with \code{k} rows and \code{m} columns where \code{m} is equivalent to the number of 
#' resampling iterations. The elements of each column indicate the indices of the observations to be used for predicting at each 
#' iteration.
#' @param ncomp an integer indicating the number of pls components.
#' @param scale a logical indicating whether both the predictors 
#' and the response variable must be scaled to zero mean and unit variance.
#' @param statistics a logical value indicating whether the precision and 
#' accuracy statistics are to be returned, otherwise the predictions for each 
#' validation segment are retrieved.
#' @return a list containing the following one-row matrices:
#' \itemize{
#' \item{\code{rmse.seg}}{ the RMSEs.}
#' \item{\code{st.rmse.seg}}{ the standardized RMSEs.}
#' \item{\code{rsq.seg}}{ the coefficients of determination.}
#' } 
#' @author Leonardo Ramirez-Lopez
#' @keywords internal 
#' @useDynLib resemble
gaussian_process_cv <- function(X, Y, mindices, pindices, noisev = 0.001, scale = TRUE, statistics = TRUE) {
    .Call('_resemble_gaussian_process_cv', PACKAGE = 'resemble', X, Y, mindices, pindices, noisev, scale, statistics)
}

#' @title Principal components based on  the non-linear iterative partial least squares (nipals) algorithm
#' @description Computes orthogonal socres partial least squares (opls) regressions with the NIPALS algorithm. It allows multiple response variables. 
#' For internal use only!
#' @usage 
#' pca_nipals(X, ncomp, center, scale,
#'            maxiter, tol,
#'            pcSelmethod = "var",
#'            pcSelvalue = 0.01)
#' @param X a matrix of predictor variables.
#' @param Y a matrix of either a single or multiple response variables.
#' @param ncomp the number of pls components.
#' @param scale logical indicating whether \code{X} must be scaled.
#' @param maxiter maximum number of iterations.
#' @param tol limit for convergence of the algorithm in the nipals algorithm.
#' @param pcSelmethod the method for selecting the number of components. 
#' Options are: \code{'cumvar'} (for selecting the number of principal components based on a given 
#' cumulative amount of explained variance) and \code{"var"} (for selecting the number of principal 
#' components based on a given amount of explained variance). Default is \code{'var'}
#' @param pcSelvalue a numerical value that complements the selected method (\code{pcSelmethod}). 
#' If \code{"cumvar"} is chosen, it must be a value (larger than 0 and below 1) indicating the maximum 
#' amount of cumulative variance that the retained components should explain. If \code{"var"} is chosen, 
#' it must be a value (larger than 0 and below 1) indicating that components that explain (individually) 
#' a variance lower than this threshold must be excluded. If \code{"manual"} is chosen, it must be a value 
#' specifying the desired number of principal components to retain. Default is 0.01.
#' @return a list containing the following elements:
#' \itemize{
#' \item{\code{pc_scores}}{ a matrix of principal component scores.}
#' \item{\code{pc_loadings}}{ a matrix of of principal component loadings.}
#' \item{\code{variance}}{ a matrix of the variance of the principal components.} 
#' \item{\code{scale}}{ a \code{list} conating two objects: \code{center} and \code{scale}, which correspond to the vectors used to center and scale the input matrix.} 
#' } 
#' @author Leonardo Ramirez-Lopez
#' @keywords internal 
#' @useDynLib resemble
pca_nipals <- function(X, ncomp, center, scale, maxiter, tol, pcSelmethod = "var", pcSelvalue = 0.01) {
    .Call('_resemble_pca_nipals', PACKAGE = 'resemble', X, ncomp, center, scale, maxiter, tol, pcSelmethod, pcSelvalue)
}

Try the resemble package in your browser

Any scripts or data that you put into this service are public.

resemble documentation built on April 21, 2023, 1:13 a.m.