R/regmodel.R
In mdatools: Multivariate Data Analysis for Chemometrics

Documented in crossval.regmodel getRegcoeffs.regmodel plotPredictions.regmodel plotRegcoeffs.regmodel plotRMSERatio.regmodel plotRMSE.regmodel plotYResiduals.regmodel print.regmodel summary.regmodel

regmodel <- function(...) {
}

#' Cross-validation of a regression model
#'
#' @description
#' Does cross-validation of a regression model
#'
#' @param obj
#' a regression model (object of class \code{regmodel})
#' @param x
#' a matrix with x values (predictors from calibration set)
#' @param y
#' a matrix with y values (responses from calibration set)
#' @param cv
#' number of segments (if cv = 1, full cross-validation will be used)
#' @param cal.fun
#' reference to function for model calibration
#' @param pred.fun
#' reference to function for getting predicted y-values (see description)
#' @param cv.scope
#' scope for center/scale operations inside CV loop: 'global' — using globally computed mean and std
#' or 'local' — recompute new for each local calibration set.
#'
#' @return
#' object of class \code{plsres} with results of cross-validation
#'
#' Function `pred.fun` must take four agruments: autoscaled x-values, array with regression
#' coefficients, vectors for centring and scaling of y-values (if used). The function must
#' return predicted y-values in original units (unscaled and uncentered).
#'
#' @export
crossval.regmodel <- function(obj, x, y, cv, cal.fun, pred.fun, cv.scope = 'local') {

   # get attributes
   x.attrs <- attributes(x)
   y.attrs <- attributes(y)

   # remove excluded rows
   if (length(x.attrs$exclrows) > 0) {
      x <- x[-x.attrs$exclrows, , drop = FALSE]
      y <- y[-x.attrs$exclrows, , drop = FALSE]
      attr(x, "exclrows") <- NULL
      attr(y, "exclrows") <- NULL
   }

   # remove excluded columns
   if (length(x.attrs$exclcols) > 0) {
      x <- x[, -x.attrs$exclcols, drop = FALSE]
      attr(x, "exclcols") <- NULL
   }

   if (length(y.attrs$exclcols) > 0) {
      y <- y[, -y.attrs$exclcols, drop = FALSE]
      attr(y, "exclcols") <- NULL
   }

   y.ref <- y

   # get main data parameters
   nvar <- ncol(x)
   nobj <- nrow(x)
   nresp <- ncol(y)
   ncomp <- obj$ncomp

   # get matrix with indices for cv segments
   cv_ind <- crossval(cv, nobj = nobj, resp = y[, 1])
   nseg <- max(cv_ind)
   nrep <- ncol(cv_ind)

   # prepare arrays for results
   yp.cv <- array(0, dim = c(nobj, ncomp, nresp))
   jk.coeffs <- array(0, dim = c(nvar, ncomp, nresp, nseg))

   # define values for global scaling
   xcenter <- obj$xcenter
   ycenter <- obj$ycenter
   xscale  <- obj$xscale
   yscale  <- obj$yscale

   if (cv.scope == 'global') {
      x <- prep.autoscale(x, xcenter, xscale)
      y <- prep.autoscale(y, ycenter, yscale)
   }

   # loop over segments and repetitions
   for (ir in seq_len(nrep)) {
      for (is in seq_len(nseg)) {
         ind <- which(cv_ind[, ir] == is)
         if (length(ind) == 0) next

         xc <- x[-ind, , drop = FALSE]
         yc <- y[-ind, , drop = FALSE]
         xt <- x[ind, , drop = FALSE]

         # redefine values for local scaling if selected
         if (cv.scope != 'global') {
            xcenter <- if (obj$center) apply(xc, 2, mean) else rep(0, ncol(xc))
            ycenter <- if (obj$center) apply(yc, 2, mean) else rep(0, ncol(yc))
            xscale <- if (obj$scale)  apply(xc, 2, sd) else rep(1, ncol(xc))
            yscale <- if (obj$scale)  apply(yc, 2, sd) else rep(1, ncol(yc))

            xc <- prep.autoscale(xc, xcenter, xscale)
            yc <- prep.autoscale(yc, ycenter, yscale)
            xt <- prep.autoscale(xt, xcenter, xscale)
         }


         # create a model
         m.loc <- cal.fun(xc, yc, ncomp, method = obj$method, center = FALSE, scale = FALSE, cv = TRUE)

         if (m.loc$ncomp < ncomp) {
             stop(
                "Local model inside cross-validation can not be computed with the same number of\n",
                "components as used for calibration. Limit the number by using parameter 'ncomp'\n",
                "and run the code again.\n", call. = FALSE
             )
         }

         yp <- pred.fun(xt, m.loc$coeffs$values, ycenter = ycenter, yscale = yscale)


         # if any have NA values quit
         if (any(is.na(yp))) {
            stop("NA results produced during cross-validation.")
         }

         # save results
         yp.cv[ind, , ] <- yp.cv[ind, , , drop = FALSE] + yp
         jk.coeffs[, , , is] <- jk.coeffs[, , , is, drop = FALSE] +
            array(m.loc$coeffs$values, dim = c(dim(m.loc$coeffs$values), 1))
      }
   }

   # average results over repetitions
   yp.cv <- yp.cv / nrep
   jk.coeffs <- jk.coeffs / nrep

   # set up names
   dimnames(jk.coeffs) <- list(
      colnames(x),
      colnames(obj$coeffs$values),
      colnames(y),
      seq_len(nseg)
   )

   dimnames(yp.cv) <- list(
      rownames(x),
      colnames(obj$coeffs$values),
      colnames(y)
   )

   # make pls results and return
   return(list(y.pred = yp.cv, y.ref = y.ref, jk.coeffs = jk.coeffs))
}

#' Regression coefficients for PLS model'
#'
#' @description
#' Returns a matrix with regression coefficients for
#' the PLS model which can be applied to a data directly
#'
#' @param obj
#' a PLS model (object of class \code{pls})
#' @param ncomp
#' number of components to return the coefficients for
#' @param ny
#' if y is multivariate which variables you want to see the coefficients for
#' @param full
#' if TRUE the method also shows p-values and t-values as well as confidence intervals for the
#' coefficients (if available)
#' @param alpha
#' significance level for confidence intervals (a number between 0 and 1, e.g. 0.05)
#' @param ...
#' other parameters
#'
#' @details
#' The method recalculates the regression coefficients found by the PLS algorithm
#' taking into account centering and scaling of predictors and responses, so the
#' matrix with coefficients can be applied directly to original data (yp = Xb).
#'
#' If number of components is not specified, the optimal number, selected by user
#' or identified by a model will be used.
#'
#' If Jack-knifing method was used to get statistics for the coefficient the method
#' returns all statistics as well (p-value, t-value, confidence interval). In this case user
#' has to specified a number of y-variable (if there are many) to get the statistics and
#' the coefficients for. The confidence interval is computed for unstandardized coefficients.
#'
#' @return
#' A matrix  with regression coefficients and (optinally) statistics.
#'
#' @export
getRegcoeffs.regmodel <- function(obj, ncomp = obj$ncomp.selected, ny = 1, full = FALSE,
   alpha = 0.05, ...) {

   if (length(ncomp) != 1 || ncomp <= 0 || ncomp > obj$ncomp) {
      stop("Wrong value for number of components.")
   }

   attrs <- mda.getattr(obj$coeffs$values)
   out <- obj$coeffs$values[, ncomp, ny]

   # get center values and scale factors
   sx <- if (is.logical(obj$xscale)) rep(1, length(out)) else obj$xscale
   mx <- if (is.logical(obj$xcenter)) rep(0, length(out)) else obj$xcenter

   sy <- if (is.logical(obj$yscale)) rep(1, 1) else obj$yscale[ny]
   my <- if (is.logical(obj$ycenter)) rep(0, 1) else obj$ycenter[ny]


   # rescale coefficients and find intercept
   s <- sy / sx
   out <- matrix(c(my - sum(s * out * mx), s * out), ncol = 1)
   colnames(out) <- "Estimated"
   rownames(out) <- c("Intercept", dimnames(obj$coeffs$values)[[1]])


   if (full && !is.null(obj$coeffs$se)) {
      t  <- c(NA, obj$coeffs$t.values[, ncomp, ny])
      p  <- c(NA, obj$coeffs$p.values[, ncomp, ny])

      # standard error also needs to be rescaled and then new CI is computed
      se <- c(NA, obj$coeffs$se[, ncomp, ny] * s)
      ci <- cbind(out + qt(alpha/2, obj$coeffs$DoF) * se, out + qt(1 - alpha/2, obj$coeffs$DoF) * se)

      out <- cbind(out, se, t, p, ci)
      colnames(out)[2:6] <- c("Std. err.", "t-value", "p-value", paste0(round(c(alpha / 2, 1 - alpha / 2) * 100, 2), "%"))
   }

   attr(out, "exclrows") <- attrs$exclrows
   attr(out, "name") <- paste("Regression coefficients for ", obj$coeffs$respnames[ny])

   return(out)
}

#' Summary method for regression model object
#'
#' @description
#' Shows performance statistics for the model.
#'
#' @param object
#' a regression model (object of class \code{regmodel})
#' @param ncomp
#' number of components to show summary for
#' @param ny
#' which y variables to show the summary for (can be a vector)
#' @param res
#' list of results to show summary for
#' @param ...
#' other arguments
#'
#' @export
summary.regmodel <- function(object, ncomp = object$ncomp.selected,
   ny = seq_len(object$res$cal$nresp), res = object$res, ...) {

   if (length(ncomp) != 1 || ncomp < 1 || ncomp > dim(object$res[["cal"]]$y.pred)[2]) {
      stop("Wrong value for 'ncomp' parameter.")
   }

   cat("\nRegression model (class regmodel) summary\n")
   cat("\nPerformance and validation:\n")
   fprintf("Number of selected components: %d\n", ncomp)

   for (y in ny) {
      sum_data <- do.call(rbind, lapply(res, as.matrix, ny = y, ncomp = ncomp))
      rownames(sum_data) <- capitalize(names(res))

      sum_data[, "R2"] <- round(sum_data[, "R2"], 3)
      sum_data[, "RMSE"] <- mdaplot.formatValues(sum_data[, "RMSE"], round.only = TRUE)
      sum_data[, "Slope"] <- round(sum_data[, "Slope"], 3)
      sum_data[, "Bias"] <- round(sum_data[, "Bias"], 4)
      sum_data[, "RPD"] <- round(sum_data[, "RPD"], 1)

      attr(sum_data, "name") <- sprintf("\nResponse variable #%d (%s)", y, res[[1]]$respnames[y])
      mda.show(sum_data)
   }
   cat("\n")
}

#' Print method for PLS model object
#'
#' @description
#' Prints information about the object structure
#'
#' @param x
#' a regression model (object of class \code{regmodel})
#' @param ...
#' other arguments
#'
#' @export
print.regmodel <- function(x, ...) {
   cat("\nRegression model (class regmodel)\n")
   cat("\nCall:\n")
   print(x$call)
   cat("\nMajor fields:\n")
   cat("$ncomp - number of calculated components\n")
   cat("$ncomp.selected - number of selected components\n")
   cat("$coeffs - object (regcoeffs) with regression coefficients\n")
   cat("$res - list with result objects\n")
   cat("\nTry summary(model) and plot(model) to see the model performance.\n")
}


################################
#  Plotting methods            #
################################


#' RMSE plot for regression model
#'
#' @description
#' Shows plot with root mean squared error values vs. number of components for PLS model.
#'
#' @param obj
#' a regression model (object of class \code{regmodel})
#' @param ny
#' number of response variable to make the plot for (if y is multivariate)
#' @param type
#' type of the plot("b", "l" or "h")
#' @param labels
#' what to show as labels (vector or name, e.g. "names", "values", "indices")
#' @param xticks
#' vector with ticks for x-axis values
#' @param res
#' list with result objects
#' @param ylab
#' label for y-axis
#' @param ...
#' other plot parameters (see \code{mdaplotg} for details)
#'
#' @export
plotRMSE.regmodel <- function(obj, ny = 1, type = "b", labels = "values",
   xticks = seq_len(obj$ncomp), res = obj$res, ylab = paste0("RMSE (", obj$res$cal$respnames[ny], ")"), ...) {

   plot_data <- lapply(res, plotRMSE, ny = ny, show.plot = FALSE)
   mdaplotg(plot_data, type = type, xticks = xticks, labels = labels, ylab = ylab, ...)
}


#' RMSECV/RMSEC ratio plot for regression model
#'
#' @description
#' Shows plot with RMSECV/RMSEC values vs. RMSECV for each component.
#'
#' @param obj
#' a regression model (object of class \code{regmodel})
#' @param ny
#' number of response variable to make the plot for (if y is multivariate)
#' @param type
#' type of the plot (use only "b" or "l")
#' @param show.labels
#' logical, show or not labels for plot points
#' @param labels
#' vector with point labels (by default number of components)
#' @param main
#' main plot title
#' @param xlab
#' label for x-axis
#' @param ylab
#' label for y-axis
#' @param ...
#' other plot parameters (see \code{mdaplot} for details)
#'
#' @export
plotRMSERatio.regmodel <- function(obj, ny = 1, type = "b", show.labels = TRUE, labels = seq_len(obj$ncomp),
   main = paste0("RMSECV/RMSEC ratio (", obj$res$cal$respnames[ny], ")"),
   ylab = "RMSECV/RMSEC ratio",
   xlab = "RMSECV", ...) {

   stopifnot("Cross-validation results are not found." = !is.null(obj$res$cv))
   stopifnot("Parameter 'ny' has a wrong value." = (length(ny) == 1 && ny >= 1 && ny <= nrow(obj$res$cal$rmse)))

   plot_data <- matrix(obj$res$cv$rmse[ny, ] / obj$res$cal$rmse[ny, ], nrow = 1)
   attr(plot_data, "xaxis.values") <- obj$res$cv$rmse[ny, ]
   attr(plot_data, "xaxis.name") <- xlab

   mdaplot(plot_data, type = type, xlab = xlab, ylab = ylab, main = main, show.labels = show.labels,
   labels = labels, ...)
}


#' Predictions plot for regression model
#'
#' @description
#' Shows plot with predicted vs. reference (measured) y values for selected components.
#'
#' @param obj
#' a regression model (object of class \code{regmodel})
#' @param ncomp
#' how many components to use (if NULL - user selected optimal value will be used)
#' @param ny
#' number of response variable to make the plot for (if y is multivariate)
#' @param legend.position
#' position of legend on the plot (if shown)
#' @param show.line
#' logical, show or not line fit for the plot points
#' @param res
#' list with result objects
#' @param ...
#' other plot parameters (see \code{mdaplotg} for details)
#'
#' @export
plotPredictions.regmodel <- function(obj, ncomp = obj$ncomp.selected, ny = 1,
   legend.position = "topleft", show.line = TRUE, res = obj$res, ...) {

   if (length(ncomp) != 1 || ncomp < 1 || ncomp > dim(obj$res[["cal"]]$y.pred)[2]) {
      stop("Wrong value for 'ncomp' parameter.")
   }

   plot_data <- lapply(res, plotPredictions.regres, ny = ny, ncomp = ncomp, show.plot = FALSE)
   attr(plot_data[[1]], "name") <- sprintf("Predictions (ncomp = %d)", ncomp)
   plots <- mdaplotg(plot_data, type = "p", legend.position = legend.position, ...)

   if (show.line) {
      for (p in plots) {
         plotRegressionLine(p)
      }
   }
}

#' Y residuals plot for regression model
#'
#' @description
#' Shows plot with y residuals (predicted vs. reference values) for selected components.
#'
#' @param obj
#' a regression model (object of class \code{regmodel})
#' @param ncomp
#' how many components to use (if NULL - user selected optimal value will be used)
#' @param ny
#' number of response variable to make the plot for (if y is multivariate)
#' @param show.lines
#' allows to show the horizonta line at 0 level
#' @param res
#' list with result objects
#' @param ...
#' other plot parameters (see \code{mdaplotg} for details)
#'
#' @export
plotYResiduals.regmodel <- function(obj, ncomp = obj$ncomp.selected, ny = 1, show.lines = c(NA, 0),
   res = obj$res, ...) {

   if (length(ncomp) != 1 || ncomp < 1 || ncomp > dim(obj$res[["cal"]]$y.pred)[2]) {
      stop("Wrong value for 'ncomp' parameter.")
   }

   plot_data <- lapply(res, plotResiduals, ny = ny, ncomp = ncomp, show.plot = FALSE)
   attr(plot_data[[1]], "name") <- sprintf("Y-residuals (ncomp = %d)", ncomp)
   mdaplotg(plot_data, show.lines = show.lines, ...)
}

#' Regression coefficient plot for regression model
#'
#' @description
#' Shows plot with regression coefficient values. Is a proxy for \code{link{plot.regcoeffs}} method.
#'
#' @param obj
#' a regression model (object of class \code{regmodel})
#' @param ncomp
#' number of components to show the plot for
#' @param ...
#' other plot parameters (see \code{link{plot.regcoeffs}} for details)
#'
#' @export
plotRegcoeffs.regmodel <- function(obj, ncomp = obj$ncomp.selected, ...) {
   plot(obj$coeffs, ncomp = ncomp, ...)
}

Any scripts or data that you put into this service are public.

mdatools documentation built on Aug. 13, 2023, 1:06 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

mdatools
Multivariate Data Analysis for Chemometrics

R/regmodel.R
In mdatools: Multivariate Data Analysis for Chemometrics

Defines functions plotRegcoeffs.regmodel plotYResiduals.regmodel plotPredictions.regmodel plotRMSERatio.regmodel plotRMSE.regmodel print.regmodel summary.regmodel getRegcoeffs.regmodel crossval.regmodel regmodel

Documented in crossval.regmodel getRegcoeffs.regmodel plotPredictions.regmodel plotRegcoeffs.regmodel plotRMSERatio.regmodel plotRMSE.regmodel plotYResiduals.regmodel print.regmodel summary.regmodel

Try the mdatools package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

mdatools Multivariate Data Analysis for Chemometrics

R/regmodel.R In mdatools: Multivariate Data Analysis for Chemometrics

Defines functions plotRegcoeffs.regmodel plotYResiduals.regmodel plotPredictions.regmodel plotRMSERatio.regmodel plotRMSE.regmodel print.regmodel summary.regmodel getRegcoeffs.regmodel crossval.regmodel regmodel

Documented in crossval.regmodel getRegcoeffs.regmodel plotPredictions.regmodel plotRegcoeffs.regmodel plotRMSERatio.regmodel plotRMSE.regmodel plotYResiduals.regmodel print.regmodel summary.regmodel

Try the mdatools package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

mdatools
Multivariate Data Analysis for Chemometrics

R/regmodel.R
In mdatools: Multivariate Data Analysis for Chemometrics