Nothing
#' @title ConsensusOPLSCV
#' @description Function for performing Kernel-OPLS cross-validation for a set
#' of Y-orthogonal components.
#' This function was first implemented in KOPLS1.1 MATLAB package, and was
#' modified in 2012 for the ConsensusOPLS method.
#'
#' @param K matrix. The kernel matrix (un-centered); see \code{koplsKernel} for
#' details.
#' @param Y matrix. The response matrix (un-centered/scaled). Could be binary
#' (for discriminant analysis) or real-valued (for classical OPLS analysis).
#' @param maxPcomp numeric. The number of Y-predictive components.
#' @param maxOcomp numeric. The number of Y-orthogonal components.
#' @param nfold numeric. Number of cross-validation rounds (integer).
#' @param cvType character. Type of cross-validation used. Either \code{nfold}
#' for n-fold cross-validation, \code{mccv} for Monte Carlo cross-validation or
#' \code{mccvb} for Monte Carlo class-balanced cross-validation. See also
#' \code{koplsCrossValSet} for details.
#' @param preProcK character. Pre-processing settings for the kernel matrix.
#' Either \code{mc} for mean-centering or \code{no} for no pre-processing.
#' @param preProcY character. Pre-processing parameter for Y. Either \code{mc}
#' for mean-centering, \code{uv} for mean-centering and scaling to
#' unit-variance, \code{pa} for mean-centering and Pareto-scaling or \code{no}
#' for no mean-centering and no scaling.
#' @param cvFrac numeric. Fraction of observations in the training set during
#' cross-validation.
#' @param modelType character. Type of model used for the ConsensusOPLS method,
#' \code{da} for discriminant analysis or \code{reg} for regression.
#' If \code{da} (default), sensitivity and specificity will be calculated.
#' @param verbose logical. Indicates whether the user wants to see the
#' progress bar printed. If \code{FALSE}, no output will be printed. If
#' \code{TRUE} (default) some output will be printed regarding the
#' cross-validation progress.
#'
#' @returns A list with the following entries (diagnostic parameters which can
#' be used to determine the optimal number of model components):
#' \item{cv}{ list. The cross-validation results.}
#' \item{Yhat}{ matrix. Predicted Y values.}
#' \item{AllYhat}{ matrix. All predicted Y values.}
#' \item{Tcv}{ matrix. Predictive score vector T for all cross-validation
#' rounds.}
#' \item{Q2Yhat}{ matrix. Total Q-square result for all Y-orthogonal
#' components.}
#' \item{Q2YhatVars}{ matrix. Q-square result per Y-variable for all
#' Y-orthogonal components.}
#' \item{cvTestIndex}{ matrix. Indices for the test set observations during
#' the cross-validation rounds.}
#' \item{cvTrainIndex}{ matrix. Indices for the training set observations
#' during the cross-validation rounds.}
#' \item{da}{ list. Cross-validation results specifically for discriminant
#' analysis:}
#' \item{totalResults}{ list. The results over all classes and averages.}
#' \item{classResults}{ list. The results for each class.}
#' \item{predClass}{ integer. Predicted class list per class and Y-orthogonal
#' components.}
#' \item{trueClass}{ integer. Predicted class list per class and Y-orthogonal
#' components.}
#' \item{sensSpec}{ integer. Sensitivity and specificity values per class and
#' Y-orthogonal components.}
#' \item{confusionMatrix}{ matrix. Confusion matrix during cross-validation
#' rounds.}
#' \item{nclass}{ integer. Number of classes in model.}
#' \item{decisionRule}{ character. Decision rule used: \code{max} or
#' \code{fixed}.}
#' \item{args}{ list. Arguments to the function:}
#' \item{maxOcomp}{ integer. Number of Y-orthogonal components.}
#' \item{maxPcomp}{ integer. Number of Y-predictive components.}
#' \item{class}{ character. Model class is \code{koplscv}.}
#' @import utils
#' @keywords internal
#' @noRd
#'
ConsensusOPLSCV <- function(K, Y, maxPcomp, maxOcomp,
modelType = "da",
cvType = 'nfold',
nfold = 5,
nMC = 100,
cvFrac = 4/5,
preProcK = "no",
preProcY = "no",
mc.cores = 1,
verbose = FALSE) {
# ----- Variable format control (part 1)
if (!is.matrix(K))
stop("K is not a matrix.")
if (!is.numeric(maxPcomp))
stop("maxPcomp is not numeric.")
if (!is.numeric(maxOcomp))
stop("maxOcomp is not numeric.")
if (!is.numeric(nfold))
stop("nfold is not numeric.")
if (!is.numeric(nMC))
stop("nfold is not numeric.")
if (!is.numeric(cvFrac))
stop("cvFrac is not numeric.")
if (!is.logical(verbose))
stop("verbose must be a logical value.")
cvType <- match.arg(cvType,
choices = c("nfold", "mccv", "mccvb"),
several.ok = F)
preProcK <- match.arg(preProcK,
choices = c("mc", "no"),
several.ok = F)
preProcY <- match.arg(preProcY,
choices = c("mc", "no", "uv", "pa"),
several.ok = F)
modelType <- match.arg(modelType,
choices = c("da", "reg"),
several.ok = F)
# ----- Default values control
if (cvType == "mccvb" && modelType != "da")
stop("Class balanced MC CV only applicable to `da` modelling.")
# ----- Variable format control (part 2)
if (modelType == "da") {
# Check the response matrix
if (! (all(Y %in% c(0, 1)) && ncol(Y) > 1))
stop("Y is not a dummy matrix.")
classVect <- koplsReDummy(Y = Y)
nclass <- length(unique(x = classVect))
} else {
if (! (is.numeric(Y) && (ncol(Y) == 1 || is.vector(Y)))) {
stop("Y should be a numeric vector.")
}
}
# ----- Convert Y-scaling to more explicit format
YcenterType <- "no"
YscaleType <- "no"
if (preProcY != "no") {
YcenterType <- "mc"
if (preProcY != "mc")
YscaleType <- preProcY
}
# ----- Parameters init
release <- ""
YhatDaSave <- list()
cvTestIndex <- list()
cvTrainingIndex <- list()
modelMain <- list()
if (verbose) {
cat("Please wait... The cross-validation process begins.")
flush.console()
}
AllYhat <- c()
nrun <- if (cvType=='nfold') nfold else nMC
for (icv in 0:nrun) {
# Update progression bar
if (verbose) {
cat("\r", " ", "\r")
progress <- round(icv * 100 / nrun, 0)
cat(sprintf("Progression : %.2f%% \r", progress))
flush.console()
}
# Set up Cross-Validation
cvSet <- koplsCrossValSet(K = K, Y = Y,
cvFrac = cvFrac, cvType = cvType,
nfold = nrun, nfoldRound = icv)
#, random.seed = 10403+icv)
# TODO: check when nfold > 1, i.e. repeated test samples
cvTestIndex[[length(cvTestIndex)+1]] <- cvSet$testIndex
cvTrainingIndex[[length(cvTrainingIndex)+1]] <- cvSet$trainingIndex
# Get Kernel matrices
# TODO: change so that this is done in the K matrix only once and
# selected by indices.
KtrTr <- cvSet$KTrTr
KteTe <- cvSet$KTeTe
KteTr <- cvSet$KTeTr
# Scale Y
YScaleObj <- koplsScale(X = cvSet$yTraining,
centerType = YcenterType,
scaleType = YscaleType)
YScaleObjTest <- koplsScaleApply(model = YScaleObj,
X = cvSet$yTest)
# Center kernel matrices
if (preProcK == "mc") {
KteTe <- koplsCenterKTeTe(KteTe = KteTe,
KteTr = KteTr,
KtrTr = KtrTr)
KteTr <- koplsCenterKTeTr(KteTr = KteTr,
KtrTr = KtrTr)
KtrTr <- koplsCenterKTrTr(KtrTr = KtrTr)
}
# Estimate K-OPLS model
model <- koplsModel(K = KtrTr, Y = YScaleObj$X,
A = maxPcomp, nox = maxOcomp,
preProcK = "no", preProcY = "no")
# Set up model stats
ssy <- sum(YScaleObjTest$X^2)
#ssyVars <- sum(YScaleObjTest$X^2)
#ssx <- sum(diag(KteTe))
if (icv == 1) {
ssyTot <- ssy
#ssyVarsTot <- ssyVars
#ssxTot <- ssx
} else if (icv > 1) {
ssyTot <- ssyTot+ssy
#ssyVarsTot <- ssyVarsTot+ssyVars
#ssxTot <- ssxTot+ssx
}
modelPredys <- mclapply(
1:(maxOcomp+1), mc.cores=mc.cores, function(nOcomp) {
# predict Yhat with (nOcomp-1) orthogonal components
modelPredy <- koplsPredict(KteTr = KteTr,
Ktest = KteTe,
Ktrain = KtrTr,
model = model,
nox = nOcomp-1,
rescaleY = FALSE)
return (list(modelPredy = modelPredy,
nOcomp = nOcomp))
})
AllYhatind <- do.call(cbind, lapply(modelPredys, function(mp) {
tmp <- koplsRescale(scaleS = YScaleObj,
varargin = mp$modelPredy$Yhat)
colnames(tmp$X) <- paste0(colnames(tmp$X),
ifelse(mp$nOcomp==1,
"_p",
paste0("_po", mp$nOcomp-1)))
return (tmp$X)
}))
if (icv==0) {
pressyTot <- setNames(rep(0, maxOcomp+1),
c("p", paste0("po", 1:maxOcomp)))
} else {
pressyTot <- pressyTot + sapply(modelPredys, function(mp) {
sum((YScaleObjTest$X - mp$modelPredy$Yhat)^2)
})
}
# pressyVarsTot <- sapply(modelPredys, function(mp) {
# colSums((YScaleObjTest$X - mp$modelPredy$Yhat)^2)
# TODO: clarify: colSums is for Y with variable per column, not for dummy
# })
# Save Yhat for all rounds
if (icv == 0) {
YhatDaSave <- replicate(maxOcomp+1, list())
}
# Add mean on Yhat by koplsRescale
for (mp in modelPredys) { #TODO: reform to lapply
tmp <- koplsRescale(scaleS = YScaleObj,
varargin = mp$modelPredy$Yhat)
YhatDaSave[[mp$nOcomp]][[length(YhatDaSave[[mp$nOcomp]])+1]] <-
tmp$X
# # Store Yhat at the highest number of orthogonal components
# if (mp$nOcomp == maxOcomp+1) {
# if (icv == 0) Yhat <- list()
# tmp <- koplsRescale(scaleS = YScaleObj,
# varargin = mp$modelPredy$Yhat)
# Yhat[[length(Yhat)+1]] <- tmp$X
# }
}
AllYhat <- rbind(AllYhat, AllYhatind) #TODO: use list
} # end icv
if (verbose) {
cat("Cross-validation is complete.")
flush.console()
}
#if (F){
if (modelType == "da") {
## CV DA metrics
daMetrics_list <- list()
daMetrics_list$sens <- daMetrics_list$spec <-
matrix(nrow=nclass, ncol=0)
daMetrics_list$observed <- matrix(classVect[unlist(cvTestIndex[-1])],
ncol=1)
daMetrics_list$predicted <- matrix(
NA,
nrow=nrow(daMetrics_list$observed),
ncol=maxOcomp+1,
dimnames=list(NULL, c("p", paste0("po", 1:maxOcomp))))
# Get sens/spec for each Y-orth component
for (i in 1:(maxOcomp + 1)) {
# Predicted class on test
predClass <- koplsMaxClassify(X = do.call(rbind,
YhatDaSave[[i]][-1]))
# Calculate sensitivity and specificity
daMetrics <- koplsSensSpec(
trueClass = classVect[unlist(cvTestIndex[-1])],
predClass = predClass,
labelClass = colnames(Y))
daMetrics_list$sens <- cbind(
daMetrics_list$sens,
matrix(daMetrics[, "sens"],
dimnames=list(rownames(daMetrics),
ifelse(i==1, "p", paste0("po", i-1)))))
daMetrics_list$spec <- cbind(
daMetrics_list$spec,
matrix(daMetrics[, "spec"],
dimnames=list(rownames(daMetrics),
ifelse(i==1, "p", paste0("po", i-1)))))
#daMetrics_list$tot_sens[i] <- daMetrics[nrow(daMetrics), "sens"]
#daMetrics_list$meanSens[i] <- daMetrics[nrow(daMetrics), "meanSens"]
#daMetrics_list$meanSpec[i] <- daMetrics[nrow(daMetrics), "meanSpec"]
daMetrics_list$confusMatrix[[i]] <- koplsConfusionMatrix(
trueClass = classVect[unlist(cvTestIndex[-1])],
predClass = predClass)
daMetrics_list$predicted[,i] <- predClass
}
daMetrics_list$nclass <- nclass
modelMain$cv$da <- daMetrics_list
## Reprediction DA metrics
daMetrics_list <- list()
daMetrics_list$sens <- daMetrics_list$spec <-
matrix(nrow=nclass, ncol=0)
daMetrics_list$observed <- matrix(classVect[cvTestIndex[[1]]], ncol=1)
daMetrics_list$predicted <- matrix(
NA,
nrow=nrow(daMetrics_list$observed),
ncol=maxOcomp+1,
dimnames=list(NULL, c("p", paste0("po", 1:maxOcomp))))
# Get sens/spec for each y-orth component
for (i in 1:(maxOcomp + 1)) {
# Predicted class on all samples
predClass <- koplsMaxClassify(X = YhatDaSave[[i]][[1]])
# Calculate sensitivity and specificity
daMetrics <- koplsSensSpec(trueClass = classVect[cvTestIndex[[1]]],
predClass = predClass,
labelClass = colnames(Y))
daMetrics_list$sens <- cbind(
daMetrics_list$sens,
matrix(daMetrics[, "sens"],
dimnames=list(rownames(daMetrics),
ifelse(i==1, "p", paste0("po", i-1)))))
daMetrics_list$spec <- cbind(
daMetrics_list$spec,
matrix(daMetrics[, "spec"],
dimnames=list(rownames(daMetrics),
ifelse(i==1, "p", paste0("po", i-1)))))
daMetrics_list$confusMatrix[[i]] <- koplsConfusionMatrix(
trueClass = classVect[cvTestIndex[[1]]],
predClass = predClass)
daMetrics_list$predicted[,i] <- predClass
}
daMetrics_list$nclass <- nclass
modelMain$cv$da <- daMetrics_list
} else if (modelType == "reg") {
## CV regression metrics
Metrics_list <- list()
Metrics_list$observed <- Y[unlist(cvTestIndex[-1]), , drop=F]
Metrics_list$predicted <- matrix(
NA,
nrow=nrow(Metrics_list$observed),
ncol=maxOcomp+1,
dimnames=list(NULL, c("p", paste0("po", 1:maxOcomp))))
# Get sens/spec for each Y-orth component
for (i in 1:(maxOcomp + 1)) {
# Predicted values
Metrics_list$predicted[, i] <- do.call(rbind, YhatDaSave[[i]][-1])
}
modelMain$cv$reg <- Metrics_list
## Reprediction metrics
Metrics_list <- list()
Metrics_list$observed <- Y
Metrics_list$predicted <- matrix(
NA,
nrow=nrow(Metrics_list$observed),
ncol=maxOcomp+1,
dimnames=list(NULL, c("p", paste0("po", 1:maxOcomp))))
# Get sens/spec for each Y-orth component
for (i in 1:(maxOcomp + 1)) {
# Predicted values
Metrics_list$predicted[,i] <- YhatDaSave[[i]][[1]]
}
modelMain$reg <- Metrics_list
}
#}
# modelMain$Model <- koplsModel(K = K, Y = Y, A = maxPcomp, nox = maxOcomp,
# preProcK = preProcK, preProcY = preProcY)
#modelMain$cv$Yhat <- do.call(rbind, Yhat[-1])
#colnames(modelMain$cv$Yhat) <- colnames(Y)
modelMain$cv$AllYhat <- AllYhat[-(1:nrow(Y)), , drop=F]
#modelMain$cv$Tcv <- modelMain$cv$Yhat %*% modelMain$Model$Cp %*% modelMain$Model$Bt[[maxOcomp + 1]] #TODO: why Bt[[maxOcomp + 1]]?
modelMain$cv$Q2Yhat <- 1 - pressyTot/ssyTot
#modelMain$cv$Q2YhatVars <- 1 - pressyVarsTot/ssyVarsTot
modelMain$cv$cvTestIndex <- cvTestIndex[-1]
#modelMain$cv$cvTrainingIndex <- cvTrainingIndex[-1]
class(modelMain) <- "koplscv"
return (modelMain)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.