#' PLSDA cross validation
#'
#' Perform PLSDA classification and feature selection.
#' Writes two output files: \code{"plsda_vip.csv"} and \code{"plsda_coef.csv"}.
#'
#' @param dataSet List, data set object generated by \code{\link[MSdata]{MS_to_MA}} function.
#' @param analSet List, containing the results of statistical analysis (can be just an empty list).
#' @param methodName Cross validation method \code{"LOOCV"} or \code{"CV"}
#' @param compNum The maximum number of components to search.
#' @param choice The determining performance measure, one of \code{"Q2"}, \code{"R2"}, \code{"Accuracy"}
#' @return Native \code{analSet} with one added \code{$plsda} element consisting of:
#' \itemize{
#' \item\code{$best.num} - the detected best number of components
#' \item\code{$choice} - value of \code{choice} argument
#' \item\code{$coef.mat} - matrix of VIPs (variable importance in projection)
#' \item\code{$vip.mat} - matrix of weighted sums of absolute regression coefficients
#' \item\code{$fit.info} - all performance measures calculated for different number of components
#' }
#' @seealso \code{\link{PLS.Anal}}, \code{\link{PLS.Loadings}}, \code{\link{PLSDA.Permut}}, \code{\link{PlotPLS}}
#' @export
# classification and feature selection
PLSDA.CV<-function(dataSet, analSet, methodName="CV", compNum=GetDefaultPLSCVComp(dataSet), choice="Q2"){
if (is.null(analSet$plsr)) stop("Please, conduct PLS.Anal and PLS.Loadings first.")
match.arg(methodName, c("LOOCV", "CV"))
match.arg(choice, c("Q2", "R2", "Accuracy"))
# get classification accuracy using caret
cls<-as.numeric(dataSet$cls)-1;
datmat<-as.matrix(dataSet$norm);
plsda.cls <- caret::train(dataSet$norm, dataSet$cls, "pls", trControl=caret::trainControl(method=ifelse(methodName == 'LOOCV', "LOOCV", 'CV')), tuneLength=compNum);
# use the classifical regression to get R2 and Q2 measure
plsda.reg <- pls::plsr(cls~datmat,method ='oscorespls', ncomp=compNum, validation= ifelse(methodName == 'LOOCV', "LOO", 'CV'));
fit.info <- pls::R2(plsda.reg, estimate = "all")$val[,1,];
# combine accuracy, R2 and Q2
accu <- plsda.cls$results[,2]
all.info <- rbind(accu, fit.info[,-1]);
rownames(all.info) <- c("Accuracy", "R2", "Q2");
# default use best number determined by Q2
if(choice == 'Q2'){
best.num <- which(all.info[3,] == max(all.info[3,]));
}else if(choice == "R2"){
best.num <- which(all.info[2,] == max(all.info[2,]));
}else{
best.num <- which(all.info[1,] == max(all.info[1,]));
}
# get coef. table, this can be error when class is very unbalanced
coef.mat <- try(caret::varImp(plsda.cls, scale=T)$importance);
if(class(coef.mat) == "try-error") {
coef.mat <- NULL;
}else{
if(dataSet$cls.num > 2){ # add an average coef for multiple class
coef.mean<-apply(coef.mat, 1, mean);
coef.mat <- cbind(coef.mean = coef.mean, coef.mat);
}
# rearange in decreasing order, keep as matrix, prevent dimesion dropping if only 1 col
inx.ord<- order(coef.mat[,1], decreasing=T);
coef.mat <- data.matrix(coef.mat[inx.ord, ,drop=FALSE]);
write.csv(signif(coef.mat,5), file="plsda_coef.csv"); # added 27 Jan 2014
}
# calculate VIP http://mevik.net/work/software/VIP.R
pls<-analSet$plsr;
b <- c(pls$Yloadings)[1:compNum];
T <- pls$scores[,1:compNum, drop = FALSE]
SS <- b^2 * colSums(T^2)
W <- pls$loading.weights[,1:compNum, drop = FALSE]
Wnorm2 <- colSums(W^2);
SSW <- sweep(W^2, 2, SS / Wnorm2, "*")
vips <- sqrt(nrow(SSW) * apply(SSW, 1, cumsum) / cumsum(SS));
if(compNum > 1){
vip.mat <- as.matrix(t(vips));
}else{
vip.mat <- as.matrix(vips);
}
colnames(vip.mat) <- paste("Comp.", 1:ncol(vip.mat));
write.csv(signif(vip.mat,5),file="plsda_vip.csv");
analSet$plsda<-list(best.num=best.num, choice=choice, coef.mat=coef.mat, vip.mat=vip.mat, fit.info=all.info);
return(analSet);
}
#' \code{PlotPLS.Classification} - plot PLSDA classification performance using different components
#' @rdname PlotPLS
#' @export
# Plot plsda classification performance using different components
PlotPLS.Classification<-function(dataSet, analSet, imgName="plsda_", format="png", dpi=72, width=NA){
if (is.null(analSet$plsr)) stop("Please, conduct PLS.Anal and PLSDA.CV first.")
if (is.null(analSet$plsda)) stop("Please, conduct PLSDA.CV first.")
res<-analSet$plsda$fit.info;
colnames(res) <- 1:ncol(res);
best.num <- analSet$plsda$best.num;
choice <- analSet$plsda$choice;
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 7;
}else if(width == 0){
w <- 7;
analSet$imgSet$pls.class<-imgName;
}else{
w <- width;
}
h <- w*5/7;
Cairo::Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
par(mar=c(5,5,2,7)); # put legend on the right outside
barplot(res, beside = TRUE, col = c("lightblue", "mistyrose","lightcyan"), ylim= c(0,1.05), xlab="Number of components", ylab="Performance");
if(choice == "Q2"){
text((best.num-1)*3 + best.num + 2.5, res[3,best.num]+ 0.02, labels = "*", cex=2.5, col="red");
}else if(choice == "R2"){
text((best.num-1)*3 + best.num + 1.5, res[2,best.num]+ 0.02, labels = "*", cex=2.5, col="red");
}else{
text((best.num-1)*3 + best.num + 0.5, res[1,best.num]+ 0.02, labels = "*", cex=2.5, col="red");
}
# calculate the maximum y position, each bar is 1, place one space between the group
xpos <- ncol(res)*3 + ncol(res) + 1;
legend(xpos, 1.0, rownames(res), fill = c("lightblue", "mistyrose","lightcyan"), xpd=T);
dev.off();
frame()
grid::grid.raster(png::readPNG(imgName));
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.