#' Random Forests
#'
#' Perform Random Forest analysis. Uses \code{\link[randomForest]{randomForest}} function.
#' Writes results to \code{"randomforests_sigfeatures.csv"} file.
#'
#' @param dataSet List, data set object generated by \code{\link[MSdata]{MS_to_MA}} function.
#' @param analSet List, containing the results of statistical analysis (can be just an empty list).
#' @param treeNum Number of trees to grow.
#' @param tryNum Number of predictors to try for each node.
#' @return Native \code{analSet} with two added elements:
#' \itemize{
#' \item\code{$rf} - standard \code{\link[randomForest]{randomForest}} function output
#' \item\code{$rf.sigmat} - matrix of features significance (sorted by MeanDecreaseAccuracy).
#' }
#' @seealso \code{\link[randomForest]{randomForest}} for used statistical function\cr
#' \code{\link{PlotRF}} for plotting functions
#' @export
# random forests
RF.Anal<-function(dataSet, analSet, treeNum=500, tryNum=10){
rf_out<-randomForest::randomForest(dataSet$norm, dataSet$cls, ntree = treeNum, mtry = tryNum, importance = TRUE, proximity = TRUE);
# set up named sig table for display
impmat<-rf_out$importance;
impmat<-impmat[rev(order(impmat[,"MeanDecreaseAccuracy"])),]
sigmat<-impmat[,"MeanDecreaseAccuracy", drop=F];
sigmat<-signif(sigmat, 5);
write.csv(sigmat,file="randomforests_sigfeatures.csv");
analSet$rf<-rf_out;
analSet$rf.sigmat<-sigmat;
return(analSet);
}
#' Plot Random Forest
#'
#' Set of functions for plotting the results of Random Forest analysis.
#'
#' @param dataSet List, data set object generated by \code{\link[MSdata]{MS_to_MA}} function.
#' @param analSet List, containing the results of statistical analysis (can be just an empty list).
#' @param imgName Image file name prefix.
#' @param format Image format, one of: "png", "tiff", "pdf", "ps", "svg"
#' @param dpi Image resolution.
#' @param width Image width.
#' @seealso \code{\link{RF.Anal}} for analytical function
#' @name PlotRF
NULL
#' \code{PlotRF.Classification} - plot feature classification.
#' @rdname PlotRF
#' @export
# plot variable importance ranked by MeanDecreaseAccuracy
PlotRF.Classification<-function(dataSet, analSet, imgName="rf_cls_", format="png", dpi=72, width=NA){
if (is.null(analSet$rf)) stop("Please, conduct RF.Anal first.")
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 8;
}else if(width == 0){
w <- 8;
analSet$imgSet$rf.cls<-imgName;
}else{
w <- width;
}
h <- w*5/8;
Cairo::Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
#par(mfrow=c(2,1));
par(mar=c(4,4,3,2));
cols <- rainbow(length(levels(dataSet$cls))+1);
plot(analSet$rf, main="Random Forest classification", col=cols);
legend("topright", legend = c("Overall", levels(dataSet$cls)), lty=2, lwd=1, col=cols);
#PlotConfusion(analSet$rf$confusion);
dev.off();
frame()
grid::grid.raster(png::readPNG(imgName));
}
#' \code{PlotRF.VIP} - plot variable importance of features ranked by their contributions to classification accuracy (MeanDecreaseAccuracy).
#' @rdname PlotRF
#' @export
# plot variable importance ranked by MeanDecreaseAccuracy
PlotRF.VIP<-function(dataSet, analSet, imgName="rf_imp_", format="png", dpi=72, width=NA){
if (is.null(analSet$rf)) stop("Please, conduct RF.Anal first.")
vip.score <- rev(sort(analSet$rf$importance[,"MeanDecreaseAccuracy"]));
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 8;
}else if(width == 0){
w <- 7;
analSet$imgSet$rf.imp<-imgName;
}else{
w <- width;
}
h <- w*7/8;
Cairo::Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
PlotImpVar(dataSet, vip.score,"MeanDecreaseAccuracy");
dev.off();
frame()
grid::grid.raster(png::readPNG(imgName));
}
#' \code{PlotRF.Outlier} - plot outlying measures, only top 5 potential outliers are labeled.
#' @rdname PlotRF
#' @export
PlotRF.Outlier<-function(dataSet, analSet, imgName="rf_outiler_", format="png", dpi=72, width=NA){
if (is.null(analSet$rf)) stop("Please, conduct RF.Anal first.")
cols <- GetColorSchema(dataSet);
uniq.cols <- unique(cols);
legend.nm <- unique(as.character(dataSet$cls));
dist.res <- randomForest::outlier(analSet$rf);
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 9;
}else if(width == 0){
w <- 7.2;
analSet$imgSet$rf.outlier<-imgName;
}else{
w <- width;
}
h <- w*7/9;
Cairo::Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
layout(matrix(c(1,2), 1, 2, byrow = TRUE), widths=c(4,1));
op<-par(mar=c(5,5,4,0));
plot(dist.res, type="h", col=cols, xlab="Samples", xaxt="n", ylab="Outlying Measures", bty="n");
# add sample names to top 5
rankres <- rank(-abs(dist.res), ties.method="random");
inx.x <- which(rankres < 6);
inx.y <- dist.res[inx.x];
nms <- names(dist.res)[inx.x];
text(inx.x, inx.y, nms, pos=ifelse(inx.y >= 0, 3, 1), xpd=T)
op<-par(mar=c(5,0,4,1));
plot.new();
plot.window(c(0,1), c(0,1));
legend("center", legend =legend.nm, pch=15, col=uniq.cols);
dev.off();
frame()
grid::grid.raster(png::readPNG(imgName));
}
# get the OOB error for the last signif
GetRFOOB<-function(analSet){
errors = analSet$rf$err.rate;
nrow = dim(errors)[1];
signif(errors[nrow, 1],3);
}
GetSigTable.RF<-function(dataSet, analSet){
GetSigTable(dataSet, analSet$rf.sigmat, "Random Forest");
}
# significance measure, double[][]
GetRFSigMat<-function(analSet){
return(CleanNumber(analSet$rf.sigmat))
}
GetRFSigRowNames<-function(analSet){
rownames(analSet$rf.sigmat);
}
GetRFSigColNames<-function(analSet){
colnames(analSet$rf.sigmat);
}
GetRFConf.Table<-function(analSet){
print(xtable::xtable(analSet$rf$confusion,
caption="Random Forest Classification Performance"), size="\\scriptsize");
}
# return double[][] confusion matrix
GetRFConfMat<-function(analSet){
signif(analSet$rf$confusion,3);
}
GetRFConfRowNames<-function(analSet){
rownames(analSet$rf$confusion);
}
GetRFConfColNames<-function(analSet){
colnames(analSet$rf$confusion);
}
# PlotConfusion <- function(clsConf){
# prior(clsConf) <- 100
# # The above rescales the confusion matrix such that columns sum to 100.
# opar <- par(mar=c(5.1, 6.1, 2, 2))
# x <- x.orig <- unclass(clsConf)
# x <- log(x + 0.5) * 2.33
# x[x < 0] <- NA
# x[x > 10] <- 10
# diag(x) <- -diag(x)
# image(1:ncol(x), 1:ncol(x),
# -(x[, nrow(x):1]), xlab='Actual', ylab='',
# col=grDevices::colorRampPalette(c(hsv(h = 0, s = 0.9, v = 0.9, alpha = 1),
# hsv(h = 0, s = 0, v = 0.9, alpha = 1),
# hsv(h = 2/6, s = 0.9, v = 0.9, alpha = 1)))(41),
# xaxt='n', yaxt='n', zlim=c(-10, 10))
# axis(1, at=1:ncol(x), labels=colnames(x), cex.axis=0.8)
# axis(2, at=ncol(x):1, labels=colnames(x), las=1, cex.axis=0.8)
# title(ylab='Predicted', line=4.5)
# abline(h = 0:ncol(x) + 0.5, col = 'gray')
# abline(v = 0:ncol(x) + 0.5, col = 'gray')
# text(1:6, rep(6:1, each=6), labels = sub('^0$', '', round(c(x.orig), 0)))
# box(lwd=2)
# par(opar) # reset par
# }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.