#' Classification using the xgboost algorithm.
#'
#' @title xgboost classification
#'
#' @param object An instance of class \code{"\linkS4class{MSnSet}"}.
#' @param assessRes An instance of class
#' \code{"\linkS4class{GenRegRes}"}, as generated by
#' \code{\link{xgboostOptimisation}}.
#' @param scores One of \code{"prediction"}, \code{"all"} or
#' \code{"none"} to report the score for the predicted class
#' only, for all classes or none.
#' @param max_depth If \code{assessRes} is missing, a \code{max_depth} must be
#' provided.
#' @param gamma If \code{assessRes} is missing, a \code{gamma} must
#' be provided.
#' @param fcol The feature meta-data containing marker definitions.
#' Default is \code{markers}.
#' @param ... Additional parameters passed to \code{\link{xgb.train}} from
#' package \code{xgboost}.
#'
#' @return An instance of class \code{"\linkS4class{MSnSet}"} with
#' \code{xgboost} and \code{xgboost.scores} feature variables storing the
#' classification results and scores respectively.
#'
#' @export
#'
#' @examples
#' library(pRolocExtra)
#' data(tan2009r1)
#' # reducing iterations
#' params <- xgboostOptimisation(tan2009r1, times = 3)
#' params
#' plot(params)
#' f1Count(params)
#' levelPlot(params)
#' getParams(params)
#' res <- xgboostClassification(tan2009r1, params)
#' getPredictions(res, fcol = "xgboost")
#' getPredictions(res, fcol = "xgboost", t = 0.75)
#' plot2D(res, fcol = "xgboost")
xgboostClassification <- function (object, assessRes,
scores = c("prediction", "all", "none"),
max_depth, gamma, nrounds = 1000, fcol = "markers", ...)
{
scores <- match.arg(scores)
if (missing(assessRes)) {
if (missing(max_depth) | missing(gamma))
stop("First run 'xgboostOptimisation' or set 'max_depth' and 'gamma' manually.")
params <- c(max_depth = max_depth, gamma = gamma)
}
else {
params <- pRoloc::getParams(assessRes)
if (is.na(params["max_depth"]))
stop("No 'max_depth' found.")
if (is.na(params["gamma"]))
stop("No 'gamma' found.")
}
# set train and test set
trainInd <- which(fData(object)[, fcol] != "unknown")
testInd <- which(fData(object)[, fcol] == "unknown")
trainSet <- pRoloc:::subsetAsDataFrame(object, fcol, train = TRUE)
testSet <- pRoloc:::subsetAsDataFrame(object, fcol, train = FALSE)
.trainSep <- pRoloc:::separateDataSet(trainSet, fcol)
.testSep <- pRoloc:::separateDataSet(testSet, fcol)
#create xgb matrix in order to use xgb algorithm
train_xg <- xgboost::xgb.DMatrix(data = as.matrix(.trainSep$theData),
label = as.integer(.trainSep$theLabels) - 1)
test_xg <- xgboost::xgb.DMatrix(data = as.matrix(.testSep$theData),
label = as.integer(.testSep$theLabels) - 1)
params_xg <- list(
booster = "gbtree",
eta = 0.001,
max_depth = params["max_depth"],
gamma = params["gamma"],
subsample = 0.75,
colsample_bytree = 1,
lambda = 1, alpha = 0,
objective = "multi:softprob",
eval_metric = "mlogloss",
num_class = length(levels(.trainSep$theLabels))
)
model <- xgboost::xgb.train(params = params_xg,
data = train_xg,
nrounds = nrounds,
early_stopping_rounds = 10,
watchlist = list(train = train_xg),
verbose = 0,
...)
#get predictions from xgboost
ans <- as.data.frame(predict(model, test_xg, reshape = TRUE))
colnames(ans) <- levels(.trainSep$theLabels)
predictedLabels <- factor(apply(ans, 1, function(x) colnames(ans)[which.max(x)]), levels = levels(.trainSep$theLabels))
temp <- rep("", length(trainInd) + length(testInd))
i <- 1:length(trainInd)
temp[trainInd[i]] <- as.character(.trainSep$theLabels[i])
i <- 1:length(testInd)
temp[testInd[i]] <- as.character(predictedLabels[i])
fData(object)$xgboost <- temp
#get back scores from xgboost
if (scores == "all") {
nbLabels <- length(levels(.trainSep$theLabels))
tempScores <- matrix(rep(0, nbLabels * (length(trainInd) +
length(testInd))), ncol = nbLabels)
colnames(tempScores) <- levels(.trainSep$theLabels)
for(i in 1:length(trainInd)){
tempScores[trainInd[i], .trainSep$theLabels[i]] <- 1
}
i <- 1:length(testInd)
ans <- as.matrix(ans)
tempScores[testInd[i], ] <- ans[i, ]
scoreMat <- tempScores
colnames(scoreMat) <- paste0(colnames(scoreMat), ".xgboost.scores")
fData(object)$xgboost.all.scores <- scoreMat
}
else if (scores == "prediction") {
nbLabels <- length(levels(.trainSep$theLabels))
tempScores <- rep(0, length(trainInd) + length(testInd))
i <- 1:length(trainInd)
tempScores[trainInd[i]] <- 1
i <- 1:length(testInd)
tempScores[testInd[i]] <- apply(ans, 1, max)
fData(object)$xgboost.scores <- tempScores
}
object@processingData@processing <- c(processingData(object)@processing,
paste0("Performed xgboost prediction (",
paste(paste(names(params),
params, sep = "="), collapse = " "), ") ", date()))
if (validObject(object))
return(object)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.