#
# SVMBridge
# (C) 2015, by Aydin Demircioglu
#
# SVMBridge is free software: you can redistribtrainSVMute it and/or modify
# it under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# SVMBridge is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# Please do not use this software to destroy or spy on people, environment or things.
# All negative use is prohibited.
#
#' Train an SVM.
#'
#' This is the main routine that trains an SVM, e.g. it will make sure that all
#' data is on the disk, create the training arguments and then call the corresponding binary.
#'
#' @param method name of the SVM method/solver
#' @param trainDataFile Filename to read training data from. This cannot be used at the same time as trainDataX/trainDataY.
#' @param trainDataX Matrix comprising the data. This cannot be used at the same time as trainDataFile.
#' @param trainDataY Vector comprising the labels for the data. This cannot be used at the same time as trainDataFile.
#' @param extraParameter Extra parameters for solver, passed to the createTrainingArguments function of the wrapper.
#' @param modelFile Path to the model file to create. If none specified, a temporary file will be created.
#' @param verbose Be verbose?
#' @param subsamplingRate Rate to subsample the data.
#' @param subsamplingMethod Method to subsample the data with.
#' @param timeOut Value to time out at, so that e.g. solvers that do stall/do an endless loop can be stopped nonetheless.
#' @param readModelFile if TRUE, will read the model back (model is saved in the created object)
#' @param ... Further parameters that will be passed to the wrapper.
#'
#' @note trainDataFile, trainDataX/trainDataY are exclusive parameters, i.e. you cannot specify both.
#' @note timeOut might not work on all platforms yet.
#'
#' @return An SVM Object.
#'
#' @export
trainSVM = function(
method = NULL,
trainDataFile = NULL,
trainDataX = NULL,
trainDataY = NULL,
extraParameter = "",
modelFile = NULL,
subsamplingRate = NULL,
subsamplingMethod = "cutoff",
verbose = FALSE,
timeOut = -1,
readModelFile = TRUE,
...) {
#expand possible tilde characters in the path and get rid of backslashes
if(is.null(trainDataFile) == FALSE && grepl("~", trainDataFile) == TRUE){
checkmate::assertString (trainDataFile)
trainDataFile = expandTilde(path = trainDataFile, verbose = verbose)
}
checkmate::assertString (method)
# get the correct object
SVMObject = getSVMObject (method)
# check for the object
if (is.null (SVMObject) == TRUE) {
stop ("Could not find wrapper for given model. Did you include the wrapper of the method?")
}
# ask object for its path
trainBinaryPath = SVMObject$trainBinaryPath
if (is.null (trainBinaryPath) == TRUE) {
stop ("Sorry, no train binary was given. Cannot train an SVM without an executable.\n")
}
trainBinary = basename(trainBinaryPath)
# general modifications
if (verbose == TRUE) {
cat(" Path of binary for training is ", trainBinaryPath, "\n")
cat(" Binary for training is ", trainBinary, "\n")
}
# TODO: sanity checks for parameter
if (is.null (method) == TRUE) {
stop("No method name is given, this should never happen.")
}
# did the user specify a modelfile?
if (is.null(modelFile) == TRUE) {
# if no, we will write the model from memory in a temp file
modelFile = tempfile()
}
# take care of data. if traindataFile or traindataX is given,
# the corresponding filename must be empty.
# finally, everything is dumped to disk.
if ( (is.null(trainDataX) == FALSE) && (is.null(trainDataFile) == FALSE)) {
stop("Given a data frame as training data and specified a training file name. Confused. Stopping.")
}
if ( (is.null(trainDataX) == TRUE) && (is.null(trainDataFile) == TRUE)) {
stop("Neither specified training data path nor given training data. Stopping.")
}
if (is.null (trainDataFile) == FALSE)
checkmate::assertString (trainDataFile)
if (is.null (trainDataX) == FALSE) {
checkmate::assertMatrix (trainDataX)
checkmate::assertMatrix (trainDataY)
}
# we got
if (is.null(trainDataX) == FALSE) {
trainDataFile = tempfile()
if (verbose == TRUE)
cat(" Writing given data as ", trainDataFile, "\n")
#e1071::write.matrix.csr(trainDataX, trainDataFile, trainDataY)
writeSparseData (filename = trainDataFile, X = trainDataX, Y = trainDataY)
}
if (verbose == TRUE)
cat(" Train Data is now in ", trainDataFile, "\n")
# at this point we need to have a train data file on disk
checkmate::assertString (trainDataFile)
checkmate::assertFile (trainDataFile)
results = list()
# subsample the file
if (is.null(subsamplingRate) == FALSE) {
checkmate::assertString (subsamplingMethod)
checkmate::assertNumeric (subsamplingRate)
if (subsamplingMethod == "cutoff") {
# simple cut off
trainDataFile = subsampleDataByCutoff (trainDataFile, subsamplingRate = as.numeric(subsamplingRate))
} else {
# unknown method
stop("Unknown subsampling method.")
}
}
# create arguments for training
args = createTrainingArguments (SVMObject,
trainDataFile = trainDataFile,
modelFile = modelFile,
extraParameter = extraParameter,
...)
# now for some 'magic'.
# some solver might create their own training script,
# e.g. when calling matlab or similar.
# in this case the argument list will have an attribute
# called dynamicBinary. If this exists, it will override
# the software path as we know it.
if (!is.null(attributes(args)$dynamicBinary)) {
trainBinaryPath = attributes(args)$dynamicBinary
}
# if timeout is specified, we HARD kill the process.
# else things like SVMperf may run to the cluster walltime
# instead of stopping at the walltime specified.
# Careful! On windows systems, this function will not work since
# certain necessities are not preinstalled.
# For now, timeout does not (easily) work on windows, thus we skip timeouts there.
if (timeOut != -1) {
if(.Platform$OS.type == "unix") {
timeOutArgs = c(sprintf("%d", timeOut), trainBinaryPath, args)
timeOutPath = "/usr/bin/timeout"
if (verbose == TRUE)
cat(" Applying a hard timeout of ", timeOut, "%f seconds\n")
trainTime = microbenchmark::microbenchmark(s <- system3(timeOutPath, timeOutArgs, verbose = verbose), times = 1L)$time / 1e9
} else {
# non supported platform gets a warning
warning ("Timeout specified, but platform does not support timeouts!")
trainTime = microbenchmark::microbenchmark(s <- system3(timeOutPath, timeOutArgs, verbose = verbose), times = 1L)$time / 1e9
}
} else {
trainTime = microbenchmark::microbenchmark(s <- system3(trainBinaryPath, args, verbose = verbose), times = 1L)$time / 1e9
}
if (verbose == TRUE)
cat("Training took ", trainTime, " seconds\n")
results[["trainTime"]] = trainTime
if (readModelFile == TRUE) {
if (verbose == TRUE)
cat( " Will read model back from ", modelFile, "\n")
model = readModel (SVMObject, modelFile = modelFile, verbose = verbose)
results[["model"]] = model
}
return (results)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.