Nothing
#I HATE R
# General results: SVMLight lack of control over convergence time, by default much longer convergence in RBF case
# Tested: cv time, final model time, accuracy, #sv, cv scores similarity
library(e1071)
library(doMC)
registerDoMC() # parallel foreach
library(caret)
library(SparseM)
library(mlbench)
library(knitr)
source("caret.svm.models.R")
# Download libsvm datasets to data_local folder:
# australian diabetes german.numer sonar_scale
# breast-cancer fourclass heart splice
# Read binary datasets
datasets = lapply(list.files(path="data_local"), function(x){ ds=read.matrix.csr(paste("data_local",x,sep="/")); ds$name=x; ds})
get.results <- function(name, data, class.column.name, params, model, seed=777, ...){
set.seed(seed)
class.column.index <- which(colnames(data)==class.column.name)
inTraining <- createDataPartition(data[, class.column.index], p = .75, list = FALSE)
training <- data[ inTraining,]
testing <- data[-inTraining,]
fitControl <- trainControl(method = "cv",
## 10-fold CV...
number = 7,
## repeated 2 times
repeats = 1,
verboseIter=FALSE
)
model <- train(formula(paste(class.column.name,"~ .", sep=" ")), data = training,
method = model,
preProc = c("center", "scale"),
tuneLength = 8,
tuneGrid = params,
trControl = fitControl,
...)
class.column.index < which(colnames(data)==class.column.name)
pred <- predict(model$finalModel, predict(model$preProcess, subset(testing, select=-c(class.column.index))))
cf <- confusionMatrix(pred, testing[, class.column.index])
list(name=name,
trainAcc=max(model$results$Accuracy),
trainAccStd=model$results$AccuracySD[which.max(model$results$Accuracy)],
testAcc=as.numeric(cf$overall[1]),
trainTime=as.numeric(model$times$everything[1]),
finalModelTime=as.numeric(model$times$final[1]))
}
# Coarse CV grid
get.all.results <- function(ds, fit.svmlight=TRUE, fit.small.grid=TRUE){
subset <- c(1,2,4,5)
C <- 10^(-6:7)
C_poly <- 10^(-4:3)
Gamma <- 10^(-8:8)
degree <- c(2,3)
if(fit.small.grid){
C <- 10^(-5:4)
C_poly <- 10^(-4:3)
Gamma <- 10^(-6:7)
degree <- c(2,3)
}
model.names=list("gmum.r::svm.radial", "gmum.r::svm.linear", "gmum.r::svm.poly",
"kernLab::svm.radial", "kernLab::svm.linear", "kernLab::svm.poly")
model.calls=list(caret.gmumSvmRadial, caret.gmumSvmLinear, caret.gmumSvmPoly,
"svmRadial", "svmLinear", "svmPoly")
model.tuneGrids = list(expand.grid(C=C, gamma=Gamma), expand.grid(C=C),
expand.grid(C=C_poly, gamma=Gamma, degree=degree, coef0=c(0)),
expand.grid(C=C, sigma=Gamma), expand.grid(C=C),
expand.grid(C=C_poly, scale=Gamma, degree=degree))
model.names=model.names[subset]
model.calls=model.calls[subset]
model.tuneGrids = model.tuneGrids[subset]
df <- as.data.frame(as.matrix(ds$x))
df$y = ds$y
df$name
model.results <- list()
for(model.index in 1:length(model.names)){
name <- model.names[[model.index]]
if(grepl("^gmum", name)){
print("Fitting")
name.experiment <- paste("libsvm_",name,ds$name, sep="")
print(name.experiment)
model.results[[name.experiment]] =
get.results(name.experiment, df, "y", model.tuneGrids[[model.index]], model.calls[[model.index]], verbosity=0, lib='libsvm')
if(fit.svmlight){
print("Fitting")
name.experiment <- paste("svmlight_",name,ds$name, sep="")
print(name.experiment)
model.results[[name.experiment]] =
get.results(name.experiment, df, "y", model.tuneGrids[[model.index]], model.calls[[model.index]], verbosity=0, lib='svmlight')
}
}else{
print("Fitting")
print(name)
print(ds$name)
model.results[[name]] =
get.results(name, df, "y", model.tuneGrids[[model.index]], model.calls[[model.index]])
}
save(list="model.results", file=paste("comparative.tests.", ds$name, model.index, ".RData", sep=""))
}
M <- data.frame(trainAcc=sapply(model.results, function(x) x$trainAcc),
trainAccStd=sapply(model.results, function(x) x$trainAccStd),
testAcc=sapply(model.results, function(x) x$testAcc),
trainTime=sapply(model.results, function(x) x$trainTime),
finalModelTrainTime=sapply(model.results, function(x) x$finalModelTime)
)
row.names(M) <- sapply(model.results, function(x) x$name)
save(list="M", file=paste("comparative.tests.", ds$name, ".RData", sep=""))
M
}
for(ds in datasets){
M <- get.all.results(ds, fit.svmlight = TRUE, fit.small.grid = TRUE)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.