require(LiblineaR)
require(e1071)
require(SwarmSVM)
######## Data Preparation
local.file.name = tempfile()
download.file("http://www.sfu.ca/~hetongh/data/svmguide1.RData",local.file.name)
load(local.file.name)
svmguide1.t = svmguide1[[2]]
svmguide1 = svmguide1[[1]]
local.file.name = tempfile()
download.file("http://www.sfu.ca/~hetongh/data/ijcnn1.RData",local.file.name)
load(local.file.name)
ijcnn1.t = ijcnn1[[2]]
ijcnn1 = ijcnn1[[1]]
local.file.name = tempfile()
download.file("http://www.sfu.ca/~hetongh/data/usps.RData",local.file.name)
load(local.file.name)
usps.t = usps[[2]]
usps = usps[[1]]
local.file.name = tempfile()
download.file("http://www.sfu.ca/~hetongh/data/mnist.RData",local.file.name)
load(local.file.name)
mnist38 = mnist[[1]]
mnist38.t = mnist[[2]]
mnist49 = mnist[[3]]
mnist49.t = mnist[[4]]
mnistoe = mnist[[5]]
mnistoe.t = mnist[[6]]
######## Repeat Length
rep.len = 10
########################
######## Cluster SVM
########################
clusterSVM.cv = function(x, y, nfold = 5, ...) {
n = nrow(x)
ind = sample(n)
folds = list()
for (i in 1:(nfold-1)) {
folds[[i]] = ind[1:(n %/% nfold)]
ind = setdiff(ind,folds[[i]])
}
folds[[nfold]] = ind
score = rep(0,nfold)
for (i in 1:nfold) {
train.ind = setdiff(1:n, folds[[i]])
test.ind = folds[[i]]
csvm.obj = SwarmSVM::clusterSVM(x = x[train.ind, ], y = y[train.ind],
valid.x = x[test.ind, ], valid.y = y[test.ind],
...)
score[i] = csvm.obj$valid.score
}
return(mean(score))
}
repeatClusterSVM = function(train, valid, rep.len = 10) {
train = as.matrix(train)
valid = as.matrix(valid)
best.score = -Inf
set.seed(1024)
for (lmd in c(1,5,10,20,50,100)) {
temp.score = clusterSVM.cv(x = train[,-1], y = train[,1], lambda = lmd,
centers = 8, iter.max = 1000, verbose = 0,
cluster.method = "mlKmeans")
if (temp.score>best.score) {
best.score = temp.score
best.lambda = lmd
}
}
score = rep(0, rep.len)
total.time = rep(0, rep.len)
for (i in 1:rep.len) {
csvm.obj = SwarmSVM::clusterSVM(x = train[,-1], y = train[,1], seed = i,
valid.x = valid[,-1], valid.y = valid[,1],
centers = 8, iter.max = 1000, verbose = 0,
lambda = best.lambda,
cluster.method = "mlKmeans")
score[i] = csvm.obj$valid.score
total.time[i] = csvm.obj$time$total.time
}
gc()
result = c(mean(score), sd(score), mean(total.time), sd(total.time), best.lambda)
names(result) = c('Average Error', 'Standard Deviation',
'Average Time', 'Standard Deviation', 'Best Lambda')
result = round(result, 7)
return(result)
}
repeatClusterSVM(svmguide1, svmguide1.t, rep.len)
# Average Error Standard Deviation Average Time Standard Deviation Best Lambda
# 0.8143000 0.0091225 0.1589000 0.0143562 20.0000000
repeatClusterSVM(ijcnn1, ijcnn1.t, rep.len)
# Average Error Standard Deviation Average Time Standard Deviation Best Lambda
# 0.9444030 0.0022965 3.6295000 0.1568306 1.0000000
repeatClusterSVM(usps, usps.t, rep.len)
# Average Error Standard Deviation Average Time Standard Deviation Best Lambda
# 0.9552566 0.0011697 2.2441000 0.2853292 1.0000000
repeatClusterSVM(mnist38, mnist38.t, rep.len)
# Average Error Standard Deviation Average Time Standard Deviation Best Lambda
# 0.9855847 0.0012162 10.4385000 1.9813294 5.0000000
repeatClusterSVM(mnist49, mnist49.t, rep.len)
# Average Error Standard Deviation Average Time Standard Deviation Best Lambda
# 0.980663 0.001575 12.641600 1.430985 1.000000
repeatClusterSVM(mnistoe, mnistoe.t, rep.len)
# Average Error Standard Deviation Average Time Standard Deviation Best Lambda
# 0.960820 0.000319 61.526800 9.283494 100.000000
########################
######## LibLinear
########################
rep.len = 10
repeatLiblineaR = function(train, valid, rep.len = 10) {
train = as.matrix(train)
valid = as.matrix(valid)
best.score = -Inf
set.seed(1024)
for (cst in c(0.01,0.1,1,10,100)) {
temp.score = LiblineaR::LiblineaR(data = train[,-1], target = train[,1],
type = 1, verbose = F, cost = cst, cross = 5)
if (temp.score>best.score) {
best.score = temp.score
best.cost = cst
}
}
score = rep(0, rep.len)
total.time = rep(0, rep.len)
for (i in 1:rep.len) {
set.seed(i)
time.stamp = proc.time()
liblinear.obj = LiblineaR::LiblineaR(data = train[,-1], target = train[,1],
type = 1, verbose = F, cost = best.cost)
preds = predict(liblinear.obj, valid[,-1])$prediction
score[i] = sum(preds==valid[,1])/length(valid[,1])
total.time[i] = (proc.time()-time.stamp)[3]
}
result = c(mean(score), sd(score), mean(total.time), sd(total.time), best.cost)
names(result) = c('Average Error', 'Standard Deviation',
'Average Time', 'Standard Deviation', 'Best Cost')
result = round(result, 7)
return(result)
}
repeatLiblineaR(svmguide1, svmguide1.t, rep.len)
# Average Error Standard Deviation Average Time Standard Deviation Best Cost
# 0.8007000 0.0031265 3.6246000 0.0455417 100.0000000
repeatLiblineaR(ijcnn1, ijcnn1.t, rep.len)
# Average Error Standard Deviation Average Time Standard Deviation Best Cost
# 0.9210532 0.0008342 51.0651000 0.8289324 100.0000000
repeatLiblineaR(usps, usps.t, rep.len)
# Average Error Standard Deviation Average Time Standard Deviation Best Cost
# 0.9377180 0.0000000 2.2539000 0.3196156 10.0000000
repeatLiblineaR(mnist38, mnist38.t, rep.len)
# Average Error Standard Deviation Average Time Standard Deviation Best Cost
# 0.7561492 0.0002125 0.7620000 0.0664011 1.0000000
repeatLiblineaR(mnist49, mnist49.t, rep.len)
# Average Error Standard Deviation Average Time Standard Deviation Best Cost
# 0.9432446 0.0000000 0.7340000 0.0776645 1.0000000
repeatLiblineaR(mnistoe, mnistoe.t, rep.len)
# Average Error Standard Deviation Average Time Standard Deviation Best Cost
# 0.9021700 0.0000823 27.9471000 2.8855612 10.0000000
########################
######## Kernel SVM
########################
svm.cv = function(x, y, nfold = 5, ...) {
n = nrow(x)
ind = sample(n)
folds = list()
for (i in 1:(nfold-1)) {
folds[[i]] = ind[1:(n %/% nfold)]
ind = setdiff(ind,folds[[i]])
}
folds[[nfold]] = ind
score = rep(0,nfold)
for (i in 1:nfold) {
train.ind = setdiff(1:n, folds[[i]])
test.ind = folds[[i]]
svm.obj = e1071::svm(x = x[train.ind,], y = as.factor(y[train.ind]), ...)
preds = predict(svm.obj, x[test.ind,], probability = FALSE)
score[i] = sum(preds==y[test.ind])/length(y[test.ind])
}
return(mean(score))
}
rep.len = 10
repeatSVM = function(train, valid, rep.len = 10) {
best.score = -Inf
set.seed(1024)
for (gm in c(0.01,0.1,1,10,100)) {
for (cst in c(0.01,0.1,1,10,100)) {
cat('Begin cv on',gm,'\t',cst)
tp = proc.time()
temp.score = svm.cv(x = train[,-1], y = train[,1], nfold = 5,
gamma = gm, cost = cst, kernel = "radial")
cat('\t\tTime:',(proc.time()-tp)[3],'\t\tScore:',temp.score,'\n')
if (temp.score>best.score) {
best.score = temp.score
best.gamma = gm
best.cost = cst
}
}
}
score = rep(0, rep.len)
total.time = rep(0, rep.len)
for (i in 1:rep.len) {
set.seed(i)
time.stamp = proc.time()
svm.obj = e1071::svm(x = train[,-1], y = as.factor(train[,1]),
kernel = "radial", gamma = best.gamma, cost = best.cost)
preds = predict(svm.obj, valid[,-1])
presd = as.numeric(preds)-1
score[i] = sum(preds==valid[,1])/length(valid[,1])
total.time[i] = (proc.time()-time.stamp)[3]
}
result = c(mean(score), sd(score), mean(total.time), sd(total.time), best.gamma, best.cost)
names(result) = c('Average Error', 'Standard Deviation',
'Average Time', 'Standard Deviation', 'Best Gamma', 'Best Cost')
result = round(result, 7)
return(result)
}
repeatSVM(svmguide1, svmguide1.t, rep.len)
# Average Error Standard Deviation Average Time Standard Deviation Best Gamma Best Cost
# 8.7875e-01 0.0000e+00 8.5260e-01 2.5906e-03 1.0000e+01 1.0000e+02
repeatSVM(ijcnn1, ijcnn1.t, rep.len)
# Average Error Standard Deviation Average Time Standard Deviation Best Gamma Best Cost
# 0.9903164 0.0000000 131.4755000 2.2816740 10.0000000 10.0000000
repeatSVM(usps, usps.t, rep.len)
# Average Error Standard Deviation Average Time Standard Deviation Best Gamma Best Cost
# 0.9706029 0.0000000 9.3162000 0.0553530 1.0000000 100.0000000
repeatSVM(mnist38, mnist38.t, rep.len)
# Average Error Standard Deviation Average Time Standard Deviation Best Gamma Best Cost
# 0.9949597 0.0000000 77.8332000 0.3907411 1.0000000 10.0000000
repeatSVM(mnist49, mnist49.t, rep.len)
# Average Error Standard Deviation Average Time Standard Deviation Best Gamma Best Cost
# 0.9929684 0.0000000 57.6431000 0.1407752 1.0000000 100.0000000
repeatSVM(mnistoe, mnistoe.t, rep.len)
# Not finished
# It is too long to tune the parameters
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.