# Setting up the spam checker SVM model
#' @import caret
#' @import kernlab
#' @importFrom utils read.csv
#' @importFrom stats na.fail
.onLoad <- function(libname, pkgname){
spambase <- read.csv(system.file("extdata", "spambase.csv", package = pkgname),header=FALSE)
headers <<- read.csv(system.file("extdata", "names.csv", package = pkgname),header=FALSE)
colnames(spambase) <- sapply((1:nrow(headers)),function(i) toString(headers[i,1]))
# Cleanup steps
spambase$y <- as.factor(spambase$y)
levels(spambase$y) <- c('ham', 'spam')
spambaseSample <- spambase[sample(nrow(spambase), 1000),]
trainIndex <- createDataPartition(spambaseSample$y, p = .8, list = FALSE, times = 1)
dataTrain <- spambaseSample[ trainIndex,]
### finding optimal value of a tuning parameter
sigDist <- kernlab::sigest(y ~ ., data = dataTrain, frac = 1)
### creating a grid of two tuning parameters, .sigma comes from the earlier line. we are trying to find best value of .C
svmTuneGrid <- data.frame(.sigma = sigDist[1], .C = 2^(-2:7))
svmModel <<- caret::train(y ~ .,
data = dataTrain,
method = "svmRadial",
preProc = c("center", "scale"),
tuneGrid = svmTuneGrid,
trControl = caret::trainControl(method = "repeatedcv", repeats = 5,
classProbs = TRUE))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.