CUDA linear classifier for R

devtools::load_all(".", quiet=TRUE)

Playing with MNIST

MNIST is a collection of handwritten digits, in matrix form, along with their corresponding labels. It is, or actually was, the standard dataset for benchmarking classification algorithms. The package contains a subset of MNIST creatively called mini-MNIST. Let us fit and asses gpuClassifierR performances using mini-MNIST.

Loading data

Let us first load and organize the dataset

data(mini_mnist)
train_feats <- mini_mnist$train$images
train_targets <- mini_mnist$train$labels
test_feats <- mini_mnist$test$images
test_targets <- mini_mnist$test$labels
M <- NCOL(train_feats)  ## Number of features
K <- NCOL(train_targets)  ## Number of targets.
w_init <- mat.or.vec(M, K)

Benchmark function

We start by defining our benchmark functions. To stay consistent we fix the training iterations to 1000 and disregard convergence.

benchmark_fun <- function(n_sample, feats, targets, decay=0.0, maxiter=1000) {
    w_init <- mat.or.vec(NCOL(feats), NCOL(targets))
    models <- lapply(list(R=w_init, C=w_init, CUDA=w_init), Classifier)
    time <- mapply(function(X, Y) system.time(train(X, feats[1:n_sample,, drop=FALSE],
                                                    targets[1:n_sample,, drop=FALSE],
                                                    decay, 0.01, maxiter,
                                                    FALSE, -1, Y))[['elapsed']]
                 , X=models, Y=names(models),
                   SIMPLIFY=FALSE)

}

Running the benchmark

We combine the training and test sets to get 2000 points. We then time each training and repeatedly increment training examples count by 100 up to 2000. We fix the number of iterations at 1000 and disregard gradient convergence.

feats <- rbind(train_feats, test_feats)
targets <- rbind(train_targets, test_targets)

## Plotting
sample_points <- c(1, seq(100, NROW(feats), 100))
times <- t(sapply(X=sample_points, function(X) benchmark_fun(X, feats, targets)))
times <- cbind(stack(as.data.frame(times)), rep(sample_points, 3))
colnames(times) <- c("time", "backend", "size")
suppressMessages(library(ggplot2, quietly = TRUE))
time_plot <- ggplot(times, aes(x=size, y=time, group=backend, colour=backend)) + geom_line() + geom_point()
time_plot <- time_plot + xlab("sample size") + ylab("Elapsed time in seconds") + ggtitle("gpuClassifieR implementations benchmark")
plot(time_plot)

Benchmark against glmnet

Let's compare the performance of our C implementation against glmnet. We'll by timing how much times it takes for glmnet to get a grid of optimal decay coefficients. We will then time our code on the provided grid. We step the maximum number of iterations to 100 for both glmnet and our implementation.

suppressMessages(library(glmnet, quietly = TRUE))
glmnet.time <- system.time({lambda_grid <- glmnet(train_feats,
                                                  max.col(train_targets),
                                                  family = "multinomial",
                                                  type.multinomial= "grouped",
                                                  lambda=NULL,
                                                  alpha=0)$lambda})[['elapsed']]


gCR.time <- list(C=NULL, CUDA=NULL)
for(backend in c('C', 'CUDA')) {
    model <- Classifier(w_init)
    gCR.time[backend] <- system.time({sapply(lambda_grid, function(X) train(model,
                                                                            train_feats,
                                                                            train_targets,
                                                                            decay=X,
                                                                            step_size = 0.1,
                                                                            max_iter=100,
                                                                        backend=backend))})[['elapsed']]
}
times <- c(gCR.time, list(FORTRAN=glmnet.time))
times <- stack(as.data.frame(times))
times$packages <- c(rep("gpuClassifieR", 2), "glmnet")
names(times) <- c("Time", "Implementation", "Package")
compare_plot <- ggplot(data=times, aes(x=Package, y=Time, fill=Implementation, colour=Implementation)) +
    geom_bar(stat="identity", position=position_dodge()) +
        scale_y_continuous("Elapsed Time", breaks=seq(0, max(times$Time), 10)) +
            scale_x_discrete("Package")
compare_plot <- compare_plot + ggtitle("Comparative performances: glmnet vs gpuClassifieR")
compare_plot