inst/doc/image_classification_using_MNIST_CIFAR_data.R

## ---- eval = F, echo = T, warning = F, message = F, cache = T-----------------
#  
#  # using system('wget..') on a linux OS
#  
#  system("wget https://raw.githubusercontent.com/mlampros/DataSets/master/mnist.zip")
#  
#  mnist <- read.table(unz("mnist.zip", "mnist.csv"), nrows = 70000, header = T,
#  
#                      quote = "\"", sep = ",")
#  

## ---- eval = F, cache = T-----------------------------------------------------
#  X = mnist[, -ncol(mnist)]
#  dim(X)
#  
#  ## [1] 70000   784
#  
#  # the KernelKnn function requires that the labels are numeric and start from 1 : Inf
#  
#  y = mnist[, ncol(mnist)] + 1
#  table(y)
#  
#  ## y
#  ##    1    2    3    4    5    6    7    8    9   10
#  ## 6903 7877 6990 7141 6824 6313 6876 7293 6825 6958
#  

## ---- eval = T, echo = F------------------------------------------------------

 knitr::kable(data.frame(irlba_singlular_vectors = 40, k = 8, method = 'braycurtis', kernel = 'biweight_tricube_MULT'), align = 'l')

## ---- eval = F, cache = T-----------------------------------------------------
#  
#  library(irlba)
#  
#  svd_irlb = irlba(as.matrix(X), nv = 40, nu = 40, verbose = F)            # irlba truncated svd
#  
#  new_x = as.matrix(X) %*% svd_irlb$v               # new_x using the 40 right singular vectors
#  

## ---- eval = F, cache = T, warning = FALSE, message = FALSE, results = 'hide'----
#  
#  library(KernelKnn)
#  
#  fit = KernelKnnCV(as.matrix(new_x), y, k = 8, folds = 4, method = 'braycurtis',
#  
#                    weights_function = 'biweight_tricube_MULT', regression = F,
#  
#                    threads = 6, Levels = sort(unique(y)))
#  
#  
#  # str(fit)
#  
#  
#  # evaluation metric
#  
#  acc = function (y_true, preds) {
#  
#    out = table(y_true, max.col(preds, ties.method = "random"))
#  
#    acc = sum(diag(out))/sum(out)
#  
#    acc
#  }
#  

## ---- eval = F, cache = F-----------------------------------------------------
#  
#  acc_fit = unlist(lapply(1:length(fit$preds),
#  
#                          function(x) acc(y[fit$folds[[x]]],
#  
#                                          fit$preds[[x]])))
#  acc_fit
#  
#  ## [1] 0.9742857 0.9749143 0.9761143 0.9741143
#  
#  cat('mean accuracy using cross-validation :', mean(acc_fit), '\n')
#  
#  ## mean accuracy using cross-validation : 0.9748571
#  

## ---- eval = F, cache = T-----------------------------------------------------
#  
#  library(OpenImageR)
#  
#  hog = HOG_apply(X, cells = 6, orientations = 9, rows = 28, columns = 28, threads = 6)
#  
#  ##
#  ## time to complete : 1.802997 secs
#  
#  dim(hog)
#  
#  ## [1] 70000   324
#  

## ---- eval = F, cache = T, warning = FALSE, message = FALSE, results = 'hide'----
#  
#  fit_hog = KernelKnnCV(hog, y, k = 20, folds = 4, method = 'braycurtis',
#  
#                    weights_function = 'biweight_tricube_MULT', regression = F,
#  
#                    threads = 6, Levels = sort(unique(y)))
#  
#  
#  #str(fit_hog)
#  

## ---- eval = F, cache = F-----------------------------------------------------
#  
#  acc_fit_hog = unlist(lapply(1:length(fit_hog$preds),
#  
#                              function(x) acc(y[fit_hog$folds[[x]]],
#  
#                                              fit_hog$preds[[x]])))
#  acc_fit_hog
#  
#  ## [1] 0.9833714 0.9840571 0.9846857 0.9838857
#  
#  cat('mean accuracy for hog-features using cross-validation :', mean(acc_fit_hog), '\n')
#  
#  ## mean accuracy for hog-features using cross-validation : 0.984
#  

## ---- eval = F, echo = T, warning = F, message = F, cache = T-----------------
#  
#  # using system('wget..') on a linux OS
#  
#  system("wget https://raw.githubusercontent.com/mlampros/DataSets/master/cifar_10.zip")
#  
#  cifar_10 <- read.table(unz("cifar_10.zip", "cifar_10.csv"), nrows = 60000, header = T,
#  
#                         quote = "\"", sep = ",")
#  

## ---- eval = F, cache = T-----------------------------------------------------
#  X = cifar_10[, -ncol(cifar_10)]
#  dim(X)
#  
#  ## [1] 60000  1024
#  
#  # the KernelKnn function requires that the labels are numeric and start from 1 : Inf
#  
#  y = cifar_10[, ncol(cifar_10)]
#  table(y)
#  
#  ## y
#  ##    1    2    3    4    5    6    7    8    9   10
#  ## 6000 6000 6000 6000 6000 6000 6000 6000 6000 6000
#  

## ---- eval = T, echo = F------------------------------------------------------

 knitr::kable(data.frame(irlba_singlular_vectors = 40, k = 8, method = 'braycurtis', 
                         
                         kernel = 'biweight_tricube_MULT'), align = 'l')

## ---- eval = F, cache = T-----------------------------------------------------
#  
#  svd_irlb = irlba(as.matrix(X), nv = 40, nu = 40, verbose = F)            # irlba truncated svd
#  
#  new_x = as.matrix(X) %*% svd_irlb$v               # new_x using the 40 right singular vectors
#  

## ---- eval = F, cache = T, warning = FALSE, message = FALSE, results = 'hide'----
#  
#  fit = KernelKnnCV(as.matrix(new_x), y, k = 8, folds = 4, method = 'braycurtis',
#  
#                    weights_function = 'biweight_tricube_MULT', regression = F,
#  
#                    threads = 6, Levels = sort(unique(y)))
#  
#  
#  # str(fit)
#  

## ---- eval = F, cache = F-----------------------------------------------------
#  
#  acc_fit = unlist(lapply(1:length(fit$preds),
#  
#                          function(x) acc(y[fit$folds[[x]]],
#  
#                                          fit$preds[[x]])))
#  acc_fit
#  
#  ## [1] 0.4080667 0.4097333 0.4040000 0.4102667
#  
#  cat('mean accuracy using cross-validation :', mean(acc_fit), '\n')
#  
#  ## mean accuracy using cross-validation : 0.4080167
#  

## ---- eval = F, cache = T-----------------------------------------------------
#  
#  hog = HOG_apply(X, cells = 6, orientations = 9, rows = 32,
#  
#                  columns = 32, threads = 6)
#  
#  ##
#  ## time to complete : 3.394621 secs
#  
#  dim(hog)
#  
#  ## [1] 60000   324
#  

## ---- eval = F, cache = T, warning = FALSE, message = FALSE, results = 'hide'----
#  
#  fit_hog = KernelKnnCV(hog, y, k = 20, folds = 4, method = 'braycurtis',
#  
#                    weights_function = 'biweight_tricube_MULT', regression = F,
#  
#                    threads = 6, Levels = sort(unique(y)))
#  
#  
#  # str(fit_hog)
#  

## ---- eval = F, cache = F-----------------------------------------------------
#  
#  acc_fit_hog = unlist(lapply(1:length(fit_hog$preds),
#  
#                              function(x) acc(y[fit_hog$folds[[x]]],
#  
#                                              fit_hog$preds[[x]])))
#  acc_fit_hog
#  
#  ## [1] 0.5807333 0.5884000 0.5777333 0.5861333
#  
#  cat('mean accuracy for hog-features using cross-validation :', mean(acc_fit_hog), '\n')
#  
#  ## mean accuracy for hog-features using cross-validation : 0.58325
#  

## ---- eval = F, echo = F------------------------------------------------------
#  
#  # remove cache and mnist.zip once vignettes are built
#  
#  # unlink("image_classification_using_MNIST_CIFAR_data_cache", recursive = TRUE)                    # USE this chunk in case of 'eval = TRUE'
#  # unlink("mnist.zip", recursive = TRUE)
#  # unlink("cifar_10.zip", recursive = TRUE)

Try the KernelKnn package in your browser

Any scripts or data that you put into this service are public.

KernelKnn documentation built on Jan. 7, 2023, 1:18 a.m.