In AlfonsoRReyes/rDeepThought:

Dogs vs. Cats classification with mxnet and R

Packages and prerequisites

In this tutorial, we mainly use the following three packages:

mxnet: model training
imager: image processing
abind: manipulations with arrays.

It is an end-to-end R solution for the dogs vs cats Kaggle competition (https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/) and it can be used as an example for fine-tuning. All the code has been test on Ubuntu 16.04.

knitr::opts_chunk$set(eval = FALSE)

library(imager)
library(mxnet)
library(abind)

Image processing

Renaming train files

files <- list.files("./train/")
old_names <- sapply(files, strsplit, split = ".", fixed = TRUE)
max_length <- max(sapply(old_names, function(x) nchar(x[[2]])))
zeros <- max_length - sapply(old_names, function(x) nchar(x[[2]]))
zeros <- sapply(zeros, function(x) paste(rep(0, x), collapse = ""))
new_names <- Map(function(x, y) {paste0("./train/", x[1], "/", y, x[2], ".jpg")},
                 x = old_names, y = zeros)

# Full names
files <- paste0("./train/", files)

dir.create("./train/cat")
dir.create("./train/dog")

# New names will be in 00001.jpg format
Map(function(x, y) file.rename(from = x, to = y), files, new_names)

Training images: 224x224, padded with empty space

files <- list.files("./train/", recursive = TRUE)
new_names <- paste0("./train_pad_224x224/", files)
files <- paste0("./train/", files)
dir.create("./train_pad_224x224/")
dir.create("./train_pad_224x224/cat")
dir.create("./train_pad_224x224/dog")

padImage <- function(x) {
  long_side <- max(dim(x)[1:2])
  short_side <- min(dim(x)[1:2])
  pad_img <- pad(x,
  nPix = long_side - short_side,
  axes = ifelse(dim(x)[1] < dim(x)[2], "x", "y"))
  return(pad_img)
}

Map(function(x, y) {
  pad_img <- padImage(load.image(x))
  res_img <- resize(pad_img,  size_x = 224, size_y = 224)
  imager::save.image(res_img, y)
  }, x = files, y = new_names)

Renaming test files

files <- list.files("./test/")
max_length <- max(sapply(files, nchar))
zeros <- max_length - sapply(files, nchar)
zeros <- sapply(zeros, function(x) paste(rep(0, x), collapse = ""))
newnames <- paste0("./test/", zeros, files)

files <- paste0("./test/", files)

Map(function(x, y) file.rename(from = x, to = y), files, newnames)

Test images: 224x224, padded with empty space

files <- list.files("./test/")
new_names <- paste0("./test_pad_224x224/", files)
files <- paste0("./test/", files)
dir.create("./test_pad_224x224/")

Map(function(x, y) {
  pad_img <- padImage(load.image(x))
  res_img <- resize(pad_img,  size_x = 224, size_y = 224)
  imager::save.image(res_img, y)
}, x = files, y = new_names)

Creating .rec files

cat_files <- list.files("train_pad_224x224/cat/", recursive=TRUE)
cat_files <- paste0("cat/", cat_files)

dog_files <- list.files("train_pad_224x224/dog/", recursive=TRUE)
dog_files <- paste0("dog/", dog_files)

train_ind <- sample(length(cat_files), length(cat_files) * 0.8)
train_data <- c(1:(length(train_ind) * 2))
train_data <- cbind(train_data, c(rep(0, length(train_ind)), rep(1, length(train_ind))))
train_data <- cbind(train_data, c(cat_files[train_ind], dog_files[train_ind]))
train_data <- train_data[sample(nrow(train_data)),]
write.table(train_data, "cats_dogs_train.lst", quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)
im2rec("cats_dogs_train.lst", "train_pad_224x224/", "cats_dogs_train.rec")

val_ind <- c(1:length(cat_files))[!c(1:length(cat_files)) %in% train_ind]
val_data <- c(1:(length(val_ind) * 2))
val_data <- cbind(val_data, c(rep(0, length(val_ind)), rep(1, length(val_ind))))
val_data <- cbind(val_data, c(cat_files[val_ind], dog_files[val_ind]))
val_data <- val_data[sample(nrow(val_data)),]
write.table(val_data, "cats_dogs_val.lst", quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)
im2rec("cats_dogs_val.lst", "train_pad_224x224/", "cats_dogs_val.rec")

The data iterator

get_iterator <- function(data_shape, train_data, val_data, batch_size = 128) {
    train <- mx.io.ImageRecordIter(path.imgrec = train_data,
                                   batch.size  = batch_size,
                                   data.shape  = data_shape,
                                   rand.crop   = TRUE,
                                   rand.mirror = TRUE)

    val <- mx.io.ImageRecordIter(path.imgrec = val_data,
                                 batch.size  = batch_size,
                                 data.shape  = data_shape,
                                 rand.crop   = FALSE,
                                 rand.mirror = FALSE)

  return(list(train = train, val = val))
}

data  <- get_iterator(data_shape = c(224, 224, 3),
                      train_data = "cats_dogs_train.rec",
                      val_data   = "cats_dogs_val.rec",
                      batch_size = 8)
train <- data$train
val   <- data$val

Load pretrained model

Here we use the pretrained model from http://data.dmlc.ml/models/imagenet/. There are 1000 classes in imagenet, and we need to replace the last fully connected layer with a new layer for 2 classes.

download.file('http://data.dmlc.ml/data/Inception.zip', destfile = 'Inception.zip')
unzip("Inception.zip")
inception_bn <- mx.model.load("./Inception-BN", iteration = 126)

symbol <- inception_bn$symbol
# check symbol$arguments for layer names
internals <- symbol$get.internals()
outputs <- internals$outputs

flatten <- internals$get.output(which(outputs == "flatten_output"))

new_fc <- mx.symbol.FullyConnected(data = flatten, 
                                   num_hidden = 2, 
                                   name = "fc1") 
# set name to original name in symbol$arguments
new_soft <- mx.symbol.SoftmaxOutput(data = new_fc, 
                                    name = "softmax")
# set name to original name in symbol$arguments

arg_params_new <- mx.model.init.params(symbol = new_soft,
                                       input.shape = list("data" = c(224, 224, 3, 8)),
                                       output.shape = NULL,
                                       initializer = mx.init.uniform(0.1),
                                       ctx = mx.cpu())$arg.params
fc1_weights_new <- arg_params_new[["fc1_weight"]]
fc1_bias_new <- arg_params_new[["fc1_bias"]]

arg_params_new <- inception_bn$arg.params

arg_params_new[["fc1_weight"]] <- fc1_weights_new 
arg_params_new[["fc1_bias"]] <- fc1_bias_new

Fine-tuning

model <- mx.model.FeedForward.create(
  symbol             = new_soft,
  X                  = train,
  eval.data          = val,
  ctx                = mx.gpu(0),
  eval.metric        = mx.metric.accuracy,
  num.round          = 2,
  learning.rate      = 0.05,
  momentum           = 0.9,
  wd                 = 0.00001,
  kvstore            = "local",
  array.batch.size   = 128,
  epoch.end.callback = mx.callback.save.checkpoint("inception_bn"),
  batch.end.callback = mx.callback.log.train.metric(150),
  initializer        = mx.init.Xavier(factor_type = "in", magnitude = 2.34),
  optimizer          = "sgd",
  arg.params         = arg_params_new,
  aux.params         = inception_bn$aux.params
)

Making predictions

preprocImage<- function(src, # URL or file location
                        height = 224,        
                        width = 224,  
                        num_channels = 3, # 3 for RGB, 1 for grayscale
                        mult_by = 1,      # set to 255 for normalized image
                        crop = FALSE) {   # no crop by default
  im <- load.image(src)

  if (crop) {
    shape <- dim(im)
    short_edge <- min(shape[1:2])
    xx <- floor((shape[1] - short_edge) / 2)
    yy <- floor((shape[2] - short_edge) / 2)
    im <- crop.borders(im, xx, yy)
  }

  resized <- resize(im,  size_x = width, size_y = height)
  arr <- as.array(resized) * mult_by
  dim(arr) <- c(width, height, num_channels, 1)
  return(arr)
}

files <- list.files("./test_pad_224x224/")
files <- paste0("./test_pad_224x224/", files)

files <- split(files, rep(1:1250, each = 10))
probs <- lapply(files, function(x) {
  images <- lapply(x, preprocImage, mult_by = 255)
  images <- do.call(abind, images)
  probs <- predict(model, X = images, ctx = mx.gpu(0))
})
saveRDS(probs, "probs.rds")
probs <- t(do.call(cbind, probs))

preds <- data.frame(id = 1:12500, label = probs[, 2])
write.csv(preds, "subm.csv", row.names = FALSE, quote = FALSE)