In this tutorial, we mainly use the following three packages:
mxnet
: model trainingimager
: image processingabind
: manipulations with arrays.It is an end-to-end R solution for the dogs vs cats Kaggle competition (https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/) and it can be used as an example for fine-tuning. All the code has been test on Ubuntu 16.04.
knitr::opts_chunk$set(eval = FALSE)
library(imager) library(mxnet) library(abind)
files <- list.files("./train/") old_names <- sapply(files, strsplit, split = ".", fixed = TRUE) max_length <- max(sapply(old_names, function(x) nchar(x[[2]]))) zeros <- max_length - sapply(old_names, function(x) nchar(x[[2]])) zeros <- sapply(zeros, function(x) paste(rep(0, x), collapse = "")) new_names <- Map(function(x, y) {paste0("./train/", x[1], "/", y, x[2], ".jpg")}, x = old_names, y = zeros) # Full names files <- paste0("./train/", files) dir.create("./train/cat") dir.create("./train/dog") # New names will be in 00001.jpg format Map(function(x, y) file.rename(from = x, to = y), files, new_names)
files <- list.files("./train/", recursive = TRUE) new_names <- paste0("./train_pad_224x224/", files) files <- paste0("./train/", files) dir.create("./train_pad_224x224/") dir.create("./train_pad_224x224/cat") dir.create("./train_pad_224x224/dog") padImage <- function(x) { long_side <- max(dim(x)[1:2]) short_side <- min(dim(x)[1:2]) pad_img <- pad(x, nPix = long_side - short_side, axes = ifelse(dim(x)[1] < dim(x)[2], "x", "y")) return(pad_img) } Map(function(x, y) { pad_img <- padImage(load.image(x)) res_img <- resize(pad_img, size_x = 224, size_y = 224) imager::save.image(res_img, y) }, x = files, y = new_names)
files <- list.files("./test/") max_length <- max(sapply(files, nchar)) zeros <- max_length - sapply(files, nchar) zeros <- sapply(zeros, function(x) paste(rep(0, x), collapse = "")) newnames <- paste0("./test/", zeros, files) files <- paste0("./test/", files) Map(function(x, y) file.rename(from = x, to = y), files, newnames)
files <- list.files("./test/") new_names <- paste0("./test_pad_224x224/", files) files <- paste0("./test/", files) dir.create("./test_pad_224x224/") Map(function(x, y) { pad_img <- padImage(load.image(x)) res_img <- resize(pad_img, size_x = 224, size_y = 224) imager::save.image(res_img, y) }, x = files, y = new_names)
cat_files <- list.files("train_pad_224x224/cat/", recursive=TRUE) cat_files <- paste0("cat/", cat_files) dog_files <- list.files("train_pad_224x224/dog/", recursive=TRUE) dog_files <- paste0("dog/", dog_files) train_ind <- sample(length(cat_files), length(cat_files) * 0.8) train_data <- c(1:(length(train_ind) * 2)) train_data <- cbind(train_data, c(rep(0, length(train_ind)), rep(1, length(train_ind)))) train_data <- cbind(train_data, c(cat_files[train_ind], dog_files[train_ind])) train_data <- train_data[sample(nrow(train_data)),] write.table(train_data, "cats_dogs_train.lst", quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE) im2rec("cats_dogs_train.lst", "train_pad_224x224/", "cats_dogs_train.rec") val_ind <- c(1:length(cat_files))[!c(1:length(cat_files)) %in% train_ind] val_data <- c(1:(length(val_ind) * 2)) val_data <- cbind(val_data, c(rep(0, length(val_ind)), rep(1, length(val_ind)))) val_data <- cbind(val_data, c(cat_files[val_ind], dog_files[val_ind])) val_data <- val_data[sample(nrow(val_data)),] write.table(val_data, "cats_dogs_val.lst", quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE) im2rec("cats_dogs_val.lst", "train_pad_224x224/", "cats_dogs_val.rec")
get_iterator <- function(data_shape, train_data, val_data, batch_size = 128) { train <- mx.io.ImageRecordIter(path.imgrec = train_data, batch.size = batch_size, data.shape = data_shape, rand.crop = TRUE, rand.mirror = TRUE) val <- mx.io.ImageRecordIter(path.imgrec = val_data, batch.size = batch_size, data.shape = data_shape, rand.crop = FALSE, rand.mirror = FALSE) return(list(train = train, val = val)) }
data <- get_iterator(data_shape = c(224, 224, 3), train_data = "cats_dogs_train.rec", val_data = "cats_dogs_val.rec", batch_size = 8) train <- data$train val <- data$val
Here we use the pretrained model from http://data.dmlc.ml/models/imagenet/. There are 1000 classes in imagenet, and we need to replace the last fully connected layer with a new layer for 2 classes.
download.file('http://data.dmlc.ml/data/Inception.zip', destfile = 'Inception.zip') unzip("Inception.zip") inception_bn <- mx.model.load("./Inception-BN", iteration = 126) symbol <- inception_bn$symbol # check symbol$arguments for layer names internals <- symbol$get.internals() outputs <- internals$outputs flatten <- internals$get.output(which(outputs == "flatten_output")) new_fc <- mx.symbol.FullyConnected(data = flatten, num_hidden = 2, name = "fc1") # set name to original name in symbol$arguments new_soft <- mx.symbol.SoftmaxOutput(data = new_fc, name = "softmax") # set name to original name in symbol$arguments arg_params_new <- mx.model.init.params(symbol = new_soft, input.shape = list("data" = c(224, 224, 3, 8)), output.shape = NULL, initializer = mx.init.uniform(0.1), ctx = mx.cpu())$arg.params fc1_weights_new <- arg_params_new[["fc1_weight"]] fc1_bias_new <- arg_params_new[["fc1_bias"]] arg_params_new <- inception_bn$arg.params arg_params_new[["fc1_weight"]] <- fc1_weights_new arg_params_new[["fc1_bias"]] <- fc1_bias_new
model <- mx.model.FeedForward.create( symbol = new_soft, X = train, eval.data = val, ctx = mx.gpu(0), eval.metric = mx.metric.accuracy, num.round = 2, learning.rate = 0.05, momentum = 0.9, wd = 0.00001, kvstore = "local", array.batch.size = 128, epoch.end.callback = mx.callback.save.checkpoint("inception_bn"), batch.end.callback = mx.callback.log.train.metric(150), initializer = mx.init.Xavier(factor_type = "in", magnitude = 2.34), optimizer = "sgd", arg.params = arg_params_new, aux.params = inception_bn$aux.params )
preprocImage<- function(src, # URL or file location height = 224, width = 224, num_channels = 3, # 3 for RGB, 1 for grayscale mult_by = 1, # set to 255 for normalized image crop = FALSE) { # no crop by default im <- load.image(src) if (crop) { shape <- dim(im) short_edge <- min(shape[1:2]) xx <- floor((shape[1] - short_edge) / 2) yy <- floor((shape[2] - short_edge) / 2) im <- crop.borders(im, xx, yy) } resized <- resize(im, size_x = width, size_y = height) arr <- as.array(resized) * mult_by dim(arr) <- c(width, height, num_channels, 1) return(arr) }
files <- list.files("./test_pad_224x224/") files <- paste0("./test_pad_224x224/", files) files <- split(files, rep(1:1250, each = 10)) probs <- lapply(files, function(x) { images <- lapply(x, preprocImage, mult_by = 255) images <- do.call(abind, images) probs <- predict(model, X = images, ctx = mx.gpu(0)) }) saveRDS(probs, "probs.rds") probs <- t(do.call(cbind, probs)) preds <- data.frame(id = 1:12500, label = probs[, 2]) write.csv(preds, "subm.csv", row.names = FALSE, quote = FALSE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.