Dogs vs. Cats classification with mxnet and R

1. Packages and prerequisites

Ubuntu 16, mxnet 0.9.4 (compiled with GPU support), imager for image processind, abind for manipulations with arrays. It is almost end-to-end R solution for Kaggle competition https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/, we will use Python only for creating .rec-files.

Thanks to jeremiedb, my code for fine-tuning is largely based on his answers.

knitr::opts_chunk$set(eval = FALSE)

library(imager)
library(mxnet)
library(abind)

2. Image processing

2.1. Renaming train files

files <- list.files("train")
old_names <- sapply(files, strsplit, split = ".", fixed = TRUE)
max_length <- max(sapply(old_names, function(x) nchar(x[[2]])))
zeros <- max_length - sapply(old_names, function(x) nchar(x[[2]]))
zeros <- sapply(zeros, function(x) paste(rep(0, x), collapse = ""))
new_names <- Map(function(x, y) {paste0("./train/", 
                                        x[1], 
                                        "/",
                                        y,
                                        x[2],
                                        ".jpg")},
                 x = old_names, y = zeros
                 )

# Full names
files <- paste0("./train/", files)

dir.create("./train/cat")
dir.create("./train/dog")

# New names will be in 00001.jpg format
Map(function(x, y) file.rename(from = x, to = y), files, new_names)

2.2. Train images: 224x224, padded with empty space

files <- list.files("train", recursive = TRUE)
new_names <- paste0("train_pad_224x224/", files)
files <- paste0("./train/", files)
dir.create("./train_pad_224x224/")
dir.create("./train_pad_224x224/cat")
dir.create("./train_pad_224x224/dog")

padImage <- function(x) {
    long_side <- max(dim(x)[1:2])
    short_side <- min(dim(x)[1:2])
    pad_img <- pad(x, 
                   nPix = long_side - short_side,
                   axes = ifelse(dim(x)[1] < dim(x)[2], "x", "y"))
    return(pad_img)
}

Map(function(x, y) {
    pad_img <- padImage(load.image(x))
    res_img <- resize(pad_img,  size_x = 224, size_y = 224)
    imager::save.image(res_img, y)
}, x = files, y = new_names)

2.3. Renaming test files

files <- list.files("test")
max_length <- max(sapply(files, nchar))
zeros <- max_length - sapply(files, nchar)
zeros <- sapply(zeros, function(x) paste(rep(0, x), collapse = ""))
newnames <- paste0("./test/", zeros, files)

files <- paste0("./test/", files)

Map(function(x, y) file.rename(from = x, to = y), files, newnames)

2.4. Test images: 224x224, padded with empty space

files <- list.files("test")
new_names <- paste0("test_pad_224x224/", files)
files <- paste0("./test/", files)
dir.create("./test_pad_224x224/")

Map(function(x, y) {
    pad_img <- padImage(load.image(x))
    res_img <- resize(pad_img,  size_x = 224, size_y = 224)
    imager::save.image(res_img, y)
}, x = files, y = new_names)

2.5. Creating .rec files

python ~/mxnet/tools/im2rec.py --list=1 --recursive=1 --train-ratio=0.8 cats_dogs train_pad_224x224
python ~/mxnet/tools/im2rec.py --num-thread=4 --pass-through=1 cats_dogs_train.lst train_pad_224x224
python ~/mxnet/tools/im2rec.py --num-thread=4 --pass-through=1 cats_dogs_val.lst train_pad_224x224

3. Iterators

get_iterator <- function(data_shape, 
                         train_data, 
                         val_data, 
                         batch_size = 128) {
    train <- mx.io.ImageRecordIter(
        path.imgrec = train_data,
        batch.size  = batch_size,
        data.shape  = data_shape,
        rand.crop   = TRUE,
        rand.mirror = TRUE)
  
    val <- mx.io.ImageRecordIter(
        path.imgrec = val_data,
        batch.size  = batch_size,
        data.shape  = data_shape,
        rand.crop   = FALSE,
        rand.mirror = FALSE
        )
 
  return(list(train = train, val = val))
}

data  <- get_iterator(data_shape = c(224, 224, 3),
         train_data = "/media/andrey/Data/KAGGLE/cats_dogs/cats_dogs_train.rec",
         val_data   = "/media/andrey/Data/KAGGLE/cats_dogs/cats_dogs_val.rec",
         batch_size = 8)
train <- data$train
val   <- data$val

4. Load pretrained model

Model from http://data.dmlc.ml/models/imagenet/ Last fully connected layes for 1000 classes replaced with new layer for 2 classes.

inception_bn <- mx.model.load("models/inception_bn/Inception-BN", 
                              iteration = 126)

symbol <- inception_bn$symbol
# check symbol$arguments for layer names
internals <- symbol$get.internals()
outputs <- internals$outputs

flatten <- internals$get.output(which(outputs == "flatten_output"))

new_fc <- mx.symbol.FullyConnected(data = flatten, 
                                   num_hidden = 2, 
                                   name = "fc1") 
                        # set name to original name in symbol$arguments
new_soft <- mx.symbol.SoftmaxOutput(data = new_fc, 
                                    name = "softmax")
                        # set name to original name in symbol$arguments

arg_params_new <- mxnet:::mx.model.init.params(
    symbol = new_soft, 
    input.shape = c(224, 224, 3, 8), 
    initializer = mxnet:::mx.init.uniform(0.1), 
    ctx = mx.gpu(0)
    )$arg.params
fc1_weights_new <- arg_params_new[["fc1_weight"]]
fc1_bias_new <- arg_params_new[["fc1_bias"]]

arg_params_new <- inception_bn$arg.params

arg_params_new[["fc1_weight"]] <- fc1_weights_new 
arg_params_new[["fc1_bias"]] <- fc1_bias_new

5. Train (fine-tune) model

model <- mx.model.FeedForward.create(
  symbol             = new_soft,
  X                  = train,
  eval.data          = val,
  ctx                = mx.gpu(0),
  eval.metric        = mx.metric.accuracy,
  num.round          = 1,
  learning.rate      = 0.05,
  momentum           = 0.9,
  wd                 = 0.00001,
  kvstore            = "local",
  array.batch.size   = 128,
  epoch.end.callback = mx.callback.save.checkpoint("inception_bn"),
  batch.end.callback = mx.callback.log.train.metric(150),
  initializer        = mx.init.Xavier(factor_type = "in", magnitude = 2.34),
  optimizer          = "sgd",
  arg.params         = arg_params_new,
  aux.params         = inception_bn$aux.params
)

model <- mx.model.load("inception_bn", 1)

Continue training with decreased speed (learning.rate = 0.03):

model <- mx.model.FeedForward.create(
  symbol             = model$symbol,
  X                  = train,
  eval.data          = val,
  ctx                = mx.gpu(0),
  eval.metric        = mx.metric.accuracy,
  num.round          = 5,
  learning.rate      = 0.03,
  momentum           = 0.9,
  wd                 = 0.00001,
  kvstore            = "local",
  array.batch.size   = 100,
  epoch.end.callback = mx.callback.save.checkpoint("inception_bn"),
  batch.end.callback = mx.callback.log.train.metric(150),
  initializer        = mx.init.Xavier(factor_type = "in", magnitude = 2.34),
  optimizer          = "sgd",
  arg.params         = model$arg.params, 
  aux.params         = model$aux.params
)

model <- mx.model.load("inception_bn", 1)

My R session crashed after each iteration, so I made some iterations manually.

6. Make predictions

preprocImage<- function(src,              # URL or file location
                        height = 224,        
                        width = 224,  
                        num_channels = 3, # 3 for RGB, 1 for grayscale
                        mult_by = 1,      # set to 255 for normalized image
                        crop = FALSE) {   # no crop by default
    
    im <- load.image(src)
    
    if (crop) {
        shape <- dim(im)
        short_edge <- min(shape[1:2])
        xx <- floor((shape[1] - short_edge) / 2)
        yy <- floor((shape[2] - short_edge) / 2) 
        im <- crop.borders(im, xx, yy)
    }
    
    resized <- resize(im,  size_x = width, size_y = height)
    arr <- as.array(resized) * mult_by
    dim(arr) <- c(width, height, num_channels, 1)
    return(arr)
}

files <- list.files("test_pad_224x224/")
files <- paste0("./test_pad_224x224/", files)

# ind <- seq(1, 12500, 1250) 
# probs <- numeric()
# for (i in ind) {
#    images <- lapply(files[i:i+1249], preprocImage, mult_by = 255)
#    images <- do.call(abind, images)
#    probs[i:i+1249] <- predict(model, X = images, ctx = mx.gpu(0))
# }

files <- split(files, rep(1:1250, each = 10))
probs <- lapply(files, function(x) {
    images <- lapply(x, preprocImage, mult_by = 255)
    images <- do.call(abind, images)
    probs <- predict(model, X = images, ctx = mx.gpu(0))
})
saveRDS(probs, "probs.rds")
probs <- t(do.call(cbind, probs))

preds <- data.frame(id = 1:12500, label = probs[, 2])
write.csv(preds, "subm.csv", row.names = FALSE, quote = FALSE)