idx_images_minist_digits

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(rTorch)
# these are the equivalents of import module
nn          <- torch$nn
transforms  <- torchvision$transforms
dsets       <- torchvision$datasets
builtins    <- import_builtins()

batch_size_train <-  64L

Load datasets

Load training dataset

local_folder <- '../datasets/raw_data'
# this will save the datasets to the local drive
train_dataset = dsets$MNIST(root=file.path(local_folder), 
                            train=TRUE, 
                            transform=transforms$ToTensor(),
                            download=TRUE)

train_dataset
builtins$len(train_dataset)

Introspection

Class and length of train_dataset

# R
class(train_dataset)
length(train_dataset)
# Python
builtins$type(train_dataset)
builtins$len(train_dataset)
reticulate::py_len(train_dataset)

Note that both similar commands produce different results

names(train_dataset)
reticulate::py_list_attributes(train_dataset)
# this is identical to Python len() function
train_dataset$`__len__`()
# this is not what we are looking for which is torch.Size([1, 28, 28])
d0 <- train_dataset$data[1, 1]
class(d0)
d0$size()
# this is identical to train_dataset.data.size() in Python
train_dataset$data$size()
# this is not a dimension we are looking for either
train_dataset$data[2, 1]$size()
# py = import_builtins()
enum_train_dataset <- builtins$enumerate(train_dataset)
class(enum_train_dataset)
# enum_train_dataset$`__count__`
reticulate::py_list_attributes(enum_train_dataset)
# this is not a number we were expecting
enum_train_dataset$`__sizeof__`()
train_dataset$data$nelement()  # total number of elements in the tensor
train_dataset$data$shape       # shape
train_dataset$data$size()      # size
# get index, label and image
# the pointer will move forward everytime we run the chunk
obj   <- reticulate::iter_next(enum_train_dataset)
idx   <- obj[[1]]        # index number

image <- obj[[2]][[1]]
label <- obj[[2]][[2]]

cat(idx, label, class(label), "\t")
print(image$size())

Introspection training dataset

Inspecting a single image

So this is how a single image is represented in numbers. It's actually a 28 pixel x 28 pixel image which is why you would end up with this 28x28 matrix of numbers.

Inspecting training dataset first element of tuple

This means to access the image, you need to access the first element in the tuple.

# Input Matrix
image$size()

# A 28x28 sized image of a digit
# torch.Size([1, 28, 28])

MNIST image from training dataset

class(image$numpy())
dim(image$numpy())

Plot one image

rotate <- function(x) t(apply(x, 2, rev))   #function to rotate the matrix

# read label for digit
label

# read tensor for image
img_tensor_2d <- image[1]
img_tensor_2d$shape       # shape of the 2D tensor: torch.Size([28, 28])

# convert tensor to numpy array
img_mat_2d <- img_tensor_2d$numpy()
dim(img_mat_2d)

# show digit image
image(rotate(img_mat_2d))
title(label)

Plot a second image

# iterate to the next tensor
obj <- reticulate::iter_next(enum_train_dataset)   # iterator
idx <- obj[[1]]
img <- obj[[2]][[1]]
lbl <- obj[[2]][[2]]

img_tensor_2d <- img[1]            # get 2D tensor
img_mat_2d <- img_tensor_2d$numpy()  # convert to 2D array

# show digit image
image(rotate(img_mat_2d))            # rotate and plot
title(lbl)                         # label as plot title

Loading the test dataset

test_dataset = dsets$MNIST(root = local_folder, 
                           train=FALSE, 
                           transform=transforms$ToTensor())

reticulate::py_len(test_dataset)

Introspection of the test dataset

# we'll get all the attributes of the class
reticulate::py_list_attributes(test_dataset)
# get the Python type
builtins$type(test_dataset$`__getitem__`(0L))   # in Python a tuple gets converted to a list
# in Python: type(test_dataset[0]) -> <class 'tuple'>
# the size of the first and last image tensor
test_dataset$`__getitem__`(0L)[[1]]$size()      # same as test_dataset[0][0].size()
test_dataset$`__getitem__`(9999L)[[1]]$size()  

This is the same as:

py_to_r(test_dataset)
# the size of the first and last image tensor
# py_get_item(test_dataset, 0L)[[1]]$size()
py_get_item(test_dataset, 0L)[[1]]$size()
py_get_item(test_dataset, 9999L)[[1]]$size()
# same as test_dataset[0][0].size()
# the label is the second list member
label <- test_dataset$`__getitem__`(0L)[[2]]  # in Python: test_dataset[0][1]
label

Plot image test dataset

# convert tensor to numpy array
.show_img <- test_dataset$`__getitem__`(0L)[[1]]$numpy() 
dim(.show_img)                                           # numpy 3D array

# reshape 3D array to 2D 
show_img <- np$reshape(.show_img, c(28L, 28L))
dim(show_img)
# another way to reshape the array
show_img <- np$reshape(test_dataset$`__getitem__`(0L)[[1]]$numpy(), c(28L, 28L))
dim(show_img)
# show in grays and rotate
image(rotate(show_img), col = gray.colors(64))
title(label)

Plot a second test image

# next image, index moves from (0L) to (1L), and so on
idx <- 1L
.show_img <- test_dataset$`__getitem__`(idx)[[1]]$numpy()
show_img  <- np$reshape(.show_img, c(28L, 28L))
label     <- test_dataset$`__getitem__`(idx)[[2]]

image(rotate(show_img), col = gray.colors(64))
title(label)

Plot the last test image

# next image, index moves from (0L) to (1L), and so on
# first image is 0, last image would be 9999
idx <- reticulate::py_len(test_dataset) - 1L
.show_img <- test_dataset$`__getitem__`(idx)[[1]]$numpy()
show_img  <- np$reshape(.show_img, c(28L, 28L))
label     <- test_dataset$`__getitem__`(idx)[[2]]

image(rotate(show_img), col = gray.colors(64))
title(label)

Defining epochs

When the model goes through the whole 60k images once, learning how to classify 0-9, it's consider 1 epoch.

However, there's a concept of batch size where it means the model would look at 100 images before updating the model's weights, thereby learning. When the model updates its weights (parameters) after looking at all the images, this is considered 1 iteration.

batch_size <- 100L

We arbitrarily set 3000 iterations here which means the model would update 3000 times.

n_iters <- 3000L

One epoch consists of 60,000 / 100 = 600 iterations. Because we would like to go through 3000 iterations, this implies we would have 3000 / 600 = 5 epochs as each epoch has 600 iterations.

num_epochs = n_iters / (reticulate::py_len(train_dataset) / batch_size)
num_epochs = as.integer(num_epochs)
num_epochs

Create iterable objects: training and testing dataset

train_loader = torch$utils$data$DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=TRUE)
# Iterable object
test_loader = torch$utils$data$DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=FALSE)

Check iteraibility

collections <- import("collections")

builtins$isinstance(train_loader, collections$Iterable)
builtins$isinstance(test_loader, collections$Iterable)

Building the model

# Same as linear regression! 
main <- py_run_string(
"
import torch.nn as nn

class LogisticRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LogisticRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        out = self.linear(x)
        return out
")

# build a Linear Rgression model
LogisticRegressionModel <- main$LogisticRegressionModel

Instantiate model class based on input and out dimensions

# feeding the model with 28x28 images
input_dim = 28L*28L

# classify digits 0-9 a total of 10 classes,
output_dim = 10L

model = LogisticRegressionModel(input_dim, output_dim)

Instantiate Cross Entropy Loss class

# need Cross Entropy Loss to calculate loss before we backpropagation
criterion = nn$CrossEntropyLoss()  

Instantiate Optimizer class

Similar to what we've covered above, this calculates the parameters' gradients and update them subsequently.

# calculate parameters' gradients and update
learning_rate = 0.001

optimizer = torch$optim$SGD(model$parameters(), lr=learning_rate)  

Parameters introspection

You'll realize we have 2 sets of parameters, 10x784 which is $A$ and 10x1 which is $b$ in the $y = AX + b$ equation, where $X$ is our input of size 784.

We'll go into details subsequently how these parameters interact with our input to produce our 10x1 output.

# Type of parameter object
print(model$parameters())
model_parameters <- builtins$list(model$parameters())

# Length of parameters
print(builtins$len(model_parameters))

# FC 1 Parameters 
builtins$list(model_parameters)[[1]]$size()

# FC 1 Bias Parameters
builtins$list(model_parameters)[[2]]$size()
builtins$len(builtins$list(model$parameters()))

Train the model and test per epoch

iter = 0

for (epoch in 1:num_epochs) {
  iter_train_dataset <- builtins$enumerate(train_loader) # convert to iterator
  for (obj in iterate(iter_train_dataset)) {
      # get the tensors for images and labels
      images <- obj[[2]][[1]]
      labels <- obj[[2]][[2]]

      # Reshape images to (batch_size, input_size)
      images <- images$view(-1L, 28L*28L)$requires_grad_()

      # Clear gradients w.r.t. parameters
      optimizer$zero_grad()

      # Forward pass to get output/logits
      outputs = model(images)

      # Calculate Loss: softmax --> cross entropy loss
      loss = criterion(outputs, labels)

      # Getting gradients w.r.t. parameters
      loss$backward()

      # Updating parameters
      optimizer$step()

      iter = iter + 1

      if (iter %% 500 == 0) {
          # Calculate Accuracy         
          correct = 0
          total = 0

          # Iterate through test dataset
          iter_test_dataset <- builtins$enumerate(test_loader) # convert to iterator
          for (obj2 in iterate(iter_test_dataset)) {
              # Load images to a Torch Variable
              images <- obj2[[2]][[1]]
              labels <- obj2[[2]][[2]]
              images <- images$view(-1L, 28L*28L)$requires_grad_()

              # Forward pass only to get logits/output
              outputs = model(images)

              # Get predictions from the maximum value
              .predicted = torch$max(outputs$data, 1L)
              predicted <- .predicted[1L]

              # Total number of labels
              total = total + labels$size(0L)

              # Total correct predictions
              correct = correct + sum((predicted$numpy() == labels$numpy()))
          }
          accuracy = 100 * correct / total

          # Print Loss
          cat(sprintf('Iteration: %5d. Loss: %f. Accuracy: %8.2f \n', 
                      iter, loss$item(), accuracy))
      }
  }
}  

Break down accuracy calculation

As we've trained our model, we can extract the accuracy calculation portion to understand what's happening without re-training the model.

This would print out the output of the model's predictions on your notebook.

# Iterate through test dataset
iter_test <- 0
iter_test_dataset <- builtins$enumerate(test_loader) # convert to iterator
for (test_obj in iterate(iter_test_dataset)) {
    iter_test <- iter_test + 1
    # Load images to a Torch Variable
    images <- test_obj[[2]][[1]]
    labels <- test_obj[[2]][[2]]
    images <- images$view(-1L, 28L*28L)$requires_grad_()

    # Forward pass only to get logits/output
    outputs = model(images)

    if (iter_test == 1) {
        print('OUTPUTS')
        print(outputs)
    }
    # Get predictions from the maximum value
    .predicted = torch$max(outputs$data, 1L)
    predicted <- .predicted[1L]
}
print(predicted)

Printing output size

This produces a 100x10 matrix because each iteration has a batch size of 100 and each prediction across the 10 classes, with the largest number indicating the likely number it is predicting.

# Iterate through test dataset
iter_test <- 0
iter_test_dataset <- builtins$enumerate(test_loader) # convert to iterator
for (test_obj in iterate(iter_test_dataset)) {
    iter_test <- iter_test + 1
    # Load images to a Torch Variable
    images <- test_obj[[2]][[1]]
    labels <- test_obj[[2]][[2]]
    images <- images$view(-1L, 28L*28L)$requires_grad_()

    # Forward pass only to get logits/output
    outputs = model(images)

    if (iter_test == 1) {
        print('OUTPUTS')
        print(outputs$size())
    }
    # Get predictions from the maximum value
    .predicted = torch$max(outputs$data, 1L)
    predicted <- .predicted[1L]
}
print(predicted$size())

The predicted and output tensors have the same number of 1D members. It is obvious because predicted is calculated from the output maximum values.

Printing one output

This would be a 1x10 matrix where the largest number is what the model thinks the image is. Here we can see that in the tensor, position 7 has the largest number, indicating the model thinks the image is 7.

number 0: -0.4181
number 1: -1.0784
...
number 7: 2.9352
# Iterate through test dataset
iter_test <- 0
iter_test_dataset <- builtins$enumerate(test_loader) # convert to iterator
for (test_obj in iterate(iter_test_dataset)) {
    iter_test <- iter_test + 1
    # Load images to a Torch Variable
    images <- test_obj[[2]][[1]]
    labels <- test_obj[[2]][[2]]
    images <- images$view(-1L, 28L*28L)$requires_grad_()

    # Forward pass only to get logits/output
    outputs = model(images)

    if (iter_test == 1) {
        print('OUTPUTS')
        print(outputs[1])    # show first tensor of 100
        print(outputs[100])   # show last tensor of 100
    }
    # Get predictions from the maximum value for 100 tensors
    .predicted = torch$max(outputs$data, 1L)
    predicted <- .predicted[1L]
}
print(predicted)

Printing prediction output

Because our output is of size 100 (our batch size), our prediction size would also of the size 100.

# Iterate through test dataset
iter_test <- 0
iter_test_dataset <- builtins$enumerate(test_loader) # convert to iterator
for (test_obj in iterate(iter_test_dataset)) {
    iter_test <- iter_test + 1
    # Load images to a Torch Variable
    images <- test_obj[[2]][[1]]
    labels <- test_obj[[2]][[2]]
    images <- images$view(-1L, 28L*28L)$requires_grad_()

    # Forward pass only to get logits/output
    outputs = model(images)

    # Get predictions from the maximum value for a batch
    .predicted = torch$max(outputs$data, 1L)
    predicted <- .predicted[1L]

    if (iter_test == 1) {
        print('PREDICTION')
        print(predicted$size())
    }
}

Print prediction value

We are printing our prediction which as verified above, should be digit 7.

# Iterate through test dataset
iter_test <- 0
iter_test_dataset <- builtins$enumerate(test_loader) # convert to iterator
for (test_obj in iterate(iter_test_dataset)) {
    iter_test <- iter_test + 1
    # Load images to a Torch Variable
    images <- test_obj[[2]][[1]]
    labels <- test_obj[[2]][[2]]
    images <- images$view(-1L, 28L*28L)$requires_grad_()

    # Forward pass only to get logits/output
    outputs = model(images)

    # Get predictions from the maximum value
    .predicted = torch$max(outputs$data, 1L)
    predicted <- .predicted[1L]

    if (iter_test == 1) {
        print('PREDICTION')
        print(predicted[1])
    }
}

Print prediction, label and label size

We are trying to show what we are predicting and the actual values. In this case, we're predicting the right value 7!

# Iterate through test dataset
iter_test <- 0
iter_test_dataset <- builtins$enumerate(test_loader) # convert to iterator
for (test_obj in iterate(iter_test_dataset)) {
    iter_test <- iter_test + 1
    # Load images to a Torch Variable
    images <- test_obj[[2]][[1]]
    labels <- test_obj[[2]][[2]]
    images <- images$view(-1L, 28L*28L)$requires_grad_()
    # Forward pass only to get logits/output
    outputs = model(images)
    # Get predictions from the maximum value
    .predicted = torch$max(outputs$data, 1L)
    predicted <- .predicted[1L]

    if (iter_test == 1) {
        print('PREDICTION')
        print(predicted[1])

        print('LABEL SIZE')
        print(labels$size())

        print('LABEL FOR IMAGE 0')
        print(labels[1]$item())  # extract the scalar part only
    }
}

Print second prediction and ground truth

It should be the digit 2.

# Iterate through test dataset
iter_test <- 0
iter_test_dataset <- builtins$enumerate(test_loader) # convert to iterator
for (test_obj in iterate(iter_test_dataset)) {
    iter_test <- iter_test + 1
    # Load images to a Torch Variable
    images <- test_obj[[2]][[1]]
    labels <- test_obj[[2]][[2]]
    images <- images$view(-1L, 28L*28L)$requires_grad_()
    # Forward pass only to get logits/output
    outputs = model(images)
    # Get predictions from the maximum value
    .predicted = torch$max(outputs$data, 1L)
    predicted <- .predicted[1L]

    if (iter_test == 1) {
        print('PREDICTION')
        print(predicted[1])

        print('LABEL SIZE')
        print(labels$size())

        print('LABEL FOR IMAGE 0')
        print(labels[1]$item())
    }
}

Print accuracy

Now we know what each object represents, we can understand how we arrived at our accuracy numbers.

One last thing to note is that correct.item() has this syntax is because correct is a PyTorch tensor and to get the value to compute with total which is an integer, we need to do this.

# Iterate through test dataset
iter_test <- 0
iter_test_dataset <- builtins$enumerate(test_loader) # convert to iterator
for (test_obj in iterate(iter_test_dataset)) {
    iter_test <- iter_test + 1
    # Load images to a Torch Variable
    images <- test_obj[[2]][[1]]
    labels <- test_obj[[2]][[2]]
    images <- images$view(-1L, 28L*28L)$requires_grad_()
    # Forward pass only to get logits/output
    outputs = model(images)
    # Get predictions from the maximum value
    .predicted = torch$max(outputs$data, 1L)
    predicted <- .predicted[1L]
    # Total number of labels
    total = total + labels$size(0L)
    # Total correct predictions
    correct = correct + sum((predicted$numpy() == labels$numpy()))
}
accuracy = 100 * correct / total
print(accuracy)

Saving PyTorch model

This is how you save your model. Feel free to just change save_model = TRUE to save your model

save_model = TRUE
if (save_model) {
    # Saves only parameters
    torch$save(model$state_dict(), 'awesome_model.pkl')
}


Try the rTorch package in your browser

Any scripts or data that you put into this service are public.

rTorch documentation built on Jan. 13, 2021, 4:32 p.m.