knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(rTorch)
# these are the equivalents of import module nn <- torch$nn transforms <- torchvision$transforms dsets <- torchvision$datasets builtins <- import_builtins() batch_size_train <- 64L
local_folder <- '../datasets/raw_data' # this will save the datasets to the local drive train_dataset = dsets$MNIST(root=file.path(local_folder), train=TRUE, transform=transforms$ToTensor(), download=TRUE) train_dataset builtins$len(train_dataset)
train_dataset
# R class(train_dataset) length(train_dataset)
# Python builtins$type(train_dataset) builtins$len(train_dataset) reticulate::py_len(train_dataset)
Note that both similar commands produce different results
names(train_dataset)
reticulate::py_list_attributes(train_dataset)
# this is identical to Python len() function train_dataset$`__len__`()
# this is not what we are looking for which is torch.Size([1, 28, 28]) d0 <- train_dataset$data[1, 1] class(d0) d0$size()
# this is identical to train_dataset.data.size() in Python train_dataset$data$size()
# this is not a dimension we are looking for either train_dataset$data[2, 1]$size()
# py = import_builtins() enum_train_dataset <- builtins$enumerate(train_dataset) class(enum_train_dataset) # enum_train_dataset$`__count__` reticulate::py_list_attributes(enum_train_dataset)
# this is not a number we were expecting enum_train_dataset$`__sizeof__`()
train_dataset$data$nelement() # total number of elements in the tensor train_dataset$data$shape # shape train_dataset$data$size() # size
# get index, label and image # the pointer will move forward everytime we run the chunk obj <- reticulate::iter_next(enum_train_dataset) idx <- obj[[1]] # index number image <- obj[[2]][[1]] label <- obj[[2]][[2]] cat(idx, label, class(label), "\t") print(image$size())
So this is how a single image is represented in numbers. It's actually a 28 pixel x 28 pixel image which is why you would end up with this 28x28 matrix of numbers.
This means to access the image, you need to access the first element in the tuple.
# Input Matrix image$size() # A 28x28 sized image of a digit # torch.Size([1, 28, 28])
class(image$numpy()) dim(image$numpy())
rotate <- function(x) t(apply(x, 2, rev)) #function to rotate the matrix # read label for digit label # read tensor for image img_tensor_2d <- image[1] img_tensor_2d$shape # shape of the 2D tensor: torch.Size([28, 28]) # convert tensor to numpy array img_mat_2d <- img_tensor_2d$numpy() dim(img_mat_2d) # show digit image image(rotate(img_mat_2d)) title(label)
# iterate to the next tensor obj <- reticulate::iter_next(enum_train_dataset) # iterator idx <- obj[[1]] img <- obj[[2]][[1]] lbl <- obj[[2]][[2]] img_tensor_2d <- img[1] # get 2D tensor img_mat_2d <- img_tensor_2d$numpy() # convert to 2D array # show digit image image(rotate(img_mat_2d)) # rotate and plot title(lbl) # label as plot title
test_dataset = dsets$MNIST(root = local_folder, train=FALSE, transform=transforms$ToTensor()) reticulate::py_len(test_dataset)
# we'll get all the attributes of the class reticulate::py_list_attributes(test_dataset)
# get the Python type builtins$type(test_dataset$`__getitem__`(0L)) # in Python a tuple gets converted to a list # in Python: type(test_dataset[0]) -> <class 'tuple'>
# the size of the first and last image tensor test_dataset$`__getitem__`(0L)[[1]]$size() # same as test_dataset[0][0].size() test_dataset$`__getitem__`(9999L)[[1]]$size()
This is the same as:
py_to_r(test_dataset)
# the size of the first and last image tensor # py_get_item(test_dataset, 0L)[[1]]$size() py_get_item(test_dataset, 0L)[[1]]$size() py_get_item(test_dataset, 9999L)[[1]]$size() # same as test_dataset[0][0].size()
# the label is the second list member label <- test_dataset$`__getitem__`(0L)[[2]] # in Python: test_dataset[0][1] label
# convert tensor to numpy array .show_img <- test_dataset$`__getitem__`(0L)[[1]]$numpy() dim(.show_img) # numpy 3D array # reshape 3D array to 2D show_img <- np$reshape(.show_img, c(28L, 28L)) dim(show_img)
# another way to reshape the array show_img <- np$reshape(test_dataset$`__getitem__`(0L)[[1]]$numpy(), c(28L, 28L)) dim(show_img)
# show in grays and rotate image(rotate(show_img), col = gray.colors(64)) title(label)
# next image, index moves from (0L) to (1L), and so on idx <- 1L .show_img <- test_dataset$`__getitem__`(idx)[[1]]$numpy() show_img <- np$reshape(.show_img, c(28L, 28L)) label <- test_dataset$`__getitem__`(idx)[[2]] image(rotate(show_img), col = gray.colors(64)) title(label)
# next image, index moves from (0L) to (1L), and so on # first image is 0, last image would be 9999 idx <- reticulate::py_len(test_dataset) - 1L .show_img <- test_dataset$`__getitem__`(idx)[[1]]$numpy() show_img <- np$reshape(.show_img, c(28L, 28L)) label <- test_dataset$`__getitem__`(idx)[[2]] image(rotate(show_img), col = gray.colors(64)) title(label)
When the model goes through the whole 60k images once, learning how to classify 0-9, it's consider 1 epoch.
However, there's a concept of batch size where it means the model would look at 100 images before updating the model's weights, thereby learning. When the model updates its weights (parameters) after looking at all the images, this is considered 1 iteration.
batch_size <- 100L
We arbitrarily set 3000 iterations here which means the model would update 3000 times.
n_iters <- 3000L
One epoch consists of 60,000 / 100 = 600 iterations. Because we would like to go through 3000 iterations, this implies we would have 3000 / 600 = 5 epochs as each epoch has 600 iterations.
num_epochs = n_iters / (reticulate::py_len(train_dataset) / batch_size) num_epochs = as.integer(num_epochs) num_epochs
train_loader = torch$utils$data$DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=TRUE)
# Iterable object test_loader = torch$utils$data$DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=FALSE)
collections <- import("collections") builtins$isinstance(train_loader, collections$Iterable) builtins$isinstance(test_loader, collections$Iterable)
# Same as linear regression! main <- py_run_string( " import torch.nn as nn class LogisticRegressionModel(nn.Module): def __init__(self, input_dim, output_dim): super(LogisticRegressionModel, self).__init__() self.linear = nn.Linear(input_dim, output_dim) def forward(self, x): out = self.linear(x) return out ") # build a Linear Rgression model LogisticRegressionModel <- main$LogisticRegressionModel
# feeding the model with 28x28 images input_dim = 28L*28L # classify digits 0-9 a total of 10 classes, output_dim = 10L model = LogisticRegressionModel(input_dim, output_dim)
# need Cross Entropy Loss to calculate loss before we backpropagation criterion = nn$CrossEntropyLoss()
Similar to what we've covered above, this calculates the parameters' gradients and update them subsequently.
# calculate parameters' gradients and update learning_rate = 0.001 optimizer = torch$optim$SGD(model$parameters(), lr=learning_rate)
You'll realize we have 2 sets of parameters, 10x784 which is $A$ and 10x1 which is $b$ in the $y = AX + b$ equation, where $X$ is our input of size 784.
We'll go into details subsequently how these parameters interact with our input to produce our 10x1 output.
# Type of parameter object print(model$parameters()) model_parameters <- builtins$list(model$parameters()) # Length of parameters print(builtins$len(model_parameters)) # FC 1 Parameters builtins$list(model_parameters)[[1]]$size() # FC 1 Bias Parameters builtins$list(model_parameters)[[2]]$size()
builtins$len(builtins$list(model$parameters()))
iter = 0 for (epoch in 1:num_epochs) { iter_train_dataset <- builtins$enumerate(train_loader) # convert to iterator for (obj in iterate(iter_train_dataset)) { # get the tensors for images and labels images <- obj[[2]][[1]] labels <- obj[[2]][[2]] # Reshape images to (batch_size, input_size) images <- images$view(-1L, 28L*28L)$requires_grad_() # Clear gradients w.r.t. parameters optimizer$zero_grad() # Forward pass to get output/logits outputs = model(images) # Calculate Loss: softmax --> cross entropy loss loss = criterion(outputs, labels) # Getting gradients w.r.t. parameters loss$backward() # Updating parameters optimizer$step() iter = iter + 1 if (iter %% 500 == 0) { # Calculate Accuracy correct = 0 total = 0 # Iterate through test dataset iter_test_dataset <- builtins$enumerate(test_loader) # convert to iterator for (obj2 in iterate(iter_test_dataset)) { # Load images to a Torch Variable images <- obj2[[2]][[1]] labels <- obj2[[2]][[2]] images <- images$view(-1L, 28L*28L)$requires_grad_() # Forward pass only to get logits/output outputs = model(images) # Get predictions from the maximum value .predicted = torch$max(outputs$data, 1L) predicted <- .predicted[1L] # Total number of labels total = total + labels$size(0L) # Total correct predictions correct = correct + sum((predicted$numpy() == labels$numpy())) } accuracy = 100 * correct / total # Print Loss cat(sprintf('Iteration: %5d. Loss: %f. Accuracy: %8.2f \n', iter, loss$item(), accuracy)) } } }
As we've trained our model, we can extract the accuracy calculation portion to understand what's happening without re-training the model.
This would print out the output of the model's predictions on your notebook.
# Iterate through test dataset iter_test <- 0 iter_test_dataset <- builtins$enumerate(test_loader) # convert to iterator for (test_obj in iterate(iter_test_dataset)) { iter_test <- iter_test + 1 # Load images to a Torch Variable images <- test_obj[[2]][[1]] labels <- test_obj[[2]][[2]] images <- images$view(-1L, 28L*28L)$requires_grad_() # Forward pass only to get logits/output outputs = model(images) if (iter_test == 1) { print('OUTPUTS') print(outputs) } # Get predictions from the maximum value .predicted = torch$max(outputs$data, 1L) predicted <- .predicted[1L] } print(predicted)
This produces a 100x10 matrix because each iteration has a batch size of 100 and each prediction across the 10 classes, with the largest number indicating the likely number it is predicting.
# Iterate through test dataset iter_test <- 0 iter_test_dataset <- builtins$enumerate(test_loader) # convert to iterator for (test_obj in iterate(iter_test_dataset)) { iter_test <- iter_test + 1 # Load images to a Torch Variable images <- test_obj[[2]][[1]] labels <- test_obj[[2]][[2]] images <- images$view(-1L, 28L*28L)$requires_grad_() # Forward pass only to get logits/output outputs = model(images) if (iter_test == 1) { print('OUTPUTS') print(outputs$size()) } # Get predictions from the maximum value .predicted = torch$max(outputs$data, 1L) predicted <- .predicted[1L] } print(predicted$size())
The
predicted
andoutput
tensors have the same number of 1D members. It is obvious becausepredicted
is calculated from theoutput
maximum values.
This would be a 1x10 matrix where the largest number is what the model thinks the image is. Here we can see that in the tensor, position 7 has the largest number, indicating the model thinks the image is 7.
number 0: -0.4181 number 1: -1.0784 ... number 7: 2.9352
# Iterate through test dataset iter_test <- 0 iter_test_dataset <- builtins$enumerate(test_loader) # convert to iterator for (test_obj in iterate(iter_test_dataset)) { iter_test <- iter_test + 1 # Load images to a Torch Variable images <- test_obj[[2]][[1]] labels <- test_obj[[2]][[2]] images <- images$view(-1L, 28L*28L)$requires_grad_() # Forward pass only to get logits/output outputs = model(images) if (iter_test == 1) { print('OUTPUTS') print(outputs[1]) # show first tensor of 100 print(outputs[100]) # show last tensor of 100 } # Get predictions from the maximum value for 100 tensors .predicted = torch$max(outputs$data, 1L) predicted <- .predicted[1L] } print(predicted)
Because our output is of size 100 (our batch size), our prediction size would also of the size 100.
# Iterate through test dataset iter_test <- 0 iter_test_dataset <- builtins$enumerate(test_loader) # convert to iterator for (test_obj in iterate(iter_test_dataset)) { iter_test <- iter_test + 1 # Load images to a Torch Variable images <- test_obj[[2]][[1]] labels <- test_obj[[2]][[2]] images <- images$view(-1L, 28L*28L)$requires_grad_() # Forward pass only to get logits/output outputs = model(images) # Get predictions from the maximum value for a batch .predicted = torch$max(outputs$data, 1L) predicted <- .predicted[1L] if (iter_test == 1) { print('PREDICTION') print(predicted$size()) } }
We are printing our prediction which as verified above, should be digit 7.
# Iterate through test dataset iter_test <- 0 iter_test_dataset <- builtins$enumerate(test_loader) # convert to iterator for (test_obj in iterate(iter_test_dataset)) { iter_test <- iter_test + 1 # Load images to a Torch Variable images <- test_obj[[2]][[1]] labels <- test_obj[[2]][[2]] images <- images$view(-1L, 28L*28L)$requires_grad_() # Forward pass only to get logits/output outputs = model(images) # Get predictions from the maximum value .predicted = torch$max(outputs$data, 1L) predicted <- .predicted[1L] if (iter_test == 1) { print('PREDICTION') print(predicted[1]) } }
We are trying to show what we are predicting and the actual values. In this case, we're predicting the right value 7!
# Iterate through test dataset iter_test <- 0 iter_test_dataset <- builtins$enumerate(test_loader) # convert to iterator for (test_obj in iterate(iter_test_dataset)) { iter_test <- iter_test + 1 # Load images to a Torch Variable images <- test_obj[[2]][[1]] labels <- test_obj[[2]][[2]] images <- images$view(-1L, 28L*28L)$requires_grad_() # Forward pass only to get logits/output outputs = model(images) # Get predictions from the maximum value .predicted = torch$max(outputs$data, 1L) predicted <- .predicted[1L] if (iter_test == 1) { print('PREDICTION') print(predicted[1]) print('LABEL SIZE') print(labels$size()) print('LABEL FOR IMAGE 0') print(labels[1]$item()) # extract the scalar part only } }
It should be the digit 2.
# Iterate through test dataset iter_test <- 0 iter_test_dataset <- builtins$enumerate(test_loader) # convert to iterator for (test_obj in iterate(iter_test_dataset)) { iter_test <- iter_test + 1 # Load images to a Torch Variable images <- test_obj[[2]][[1]] labels <- test_obj[[2]][[2]] images <- images$view(-1L, 28L*28L)$requires_grad_() # Forward pass only to get logits/output outputs = model(images) # Get predictions from the maximum value .predicted = torch$max(outputs$data, 1L) predicted <- .predicted[1L] if (iter_test == 1) { print('PREDICTION') print(predicted[1]) print('LABEL SIZE') print(labels$size()) print('LABEL FOR IMAGE 0') print(labels[1]$item()) } }
Now we know what each object represents, we can understand how we arrived at our accuracy numbers.
One last thing to note is that correct.item()
has this syntax is because correct is a PyTorch tensor and to get the value to compute with total which is an integer, we need to do this.
# Iterate through test dataset iter_test <- 0 iter_test_dataset <- builtins$enumerate(test_loader) # convert to iterator for (test_obj in iterate(iter_test_dataset)) { iter_test <- iter_test + 1 # Load images to a Torch Variable images <- test_obj[[2]][[1]] labels <- test_obj[[2]][[2]] images <- images$view(-1L, 28L*28L)$requires_grad_() # Forward pass only to get logits/output outputs = model(images) # Get predictions from the maximum value .predicted = torch$max(outputs$data, 1L) predicted <- .predicted[1L] # Total number of labels total = total + labels$size(0L) # Total correct predictions correct = correct + sum((predicted$numpy() == labels$numpy())) } accuracy = 100 * correct / total print(accuracy)
This is how you save your model.
Feel free to just change save_model = TRUE
to save your model
save_model = TRUE if (save_model) { # Saves only parameters torch$save(model$state_dict(), 'awesome_model.pkl') }
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.