nnCore: A Light Weight Deep Learning Core for R

#' nnCore
#'
#' A Light Weight Deep Learning Core in R.
#' This library was built to have a simple yet comprehensive neural
#' network library that is well commented. This library was built to
#' be light weight in comparison to most bloated neuralnet libraries.
#' @name nnCore
#' @param nnCore$new(Species ~ ., data = iris, hidden = 6)
#' @keywords neuralnetwork, nnCore
#' @export
#' @examples
#' nn <- nnCore$new(Species ~ ., data = iris, hidden = 6)
#' nn$train(9999, trace = 1e3, learn_rate = .0001)
#' nn$computeNN(iris[,-5])
#'
#' Creating a new Network
#' nn <- nnCore$new(Species ~ ., data = iris, hidden = 6)
#'For the formula Species~. Means that column named “Species” Correlates to . which means the reaming columns in the data set. If you do not want to include all columns in the data set you can name the specific columns separated with “+”s. Species ~ Sepal.Length+Sepal.Width.

#' ## Training A Network
#'nn$train(9999, trace = 1e3, learn_rate = .0001)
#'9999 is the number of training iterations. Default the number of iterations is set to 1e4 or the default tolerance threshold which is 0.01 if the threshold is achived before the set number of iterations is passed.
#'
#' ## Compute New Observations
#'nn$computeNN(iris[,-5])
#'iris[,-5] is simply the iris training set with the class column removed, which will overlap with the dataset, but any new observations can be tested with $computeNN. The function will return the predicted classifications.

#' ## Linking and Predicting
#'The $ syntax of the R6 library allows for commands to be strung together in one line. It is important to note that order does matter.

#'nn$train(9999, trace = 1e3, learn_rate = .0001)$predict(data.matrix(cbind(1, iris[,-5])))
#'Hidden Layer Modifications
#'You can either enter a specific number of hidden nodes or you can put one of three options in as a character, to create a generic number of nodes. This is valuable if you are generating neural networks within a program and do not have time to run a hyperparameters test. These three options have been defined as generic ways of determining an accurate number of hidden nodes. Choose 1,2,3 to decide method for choosing # of hidden nodes.
#'
#'“1”     # of hidden nodes = # of inputnodes
#'“2”     # of hidden nodes = # of (# of input nodes + output nodes)/ (2/3)
#'“3”     # of hidden nodes =  sqrt(# of input nodes * # of attributes)
#' ## Examples
#'This will use option one, which will simply be the number of input nodes

#'nn <- nnCore$new(Species ~ ., data = iris, hidden = "1")

if (!require("R6")) install.packages("R6")
if (!require("roxygen2")) install.packages("roxygen2")
library(R6)
nnCoreV3 <- R6Class("NeuralNetwork",
                    public = list(
                      X = NULL,  Y = NULL,
                      W1 = NULL, W2 = NULL, b1 = NULL, b2 = NULL,
                      output = NULL, accuracyTime = numeric(), lossTime = numeric(),
                      plotData2 = F, superScore = numeric(), superCounter = 1,

                      # hiddenSelect allows one to pick a generic method of determining the number of hidden nodes
                      # these functions are helpful if one has automated the building of neuralnets.
                      hiddenSelect = function(hidd){
                        if(hidd == "1"){
                          hiddenMode <- round(length(colnames(data)))
                        } else if(hidd == "2"){
                          hiddenMode <- round((round(length(colnames(data))) + 1)/(2/3))
                        } else if(hidd == "3"){
                          hiddenMode <- round(sqrt(length(colnames(data))*  length(data[,1]) ))
                        }
                        return(hiddenMode)
                      },
                      # initialize sets up the initial neuralnetwork structure, and generates
                      # weights etc.
                      initialize = function(formula, hidden, data = list(), plotData = F) {
                        self$plotData2 <- plotData
                        # Model and training data
                        mod <- model.frame(formula, data = data) #This allows us to use the ~ sintax
                        self$X <- model.matrix(attr(mod, 'terms'), data = mod)
                        self$Y <- model.response(mod)

                        # Dimensions
                        D <- ncol(self$X) # input dimensions (+ bias)
                        K <- length(unique(self$Y)) # number of classes
                        if(typeof(hidden) == "character"){
                          H <- self$hiddenSelect(hidden)  #This allows us to use predetermined hidden node options.
                        }else{
                          H <- hidden # number of hidden nodes (- bias)
                        }
                        # Initial weights and bias for the two layers
                        self$W1 <- .01 * matrix(rnorm(D * H), D, H)
                        self$W2 <- .01 * matrix(rnorm((H + 1) * K), H + 1, K)

                        # Create starting bias weights
                        self$b1 <- matrix(0,nrow = 1, ncol = H)
                        self$b2 <- matrix(0, nrow = 1, ncol = K)
                      },

                      # Fit runs one step of bpropogation but returns the result, instead of storing it,
                      # used for predicting new test data.
                      fit = function(data = self$X) {
                        #h <- self$sigmoid(data %*% (self$W1+self$b1))
                        h <- self$sigmoid(sweep(data %*% self$W1 ,2, self$b1, '+'))
                        score <- cbind(1, h) %*% self$W2
                        self$superScore <- self$softmax(score)
                        self$superCounter = self$superCounter + 1
                        return(self$softmax(score))

                      },
                      # runs func fit but saves the results for training.
                      # typically this is orginized backwards, but in order to not have to copy
                      # feedforward into fit it is setup this way
                      feedforward = function(data = self$X) {
                        self$output <- self$fit(data)
                        invisible(self)
                      },
                      # Standard backpropgate function
                      # saves the training results.
                      backpropagate = function(lr = 1e-2) {

                        h <- self$sigmoid(sweep(self$X %*% self$W1, 2, self$b1, '+'))
                        Yid <- match(self$Y, sort(unique(self$Y)))

                        haty_y <- self$output - (col(self$output) == Yid) # E[y] - y
                        dW2 <- t(cbind(1, h)) %*% haty_y

                        dh <- haty_y %*% t(self$W2[-1, , drop = FALSE]) #y-hat =dscores
                        dW1 <- t(self$X) %*% (self$dsigmoid(h) * dh)

                        db1 <- colSums((self$dsigmoid(h) * dh))
                        db2 <- colSums(haty_y)
                        # Update Weights
                        self$W1 <- self$W1 - lr * dW1
                        self$W2 <- self$W2 - lr * dW2

                        # Update Biases
                        self$b1 <- self$b1 - lr * db1
                        self$b2 <- self$b2 - lr * db2

                        invisible(self)
                      },
                      # uses argmax to estamte a single class for each observation. i.e. one
                      # test input
                      predict = function(data = self$X) {
                        probs <- self$fit(data)
                        preds <- apply(probs, 1, which.max)
                        levels(self$Y)[preds]
                      },
                      # computes the loss used to report improvments in the neuralnet during training
                      # Can also be called through accuracy during prediction() tests
                      compute_loss = function(probs = self$output) {
                        Yid <- match(self$Y, sort(unique(self$Y)))
                        correct_logprobs <- -log(probs[cbind(seq_along(Yid), Yid)])
                        sum(correct_logprobs)
                      },
                      # The main train function think of this as the main() function
                      # calls feedforward() and backpropogation() in the correct orders
                      train = function(iterations = 1e4,
                                       learn_rate = 1e-2,
                                       tolerance = .01,
                                       trace = 100) {
                        for (i in seq_len(iterations)) {
                          self$feedforward()$backpropagate(learn_rate)

                          # Default V2 will not record accuracy every step, becasue it slows down the training.
                          # To record every step change the plotData parameter upon initilization.
                          if(self$plotData2 == T){
                            self$accuracyTime[i] <- self$accuracy()   #Recording the points every iteration is to slow!
                            self$lossTime[i] <- self$compute_loss()/100
                          }

                          if (trace > 0 && i %% trace == 0){
                            #Print the accuracy and loss
                            message('Iteration ', i, '\tLoss ', self$compute_loss(),'\tAccuracy ', self$accuracy())
                          }

                          if (self$compute_loss() < tolerance){ #When the loss is under the tolerance stop training
                            break

                          }
                        }
                        invisible(self)
                      },
                      # uses  compute_loss() function to determine the accuracy
                      accuracy = function() {
                        predictions <- apply(self$output, 1, which.max)
                        predictions <- levels(self$Y)[predictions]
                        mean(predictions == self$Y)
                      },

                      # Creates a better interface for the user without interfering with the other uses of predict()
                      computeNN = function(data ) {
                        self$predict(data.matrix(cbind(1, data)))
                      },
                      # Plots the neuralnets training accuracy. IMPORTANT: If you do not have the plotData param set to
                      # true this funciton will not work.
                      plotNN = function(){
                        plot(self$accuracyTime, xlab = "Iterations", ylab = "Accuracy", type = "o", pch =20, col= "blue")

                      },
                      # larger selection of activation functions to use
                      sigmoid = function(x) 1 / (1 + exp(-x)),
                      dsigmoid = function(x) x * (1 - x),
                      softmax = function(x) exp(x) / rowSums(exp(x))
                    )
)