bis557: Coursework for BIS 557

#' Function to initialize weights and biases for a neural net
#'
#' @description Create list of weights to describe a dense neural network. Modified from CASL
#' @param sizes an integer vecotr
#' @return a list of initialized weights and biases
#' @export
nn_make_weights <- function (sizes)
{
    L <- length(sizes) - 1L
    weights <- vector("list", L)
    for (j in seq_len(L)) {
        w <- matrix(rnorm(sizes[j] * sizes[j + 1L]), ncol = sizes[j], nrow = sizes[j + 1L])
        weights[[j]] <- list(w = w, b = rnorm(sizes[j + 1L]))
    }
    return(weights)
}


#' ReLU function
#'
#' @description Applies a rectified linear unit (ReLU) to a vector or matrix. Reproduced from CASL.
#' @param v a numeric vector or matrix
#' @return a numeric vector or matrix
#' @export
util_ReLU <- function (v) 
{
    v[v < 0] <- 0
    return(v)
}


#' ReLU derivative function
#'
#' @description Applies derivative of the ReLU to a vector or matrix. Reproduced from CASL.
#' @param v a numeric vector or matrix
#' @return a numeric vector or matrix
#' @export
util_ReLU_p <- function (v) 
{
    p <- v * 0
    p[v > 0] <- 1
    return(p)
}


#' MSE derivative function
#'
#' @description Applies derivative of the MSE to a vector. Reproduced from CASL.
#' @param y a numeric vector 
#' @param a a numeric vector
#' @return a numeric vector
#' @export
util_mse_p <- function (y, a) 
{
    return(a - y)
}


#' Mean absolute error derivative function
#'
#' @description Applies derivative of the mean absolute error to a vector. 
#' @param y a numeric vector 
#' @param a a numeric vector
#' @return a numeric vector
#' @export
util_mae_p <- function (y, a) 
{
    return(a > y)
}


#' Forward propagation function for a neural net
#'
#' @description Applies forward propagation for a set of neural net weights and biases. Reproduced from CASL.
#' @param x a numeric vector 
#' @param weights a list created by nn_make_weights
#' @param sigma an activation function
#' @return a list of updated weights and responses
#' @export
nn_forward_prop <- function (x, weights, sigma) 
{
    L <- length(weights)
    z <- vector("list", L)
    a <- vector("list", L)
    for (j in seq_len(L)) {
        a_j1 <- if (j == 1)  x  else  a[[j - 1L]]
        z[[j]] <- weights[[j]]$w %*% a_j1 + weights[[j]]$b
        a[[j]] <- if (j != L)  sigma(z[[j]])  else  z[[j]]
    }
    return(list(z = z, a = a))
}


#' Backward propagation function for a neural net
#'
#' @description Applies backward propagation to calculate loss. Reproduced from CASL.
#' @param x a numeric vector 
#' @param y a numeric vector
#' @param weights a list created by nn_make_weights
#' @param f_obj a list created by nn_forward_prop
#' @param sigma_p derivative of activation function
#' @param f_p derivative of the loss function
#' @return a list of updated weights and responses
#' @export
nn_backward_prop <- function (x, y, weights, f_obj, sigma_p, f_p) 
{
    z <- f_obj$z
    a <- f_obj$a
    L <- length(weights)
    grad_z <- vector("list", L)
    grad_w <- vector("list", L)
    for (j in rev(seq_len(L))) {
        if (j == L) {
            grad_z[[j]] <- f_p(y, a[[j]])
        } else {
            grad_z[[j]] <- (t(weights[[j + 1]]$w) %*%
            grad_z[[j + 1]]) * sigma_p(z[[j]])
        }
        a_j1 <- if (j == 1)  x  else  a[[j - 1L]]
        grad_w[[j]] <- grad_z[[j]] %*% t(a_j1)
    }
    list(grad_z = grad_z, grad_w = grad_w)
}


#' Stochastic gradient descent (SGD) to estimate a neural net
#'
#' @description Apply stochastic gradient descent (SGD) to estimate NN. Modified from CASL.
#' @param X a numeric matrix
#' @param y a numeric vector
#' @param sizes an integer vector
#' @param epochs an integer value
#' @param eta a positive numeric value
#' @param weights optional list of starting weights
#' @param f_p derivative of the loss function
#' @return a list of trained weights 
#' @export
nn_sgd <- function (X, y, sizes, epochs, eta, weights = NULL, f_p = util_mse_p)
{
    if (is.null(weights))  weights <- nn_make_weights(sizes)

    for (epoch in seq_len(epochs)) {
        for (i in seq_len(nrow(X))) {
            f_obj <- nn_forward_prop(X[i,], weights, util_ReLU)
            b_obj <- nn_backward_prop(X[i,], y[i,], weights, f_obj, util_ReLU_p, f_p)
            for (j in seq_along(b_obj)) {
                weights[[j]]$b <- weights[[j]]$b - eta * b_obj$grad_z[[j]]
                weights[[j]]$w <- weights[[j]]$w - eta * b_obj$grad_w[[j]]
            }
        }
    }
    return(weights)
}


#' Prediction from a training neural net
#'
#' @description Predict values from a training neural network. Reproduced from CASL.
#' @param weights a list of weights describing the NN
#' @param X_test a numeric matrix
#' @return a numeric matrix of predicted values
#' @export
nn_predict <- function (weights, X_test)
{
    p <- length(weights[[length(weights)]]$b)
    y_hat <- matrix(0, ncol = p, nrow = nrow(X_test))
    for (i in seq_len(nrow(X_test))) {
        a <- nn_forward_prop(X_test[i,], weights, util_ReLU)$a
        y_hat[i, ] <- a[[length(a)]]
    }
    return(y_hat)
}