knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
For this project we will writing an R package that implements gradient descent algorithms for neural networks with one hidden layer.
Here are some significant formulas that have been used in this function:
Loss:
1.Regression: $L(w,v) = \frac{1}{2}\sum_{i=1}^n[f(x_i)-y_i]^2 = \frac{1}{2}\sum_{i=1}^n[w^TS(v^TX_i)-y_i]^2$
2.Binary classification: $L(w,v) = \frac{1}{n}\sum^{n}{i=1}\frac{1}{2}[f(x_i)-y_i]^2 = \frac{1}{n}\sum^{n}{i=1}log[1+exp(-\tilde{y}_if(x_i))]$
Gradient:
1.$\nabla_w\frac{1}{2}[w^TS(v^Tx_i)-y_i]^2 = \frac{\partial}{\partial b_i}\frac{1}{2}[b_i-y_i]^2\nabla_wb_i = \delta_i^wz_i$
2.$\nabla_v\frac{1}{2}[w^TS[v^Tx_i]-y_i]^2 = \frac{\partial}{\partial a_i}\frac{1}{2}[w^TS[a_i]-y_i]^2\nabla_va_i = \delta_i^vx_i$
Backpropagation:
1.$w^{t+1}\longleftarrow w^t - \alpha\nabla_wL(w^t,v^t)$
2.$v^{t+1}\longleftarrow v^t-\alpha\nabla_vl(w^t,v^t)$
The purpose of this section is to give users a general information of this package. We will briefly go over the main functions.
library(ElemStatLearn) library(NeuralNetwork) data(spam, package = "ElemStatLearn") data(SAheart, package = "ElemStatLearn") data(zip.train, package = "ElemStatLearn") zip.train <- zip.train[zip.train[, 1] %in% c(0, 1),] data(prostate, package = "ElemStatLearn") data(ozone, package = "ElemStatLearn") data.list <- list( spam = list( features = as.matrix(spam[, 1:57]), labels = ifelse(spam$spam == "spam", 1, 0), step.size = 0.06, is.01 = TRUE ), SAheart = list( features = as.matrix(SAheart[, c(1:4, 6:9)]), labels = ifelse(SAheart$chd == 1, 1, 0), step.size = 0.02, is.01 = TRUE ), zip.train = list( features = as.matrix(zip.train[,-1]), labels = ifelse(zip.train[, 1] == 1, 1, 0), step.size = 0.05, is.01 = TRUE ), prostate = list( features = as.matrix(prostate[, 1:8]), labels = prostate$lpsa, step.size = 0.02, is.01 = FALSE ), ozone = list( features = as.matrix(ozone[,-1]), labels = ozone[, 1], step.size = 0.02, is.01 = FALSE )) n.folds <- 5L n.hidden.units <- 100L max.iterations <- 500L # X.mat <- data.set$features # y.vec <- data.set$labels # # legend( # x = 0, # y = max( # cbind( # model.list$mean.validation.loss.vec, # model.list$mean.train.loss.vec # ) # ), # c("Validation loss", "Train loss"), # lty = 1:2, # xjust = 0, # yjust = 1 # )
We are going to run our code on the following data sets.
data.name = 1 data.set <- data.list[[data.name]] test.loss.mat <- matrix(0, nrow = n.folds, ncol = 2) step.size <- data.set$step.size #Check data type here: set.seed(1) fold.vec <- sample(rep(1:n.folds, l = length(data.set$labels))) for (i.fold in (1:n.folds)) { train.index <- fold.vec != i.fold test.index <- fold.vec == i.fold X.train <- data.set$features[train.index,] y.train <- data.set$labels[train.index] X.test <- data.set$features[test.index,] y.test <- data.set$labels[test.index] result.list <- NNetEarlyStoppingCV( X.mat = X.train, y.vec = y.train, max.iterations = max.iterations, step.size = step.size, n.hidden.units = n.hidden.units ) if (data.set$is.01) { # binary data NNet.predict <- ifelse(result.list$predict(X.test) > 0.5, 1,0) NNet.loss <- mean(ifelse(NNet.predict == y.test, 0, 1)) baseline.predict <- mean(y.test) } else{ # regression data NNet.predict <- result.list$predict(X.test) NNet.loss <- mean((NNet.predict - y.test) ^ 2) baseline.predict <- mean(y.test) } baseline.loss <- mean((baseline.predict - y.test) ^ 2) test.loss.mat[i.fold,] = c(NNet.loss, baseline.loss) } colnames(test.loss.mat) <- c("Nerual Network", "Baseline")
# plot result if (!data.set$is.01) { barplot( test.loss.mat, main = c("Square Regression: ", "spam"), xlab = "mean loss value", legend = (rownames(test.loss.mat)), beside = TRUE ) } else{ barplot( test.loss.mat, main = c("Binary Classification: ", "spam"), xlab = "mean loss value", legend = (rownames(test.loss.mat)), beside = TRUE ) } model.list <- NNetEarlyStoppingCV( X.mat = X.train, y.vec = y.train, max.iterations = max.iterations, step.size = step.size, n.hidden.units = n.hidden.units )
Comment on difference in accuracy: Neural network model is much better than baseline
matplot( x = seq(1, max.iterations), y = cbind(model.list$mean.validation.loss.vec, model.list$mean.train.loss.vec), xlab = "Iteration", ylab = "mean loss value", type = "l", lty = 1:2, pch = 15, col = c(17) ) dot.x <- model.list$selected.step dot.y <- model.list$mean.validation.loss.vec[dot.x] matpoints(x = dot.x, y = dot.y, col = 2, pch = 19) legend( x = 0, y = max(cbind(model.list$mean.validation.loss.vec, model.list$mean.train.loss.vec)), c("Validation loss", "Train loss"), lty = 1:2, xjust = 0, yjust = 1 )
What are the optimal regularization parameters?
Answer: iteration = r which.min(model.list$mean.validation.loss.vec)
data.name = 2 data.set <- data.list[[data.name]] test.loss.mat <- matrix(0, nrow = n.folds, ncol = 2) step.size <- data.set$step.size #Check data type here: set.seed(1) fold.vec <- sample(rep(1:n.folds, l = length(data.set$labels))) for (i.fold in (1:n.folds)) { train.index <- fold.vec != i.fold test.index <- fold.vec == i.fold X.train <- data.set$features[train.index,] y.train <- data.set$labels[train.index] X.test <- data.set$features[test.index,] y.test <- data.set$labels[test.index] result.list <- NNetEarlyStoppingCV( X.mat = X.train, y.vec = y.train, max.iterations = max.iterations, step.size = step.size, n.hidden.units = n.hidden.units ) if (data.set$is.01) { # binary data NNet.predict <- ifelse(result.list$predict(X.test) > 0.5, 1,0) NNet.loss <- mean(ifelse(NNet.predict == y.test, 0, 1)) baseline.predict <- mean(y.test) } else{ # regression data NNet.predict <- result.list$predict(X.test) NNet.loss <- mean((NNet.predict - y.test) ^ 2) baseline.predict <- mean(y.test) } baseline.loss <- mean((baseline.predict - y.test) ^ 2) test.loss.mat[i.fold,] = c(NNet.loss, baseline.loss) } colnames(test.loss.mat) <- c("Nerual Network", "Baseline")
# plot result if (!data.set$is.01) { barplot( test.loss.mat, main = c("Square Regression: ", "SAheart"), xlab = "mean loss value", legend = (rownames(test.loss.mat)), beside = TRUE ) } else{ barplot( test.loss.mat, main = c("Binary Classification: ", "SAheart"), xlab = "mean loss value", legend = (rownames(test.loss.mat)), beside = TRUE ) } model.list <- NNetEarlyStoppingCV( X.mat = X.train, y.vec = y.train, max.iterations = max.iterations, step.size = step.size, n.hidden.units = n.hidden.units )
Comment on difference in accuracy: Neural network model is much better than baseline
matplot( x = seq(1, max.iterations), y = cbind(model.list$mean.validation.loss.vec, model.list$mean.train.loss.vec), xlab = "Iteration", ylab = "mean loss value", type = "l", lty = 1:2, pch = 15, col = c(17) ) dot.x <- model.list$selected.step dot.y <- model.list$mean.validation.loss.vec[dot.x] matpoints(x = dot.x, y = dot.y, col = 2, pch = 19) legend( x = 0, y = max(cbind(model.list$mean.validation.loss.vec, model.list$mean.train.loss.vec)), c("Validation loss", "Train loss"), lty = 1:2, xjust = 0, yjust = 1 )
What are the optimal regularization parameters?
Answer: iteration = r which.min(model.list$mean.validation.loss.vec)
data.name = 3 data.set <- data.list[[data.name]] test.loss.mat <- matrix(0, nrow = n.folds, ncol = 2) step.size <- data.set$step.size #Check data type here: set.seed(1) fold.vec <- sample(rep(1:n.folds, l = length(data.set$labels))) for (i.fold in (1:n.folds)) { train.index <- fold.vec != i.fold test.index <- fold.vec == i.fold X.train <- data.set$features[train.index,] y.train <- data.set$labels[train.index] X.test <- data.set$features[test.index,] y.test <- data.set$labels[test.index] result.list <- NNetEarlyStoppingCV( X.mat = X.train, y.vec = y.train, max.iterations = max.iterations, step.size = step.size, n.hidden.units = n.hidden.units ) if (data.set$is.01) { # binary data NNet.predict <- ifelse(result.list$predict(X.test) > 0.5, 1,0) NNet.loss <- mean(ifelse(NNet.predict == y.test, 0, 1)) baseline.predict <- mean(y.test) } else{ # regression data NNet.predict <- result.list$predict(X.test) NNet.loss <- mean((NNet.predict - y.test) ^ 2) baseline.predict <- mean(y.test) } baseline.loss <- mean((baseline.predict - y.test) ^ 2) test.loss.mat[i.fold,] = c(NNet.loss, baseline.loss) } colnames(test.loss.mat) <- c("Nerual Network", "Baseline")
# plot result if (!data.set$is.01) { barplot( test.loss.mat, main = c("Square Regression: ", "zip.train"), xlab = "mean loss value", legend = (rownames(test.loss.mat)), beside = TRUE ) } else{ barplot( test.loss.mat, main = c("Binary Classification: ", "zip.train"), xlab = "mean loss value", legend = (rownames(test.loss.mat)), beside = TRUE ) } model.list <- NNetEarlyStoppingCV( X.mat = X.train, y.vec = y.train, max.iterations = max.iterations, step.size = step.size, n.hidden.units = n.hidden.units )
Comment on difference in accuracy: Neural network model is much better than baseline
matplot( x = seq(1, max.iterations), y = cbind(model.list$mean.validation.loss.vec, model.list$mean.train.loss.vec), xlab = "Iteration", ylab = "mean loss value", type = "l", lty = 1:2, pch = 15, col = c(17) ) dot.x <- model.list$selected.step dot.y <- model.list$mean.validation.loss.vec[dot.x] matpoints(x = dot.x, y = dot.y, col = 2, pch = 19) legend( x = 0, y = max(cbind(model.list$mean.validation.loss.vec, model.list$mean.train.loss.vec)), c("Validation loss", "Train loss"), lty = 1:2, xjust = 0, yjust = 1 )
What are the optimal regularization parameters?
Answer: iteration = r which.min(model.list$mean.validation.loss.vec)
data.name = 4 data.set <- data.list[[data.name]] test.loss.mat <- matrix(0, nrow = n.folds, ncol = 2) step.size <- data.set$step.size #Check data type here: set.seed(1) fold.vec <- sample(rep(1:n.folds, l = length(data.set$labels))) for (i.fold in (1:n.folds)) { train.index <- fold.vec != i.fold test.index <- fold.vec == i.fold X.train <- data.set$features[train.index,] y.train <- data.set$labels[train.index] X.test <- data.set$features[test.index,] y.test <- data.set$labels[test.index] result.list <- NNetEarlyStoppingCV( X.mat = X.train, y.vec = y.train, max.iterations = max.iterations, step.size = step.size, n.hidden.units = n.hidden.units ) if (data.set$is.01) { # binary data NNet.predict <- ifelse(result.list$predict(X.test) > 0.5, 1,0) NNet.loss <- mean(ifelse(NNet.predict == y.test, 0, 1)) baseline.predict <- mean(y.test) } else{ # regression data NNet.predict <- result.list$predict(X.test) NNet.loss <- mean((NNet.predict - y.test) ^ 2) baseline.predict <- mean(y.test) } baseline.loss <- mean((baseline.predict - y.test) ^ 2) test.loss.mat[i.fold,] = c(NNet.loss, baseline.loss) } colnames(test.loss.mat) <- c("Nerual Network", "Baseline")
# plot result if (!data.set$is.01) { barplot( test.loss.mat, main = c("Square Regression: ", "prostate"), xlab = "mean loss value", legend = (rownames(test.loss.mat)), beside = TRUE ) } else{ barplot( test.loss.mat, main = c("Binary Classification: ", "prostate"), xlab = "mean loss value", legend = (rownames(test.loss.mat)), beside = TRUE ) } model.list <- NNetEarlyStoppingCV( X.mat = X.train, y.vec = y.train, max.iterations = max.iterations, step.size = step.size, n.hidden.units = n.hidden.units )
Comment on difference in accuracy: Neural network model is much better than baseline
matplot( x = seq(1, max.iterations), y = cbind(model.list$mean.validation.loss.vec, model.list$mean.train.loss.vec), xlab = "Iteration", ylab = "mean loss value", type = "l", lty = 1:2, pch = 15, col = c(17) ) dot.x <- model.list$selected.step dot.y <- model.list$mean.validation.loss.vec[dot.x] matpoints(x = dot.x, y = dot.y, col = 2, pch = 19) legend( x = 0, y = max(cbind(model.list$mean.validation.loss.vec, model.list$mean.train.loss.vec)), c("Validation loss", "Train loss"), lty = 1:2, xjust = 0, yjust = 1 )
What are the optimal regularization parameters?
Answer: iteration = r which.min(model.list$mean.validation.loss.vec)
data.name = 5 data.set <- data.list[[data.name]] test.loss.mat <- matrix(0, nrow = n.folds, ncol = 2) step.size <- data.set$step.size #Check data type here: set.seed(1) fold.vec <- sample(rep(1:n.folds, l = length(data.set$labels))) for (i.fold in (1:n.folds)) { train.index <- fold.vec != i.fold test.index <- fold.vec == i.fold X.train <- data.set$features[train.index,] y.train <- data.set$labels[train.index] X.test <- data.set$features[test.index,] y.test <- data.set$labels[test.index] result.list <- NNetEarlyStoppingCV( X.mat = X.train, y.vec = y.train, max.iterations = max.iterations, step.size = step.size, n.hidden.units = n.hidden.units ) if (data.set$is.01) { # binary data NNet.predict <- ifelse(result.list$predict(X.test) > 0.5, 1,0) NNet.loss <- mean(ifelse(NNet.predict == y.test, 0, 1)) baseline.predict <- mean(y.test) } else{ # regression data NNet.predict <- result.list$predict(X.test) NNet.loss <- mean((NNet.predict - y.test) ^ 2) baseline.predict <- mean(y.test) } baseline.loss <- mean((baseline.predict - y.test) ^ 2) test.loss.mat[i.fold,] = c(NNet.loss, baseline.loss) } colnames(test.loss.mat) <- c("Nerual Network", "Baseline")
# plot result if (!data.set$is.01) { barplot( test.loss.mat, main = c("Square Regression: ", "ozone"), xlab = "mean loss value", legend = (rownames(test.loss.mat)), beside = TRUE ) } else{ barplot( test.loss.mat, main = c("Binary Classification: ", "ozone"), xlab = "mean loss value", legend = (rownames(test.loss.mat)), beside = TRUE ) } model.list <- NNetEarlyStoppingCV( X.mat = X.train, y.vec = y.train, max.iterations = max.iterations, step.size = step.size, n.hidden.units = n.hidden.units )
Comment on difference in accuracy: Neural network model is much better than baseline
matplot( x = seq(1, max.iterations), y = cbind(model.list$mean.validation.loss.vec, model.list$mean.train.loss.vec), xlab = "Iteration", ylab = "mean loss value", type = "l", lty = 1:2, pch = 15, col = c(17) ) dot.x <- model.list$selected.step dot.y <- model.list$mean.validation.loss.vec[dot.x] matpoints(x = dot.x, y = dot.y, col = 2, pch = 19) legend( x = 0, y = max(cbind(model.list$mean.validation.loss.vec, model.list$mean.train.loss.vec)), c("Validation loss", "Train loss"), lty = 1:2, xjust = 0, yjust = 1 )
What are the optimal regularization parameters?
Answer: iteration = r which.min(model.list$mean.validation.loss.vec)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.