#' General Neural network for regression with backprop on the weight
#'
#'Training by using neural network with gradient descending
#'( real numbers for regression ).
#'
#'@param X.mat (feature matrix, n_observations x n_features)
#'@param y.vec (label vector, n_observations x 1)
#'@param max.iterations (int scalar > 1)
#'@param step.size (the steps size used when decending)
#'@param n.hidden.units (number of hidden units)
#'@param is.train (logical vector of size n_observations,
#'TRUE if the observation is in the train set, FALSE for the validation set)
#'
#'@return pred.mat (n_observations x max.iterations matrix of predicted values or n x k)
#'@return W.mat:final weight matrix (n_features+1 x n.hidden.units or p+1 x u)
#'@return v.vec: final weight vector (n.hidden.units+1 or u+1).
#'@return predict (testX.mat):
#'a function that takes a test features matrix and returns a vector of predictions
#' ( real numbers for regression )
#' The first row of W.mat should be the intercept terms;
#' the first element of v.vec should be the intercept term.
#'
#' @export
#'
#' @examples
#'
#' Hard coded example
#' ------------------
#' hard_incidence <- as.numeric(c(84.106289446623, 55.328513822534, 38.106289446623, 45.345737285, 24.324, 30.324))
#' hard_emission <- as.numeric(c(38.288719431206, 0.13392142633291, 4.573728543534, 23.737285372, 54.345, 14.4))
#' hard_phase <- as.numeric(c(120.59515694473, 55.329415379509, 100.1339214473, 154.9728542, 55.57, 47.00))
#'
#' NNtestX.mat <- as.matrix(data.frame(hard_incidence, hard_emission, hard_phase))
#' NNtestY.vec <- as.numeric(c(0.0607816, 0.078306 , 0.098325, 0.052368, .0163620, 0.0757853))
#' max.iterations <- as.integer(10)
#' step.size <- as.numeric(0.05)
#' NNtest.is.train <- as.logical(c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE))
#'
#' # 3 hidden units
#' results <- NNetIterations(NNtestX.mat, NNtestY.vec, max.iterations, step.size, as.integer(3), NNtest.is.train)
#' results$prediction(NNtestX.mat)
#'
#' need other example that works for modern R
NNetIterations <- function(X.mat,y.vec,max.iterations,step.size,n.hidden.units,is.train){
# if all the matrix is not of type numeric fail
if(!all(is.matrix(X.mat),is.double(X.mat))){
stop("X.mat must be a numberic matrix!")
}
# stop if the label vector is not numeric and length does not fit mathematically with X.mat
if (!all(is.vector(y.vec), is.double(y.vec),length(y.vec) == nrow(X.mat))) {
stop("y.vec must be a numeric vector of the same number of rows as X.mat!")
}
# stop if the max.iterations value is not an integer or it is too small to be used for learning
if(!all(max.iterations>=1, is.integer(max.iterations))){
stop("max.iterations must be an interger greater or equal to 1!")
}
# stop if the step.size variable is not valid or not a number
if(!all(is.numeric(step.size), 0<step.size, step.size<=1)){
stop("step.size must be a number between 0 and 1!")
}
# stop if the number of hidden layers is not valid
if(!all(n.hidden.units>=1, is.integer(n.hidden.units))){
stop("n.hidden.units must be an interger greater or equal to 1!")
}
# stop if the is.train is not logical and not the right size
if(!all(is.logical(is.train), length(is.train)==nrow(X.mat))){
stop("is.train must be a logical vector of the same number of rows as X.mat!")
}
if(length(unique(y.vec))==2){
error("This model is not capable of binary classification")
}
# get the total data dimensions
n.observations <- nrow(X.mat)
n.features <- ncol(X.mat)
#find the train indices and validation indices by using which() to return a vector of all the indexes needed
train.index = which(is.train==TRUE)
validation.index = which(is.train!=TRUE)
# split the data into the two sets using the indices
X.train = X.mat[train.index,]
y.train = y.vec[train.index]
# split the validation or test data using the indices vector
X.validation = X.mat[validation.index,]
y.validation = y.vec[validation.index]
#compute a scaled input matrix, which has mean=0 and sd=1 for each column
X.scaled.train = scale(X.train,center = TRUE,scale = TRUE)
#X.scaled.validation = scale(X.validation,center = TRUE,scale = TRUE)
X.scaled.mat = scale(X.mat,center = TRUE,scale = TRUE)
# initialize all data variables needed for discent
pred.mat = matrix(0,n.observations, max.iterations)
v.mat = matrix(runif((n.features+1)*n.hidden.units),n.features+1,n.hidden.units)
w.vec = runif(n.hidden.units+1)
w.gradient=rep(0,n.hidden.units+1)
v.gradient=matrix(0,n.features+1,n.hidden.units)
# loop over all iterations of the discent ( this trains multiple models at once to find the best )
for(iteration in 1:max.iterations){
# multiply the scaled train matrix by the current overal weight vec. This backpropigates the lower layers back up to the current model predictor
A.mat = (cbind(1,X.scaled.train))%*%v.mat
# scale it back into 0 - 1
X.zeta.mat = sigmoid(A.mat)
# Get the likleyhood of each feature to be active ( the bias )
X.bias.vec = as.numeric((cbind(1,X.zeta.mat)) %*% w.vec)
# No Minnaert Here, using basic linear regression and predicting with weights
pred.mat[,iteration] = cbind(1,sigmoid(cbind(1,X.scaled.mat)%*%v.mat))%*%w.vec
# calculate the labels w/ biases
delta.w = X.bias.vec - y.train
# calculate the gradient values
delta.v = delta.w * (X.zeta.mat * (1-X.zeta.mat)) * matrix( w.vec[-1], nrow(X.zeta.mat * (1-X.zeta.mat)), ncol(X.zeta.mat * (1-X.zeta.mat)) )
# get the current gradient value for this observation for both weight vectors
w.gradient = (t(cbind(1,X.zeta.mat))%*%delta.w)/n.observations
v.gradient = (t(cbind(1,X.scaled.train))%*%delta.v)/ n.observations
# apply the gradient step to each
w.vec = w.vec - step.size*as.vector(w.gradient)
v.mat = v.mat - step.size*v.gradient
}
# return the list of values and a func to predict
result.list = list(
pred.mat = pred.mat,
v.mat = v.mat,
w.vec = w.vec,
prediction = function(testX.mat){
# never going to be bindary
prediction.vec = cbind(1,sigmoid(cbind(1,testX.mat)%*%v.mat))%*%w.vec
return (prediction.vec)
}
)
return(result.list)
}
#' a function using neural networks through cross validation
#'
#' use K-fold cross validation based on the folds IDs provided in fold.vec(randomly)
#'
#' for each validarion/train split, use NNetIterations to compute the predictions
#' for all observations
#'
#' compute mean.validation.loss.vec, which is a vector(with max.iterations elements)
#' of mean validation loss over all K folds
#'
#' compute the mean.train.loss.vec, analogous to above but for the train data
#'
#' minimize the mean validation loss to determine selected.steps,
#' the optimal number of steps/iterations
#'
#' finally use NNetIteration(max.iterations=selected.steps) on the whole training data set for the optimal run
#'
#' @param X.mat : (n x p matrix of observations)
#' @param y.vec : (vector with n labels that align with X.mat rows )
#' @param fold.vec : (number of validation/training sets)
#' fold.vec = samole(1:n.folds,length(y.vec))
#' @param max.iterations (the maximum number of iterations for each fold)
#' @param step.size (the step size used in the weighted gradient decent)
#' @param n.hidden.units (the number of hidden units or "layer" to the net)
#' @param n.folds (default = 4; this is the number of folds to be used to break data into folds
#' Warning: make sure it is not too high or NULLS will cause failures)
#'
#' @return mean.validation.loss
#' @return mean.train.loss.vec
#' @return selected.steps
#'
#' @export
#'
#' @examples
#'
#'
NNetEarlyStoppingCV <-
function(X.mat, y.vec, fold.vec, max.iterations, step.size, n.hidden.units, n.folds = 4){
# init loss vectors ad logical vector
mean.train.loss.vec = rep(0,max.iterations)
mean.validation.loss.vec = rep(0,max.iterations)
is.train = rep(TRUE, length(y.vec))
for(fold.number in 1:n.folds){
# set the folds incides to logical values that represent this fold
is.train[which(fold.vec == fold.number)] = FALSE
is.train[which(fold.vec != fold.number)] = TRUE
# get the indexes of the train and test data seperatly
train.index = as.numeric(which(is.train==TRUE))
validation.index = as.numeric(which(is.train==FALSE))
# get the seperate data matrices using the index vectors
X.train = X.mat[train.index,]
y.train = y.vec[train.index]
X.validation = X.mat[validation.index,]
y.validation = y.vec[validation.index]
# run the training function
return.list.i = NNetIterations( X.mat, y.vec, max.iterations, step.size, n.hidden.units, is.train )
# isolate the training and validation set used in the training function
prediction.train = return.list.i$pred.mat[train.index,]
prediction.validation = return.list.i$pred.mat[validation.index,]
# Here we incriment the mean loss of all folds by taking the mean
# error of each iteration and add them to the vector of losses over iterations
# This uses mean loss
mean.train.loss.vec = mean.train.loss.vec + colMeans( abs(prediction.train - y.train) )
mean.validation.loss.vec = mean.train.loss.vec + colMeans( abs(prediction.validation - y.validation) )
# This uses mean root squared loss
#mean.train.loss.vec = mean.train.loss.vec + sqrt(colMeans( abs(prediction.train - y.train)^2 ) )
#mean.validation.loss.vec = mean.train.loss.vec + sqrt( colMeans( abs(prediction.validation - y.validation)^2 ) )
}
# take the min of the average of all folds
mean.train.loss.vec = mean.train.loss.vec / n.folds
mean.validation.loss.vec = mean.validation.loss.vec / n.folds
selected.steps = which.min(mean.validation.loss.vec)
result.list = list(
mean.train.loss.vec = mean.train.loss.vec,
mean.validation.loss.vec = mean.validation.loss.vec,
selected.steps = selected.steps
)
return(result.list)
}
## Helpers
# sigmoid function
sigmoid <- function(x){
return(1/(1+exp(-x)))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.