# TODO: Add regularisation.
# TODO: Add momentum.
# TODO: Make function faster (RCPP)
DBN <- function(x, y, n.iter = 300, nodes = c(30,40,30),
learning.rate = 0.5, size.minibatch = 10, n.iter.pre = 30, learning.rate.pre = .1) {
# Initialize weights with the pretrain algorithm
print(paste0('Starting greedy pretraining with ', n.iter.pre, ' epochs for each RBM layer....'))
weights <- StackRBM(x, n.iter= n.iter.pre, layers = nodes,
learning.rate = learning.rate.pre, size.minibatch = size.minibatch )
# Remove bias weights in opposite directions
weights[[1]]$trained.weights <- t(weights[[1]]$trained.weights)[-1,]
weights[[2]]$trained.weights <- t(weights[[2]]$trained.weights)[-1,]
weights[[3]]$trained.weights <- t(weights[[3]]$trained.weights)[-1,]
# Get all the indexes for the unique labels
labels <- unique(y)
idx <- vector('list', length = length(labels))
# Initialize the y weights
y.weights <- matrix(rnorm(length(labels) * nodes[3], 0, 01),
nrow = length(labels) , ncol = nodes[3])
# Add term for the bias
y.weights <- cbind(rnorm(length(labels)), y.weights)
# Save indexes
for (i in 1:length(labels)) {
idx[[i]]<- which(y == labels[i])
}
y <- LabelBinarizer(y)
# Attach bias to data
x <- cbind(1, x)
# Start gradient descent
print(paste0('Starting gradient descent with ', n.iter, 'epochs.....'))
for (j in 1:n.iter) {
# Pick balanced labels
#samp <- sample(1:nrow(x),size.minibatch)
# Pick balanced labels
samp <- rep(0,size.minibatch)
p <- 1
for (i in 1 : size.minibatch){
samp[p]<- sample(idx[[p]], 1)
p <- p + 1
if (p == length(labels) +1) {
# Reset counter
p <- 1
}
}
# First perform one feed-forward pass
# Sample vis from the data
vis <- x[ samp, ,drop = FALSE]
# Go to first hidden layer
hid <- logistic(weights[[1]]$trained.weights %*% t(vis))
# Fix bias
hid <- rbind(1, hid)
# Go to second hidden layer, first hidden layer as input
pen <- logistic(weights[[2]]$trained.weights %*% hid)
# Fix the bias
pen <- rbind(1, pen)
# Go to third hidden layer, second hidden layer as input
top <- logistic(weights[[3]]$trained.weights %*% pen)
# Fix the bias
top <- rbind(1, top)
# Now the last hidden to label output
lab <- logistic(y.weights %*% top)
# Calculate the cost
J <- 1/size.minibatch* (sum(-t(y[samp,,drop = FALSE]) *
log(lab) - ((1 - t(y[samp,,drop = FALSE]))
* log(1 - lab))))
J <- sum(J)
# Print cost
print(paste0('Cost at epoch ', j, ' = ', J))
# Compare label to actual label and backpropogate
er.grad.lab <- logistic(lab) * (1 - logistic(lab)) * -(t(y[samp,,drop = FALSE]) - lab)
# Backpropogate the error
top.back <- t(y.weights[,-1]) %*% er.grad.lab
# Calculate gradient top layer
er.grad.top <- logistic(top[-1,]) * (1 - logistic(top[-1,])) * top.back
# Back to pentative layer
pen.back <- t(weights[[3]]$trained.weights[,-1]) %*% er.grad.top
# calculate gradient pentative layer
er.grad.pen <- logistic(pen[-1, ]) * (1 - logistic(pen[-1,])) * pen.back
# Back to hidden layer
hid.back <- t(weights[[2]]$trained.weights[,-1]) %*% er.grad.pen
# Calculate gradient hidden layer
er.grad.hid <- logistic(hid[-1,]) * (1 - logistic(hid[-1,])) * hid.back
# Adjust weights
# hidden layer
grad.hid <- (1/size.minibatch * (er.grad.hid %*% vis))
# Add regularisation
#grad.hid[, -1] <- grad.hid[, -1] + (lambda/size.minibatch * weights[[1]]$trained.weights[, -1])
# adjust weights hidden
weights[[1]]$trained.weights <- weights[[1]]$trained.weights - (learning.rate * grad.hid)
# Pentative layer
grad.pen <- (1/size.minibatch * ( er.grad.pen %*% t(hid)))
#grad.pen[, -1] <- grad.pen[, -1] + (lambda/size.minibatch * weights[[2]]$trained.weights[, -1])
# Adjust weights pentative layer
weights[[2]]$trained.weights <- weights[[2]]$trained.weights - (learning.rate * grad.pen)
# Top layer
grad.top <- (1/size.minibatch * (er.grad.top %*% t(pen)))
# Add regularisation
#grad.top[, -1] <- grad.top[, -1] + (lambda/size.minibatch * weights[[3]]$trained.weights[, -1])
# Adjust weights top layer
weights[[3]]$trained.weights <- weights[[3]]$trained.weights - (learning.rate * grad.top)
# Label weights
grad.lab <- ( 1/size.minibatch * (er.grad.lab %*% t(top)))
# add regularisation
#grad.lab[, -1] <- grad.lab[, -1] + (lambda/size.minibatch * y.weights[, -1])
y.weights <- y.weights - (learning.rate * grad.lab)
}
# Add the label weights to the weights
weights[[4]] <- y.weights
return(weights)
}
# Sample visible
vis <- cbind(1, test)
H <- logistic(mod[[1]]$trained.weights %*% t(vis))
H <- rbind(1, H)
H <- logistic(mod[[2]]$trained.weights %*% H)
H <- rbind(1, H)
H <- logistic(mod[[3]]$trained.weights %*% H)
H <- rbind(1, H)
lab <- logistic(mod[[4]] %*% H)
# Calculate accuracy
mean(apply(lab, 2, which.max)-1 == test_y)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.