# See http://cs231n.github.io/neural-networks-3/#gradcheck
# Function to calculate numerical and analytical gradient
calcGradients <- function(X, y, regression, ...) {
# Set epsilon (for calculating numerical gradient), determine dimensions
epsilon <- 1e-5
n_rows <- nrow(X)
n_cols <- ncol(X)
# Construct NN
NN <- neuralnetwork(X = X,
y = y,
standardize = TRUE,
regression = regression,
batch.size = n_rows,
val.prop = 0,
n.epochs = 100,
verbose = FALSE,
step.k = 30, ...)
# Prepare targets for regression/classification
if (regression) {
y <- as.matrix(y)
} else {
y <- 1 * outer(y, NN$Rcpp_ANN$getMeta()$y_names, '==')
}
y <- NN$Rcpp_ANN$scale_y(y, FALSE)
# Prepare features - same for regression and classification
X <- as.matrix(X)
X <- NN$Rcpp_ANN$scale_X(X, FALSE)
#### NUMERICAL GRADIENT
# Construct feature matrices with small perturbations (positive and negative)
# at one of the elements per row. For this element the gradients compared
X_eps_neg <- X_eps_pos <- X
idx_vec <- sample(n_cols, n_rows, replace = TRUE)
for (i in 1:n_rows) {
idx <- idx_vec[i]
X_eps_pos[i,idx] <- X_eps_pos[i,idx] + epsilon
X_eps_neg[i,idx] <- X_eps_neg[i,idx] - epsilon
}
# Function to evaluate loss for a feature matrix X and targets y
eval_loss <- function(X, y) {
y_fit <- NN$Rcpp_ANN$forwardPass(X)
return( NN$Rcpp_ANN$evalLoss(y, y_fit) )
}
# Calculate numerical gradient
E_num <- eval_loss(X_eps_pos, y) - eval_loss(X_eps_neg, y)
E_num <- E_num / (2*epsilon)
E_num <- rowSums(E_num)
#### ANALYTICAL GRADIENT
y_fit <- NN$Rcpp_ANN$forwardPass(X)
E <- NN$Rcpp_ANN$backwardPass(y, y_fit)
E_ana <- sapply(1:n_rows, function(i) E[i, idx_vec[i]])
return ( list(numeric = E_num, analytic = E_ana) )
}
# Function to check equality between observed and expected by inspecting their
# distance to one another relative to the largest absolute value of the two
relCompare <- function(observed, expected, tolerance) {
# Elementwise maximum of absolute values
max_elem <- pmax(abs(expected), abs(observed))
# Absolute relative difference
rel_diff <- abs(observed - expected) / max_elem
# Check equality given tolerance
check_vec <- (rel_diff < tolerance)
# Check for edge case: both gradient elements zero
check_vec[max_elem < 1e-8] <- TRUE # Both element close to zero
# Pass check if all numerical grads are equal to their analytical counterparts
return ( all(check_vec) )
}
context("Full gradient check for regression")
## ACTIVATION FUNCTIONS
test_that("the full gradient is correct for all loss functions",
{
skip_on_cran()
# Set data and relative tolerance for equality check
data <- iris[sample(nrow(iris), size = 4), 1:4]
rel_tol_smooth <- 1e-7
rel_tol_kinks <- 1e-5
# Function to check gradient for current cases
gradCheck <- function(loss.type, rel_tol) {
grads <- calcGradients(X = data[,1:3],
y = data[,4],
hidden.layers = c(5,5),
activ.functions = "tanh",
regression = TRUE,
loss.type = loss.type)
return( relCompare(grads$analytic, grads$numeric, rel_tol) )
}
# Run tests on gradient
# The gradient checks sometimes fail by chance (especially with functions
# that contain kinks such as the ramp, step and relu) so do not run on CRAN
expect_true(gradCheck("squared", rel_tol_smooth))
expect_true(gradCheck("absolute", rel_tol_kinks))
expect_true(gradCheck("huber", rel_tol_kinks))
expect_true(gradCheck("pseudo-huber", rel_tol_kinks))
})
## ACTIVATION FUNCTIONS
test_that("the full gradient is correct for all activation functions",
{
skip_on_cran()
# Set data and relative tolerance for equality check
data <- iris[sample(nrow(iris), size = 4), 1:4]
rel_tol_smooth <- 1e-7
rel_tol_kinks <- 1e-5
# Function to check gradient for current cases
gradCheck <- function(activ.functions, rel_tol) {
grads <- calcGradients(X = data[,1:3],
y = data[,4],
hidden.layers = c(5),
activ.functions = activ.functions,
regression = TRUE,
loss.type = 'squared')
return( relCompare(grads$analytic, grads$numeric, rel_tol) )
}
# Run tests on gradient
# The gradient checks sometimes fail by chance (especially with functions
# that contain kinks such as the ramp, step and relu) so do not run on CRAN
expect_true(gradCheck("tanh", rel_tol_smooth))
expect_true(gradCheck("sigmoid", rel_tol_smooth))
expect_true(gradCheck("linear", rel_tol_smooth))
expect_true(gradCheck("ramp", rel_tol_kinks))
expect_true(gradCheck("step", rel_tol_kinks))
expect_true(gradCheck("relu", rel_tol_kinks))
})
context("Full gradient check for classification")
test_that("the full gradient is correct for all activation functions",
{
skip_on_cran()
# Set data and relative tolerance for equality check
data <- iris[sample(nrow(iris), size = 4),]
rel_tol_smooth <- 1e-7
rel_tol_kinks <- 1e-5
# Function to check gradient for current cases
gradCheck <- function(activ.functions, rel_tol) {
grads <- calcGradients(X = data[,1:4],
y = data[,5],
hidden.layers = c(5),
activ.functions = activ.functions,
regression = FALSE,
loss.type = 'log')
return( relCompare(grads$analytic, grads$numeric, rel_tol) )
}
# Run tests on gradients
# The gradient checks sometimes fail by chance (especially with functions
# that contain kinks such as the ramp, step and relu) so do not run on CRAN
expect_true(gradCheck("tanh", rel_tol_smooth))
expect_true(gradCheck("sigmoid", rel_tol_smooth))
expect_true(gradCheck("linear", rel_tol_smooth))
expect_true(gradCheck("ramp", rel_tol_kinks))
expect_true(gradCheck("step", rel_tol_kinks))
expect_true(gradCheck("relu", rel_tol_kinks))
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.