####################################################################
####################### RWNN neural networks #######################
####################################################################
#' @title rwnn control function
#'
#' @description A function used to create a control-object for the \link{rwnn} function.
#'
#' @param n_hidden A vector of integers designating the number of neurons in each of the hidden layers (the length of the list is taken as the number of hidden layers).
#' @param n_features The number of randomly chosen features in the RWNN model. Note: This is meant for use in \link{bag_rwnn}, and it is not recommended outside of that function.
#' @param lnorm A string indicating the type of regularisation used when estimating the weights in the output layer, \code{"l1"} or \code{"l2"} (default).
#' @param bias_hidden A vector of TRUE/FALSE values. The vector should have length 1, or be equal to the number of hidden layers.
#' @param bias_output TRUE/FALSE: Should a bias be added to the output layer?
#' @param activation A vector of strings corresponding to activation functions (see details). The vector should have length 1, or be equal to the number of hidden layers.
#' @param combine_input TRUE/FALSE: Should the input be included to predict the output?
#' @param combine_hidden TRUE/FALSE: Should all hidden layers be combined to predict the output?
#' @param include_data TRUE/FALSE: Should the original data be included in the returned object? Note: this should almost always be set to '\code{TRUE}', but using '\code{FALSE}' is more memory efficient in \link{ERWNN-object}'s.
#' @param include_estimate TRUE/FALSE: Should the \code{rwnn}-function estimate the output parameters? Note: this should almost always be set to '\code{TRUE}', but using '\code{FALSE}'is more memory efficient in \link{ERWNN-object}'s.
#' @param rng A string indicating the sampling distribution used for generating the weights of the hidden layer (defaults to \code{runif}).
#' @param rng_pars A list of parameters passed to the \code{rng} function (defaults to \code{list(min = -1, max = 1)}).
#'
#' @details The possible activation functions supplied to '\code{activation}' are:
#' \describe{
#' \item{\code{"identity"}}{\deqn{f(x) = x}}
#' \item{\code{"bentidentity"}}{\deqn{f(x) = \frac{\sqrt{x^2 + 1} - 1}{2} + x}}
#' \item{\code{"sigmoid"}}{\deqn{f(x) = \frac{1}{1 + \exp(-x)}}}
#' \item{\code{"tanh"}}{\deqn{f(x) = \frac{\exp(x) - \exp(-x)}{\exp(x) + \exp(-x)}}}
#' \item{\code{"relu"}}{\deqn{f(x) = \max\{0, x\}}}
#' \item{\code{"silu"} (default)}{\deqn{f(x) = \frac{x}{1 + \exp(-x)}}}
#' \item{\code{"softplus"}}{\deqn{f(x) = \ln(1 + \exp(x))}}
#' \item{\code{"softsign"}}{\deqn{f(x) = \frac{x}{1 + |x|}}}
#' \item{\code{"sqnl"}}{\deqn{f(x) = -1\text{, if }x < -2\text{, }f(x) = x + \frac{x^2}{4}\text{, if }-2 \le x < 0\text{, }f(x) = x - \frac{x^2}{4}\text{, if }0 \le x \le 2\text{, and } f(x) = 2\text{, if }x > 2}}
#' \item{\code{"gaussian"}}{\deqn{f(x) = \exp(-x^2)}}
#' \item{\code{"sqrbf"}}{\deqn{f(x) = 1 - \frac{x^2}{2}\text{, if }|x| \le 1\text{, }f(x) = \frac{(2 - |x|)^2}{2}\text{, if }1 < |x| < 2\text{, and }f(x) = 0\text{, if }|x| \ge 2}}
#' }
#'
#' The '\code{rng}' argument can also be set to \code{"orthogonal"}, \code{"torus"}, \code{"halton"}, or \code{"sobol"} for added stability. The \code{"torus"}, \code{"halton"}, and \code{"sobol"} methods relay on the \link[randtoolbox]{torus}, \link[randtoolbox]{halton}, and \link[randtoolbox]{sobol} functions. NB: this is not recommended when creating ensembles.
#'
#' @return A list of control variables.
#'
#' @references Wang W., Liu X. (2017) "The selection of input weights of extreme learning machine: A sample structure preserving point of view." \emph{Neurocomputing}, 261, 28-36.
#'
#' @export
control_rwnn <- function(n_hidden = NULL, n_features = NULL, lnorm = NULL,
bias_hidden = TRUE, bias_output = TRUE, activation = NULL,
combine_input = FALSE, combine_hidden = TRUE,
include_data = TRUE, include_estimate = TRUE,
rng = runif, rng_pars = list(min = -1, max = 1)) {
#
if (is.null(lnorm) | !is.character(lnorm)) {
lnorm <- "l2"
}
#
lnorm <- tolower(lnorm)
if (!(lnorm %in% c("l1", "l2"))) {
stop("'lnorm' has to be either 'l1' or 'l2'.")
}
#
if (length(bias_hidden) == 1) {
bias_hidden <- rep(bias_hidden, length(n_hidden))
}
else if (length(bias_hidden) == length(n_hidden)) {
bias_hidden <- bias_hidden
}
else {
stop("The 'bias_hidden' vector specified in the control-object should have length 1, or be the same length as the vector 'n_hidden'.")
}
#
if (!is.logical(bias_output)) {
stop("'bias_output' has to be 'TRUE'/'FALSE'.")
}
#
if (!is.logical(combine_input)) {
stop("'combine_input' has to be 'TRUE'/'FALSE'.")
}
#
if (!is.logical(combine_hidden)) {
stop("'combine_hidden' has to be 'TRUE'/'FALSE'.")
}
#
if (!is.logical(include_data)) {
stop("'include_data' has to be 'TRUE'/'FALSE'.")
}
#
if (!is.logical(include_estimate)) {
stop("'include_estimate' has to be 'TRUE'/'FALSE'.")
}
#
if (is.null(activation) | !is.character(activation)) {
activation <- "silu"
}
if (all(!(activation %in% c("sigmoid", "tanh", "relu", "silu", "softplus", "softsign", "sqnl", "gaussian", "sqrbf", "bentidentity", "identity")))) {
stop("Invalid activation function detected in 'activation' vector. The implemented activation functions are: 'sigmoid', 'tanh', 'relu', 'silu', 'softplus', 'softsign', 'sqnl', 'gaussian', 'sqrbf', 'bentidentity', and 'identity'.")
}
if (length(activation) == 1) {
activation <- rep(tolower(activation), length(n_hidden))
}
else if (length(activation) == length(n_hidden)) {
activation <- tolower(activation)
}
else {
stop("The 'activation' vector specified in the control-object should have length 1, or be the same length as the vector 'n_hidden'.")
}
if (length(activation) < 1) {
activation <- NULL
}
#
if (is.character(rng)) {
rng <- tolower(rng)
if (rng %in% c("o", "orto", "orthogonal", "h", "halt", "halton", "s", "sobo", "sobol", "tor", "torus")) {
rng_arg <- c("min", "max")
}
else {
rng_arg <- formalArgs(rng)[-which(formalArgs(rng) == "n")]
}
}
else {
rng_arg <- formalArgs(rng)[-which(formalArgs(rng) == "n")]
}
if (!all(rng_arg %in% names(rng_pars))) {
stop(paste("The following arguments were not found in 'rng_pars' list:", paste(rng_arg[!(rng_arg %in% names(rng_pars))], collapse = ", ")))
}
#
return(
list(
n_hidden = n_hidden, n_features = n_features, lnorm = lnorm,
bias_hidden = bias_hidden, bias_output = bias_output, activation = activation,
combine_input = combine_input, combine_hidden = combine_hidden,
include_data = include_data, include_estimate = include_estimate,
rng = rng, rng_pars = rng_pars
)
)
}
#' @title Random weight neural networks
#'
#' @description Set-up and estimate weights of a random weight neural network.
#'
#' @param formula A \link{formula} specifying features and targets used to estimate the parameters of the output layer.
#' @param data A data-set (either a \link{data.frame} or a \link[tibble]{tibble}) used to estimate the parameters of the output layer.
#' @param n_hidden A vector of integers designating the number of neurons in each of the hidden layers (the length of the list is taken as the number of hidden layers).
#' @param lambda The penalisation constant used when training the output layer.
#' @param type A string indicating whether this is a regression or classification problem.
#' @param control A list of additional arguments passed to the \link{control_rwnn} function.
#'
#' @details A deep RWNN is constructed by increasing the number of elements in the vector \code{n_hidden}. Furthermore, if \code{type} is null, then the function tries to deduce it from class of target.
#'
#' @return An \link{RWNN-object}.
#'
#' @references Schmidt W., Kraaijveld M., Duin R. (1992) "Feedforward neural networks with random weights." \emph{In Proceedings., 11th IAPR International Conference on Pattern Recognition. Vol.II. Conference B: Pattern Recognition Methodology and Systems}, 1–4.
#'
#' Pao Y., Park G., Sobajic D. (1992) "Learning and generalization characteristics of random vector Functional-link net." \emph{Neurocomputing}, 6, 163–180.
#'
#' Huang G.B., Zhu Q.Y., Siew C.K. (2006) "Extreme learning machine: Theory and applications." \emph{Neurocomputing}, 70(1), 489–501.
#'
#' Henríquez P.A., Ruz G.A. (2018) "Twitter Sentiment Classification Based on Deep Random Vector Functional Link." \emph{In 2018 International Joint Conference on Neural Networks (IJCNN)}, 1–6.
#'
#' @export
rwnn <- function(formula, data = NULL, n_hidden = c(), lambda = 0, type = NULL, control = list()) {
UseMethod("rwnn")
}
#
rwnn_matrix <- function(X, y, n_hidden = c(), lambda = 0, type = NULL, control = list()) {
## Creating control object
control$n_hidden <- n_hidden
control <- do.call(control_rwnn, control)
#
lnorm <- control$lnorm
bias_hidden <- control$bias_hidden
activation <- control$activation
n_features <- control$n_features
rng_function <- control$rng
rng_pars <- control$rng_pars
## Checks
dc <- data_checks(y, X)
# Regularisation
if (is.null(lambda) | !is.numeric(lambda)) {
lambda <- 0
warning("Note: 'lambda' was not supplied, or not numeric, and is therefore set to 0.")
} else if (length(lambda) > 1) {
lambda <- lambda[1]
warning("The length of 'lambda' was larger than 1, only the first element will be used.")
}
if (lambda < 0) {
lambda <- 0
warning("'lambda' has to be a real number larger than or equal to 0.")
}
# Feature restriction
if (is.null(n_features)) {
n_features <- ncol(X)
}
if (length(n_features) > 1) {
n_features <- n_features[1]
warning("The length of 'n_features' was larger than 1, only the first element will be used.")
}
if ((n_features < 1) || (n_features > dim(X)[2])) {
stop("'n_features' has to be between 1 and the total number of features.")
}
## Creating random weights
X_dim <- dim(X)
W_hidden <- vector("list", length = length(n_hidden))
for (w in seq_along(W_hidden)) {
if (w == 1) {
nr_rows <- (X_dim[2] + as.numeric(bias_hidden[w]))
}
else {
nr_rows <- (n_hidden[w - 1] + as.numeric(bias_hidden[w]))
}
if (is.character(rng_function)) {
if (rng_function %in% c("o", "orto", "orthogonal")) {
random_weights <- (rng_pars$max - rng_pars$min) * random_orthonormal(w, nr_rows, X, W_hidden, n_hidden, activation, bias_hidden) + rng_pars$min
}
else if (rng_function %in% c("h", "halt", "halton")) {
random_weights <- (rng_pars$max - rng_pars$min) * halton(nr_rows, n_hidden[w], init = w == 1, start = 0) + rng_pars$min
}
else if (rng_function %in% c("s", "sobo", "sobol")) {
random_weights <- (rng_pars$max - rng_pars$min) * sobol(nr_rows, n_hidden[w], init = w == 1, start = 0) + rng_pars$min
}
else if (rng_function %in% c("tor", "torus")) {
random_weights <- (rng_pars$max - rng_pars$min) * torus(nr_rows, n_hidden[w], init = w == 1, start = 0) + rng_pars$min
}
else {
rng_pars$n <- n_hidden[w] * nr_rows
random_weights <- matrix(do.call(rng_function, rng_pars), ncol = n_hidden[w])
}
}
else {
rng_pars$n <- n_hidden[w] * nr_rows
random_weights <- matrix(do.call(rng_function, rng_pars), ncol = n_hidden[w])
}
W_hidden[[w]] <- random_weights
if ((w == 1) && (n_features < dim(X)[2])) {
indices_f <- sample(ncol(X), n_features, replace = FALSE) + as.numeric(bias_hidden[w])
W_hidden[[w]][-indices_f, ] <- 0
}
}
## Values of last hidden layer
if (control$include_estimate) {
H <- rwnn_forward(X, W_hidden, activation, bias_hidden)
H <- lapply(seq_along(H), function(i) matrix(H[[i]], ncol = n_hidden[i]))
if (control$combine_hidden){
H <- do.call("cbind", H)
}
else {
H <- H[[length(H)]]
}
O <- H
if (control$combine_input) {
O <- cbind(X, H)
}
if (control$bias_output) {
O <- cbind(1, O)
}
W_output <- estimate_output_weights(O, y, lnorm, lambda)
} else {
W_output <- list()
}
## Return object
object <- list(
formula = NULL,
data = if(control$include_data) list(X = X, y = y, C = ifelse(type == "regression", NA, colnames(y))) else NULL,
n_hidden = n_hidden,
activation = activation,
lnorm = lnorm,
lambda = lambda,
bias = list(W = bias_hidden, beta = control$bias_output),
weights = list(W = W_hidden, beta = W_output$beta),
sigma = W_output$sigma,
type = type,
combined = list(X = control$combine_input, W = control$combine_hidden)
)
class(object) <- "RWNN"
return(object)
}
#' @rdname rwnn
#' @method rwnn formula
#'
#' @example inst/examples/rwnn_example.R
#'
#' @export
rwnn.formula <- function(formula, data = NULL, n_hidden = c(), lambda = 0, type = NULL, control = list()) {
# Checks for 'n_hidden'
if (length(n_hidden) < 1) {
stop("When the number of hidden layers is 0, or left 'NULL', the RWNN reduces to a linear model, see ?lm.")
}
if (any(!is.numeric(n_hidden))) {
stop("Not all elements of the 'n_hidden' vector were numeric.")
}
# Checks for 'data'
keep_formula <- TRUE
if (is.null(data)) {
data <- tryCatch(
expr = {
as.data.frame(as.matrix(model.frame(formula)))
},
error = function(e) {
stop("'data' needs to be supplied when using 'formula'.")
}
)
x_name <- paste0(attr(terms(formula), "term.labels"), ".")
colnames(data) <- paste0("V", gsub(x_name, "", colnames(data)))
colnames(data)[1] <- "y"
formula <- paste(colnames(data)[1], "~", paste(colnames(data)[seq_along(colnames(data))[-1]], collapse = " + "))
formula <- as.formula(formula)
keep_formula <- FALSE
}
# Re-capture feature names when '.' is used in formula interface
formula <- terms(formula, data = data)
formula <- strip_terms(formula)
#
X <- model.matrix(formula, data)
keep <- which(colnames(X) != "(Intercept)")
if (any(colnames(X) == "(Intercept)")) {
X <- X[, keep, drop = FALSE]
}
#
y <- model.response(model.frame(formula, data))
y <- as.matrix(y, nrow = nrow(data))
#
if (is.null(type)) {
if (is(y[, 1], "numeric")) {
type <- "regression"
if (all(abs(y - round(y)) < 1e-8)) {
warning("The response consists of only integers, is this a classification problem?")
}
}
else if (class(y[, 1]) %in% c("factor", "character", "logical")) {
type <- "classification"
}
}
# Change output based on 'type'
if (tolower(type) %in% c("c", "class", "classification")) {
type <- "classification"
y_names <- sort(unique(y))
y <- factor(y, levels = y_names)
y <- model.matrix(~ 0 + y)
attr(y, "assign") <- NULL
attr(y, "contrasts") <- NULL
y <- 2 * y - 1
colnames(y) <- paste(y_names, sep = "")
}
else if (tolower(type) %in% c("r", "reg", "regression")) {
type <- "regression"
}
else {
stop("'type' has not been correctly specified, it needs to be set to either 'regression' or 'classification'.")
}
#
mm <- rwnn_matrix(X, y, n_hidden = n_hidden, lambda = lambda, type = type, control = control)
mm$formula <- if (keep_formula) formula
return(mm)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.