R/s.TFN.R

Defines functions s.TFN

Documented in s.TFN

# s.TFN.R
# ::rtemis::
# 2019 Efstathios D. Gennatas egenn.github.io

#' Artificial Neural Network with \pkg{tensorflow} [C, R]
#'
#' Train an Artificial Neural Network using \pkg{keras} and \pkg{tensorflow}
#'
#' For more information on arguments and hyperparameters, see (https://keras.rstudio.com/) and (https://keras.io/)
#' It is important to define network structure and adjust hyperparameters based on your problem. You cannot expect
#' defaults to work on any given dataset.
#' @inheritParams s.GLM
#' @param n.hidden.nodes Integer vector: Length must be equal to the number of hidden layers you wish to create.
#' Can be zero, in which case you get a linear model. Default = N of features, i.e. NCOL(x)
#' @param initializer Character: Initializer to use for each layer: "glorot_uniform", "glorot_normal", "he_uniform",
#' "he_normal", "cun_uniform", "lecun_normal", "random_uniform", "random_normal", "variance_scaling",
#' "truncated_normal", "orthogonal", "zeros", "ones", "constant".
#' Glorot is also known as Xavier initialization. Default = "glorot_uniform"
#' @param initializer.seed Integer: Seed to use for each initializer for reproducibility. Default = NULL
#' @param dropout Floar, vector, (0, 1): Probability of dropping nodes. Can be a vector of length equal to N of layers,
#' otherwise will be recycled. Default = 0
#' @param activation String vector: Activation type to use: "relu", "selu", "elu", "sigmoid", "hard_sigmoid", "tanh",
#' "exponential", "linear", "softmax", "softplus", "softsign". Defaults to "relu" for Classification and
#' "tanh" for Regression
#' @param batch.normalization Logical: If TRUE, batch normalize after each hidden layer. Default = TRUE
#' @param output Character: Activation to use for output layer. Can be any as in \code{activation}.
#' Default = "linear" for Regression, "sigmoid" for binary classification, "softmax" for multiclass
#' @param loss Character: Loss to use: Default = "mean_squared_error" for regression, "binary_crossentropy" for binary
#' classification, "sparse_categorical_crossentropy" for multiclass
#' @param optimizer Character: Optimization to use: "rmsprop", "adadelta", "adagrad", "adam", "adamax", "nadam", "sgd".
#' Default = "rmsprop"
#' @param learning.rate Float: learning rate. Defaults depend on \code{optimizer} used and are:
#' \code{rmsprop = .01, adadelta = 1, adagrad = .01, adamax = .002, adam = .001, nadam = .002, sgd = .1}
#' @param metric Character: Metric used for evaluation during train. Default = "mse" for regression,
#'  "accuracy" for classification.
#' @param epochs Integer: Number of epochs. Default = 100
#' @param batch.size Integer: Batch size. Default = N of cases
#' @param validation.split Float (0, 1): proportion of training data to use for validation. Default = .2
#' @param callback Function to be called by keras during fitting.
#' Default = \code{keras::callback_early_stopping(patience = 150)} for early stopping.
#' @param scale Logical: If TRUE, scale featues before training. Default = TRUE
#' column means and standard deviation will be saved in \code{rtMod$extra} field to allow
#' scaling ahead of prediction on new data
#' @param ... Additional parameters
#' @author Efstathios D. Gennatas
#' @seealso \link{elevate} for external cross-validation
#' @family Supervised Learning
#' @family Deep Learning
#' @export

s.TFN <- function(x, y = NULL,
                  x.test = NULL, y.test = NULL,
                  x.valid = NULL, y.valid = NULL,
                  class.weights = NULL,
                  ipw = TRUE,
                  ipw.type = 2,
                  upsample = FALSE,
                  downsample = FALSE,
                  resample.seed = NULL,
                  net = NULL,
                  n.hidden.nodes = NULL,
                  initializer = c("glorot_uniform", "glorot_normal",
                                  "he_uniform", "he_normal",
                                  "lecun_uniform", "lecun_normal",
                                  "random_uniform", "random_normal",
                                  "variance_scaling", "truncated_normal",
                                  "orthogonal", "zeros",
                                  "ones", "constant"),
                  initializer.seed = NULL,
                  dropout = 0,
                  activation = c("relu", "selu",
                                 "elu", "sigmoid",
                                 "hard_sigmoid", "tanh",
                                 "exponential", "linear",
                                 "softmax", "softplus",
                                 "softsign"),
                  l1 = 0,
                  l2 = 0,
                  batch.normalization = TRUE,
                  output = NULL,
                  loss = NULL,
                  optimizer = c("rmsprop", "adadelta",
                                "adagrad", "adam",
                                "adamax", "nadam",
                                "sgd"),
                  learning.rate = NULL,
                  metric = NULL,
                  epochs = 100,
                  batch.size = NULL,
                  validation.split = .2,
                  callback = keras::callback_early_stopping(patience = 150),
                  scale = TRUE,
                  x.name = NULL,
                  y.name = NULL,
                  print.plot = TRUE,
                  print.error.plot = NULL,
                  rtlayout.mat = c(2, 1),
                  plot.fitted = NULL,
                  plot.predicted = NULL,
                  plot.theme = getOption("rt.fit.theme", "lightgrid"),
                  question = NULL,
                  verbose = TRUE,
                  verbose.checkpoint = FALSE,
                  outdir = NULL,
                  save.mod = ifelse(!is.null(outdir), TRUE, FALSE), ...) {

  # [ INTRO ] ====
  if (missing(x)) {
    print(args(s.TFN))
    return(invisible(9))
  }
  if (!is.null(outdir)) outdir <- paste0(normalizePath(outdir, mustWork = FALSE), "/")
  logFile <- if (!is.null(outdir)) {
    paste0(outdir, "/", sys.calls()[[1]][[1]], ".", format(Sys.time(), "%Y%m%d.%H%M%S"), ".log")
  } else {
    NULL
  }
  start.time <- intro(verbose = verbose, logFile = logFile)
  mod.name <- "TFN"

  # [ DEPENDENCIES ] ====
  if (!depCheck("tensorflow", verbose = FALSE)) {
    cat("\n"); stop("Please install dependencies and try again")
  }

  # [ ARGUMENTS ] ====
  if (is.null(x.name)) x.name <- getName(x, "x")
  if (is.null(y.name)) y.name <- getName(y, "y")
  if (!verbose) print.plot <- FALSE
  verbose <- verbose | !is.null(logFile)
  if (save.mod & is.null(outdir)) outdir <- paste0("./s.", mod.name)
  if (!is.null(outdir)) outdir <- paste0(normalizePath(outdir, mustWork = FALSE), "/")
  if (is.null(print.error.plot)) print.error.plot <- print.plot
  initializer <- match.arg(initializer)
  initializer <- paste0("initializer_", initializer)
  initializer <- getFromNamespace(initializer, "keras")

  optimizer <- match.arg(optimizer)
  if (is.null(learning.rate)) {
    learning.rate <- switch(optimizer,
                            rmsprop = .01,
                            adadelta = 1,
                            adagrad = .01,
                            adamax = .002,
                            adam = .001,
                            nadam = .002,
                            sgd = .1)
  }
  optimizer <- paste0("optimizer_", optimizer)
  optimizer <- getFromNamespace(optimizer, "keras")


  # [ DATA ] ====
  dt <- dataPrepare(x, y, x.test, y.test,
                    ipw = ipw,
                    ipw.type = ipw.type,
                    upsample = upsample,
                    downsample = downsample,
                    resample.seed = resample.seed,
                    verbose = verbose)
  x <- dt$x
  y <- dt$y
  x.test <- dt$x.test
  y.test <- dt$y.test
  x.valid <- dt$x.valid
  y.valid <- dt$y.valid
  xnames <- dt$xnames
  type <- dt$type
  checkType(type, c("Classification", "Regression"), mod.name)
  .class.weights <- if (is.null(class.weights) & ipw) dt$class.weights else class.weights
  if (verbose) dataSummary(x, y, x.test, y.test, type)
  x.dm <- data.matrix(x)
  n.features <- NCOL(x)

  # Activation
  if (length(activation) > 1) {
    activation <- ifelse(type == "Classification", "relu", "tanh")
  }

  # Outcome
  .class.weights.int <- NULL
  if (type == "Classification") {
    y0 <- y
    y <- as.numeric(y) - 1
    n.classes <- length(levels(y0))
    if (!is.null(.class.weights)) {
      .class.weights.int <- as.list(.class.weights)
      names(.class.weights.int) <- seq(n.classes) - 1
    }
  }

  # Loss
  if (is.null(loss)) {
    if (type == "Classification") {
      loss <- if (n.classes == 2) "binary_crossentropy" else "sparse_categorical_crossentropy"
    } else {
      loss <- "mean_squared_error"
    }
  }
  if (type == "Classification" & loss == "categorical_crossentropy") y <- keras::to_categorical(y)

  if (print.plot) {
    if (is.null(plot.fitted)) plot.fitted <- if (is.null(y.test)) TRUE else FALSE
    if (is.null(plot.predicted)) plot.predicted <- if (!is.null(y.test)) TRUE else FALSE
  } else {
    plot.fitted <- plot.predicted <- FALSE
  }

  # '- Normalize ====
  # Normalize training data
  if (scale) {
    x.dm <- scale(x.dm)
    col_means_train <- attr(x.dm, "scaled:center")
    col_stddevs_train <- attr(x.dm, "scaled:scale")
    if (!is.null(x.test)) {
      x.test <- scale(x.test, center = col_means_train, scale = col_stddevs_train)
    }
  }

  # Default n.hidden.nodes
  if (is.null(n.hidden.nodes)) n.hidden.nodes <- n.features

  # Metric
  if (is.null(metric)) {
    if (type == "Classification") {
      metric <- "accuracy"
    } else {
      metric <- "mean_squared_error"
    }
  }

  # Default batch.size
  if (is.null(batch.size)) {
    batch.size <- floor(.25 * length(y))
  }

  # [ NETWORK ] ====
  if (n.hidden.nodes[1] == 0) {
    n.hnodes <- n.hlayers <- 0
  } else {
    n.hnodes <- n.hidden.nodes
    n.hlayers <- length(n.hidden.nodes)
  }
  if (length(dropout) < n.hlayers) dropout <- rep(dropout, length.out = n.hlayers)

  ### '- INIT ====
  if (is.null(net)) {

    net <- keras::keras_model_sequential()

    ### '- HIDDEN LAYERS ====
    if (n.hlayers > 0) {
      for (i in seq(n.hlayers)) {
        net <- keras::layer_dense(net,
                                  units = n.hnodes[i],
                                  activation = activation,
                                  input_shape = n.features,
                                  kernel_initializer = initializer(seed = initializer.seed),
                                  name = paste0("rt_Dense_", i))
        if (l1 != 0 && l2 != 0) net <- keras::layer_activity_regularization(net,
                                                                            l1 = l1, l2 = l2,
                                                                            name = paste0("rt_Reg_", i))
        if (batch.normalization) net <- keras::layer_batch_normalization(net, name = paste0("rt_BN_", i))
        net <- keras::layer_dropout(net, rate = dropout[i],
                                    name = paste0("rt_Dropout_", i))
      }
    } # /if (n.hlayers > 0)

    ### '- OUTPUT ====
    n.outputs <- if (type == "Regression") 1 else n.classes
    if (loss == "binary_crossentropy") n.outputs <- 1
    if (is.null(output)) {
      if (type == "Classification") {
        output <- if (n.outputs > 1) "softmax" else "sigmoid"
      } else {
        output <- "linear"
      }
    }

    net <- keras::layer_dense(net,
                              units = n.outputs,
                              activation = output,
                              name = "rt_Output")

    # Parameters ====
    parameters <- list(n.hidden.nodes = n.hidden.nodes,
                       batch.size = batch.size,
                       batch.normalization = batch.normalization,
                       epochs = epochs,
                       optimizer = optimizer,
                       learning.rate = learning.rate,
                       metric = metric)
    if (verbose) printls(parameters, title = "ANN parameters",
                         center.title = TRUE,
                         pad = 0,
                         newline.pre = TRUE)

    # [ TF ] ====
    if (verbose) msg0("Training Neural Network ", type, " with ",
                      n.hlayers, " hidden ", ifelse(n.hlayers == 1, "layer", "layers"),
                      "...\n", newline.pre = TRUE)

    # '- Compile ====
    net %>% keras::compile(
      loss = loss,
      optimizer = optimizer(lr = learning.rate),
      metrics = metric
    )
  } else {
    if (verbose) msg("Training pre-built Network for", type, "...")
  }

  # '- Fit ====
  net %>%
    keras::fit(
      x.dm, y,
      epochs = epochs,
      batch_size = batch.size,
      validation_split = validation.split,
      callback = callback,
      class_weight = .class.weights.int
    )

  # [ FITTED ] ====
  if (type == "Regression") {
    fitted <- c(predict(net, x.dm))
    error.train <- modError(y, fitted, type = type)
  } else {
    fitted.prob <- keras::predict_proba(net, x.dm)
    fitted <- factor(c(keras::predict_classes(net, x.dm)))
    levels(fitted) <- levels(y0) # levels are 0, 1, 2 before conversion
    error.train <- modError(y0, fitted, type = type)
  }

  if (verbose) errorSummary(error.train, mod.name)

  # [ PREDICTED ] ====
  predicted.prob <- predicted <- error.test <- NULL
  if (!is.null(x.test)) {
    if (type == "Regression") {
      predicted <- c(predict(net, data.matrix(x.test)))
    } else {
      predicted.prob <- keras::predict_proba(net, data.matrix(x.test))
      predicted <- factor(c(keras::predict_classes(net, data.matrix(x.test))))
      levels(predicted) <- levels(y0)
    }
    if (!is.null(y.test)) {
      error.test <- modError(y.test, predicted)
      if (verbose) errorSummary(error.test, mod.name)
    }
  }

  # [ OUTRO ] ====
  extra <- list(scale = scale,
                col_means_train = if (scale) col_means_train else NULL,
                col_stddevs_train = if (scale) col_stddevs_train else NULL)
  rt <- rtModSet(mod.name = mod.name,
                 type = type,
                 y.train = if (type == "Classification") y0 else y,
                 y.test = y.test,
                 x.name = x.name,
                 xnames = xnames,
                 mod = net,
                 fitted = fitted,
                 fitted.prob = fitted.prob,
                 se.fit = NULL,
                 error.train = error.train,
                 predicted = predicted,
                 predicted.prob = predicted.prob,
                 se.prediction = NULL,
                 parameters = parameters,
                 error.test = error.test,
                 question = question,
                 extra = extra)

  rtMod.out(rt,
            print.plot,
            plot.fitted,
            plot.predicted,
            y.test,
            mod.name,
            outdir,
            save.mod,
            verbose,
            plot.theme)

  if (save.mod) keras::save_model_hdf5(net, filepath = paste0(outdir, "rt_kerasTF"))
  outro(start.time, verbose = verbose, sinkOff = ifelse(is.null(logFile), FALSE, TRUE))
  rt

} # rtemis::s.TFN
egenn/rtemis documentation built on March 25, 2020, 3:28 p.m.