r_neural_network: Kapp Neural Nets

Documented in backpropagate

#' @title Calculate the Error and Backpropagate adjusts
#' @description This function does a backwards propagation.
#' @param input A matrix of size (N, 1), where N is the size
#' of the layer that is going to receive such inputs. (matrix)
#' @param output A matrix of size (M, 1) representing the
#' expected outputs given some inputs, where M is the network
#' output size. (matrix)
#' @param network A network object generated by create_network (list)
#' @author Eduardo Kapp
backpropagate <- function(input, output, network) {
  layer_sequence <- seq_len(network$n_layers)

  # To backpropagate, first we need to do a forward pass and register every
  # activation (layer_output) and every weighted input (raw_layer_output)
  weighted_inputs <- list()
  activation_outputs  <- list()
  inputs <- input
  for (layer in (layer_sequence)) {
    # Compute layer outputs (z)
    raw_layer_output <- compute_layer(
      inputs,
      network$layers[[layer]]
    )
    weighted_inputs[[layer]] <- raw_layer_output

    # Compute activation of the layer outputs (a)
    layer_output <- network$layers[[layer]]$activation(raw_layer_output)
    activation_outputs[[layer]] <- layer_output

    # the new inputs will be the last layer outputs
    inputs <- layer_output
  }

  # Now we have every z and every a, so we can calculate the cost at each
  # layer. We'll do this from the last layer until the first (that's why
  # it's called backpropagation).
  nabla_b <- list() # nabla_b is a list with bias adjustments for every layer
  nabla_w <- list()# nabla_w is a list with weights adjustments for every layer
  # The last layer has its cost directly associated with the expected outputs
  z <- weighted_inputs[[network$n_layers]] # z is the weighted inputs
  a <- activation_outputs[[network$n_layers]] # a is the activation(z)
  a_1 <- activation_outputs[[network$n_layers - 1]] # a_1 is the prev layer a
  delta <- mse_prime(output, a)
  delta <- delta * sigmoid_prime(z)
  nabla_b[[network$n_layers]] <- delta
  nabla_w[[network$n_layers]] <- delta %*% t(a_1)

  # Now for every other layer
  layer_sequence <- rev(seq_len(network$n_layers - 1))

  for (layer in (layer_sequence)) {
    z <- weighted_inputs[[layer]]
    a <- activation_outputs[[layer]]
    if (layer != 1)
      a_1 <- activation_outputs[[layer - 1]]
    else
      a_1 <- input
    sp <- sigmoid_prime(z)
    delta <- (t(network$layers[[layer + 1]]$weights) %*% delta) * sp
    nabla_b[[layer]] <- delta
    nabla_w[[layer]] = delta %*% t(a_1)
  }
  return(list(nabla_w = nabla_w, nabla_b = nabla_b))
}