#' One iteration of the Tractable Approximate Gaussian Inference (TAGI)
#'
#' This function goes through one learning iteration of the neural network model
#' using TAGI.
#'
#' @param NN Lists the structure of the neural network
#' @param mp Mean vector of parameters for each layer \eqn{\mu_{\theta}}
#' @param Sp Covariance matrix of parameters for each layer \eqn{\Sigma_{\theta}}
#' @param x Input data
#' @param y Response data
#' @return - Updated mean vector of parameters for each layer \eqn{\mu_{\theta}}
#' @return - Updated covariance matrix of parameters for each layer \eqn{\Sigma_{\theta}}
#' @return - Mean of predicted responses
#' @return - Variance of the predicted responses
#' @export
network <- function(NN, mp, Sp, x, y){
# Initialization
numObs = nrow(x)
numCovariates = NN$nx
zn = matrix(0, numObs, NN$ny)
Szn = matrix(0, numObs, NN$ny)
# Loop
loop = 0
for (i in seq(from = 1, to = numObs, by = NN$batchSize)){
loop = loop + 1
idxBatch = i:(i + NN$batchSize - 1)
xloop = matrix(t(x[idxBatch,]), nrow = length(idxBatch) * numCovariates, ncol = 1)
# Training
if (NN$trainMode == 1){
yloop = matrix(t(y[idxBatch,]), nrow = length(idxBatch) * NN$ny, ncol = 1)
out_feedForward = feedForward(NN, xloop, mp, Sp)
mz = out_feedForward[[1]]
Sz = out_feedForward[[2]]
Czw = out_feedForward[[3]]
Czb = out_feedForward[[4]]
Czz = out_feedForward[[5]]
out_feedBackward = feedBackward(NN, mp, Sp, mz, Sz, Czw, Czb, Czz, yloop)
mp = out_feedBackward[[1]]
Sp = out_feedBackward[[2]]
zn[idxBatch,] = t(matrix(mz[[length(mz)]], nrow = NN$ny, ncol = length(idxBatch)))
Szn[idxBatch,] = t(matrix(Sz[[length(Sz)]], nrow = NN$ny, ncol = length(idxBatch)))
}
# Testing
else {
out_feedForward = feedForward(NN, xloop, mp, Sp)
mz = out_feedForward[[1]]
Sz = out_feedForward[[2]]
zn[idxBatch,] = t(matrix(mz, nrow = NN$ny, ncol = length(idxBatch)))
Szn[idxBatch,] = t(matrix(Sz, nrow = NN$ny, ncol = length(idxBatch)))
}
}
outputs <- list(mp, Sp, zn, Szn)
return(outputs)
}
#' Forward uncertainty propagation
#'
#' This function feeds the neural network forward from input data to
#' responses.
#'
#' @param NN Lists the structure of the neural network
#' @param mp Mean vectors of parameters for each layer \eqn{\mu_{\theta}}
#' @param Sp Covariance matrices of parameters for each layer \eqn{\Sigma_{\theta}}
#' @param x Input data
#' @return - Mean vectors of units for each layer \eqn{\mu_{Z}}
#' @return - Covariance matrices of units for each layer \eqn{\Sigma_{Z}}
#' @return - Covariance matrices between units and weights for each layer \eqn{\Sigma_{ZW}}
#' @return - Covariance matrices between units and biases for each layer \eqn{\Sigma_{ZB}}
#' @return - Covariance matrices between previous and current units for each layer \eqn{\Sigma_{ZZ^{+}}}
#' @export
feedForward <- function(NN, x, mp, Sp){
# Initialization
numLayers = length(NN$nodes)
hiddenLayerActFunIdx = activationFunIndex(NN$hiddenLayerActivation)
# Activation unit
ma = matrix(list(), nrow = numLayers, ncol = 1)
ma[[1,1]] = x
Sa = matrix(list(), nrow = numLayers, ncol = 1)
# hidden states
mz = matrix(list(), nrow = numLayers, ncol = 1)
Czw = matrix(list(), nrow = numLayers, ncol = 1)
Czb = matrix(list(), nrow = numLayers, ncol = 1)
Czz = matrix(list(), nrow = numLayers, ncol = 1)
Sz = matrix(list(), nrow = numLayers, ncol = 1)
J = matrix(list(), nrow = numLayers, ncol = 1)
# hidden layers
for (j in 2:numLayers){
mz[[j,1]] = meanMz(mp[[j-1,1]], ma[[j-1,1]], NN$idxFmwa[j-1,], NN$idxFmwab[[j-1,1]])
# covariance for z^(j)
Sz[[j,1]] = covarianceSz(mp[[j-1,1]], ma[[j-1,1]], Sp[[j-1,1]], Sa[[j-1,1]],NN$idxFmwa[j-1,], NN$idxFmwab[[j-1,1]])
if (NN$trainMode == 1){
# covariance between z^(j) and w^(j-1)
out = covarianceCzp(ma[[j-1,1]], Sp[[j-1,1]], NN$idxFCwwa[j-1,], NN$idxFCb[[j-1,1]])
Czb[[j,1]] = out[[1]]
Czw[[j,1]] = out[[2]]
# covariance between z^(j+1) and z^(j)
if (!(is.null(Sz[[j-1,1]]))){
Czz[[j,1]] = covarianceCzz(mp[[j-1,1]], Sz[[j-1,1]], J[[j-1,1]], NN$idxFCzwa[j-1,])
}
}
# Activation
if (j < numLayers){
out_act = meanA(mz[[j,1]], mz[[j,1]], hiddenLayerActFunIdx)
ma[[j,1]] = out_act[[1]]
J[[j,1]] = out_act[[2]]
Sa[[j,1]] = covarianceSa(J[[j,1]], Sz[[j,1]])
}
}
# Outputs
if (!(NN$outputActivation == "linear")){
ouputLayerActFunIdx = activationFunIndex(NN$outputActivation)
out_feedForward = meanA(mz[[numLayers,1]], mz[[numLayers,1]], ouputLayerActFunIdx)
mz[[numLayers,1]] = out_feedForward[[1]]
J[[numLayers,1]] = out_feedForward[[2]]
Sz[[numLayers,1]] = covarianceSa(J[[numLayers,1]], Sz[[numLayers,1]])
}
if (NN$trainMode == 0){
mz = mz[[numLayers,1]]
Sz = Sz[[numLayers,1]]
}
outputs <- list(mz, Sz, Czw, Czb, Czz)
return(outputs)
}
#' Forward uncertainty propagation for derivative calculation
#'
#' This function feeds the neural network forward from input data to
#' responses and considers components required for derivative calculations.
#'
#' @param NN Lists the structure of the neural network
#' @param theta List of parameters
#' @param states List of states
#' @return - Updated states
#' @return - Mean vectors of activation units' first derivative
#' @return - Covariance matrices of activation units' first derivative
#' @return - Mean vectors of activation units' second derivative
#' @return - Covariance matrices of activation units' second derivative
#' @export
feedForwardPass <- function(NN, theta, states){
# Initialization
out_extractParameters <- extractParameters(theta)
mw = out_extractParameters[[1]]
Sw = out_extractParameters[[2]]
mb = out_extractParameters[[3]]
Sb = out_extractParameters[[4]]
out_extractStates <- extractStates(states)
mz = out_extractStates[[1]]
Sz = out_extractStates[[2]]
ma = out_extractStates[[3]]
Sa = out_extractStates[[4]]
J = out_extractStates[[5]]
mdxs = out_extractStates[[6]]
Sdxs = out_extractStates[[7]]
mxs = out_extractStates[[8]]
Sxs = out_extractStates[[9]]
numLayers = length(NN$nodes)
actFunIdx = NN$actFunIdx
actBound = NN$actBound
B = NN$batchSize
rB = NN$repBatchSize
nodes = NN$nodes
numParamsPerLayer_2 = NN$numParamsPerLayer_2
# derivative
mda = matrix(list(), nrow = numLayers, ncol = 1)
Sda = matrix(list(), nrow = numLayers, ncol = 1)
mdda = matrix(list(), nrow = numLayers, ncol = 1)
Sdda = matrix(list(), nrow = numLayers, ncol = 1)
mda[[1,1]] = rep(1, nrow(mz[[1,1]]))
Sda[[1,1]] = rep(0, nrow(Sz[[1,1]]))
mdda[[1,1]] = rep(0, nrow(mz[[1,1]]))
Sdda[[1,1]] = rep(0, nrow(Sz[[1,1]]))
# hidden layers
for (j in 2:numLayers){
idxw = (numParamsPerLayer_2[1, j-1]+1):numParamsPerLayer_2[1, j]
idxb = (numParamsPerLayer_2[2, j-1]+1):numParamsPerLayer_2[2, j]
# Max pooling
if (NN$layer[j] == NN$layerEncoder$fc){
if ((B == 1) & (rB == 1)){
out_fcMeanVarB1 <- fcMeanVarB1(mw[idxw], Sw[idxw], mb[idxb], Sb[idxb], ma[[j-1,1]], Sa[[j-1,1]], nodes[j-1], nodes[j])
mz[[j,1]] = out_fcMeanVarB1[[1]]
Sz[[j,1]] = out_fcMeanVarB1[[2]]
} else {
out_fcMeanVar <- fcMeanVar(mz[[j,1]], Sz[[j,1]], mw[idxw], Sw[idxw], mb[idxb], Sb[idxb], ma[[j-1,1]], Sa[[j-1,1]], nodes[j-1], nodes[j], B, rB)
mz[[j,1]] = out_fcMeanVar[[1]]
Sz[[j,1]] = out_fcMeanVar[[2]]
}
}
# Activation
if (actFunIdx[j] != 0){
out_meanVar = meanVar(mz[[j,1]], mz[[j,1]], Sz[[j,1]], actFunIdx[j])
ma[[j,1]] = out_meanVar[[1]]
Sa[[j,1]] = out_meanVar[[2]]
J[[j,1]] = out_meanVar[[3]]
} else {
ma[[j,1]] = mz[[j,1]]
Sa[[j,1]] = Sz[[j,1]]
J[[j,1]]= matrix(1, nrow(mz[[j,1]]), ncol(mz[[j,1]]))
}
# derivative for FC
if ((NN$collectDev > 0) & (actFunIdx[j] != 0)){
out_meanVarDev = meanVarDev(mz[[j,1]], Sz[[j,1]], actFunIdx[j], actBound[j])
mda[[j,1]] = out_meanVarDev[[1]]
Sda[[j,1]] = out_meanVarDev[[2]]
mdda[[j,1]] = out_meanVarDev[[3]]
Sdda[[j,1]] = out_meanVarDev[[4]]
}
}
states <- compressStates(mz, Sz, ma, Sa, J, mdxs, Sdxs, mxs, Sxs)
outputs <- list(states, mda, Sda, mdda, Sdda)
return(outputs)
}
#' Derivative calculation
#'
#' This function does derivative calculations.
#'
#' @param NN Lists the structure of the neural network
#' @param theta List of parameters
#' @param states List of states
#' @param mda Mean vectors of activation units' first derivative
#' @param Sda Covariance matrices of activation units' first derivative
#' @param mdda Mean vectors of activation units' second derivative
#' @param Sdda Covariance matrices of activation units' second derivative
#' @param dlayer layer from which derivatives will be in respect to
#' @return - Mean of first derivative of predicted responses
#' @return - Variance of first derivative of predicted responses
#' @return - Covariance between derivatives and inputs
#' @return - Mean of second derivative of predicted responses
#' @export
derivative <- function(NN, theta, states, mda, Sda, mdda, Sdda, dlayer){
# Initialization
out_extractParameters <- extractParameters(theta)
mw = out_extractParameters[[1]]
Sw = out_extractParameters[[2]]
out_extractStates <- extractStates(states)
Sz = out_extractStates[[2]]
ma = out_extractStates[[3]]
Sa = out_extractStates[[4]]
J = out_extractStates[[5]]
numLayers = length(NN$nodes)
actFunIdx = NN$actFunIdx
actBound = NN$actBound
B = NN$batchSize
rB = NN$repBatchSize
nodes = c(NN$nodes, NN$nodes[length(NN$nodes)])
layer = NN$layer
numParamsPerLayer_2 = NN$numParamsPerLayer_2
# derivative
mdg = createDevCellarray(nodes, numLayers, B, rB)
Sdg = mdg
Cdgz = mdg
mdge = mdg
mddg = mdg
Sddg = mdg
for (j in (numLayers-1):dlayer){
idxw = (numParamsPerLayer_2[1, j]+1):numParamsPerLayer_2[1, j+1]
if (NN$layer[j+1] == NN$layerEncoder$fc){
if ((NN$collectDev > 0) & (j == (numLayers-1))){
out_fcMeanVarDnode <- fcMeanVarDnode(mw[idxw], Sw[idxw], mda[[j,1]], Sda[[j,1]], nodes[j], nodes[j+1], B)
mdgk = out_fcMeanVarDnode[[1]]
Sdgk = out_fcMeanVarDnode[[2]]
out_fcCovaz <- fcCovaz(J[[j+1,1]], J[[j,1]], Sz[[j,1]], mw[idxw], nodes[j], nodes[j+1], B)
Caizi = out_fcCovaz[[1]]
Caozi = out_fcCovaz[[2]]
out_fcCovdz <- fcCovdz(ma[[j+1,1]], ma[[j,1]], Caizi, Caozi, actFunIdx[j+1], actFunIdx[j], nodes[j], nodes[j+1], B)
Cdozi = out_fcCovdz[[1]]
Cdizi = out_fcCovdz[[2]]
mdg[[j,1]] = matrix(rowSums(mdgk), nrow(mdgk), 1)
Sdg[[j,1]] = matrix(rowSums(Sdgk), nrow(Sdgk), 1)
mdge[[j,1]] = mdgk
Cdgk = covdx(0, mw[idxw], rep(0, NN$ny*B), 0, rep(1, NN$ny*B), Cdozi, Cdizi, nodes[j], nodes[j+1], 1, B)
Cdgz[[j,1]] = matrix(rowSums(Cdgk), nrow(Cdgk), 1)
# For second and higher order derivative
if (NN$collectDev > 1){
out_fcMeanVarDnode <- fcMeanVarDnode(mw[idxw], Sw[idxw], mdda[[j,1]], Sdda[[j,1]], nodes[j], nodes[j+1], B)
mddgk = out_fcMeanVarDnode[[1]]
Sddgk = out_fcMeanVarDnode[[2]]
mddg[[j,1]] = matrix(rowSums(mddgk), nrow(mddgk), 1)
Sddg[[j,1]] = matrix(rowSums(Sddgk), nrow(Sddgk), 1)
# Matrix of combination types (1 when second derivative is used, >1 thereafter, 0 when not still used)
# Read from RIGHT to LEFT (column number corresponds to layer number)
# (e.g. in second derivative, only one product wd is of second order, then the other products are first order and multiplied with wd products of their layer.
# terms before product' second derivative are of first order and NOT multiplied with wd products of their layer)
combinations_matrix = apply(upper.tri(diag(numLayers-1), diag = TRUE),1,cumsum)
mddg_combinations = matrix(list(createDevCellarray(nodes, numLayers, B, rB)), nrow = nrow(combinations_matrix), ncol = 1)
for (i in 1:nrow(combinations_matrix)){
# Cases which start with first order wd
if (combinations_matrix[i,j] == 0){
mddg_combinations[[i]][[j]] = mdg[[j,1]]
}
# Cases which start with second order wd
else if (combinations_matrix[i,j] == 1){
mddg_combinations[[i]][[j]] = mddg[[j,1]]
}
}
}
} else if ((NN$collectDev > 0) & (j < (numLayers-1))){
out_fcDerivative <- fcDerivative(mw[idxw], Sw[idxw], mw[idxwo], J[[j+1,1]], J[[j,1]],
ma[[j+1,1]], Sa[[j+1,1]], ma[[j,1]], Sa[[j,1]],
Sz[[j,1]], mda[[j,1]], Sda[[j,1]],
mdg[[j+1,1]], mdge[[j+1,1]], Sdg[[j+1,1]], mdg[[j+2,1]],
actFunIdx[j+1], actFunIdx[j], nodes[j], nodes[j+1], nodes[j+2], B)
mdgk = out_fcDerivative[[1]]
Sdgk = out_fcDerivative[[2]]
Cdgzk = out_fcDerivative[[3]]
mdg[[j,1]] = matrix(rowSums(mdgk), nrow(mdgk), 1)
Sdg[[j,1]] = matrix(rowSums(Sdgk), nrow(Sdgk), 1)
mdge[[j,1]] = mdgk
Cdgz[[j,1]] = matrix(rowSums(Cdgzk), nrow(Cdgzk), 1)
# For second and higher order derivative
if (NN$collectDev > 1){
Caow = out_fcDerivative[[4]]
Caoai = out_fcDerivative[[5]]
Cdow = out_fcDerivative[[6]]
Cdodi = out_fcDerivative[[7]]
Cdowdi = out_fcDerivative[[8]]
# First derivative of current layer (wd)
out_fcMeanVarDnode <- fcMeanVarDnode(mw[idxw], Sw[idxw], mda[[j,1]], Sda[[j,1]], nodes[j], nodes[j+1], B)
mpdi = out_fcMeanVarDnode[[1]]
# First derivative of next layer (wd)
out_fcMeanVarDnode <- fcMeanVarDnode(mw[idxwo], Sw[idxwo], mda[[j+1,1]], Sda[[j+1,1]], nodes[j+1], nodes[j+2], B)
mpdo = out_fcMeanVarDnode[[1]]
# Second derivative of current layer (wdd)
out_fcMeanVarDnode <- fcMeanVarDnode(mw[idxw], Sw[idxw], mdda[[j,1]], Sdda[[j,1]], nodes[j], nodes[j+1], B)
mpddi = out_fcMeanVarDnode[[1]]
for (i in 1:nrow(combinations_matrix)){
# Case where to multiply first order wd to previous first order wd
if (combinations_matrix[i,j] == 0){
mddg_combinations[[i]][[j]] = mdg[[j,1]]
}
# Case where to multiply second order wdd to previous first order wd
else if (combinations_matrix[i,j] == 1){
mddgk = fcDerivative2(mw[idxw], mw[idxwo], ma[[j+1,1]], ma[[j,1]], mda[[j,1]],
mdda[[j,1]], mpddi, mddg_combinations[[i]][[j+1]], mddg_combinations[[i]][[j+2]], Caoai, Cdow,
Cdodi, actFunIdx[j], nodes[j], nodes[j+1], nodes[j+2], B)
mddg_combinations[[i]][[j]] = matrix(rowSums(mddgk), nrow(mddgk), 1)
}
# Case where to multiply first order wd*wd to second order wdd
else if (combinations_matrix[i,j] == 2){
mddg_combinations[[i]][[j]] = fcDerivative3(mw[idxw], Sw[idxw], mw[idxwo], ma[[j+1,1]], ma[[j,1]], mda[[j+1,1]],
mda[[j,1]], Sda[[j,1]], mpdi, mddg_combinations[[i]][[j+1]], mddg_combinations[[i]][[j+2]], Caow, Caoai, Cdow,
Cdodi, actFunIdx[j+1], actFunIdx[j], nodes[j], nodes[j+1], nodes[j+2], B, j == dlayer)
}
# Case where to multiply first order wd*wd to previous terms' product wd*wd (first one)
else if (combinations_matrix[i,j] == 3){
mddg_combinations[[i]][[j]] = fcDerivative4(mw[idxw], Sw[idxw], mw[idxwo], ma[[j+1,1]], ma[[j,1]], mda[[j+1,1]],
mda[[j,1]], Sda[[j,1]], mpdo, mpdi, mddg_combinations[[i]][[j+1]], mddg_combinations[[i]][[j+2]], Cdowdi,
actFunIdx[j+1], actFunIdx[j], nodes[j], nodes[j+1], nodes[j+2], B, j == dlayer)
}
# Case where to multiply first order wd*wd to previous terms' product wd*wd (not first one)
else if (combinations_matrix[i,j] > 3){
mddg_combinations[[i]][[j]] = fcDerivative5(mw[idxw], Sw[idxw], mw[idxwo], ma[[j+1,1]], ma[[j,1]], mda[[j+1,1]],
mda[[j,1]], Sda[[j,1]], mpdo, mpdi, mddg_combinations[[i]][[j+1]], mddg_combinations[[i]][[j+2]], Cdowdi,
actFunIdx[j+1], actFunIdx[j], nodes[j], nodes[j+1], nodes[j+2], B, j == dlayer)
}
}
# Only sum combinations that have a second derivative so far to have g'' with respect to current layer j
mddg[[j,1]] = matrix(0, nrow(mddg_combinations[[1]][[j]]), 1)
for (i in 1:nrow(combinations_matrix)){
if (combinations_matrix[i,j] > 0){
mddg[[j,1]] = mddg[[j,1]] + matrix(rowSums(mddg_combinations[[i]][[j]]), nrow(mddg_combinations[[i]][[j]]), 1)
}
}
}
}
}
idxwo = idxw
}
outputs <- list(mdg, Sdg, Cdgz, mddg)
return(outputs)
}
#' Backpropagation
#'
#' This function feeds the neural network backward from responses to input data.
#'
#' @param NN Lists the structure of the neural network
#' @param mp Mean vectors of parameters for each layer \eqn{\mu_{\theta}}
#' @param Sp Covariance matrices of parameters for each layer \eqn{\Sigma_{\theta}}
#' @param mz Mean vectors of units for each layer \eqn{\mu_{Z}}
#' @param Sz Covariance matrices of units for each layer \eqn{\Sigma_{Z}}
#' @param Czw Covariance matrices between units and weights for each layer \eqn{\Sigma_{ZW}}
#' @param Czb Covariance matrices between units and biases for each layer \eqn{\Sigma_{ZB}}
#' @param Czz Covariance matrices between previous and current units for each layer \eqn{\Sigma_{ZZ^{+}}}
#' @seealso \code{\link{backwardHiddenStateUpdate}},
#' \code{\link{backwardParameterUpdate}}, \code{\link{forwardHiddenStateUpdate}}
#' @param y Response data
#' @return - Updated mean vectors of parameters for each layer \eqn{\mu_{\theta}}
#' @return - Updated covariance matrices of parameters for each layer \eqn{\Sigma_{\theta}}
#' @export
feedBackward <- function(NN, mp, Sp, mz, Sz, Czw, Czb, Czz, y){
# Initialization
numLayers = length(NN$nodes)
mpUd = matrix(list(), nrow = numLayers - 1, ncol = 1)
SpUd = matrix(list(), nrow = numLayers - 1, ncol = 1)
mzUd = matrix(list(), nrow = numLayers - 1, ncol = 1)
SzUd = matrix(list(), nrow = numLayers - 1, ncol = 1)
lHL = numLayers - 1
if (NN$ny == length(NN$sv)){
sv = t(NN$sv)
} else {sv = matrix(NN$sv, nrow = NN$ny, ncol = 1)}
# Update hidden states for the last hidden layer
R = matrix(sv^2, nrow = nrow(Sz[[lHL + 1]]), ncol = 1)
Szv = Sz[[lHL + 1]] + R
out_forwardHiddenStateUpdate = forwardHiddenStateUpdate(mz[[lHL+1]], Sz[[lHL+1]], mz[[lHL+1]], Szv, Sz[[lHL+1]], y)
mzUd[[lHL+1]] = out_forwardHiddenStateUpdate[[1]]
SzUd[[lHL+1]] = out_forwardHiddenStateUpdate[[2]]
for (k in (numLayers-1):1){
# Update parameters
Czp = buildCzp(Czw[[k+1]], Czb[[k+1]], NN$nodes[k+1], NN$nodes[k], NN$batchSize)
out_backwardParameterUpdate = backwardParameterUpdate(mp[[k]], Sp[[k]], mz[[k+1]], Sz[[k+1]], SzUd[[k+1]], Czp, mzUd[[k+1]], NN$idxSzpUd[[k]])
mpUd[[k]] = out_backwardParameterUpdate[[1]]
SpUd[[k]] = out_backwardParameterUpdate[[2]]
# Update hidden states
if (k > 1){
Czzloop = buildCzz(Czz[[k+1]], NN$nodes[k+1], NN$nodes[k], NN$batchSize)
out_backwardHiddenStateUpdate = backwardHiddenStateUpdate(mz[[k]], Sz[[k]], mz[[k+1]], Sz[[k+1]], SzUd[[k+1]], Czzloop, mzUd[[k+1]], NN$idxSzzUd[[k]])
mzUd[[k]] = out_backwardHiddenStateUpdate[[1]]
SzUd[[k]] = out_backwardHiddenStateUpdate[[2]]
}
}
outputs <- list(mpUd, SpUd)
return(outputs)
}
#' Backpropagation (states' deltas)
#'
#' This function calculates states' deltas.
#'
#' @param NN Lists the structure of the neural network
#' @param theta List of parameters
#' @param states List of states
#' @param y Response data
#' @param Sy Variance of responses
#' @param udIdx Specific update IDs
#' @return - Delta of mean vector of units given \eqn{y} \eqn{\mu_{Z}|y} at all layers
#' @return - Delta of covariance matrix of units given \eqn{y} \eqn{\Sigma_{Z}|y} at all layers
#' @export
<- function(NN, theta, states, y, Sy, udIdx){
# Initialization
out_extractParameters <- extractParameters(theta)
mw = out_extractParameters[[1]]
out_extractStates <- extractStates(states)
mz = out_extractStates[[1]]
Sz = out_extractStates[[2]]
ma = out_extractStates[[3]]
Sa = out_extractStates[[4]]
J = out_extractStates[[5]]
mdxs = out_extractStates[[6]]
Sdxs = out_extractStates[[7]]
Sxs = out_extractStates[[9]]
numLayers = length(NN$nodes)
B = NN$batchSize
rB = NN$repBatchSize
nodes = NN$nodes
layer = NN$layer
lHL = numLayers - 1
numParamsPerLayer_2 = NN$numParamsPerLayer_2
deltaM = matrix(list(), nrow = numLayers, ncol = 1)
deltaS = matrix(list(), nrow = numLayers, ncol = 1)
# Update hidden states for the last hidden layer
if (NN$lastLayerUpdate == 1){
if (is.null(Sy)){
R = NN$sv^2
} else {
R = NN$sv^2 + Sy
}
if (is.null(udIdx)){
Szv = Sa[[length(Sa),1]] + R
out_forwardHiddenStateUpdate = forwardHiddenStateUpdate(0, 0, ma[[lHL+1,1]], Szv, J[[lHL+1,1]]*Sz[[lHL+1,1]], y)
deltaMz = out_forwardHiddenStateUpdate[[1]]
deltaSz = out_forwardHiddenStateUpdate[[2]]
} else {
mzf = ma[[length(ma),1]][udIdx]
Szf = J[[lHL+1,1]][udIdx] * Sz[[lHL+1,1]][udIdx]
ys = y
Szv = Sa[[length(Sa),1]][udIdx] + R
deltaMz = matrix(0, nrow(mz[[lHL+1,1]]), ncol(mz[[lHL+1,1]]))
deltaSz = matrix(0, nrow(Sz[[lHL+1,1]]), ncol(Sz[[lHL+1,1]]))
out_forwardHiddenStateUpdate = forwardHiddenStateUpdate(0, 0, mzf, Szv, Szf, ys)
deltaMz[udIdx] = out_forwardHiddenStateUpdate[[1]]
deltaSz[udIdx] = out_forwardHiddenStateUpdate[[2]]
}
} else {
deltaMz = y
deltaSz = Sy
}
for (j in (numLayers-1):1){
if (is.null(mdxs[[j+1,1]])){
nSz = Sz[[j+1,1]]
} else {
nSz = Sdxs[[j+1,1]]
}
if (is.null(mdxs[[j,1]])){
cSz = Sz[[j,1]]
} else {
cSz = Sdxs[[j,1]]
}
cSxs = Sxs[[j,1]]
idxw = (numParamsPerLayer_2[1, j]+1):numParamsPerLayer_2[1, j+1]
# Innovation vector
out_innovationVector = innovationVector(nSz, deltaMz, deltaSz)
deltaM[[j+1,1]] = out_innovationVector[[1]]
deltaS[[j+1,1]] = out_innovationVector[[2]]
# Max pooling
if (layer[j+1] == NN$layerEncoder$fc){
if ((j > 1)|(NN$convariateEstm == 1)){
if ((B == 1) & (rB == 1)){
out_fcHiddenStateBackwardPassB1 <- fcHiddenStateBackwardPassB1(cSz, cSxs, J[[j,1]], mw[idxw], deltaM[[j+1,1]], deltaS[[j+1,1]], nodes[j], nodes[j+1])
deltaMz = out_fcHiddenStateBackwardPassB1[[1]]
deltaSz = out_fcHiddenStateBackwardPassB1[[2]]
} else {
out_fcHiddenStateBackwardPass <- fcHiddenStateBackwardPass(cSz, cSxs, J[[j,1]], mw[idxw], deltaM[[j+1,1]], deltaS[[j+1,1]], nodes[j], nodes[j+1], B, rB)
deltaMz = out_fcHiddenStateBackwardPass[[1]]
deltaSz = out_fcHiddenStateBackwardPass[[2]]
}
}
}
}
outputs <- list(deltaM, deltaS)
return(outputs)
}
#' Backpropagation (parameters' deltas)
#'
#' This function calculates parameter's deltas.
#'
#' @param NN Lists the structure of the neural network
#' @param theta List of parameters
#' @param states List of states
#' @param deltaM Delta of mean vector of units given \eqn{y} \eqn{\mu_{Z}|y} at all layers
#' @param deltaS Delta of covariance matrix of units given \eqn{y} \eqn{\Sigma_{Z}|y} at all layers
#' @return Parameters' deltas (mean and covariance for each)
#' @export
parameterBackwardPass <- function(NN, theta, states, deltaM, deltaS){
# Initialization
out_extractParameters <- extractParameters(theta)
mw = out_extractParameters[[1]]
Sw = out_extractParameters[[2]]
mb = out_extractParameters[[3]]
Sb = out_extractParameters[[4]]
mwx = out_extractParameters[[5]]
Swx = out_extractParameters[[6]]
mbx = out_extractParameters[[7]]
Sbx = out_extractParameters[[8]]
out_extractStates <- extractStates(states)
ma = out_extractStates[[3]]
numLayers = length(NN$nodes)
B = NN$batchSize
rB = NN$repBatchSize
nodes = NN$nodes
layer = NN$layer
numParamsPerLayer_2 = NN$numParamsPerLayer_2
deltaMw = matrix(mw, ncol = 1)
deltaSw = matrix(Sw, ncol = 1)
deltaMb = matrix(mb, ncol = 1)
deltaSb = matrix(Sb, ncol = 1)
deltaMwx = matrix(mwx, ncol = 1)
deltaSwx = matrix(Swx, ncol = 1)
deltaMbx = matrix(mbx, ncol = 1)
deltaSbx = matrix(Sbx, ncol = 1)
for (j in (numLayers-1):1){
idxw = (numParamsPerLayer_2[1, j]+1):numParamsPerLayer_2[1, j+1]
idxb = (numParamsPerLayer_2[2, j]+1):numParamsPerLayer_2[2, j+1]
# Convolutional
if (layer[j+1] == NN$layerEncoder$fc){
if ((j > 1)|(NN$convariateEstm == 1)){
if ((B == 1) & (rB == 1)){
out_fcParameterBackwardPassB1 <- fcParameterBackwardPassB1(Sw[idxw], Sb[idxb], ma[[j,1]], deltaM[[j+1,1]], deltaS[[j+1,1]], nodes[j], nodes[j+1])
deltaMw[idxw] = out_fcParameterBackwardPassB1[[1]]
deltaSw[idxw] = out_fcParameterBackwardPassB1[[2]]
deltaMb[idxb] = out_fcParameterBackwardPassB1[[3]]
deltaSb[idxb] = out_fcParameterBackwardPassB1[[4]]
} else {
out_fcParameterBackwardPass <- fcParameterBackwardPass(deltaMw[idxw], deltaSw[idxw], deltaMb[idxb], deltaSb[idxb], Sw[idxw], Sb[idxb], ma[[j,1]], deltaM[[j+1,1]], deltaS[[j+1,1]], nodes[j], nodes[j+1], B, rB)
deltaMw[idxw] = out_fcParameterBackwardPass[[1]]
deltaSw[idxw] = out_fcParameterBackwardPass[[2]]
deltaMb[idxb] = out_fcParameterBackwardPass[[3]]
deltaSb[idxb] = out_fcParameterBackwardPass[[4]]
}
}
}
}
deltaTheta = compressParameters(deltaMw, deltaSw, deltaMb, deltaSb, deltaMwx, deltaSwx, deltaMbx, deltaSbx)
return(deltaTheta)
}
#' Backpropagation (states' deltas) for fully connected layers (many observations)
#'
#' This function calculates units' deltas at a given layer when using more than one
#' observation at the time.
#'
#' @param Sz Covariance of units from current layer
#' @param Sxs Null by default (not used yet)
#' @param J Jacobian of current layer
#' @param mw Mean vector of weights for the current layer
#' @param deltaM Delta of mean vector of the next layer units given \eqn{y} \eqn{\mu_{Z}|y}
#' @param deltaS Delta of covariance matrix of the next layer units given \eqn{y} \eqn{\Sigma_{Z}|y}
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param B Batch size
#' @param rB Number of times batch size is repeated
#' @return - Delta of mean vector of current layer units given \eqn{y} \eqn{\mu_{Z}|y}
#' @return - Delta of covariance matrix of current layer units given \eqn{y} \eqn{\Sigma_{Z}|y}
#' @export
fcHiddenStateBackwardPass <- function(Sz, Sxs, J, mw, deltaM, deltaS, ni, no, B, rB){
deltaMz = Sz
deltaSz = Sz
deltaMzx = Sxs
deltaSzx = Sxs
mw = matrix(rep(t(matrix(mw, ni, no)),B), nrow = ni*B, byrow = TRUE)
if (is.null(Sxs)){
for (t in 1:rB){
Czz = J[,t]*Sz[,t]*mw
deltaMzloop = t(matrix(matrix(rep(t(matrix(deltaM[,t], no, B)), ni), ncol = B, byrow = TRUE), no, ni*B))
deltaSzloop = t(matrix(matrix(rep(t(matrix(deltaS[,t], no, B)), ni), ncol = B, byrow = TRUE), no, ni*B))
deltaMzloop = Czz*deltaMzloop
deltaSzloop = Czz*deltaSzloop*Czz
deltaMz[,t] = matrix(rowSums(deltaMzloop), ncol=1)
deltaSz[,t] = matrix(rowSums(deltaSzloop), ncol=1)
deltaMzx[,t] = NULL
deltaSzx[,t] = NULL
}
} else {
for (t in 1:rB){
Czz = J[,t]*Sz[,t]*mw
Czx = J[,t]*Sxs[,t]*mw
deltaMloop = t(matrix(matrix(rep(t(matrix(deltaM[,t], no, B)), ni), ncol = B, byrow = TRUE), no, ni*B))
deltaSloop = t(matrix(matrix(rep(t(matrix(deltaS[,t], no, B)), ni), ncol = B, byrow = TRUE), no, ni*B))
deltaMzloop = Czz*deltaMloop
deltaSzloop = Czz*deltaSloop*Czz
deltaMxsloop = Czx*deltaMloop
deltaSxsloop = Czx*deltaSloop*Czx
deltaMz[,t] = matrix(rowSums(deltaMzloop), nrow(deltaMzloop), 1)
deltaSz[,t] = matrix(rowSums(deltaSzloop), nrow(deltaSzloop), 1)
deltaMzx[,t] = matrix(rowSums(deltaMxsloop), nrow(deltaMxsloop), 1)
deltaSzx[,t] = matrix(rowSums(deltaSxsloop), nrow(deltaSxsloop), 1)
}
}
outputs <- list(deltaMz, deltaSz, deltaMzx, deltaSzx)
return(outputs)
}
#' Backpropagation (states' deltas) for fully connected layers (one observation)
#'
#' This function calculates units' deltas at a given layer when using one observation at the time.
#'
#' @param Sz Covariance of the units from current layer
#' @param Sxs Null by default (not used yet)
#' @param J Jacobian of current layer
#' @param mw Mean vector of weights for the current layer
#' @param deltaM Delta of mean vector of next layer units given \eqn{y} \eqn{\mu_{Z}|y}
#' @param deltaS Delta of covariance matrix of next layer units given \eqn{y} \eqn{\Sigma_{Z}|y}
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @return - Delta of mean vector of current layer units given \eqn{y} \eqn{\mu_{Z}|y}
#' @return - Delta of covariance matrix of current layer units given \eqn{y} \eqn{\Sigma_{Z}|y}
#' @export
fcHiddenStateBackwardPassB1 <- function(Sz, Sxs, J, mw, deltaM, deltaS, ni, no){
mw = matrix(mw, ni, no)
deltaMzx = Sxs
deltaSzx = Sxs
if (is.null(Sxs)){
deltaMloop = matrix(rep(deltaM, ni), nrow = ni, byrow = TRUE)
deltaSloop = matrix(rep(deltaS, ni), nrow = ni, byrow = TRUE)
Caz = J*Sz
Caz = matrix(Caz, nrow(mw), ncol(mw))
Cwa = mw*Caz
deltaMzloop = Cwa*deltaMloop
deltaSzloop = (Cwa^2)*deltaSloop
deltaMz = matrix(rowSums(deltaMzloop), nrow(deltaMzloop), 1)
deltaSz = matrix(rowSums(deltaSzloop), nrow(deltaSzloop), 1)
deltaMzx = NULL
deltaSzx = NULL
} else {
deltaMloop = matrix(rep(deltaM, ni), nrow = ni, byrow = TRUE)
deltaSloop = matrix(rep(deltaS, ni), nrow = ni, byrow = TRUE)
Caz = J*Sz
Caxs = J*Sxs
Caz = matrix(Caz, nrow(mw), ncol(mw))
Caxs = matrix(Caz, nrow(mw), ncol(mw))
out_vectorized4delta <- vectorized4delta(mw, Caz, Caxs, deltaMloop, deltaSloop)
deltaMzloop = out_vectorized4delta[[1]]
deltaSzloop = out_vectorized4delta[[2]]
deltaMzsloop = out_vectorized4delta[[3]]
deltaSzsloop = out_vectorized4delta[[4]]
deltaMz = matrix(rowSums(deltaMzloop), nrow(deltaMzloop), 1)
deltaSz = matrix(rowSums(deltaSzloop), nrow(deltaSzloop), 1)
deltaMzx = matrix(rowSums(deltaMzsloop), nrow(deltaMzsloop), 1)
deltaSzx = matrix(rowSums(deltaSzsloop), nrow(deltaSzsloop), 1)
}
outputs <- list(deltaMz, deltaSz, deltaMzx, deltaSzx)
return(outputs)
}
#' Derivatives for fully connected layers
#'
#' This function calculates mean and variance of derivatives and covariance of derivative and input layers.
#'
#' @param mw Mean vector of weights for the current layer
#' @param Sw Covariance of weights for the current layer
#' @param mwo Mean vector of weights for the next layer
#' @param Jo Jacobian of next layer
#' @param J Jacobian of current layer
#' @param mao Mean vector of activation units from next layer
#' @param Sao Covariance of activation units from next layer
#' @param mai Mean vector of activation units from current layer
#' @param Sai Covariance of activation units from current layer
#' @param Szi Covariance of units from current layer
#' @param mdai Mean vector of activation units' first derivative from current layer
#' @param Sdai Covariance of activation units' first derivative from current layer
#' @param mdgo Mean vector of product of derivatives in next layer
#' @param mdgoe Mean of product of derivatives at each node in next layer
#' @param Sdgo Variance of first derivatives in next layer
#' @param mdgo2 Mean vector of product of derivatives in second next layer
#' @param acto Activation function index for next layer defined by \code{\link{activationFunIndex}}
#' @param acti Activation function index for current layer defined by \code{\link{activationFunIndex}}
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param no2 Number of units in second next layer
#' @param B Batch size
#' @return Mean vector of first derivatives
#' @return Covariance matrix of first derivatives
#' @return Covariance matrix of first derivative and input layer
#' @return Covariance between activation units and weights
#' @return Covariance between activation units from current and next layers
#' @return Covariance between first derivatives and weights
#' @return Covariance between first derivatives from current and next layers
#' @return Covariance between first derivatives from next layer and weights times derivatives from current layer
#' @export
fcDerivative <- function(mw, Sw, mwo, Jo, J, mao, Sao, mai, Sai, Szi, mdai, Sdai, mdgo, mdgoe, Sdgo, mdgo2, acto, acti, ni, no, no2, B){
out_fcMeanVarDnode <- fcMeanVarDnode(mw, Sw, mdai, Sdai, ni, no, B)
mpdi = out_fcMeanVarDnode[[1]]
Spdi = out_fcMeanVarDnode[[2]]
out_fcCovawaa <- fcCovawaa(mw, Sw, Jo, mai, Sai, ni, no, B)
Caow = out_fcCovawaa[[1]]
Caoai = out_fcCovawaa[[2]]
out_fcCovdwddd <- fcCovdwddd(mao, Sao, mai, Sai, Caow, Caoai, acto, acti, ni, no, B)
Cdow = out_fcCovdwddd[[1]]
Cdodi = out_fcCovdwddd[[2]]
Cdowdi = fcCovdwd(mdai, mw, Cdow, Cdodi, ni, no, B)
Cdgodgi = fcCovDlayer(mdgo2, mwo, Cdowdi, ni, no, no2, B)
out_fcMeanVarDlayer <- fcMeanVarDlayer(mpdi, Spdi, mdgo, mdgoe, Sdgo, Cdgodgi, ni, no, no2, B)
mdgi = out_fcMeanVarDlayer[[1]]
Sdgi = out_fcMeanVarDlayer[[2]]
out_fcCovaz <- fcCovaz(Jo, J, Szi, mw, ni, no, B)
Caizi = out_fcCovaz[[1]]
Caozi = out_fcCovaz[[2]]
out_fcCovdz <- fcCovdz(mao, mai, Caizi, Caozi, acto, acti, ni, no, B)
Cdozi = out_fcCovdz[[1]]
Cdizi = out_fcCovdz[[2]]
Cdx = covdx(mwo, mw, mdgo2, mpdi, mdgoe, Cdozi, Cdizi, ni, no, no2, B)
outputs <- list(mdgi, Sdgi, Cdx, Caow, Caoai, Cdow, Cdodi, Cdowdi)
return(outputs)
}
#' Second derivatives for fully connected layers
#'
#' This function calculates mean of product of derivatives, when new product term involves second derivatives (wdd).
#'
#' @param mw Mean vector of weights for the current layer
#' @param mwo Mean vector of weights for the next layer
#' @param mao Mean vector of activation units from next layer
#' @param mai Mean vector of activation units from current layer
#' @param mdai Mean vector of activation units' first derivative from current layer
#' @param mddai Mean vector of activation units' second derivative from current layer
#' @param mpddi Mean vector of second derivative product wdd of current layer
#' @param mdgo Mean vector of product of derivatives in next layer
#' @param mdgo2 Mean vector of product of derivatives in second next layer
#' @param Caoai Covariance between activation units from current and next layers
#' @param Cdow Covariance between first derivatives and weights
#' @param Cdodi Covariance between first derivatives from current and next layers
#' @param acti Activation function index for current layer defined by \code{\link{activationFunIndex}}
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param no2 Number of units in second next layer
#' @param B Batch size
#' @return Mean of product terms for second derivative calculations
#' @export
fcDerivative2 <- function(mw, mwo, mao, mai, mdai, mddai, mpddi, mdgo, mdgo2, Caoai, Cdow, Cdodi, acti, ni, no, no2, B){
out_fcCovdaddd <- fcCovdaddd(mao, mai, mdai, Caoai, Cdodi, acti, ni, no, B)
Cdoai = out_fcCovdaddd[[1]]
Cdoddi = out_fcCovdaddd[[2]]
Cdowddi = fcCovdwd(mddai, mw, Cdow, Cdoddi, ni, no, B)
Cdgoddgi = fcCovDlayer(mdgo2, mwo, Cdowddi, ni, no, no2, B)
out_fcMeanVarDlayer <- fcMeanVarDlayer(mpddi, matrix(0, nrow(mpddi), ncol(mpddi)), mdgo, matrix(0, nrow(mpddi), ncol(mpddi)), matrix(0, nrow(mpddi), ncol(mpddi)), Cdgoddgi, ni, no, no2, B)
mddgi = out_fcMeanVarDlayer[[1]]
return(mddgi)
}
#' Products of first derivatives multiplied to second derivative for fully connected layers
#'
#' This function calculates mean of product of derivatives, when new product term involves product
#' of two first derivatives (wdwd) from the same layer multiplied to second derivatives (wdd) from next layer.
#'
#' @param mw Mean vector of weights for the current layer
#' @param Sw Covariance of weights for the current layer
#' @param mwo Mean vector of weights for the next layer
#' @param mao Mean vector of activation units from next layer
#' @param mai Mean vector of activation units from current layer
#' @param mdao Mean vector of activation units' first derivative from next layer
#' @param mdai Mean vector of activation units' first derivative from current layer
#' @param Sdai Covariance of activation units' first derivative from current layer
#' @param mpdi Mean vector of first derivative product wd of current layer
#' @param mdgo Mean vector of product of derivatives in next layer
#' @param mdgo2 Mean vector of product of derivatives in second next layer
#' @param Caow Covariance between activation units and weights
#' @param Caoai Covariance between activation units from current and next layers
#' @param Cdow Covariance between first derivatives and weights
#' @param Cdodi Covariance between first derivatives from current and next layers
#' @param acto Activation function index for next layer defined by \code{\link{activationFunIndex}}
#' @param acti Activation function index for current layer defined by \code{\link{activationFunIndex}}
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param no2 Number of units in second next layer
#' @param B Batch size
#' @param dlayer TRUE if derivatives will be in respect to current layer
#' @return Mean of product terms for second derivative calculations
#' @export
fcDerivative3 <- function(mw, Sw, mwo, mao, mai, mdao, mdai, Sdai, mpdi, mdgo, mdgo2, Caow, Caoai, Cdow, Cdodi, acto, acti, ni, no, no2, B, dlayer){
out_fcCovaddddddw <- fcCovaddddddw(mao, mai, mdao, Caoai, Cdodi, Caow, Cdow, acto, acti, ni, no, B)
Cddodi = out_fcCovaddddddw[[1]]
Cddow = out_fcCovaddddddw[[2]]
Cddowdi = fcCovdwd(mdai, mw, Cddow, Cddodi, ni, no, B)
Cddgodgik = fcCovDlayer(mdgo2, mwo, Cddowdi, ni, no, no2, B)
Cddgodgik = rowSums(matrix(Cddgodgik, B*ni*no, no2))
Cddgodgik = matrix(Cddgodgik, B*ni, no)
if (dlayer == FALSE){
# Combination of products of first derivative of current layer (wd)*(wd) (iterations on nodes from same layer (with weights pointing to same next layer node))
mpdi2n <- fcCombinaisonDnode(mpdi, mw, Sw, mdai, Sdai, ni, no, B)
Cwdowdiwdi <- fcCovwdowdiwdi(mpdi, Cddgodgik, ni, no, B)
mddgi <- fcMeanDlayer2row(mpdi, mpdi2n, mdgo, Cwdowdiwdi, ni, no, no2, B)
} else {
# (wd)^2 only
mpdi2wn <- fcCombinaisonDweightNode(mpdi, mw, Sw, mdai, Sdai, ni, no, B)
Cwdowdi2 <- fcCovwdowdi2(mpdi, Cddgodgik)
mdgo = t(matrix(matrix(rep(t(matrix(mdgo, no, B)), ni), nrow = no*ni, ncol = B, byrow = TRUE), no, ni*B))
mddgi = mdgo*mpdi2wn + Cwdowdi2
mddgi = matrix(rowSums(mddgi), nrow(mddgi), 1)
}
return(mddgi)
}
#' Products of first derivatives multiplied to products of first derivatives for fully connected layers
#'
#' This function calculates mean of product of derivatives, when new product term involves product
#' of two first derivatives (wdwd) from the same layer multiplied to product
#' of two first derivatives (wdwd) from next layer, when second next layer is second derivatives (wdd).
#'
#' @param mw Mean vector of weights for the current layer
#' @param Sw Covariance of weights for the current layer
#' @param mwo Mean vector of weights for the next layer
#' @param mao Mean vector of activation units from next layer
#' @param mai Mean vector of activation units from current layer
#' @param mdao Mean vector of activation units' first derivative from next layer
#' @param mdai Mean vector of activation units' first derivative from current layer
#' @param Sdai Covariance of activation units' first derivative from current layer
#' @param mpdo Mean vector of first derivative product wd of next layer
#' @param mpdi Mean vector of first derivative product wd of current layer
#' @param mdgo Mean vector of product of derivatives in next layer
#' @param mdgo2 Mean vector of product of derivatives in second next layer
#' @param Cdowdi Covariance between first derivatives from next layer and weights times derivatives from current layer
#' @param acto Activation function index for next layer defined by \code{\link{activationFunIndex}}
#' @param acti Activation function index for current layer defined by \code{\link{activationFunIndex}}
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param no2 Number of units in second next layer
#' @param B Batch size
#' @param dlayer TRUE if derivatives will be in respect to current layer
#' @return Mean of product terms for second derivative calculations
#' @export
fcDerivative4 <- function(mw, Sw, mwo, mao, mai, mdao, mdai, Sdai, mpdo, mpdi, mdgo, mdgo2, Cdowdi, acto, acti, ni, no, no2, B, dlayer){
# Combination of products of first derivative of current layer (wd)*(wd) (iterations on weights on the same node)
mpdi2w <- fcCombinaisonDweight(mpdi, mw, Sw, mdai, Sdai, ni, no, B)
Cdgodgi <- fcCovDlayer(mdgo2, mwo, Cdowdi, ni, no, no2, B)
if (dlayer == FALSE){
# Combination of products of first derivative of current layer (wd)*(wd) (iterations on nodes from same layer (with weights pointing to same next layer node))
mpdi2n <- fcCombinaisonDnode(mpdi, mw, Sw, mdai, Sdai, ni, no, B)
# All possible combinations
mpdi2wnAll <- fcCombinaisonDweightNodeAll(mpdi, mpdi2n, mpdi2w, ni, no, B)
Cwdowdowdiwdi <- fcCwdowdowdiwdi(mpdi, mpdo, Cdgodgi, acti, ni, no, no2, B)
mdgo = array(matrix(rep(rep(matrix(mdgo, ncol = no, byrow = TRUE), times = ni), each = ni), B*ni, no*no), c(B*ni, no, no))
mdgoA = array(0, c(B*ni, no, ni*no))
for (b in 0:(no-1)){
for (i in (b*ni+1):(b*ni+ni)){
mdgoA[,,i] = mdgo[,,b+1]
}
}
md = mpdi2wnAll * mdgoA
mddgi = md + Cwdowdowdiwdi
# Come back to Bni x ni matrix
mddgi = matrix(apply(mddgi, 3, rowSums), B*ni*ni, no)
mddgi = matrix(rowSums(mddgi), B*ni, ni)
} else {
Cwdowdowwdi2 <- fcCwdowdowwdi2(mpdi, mpdo, Cdgodgi, acti, ni, no, no2, B)
mddgi <- fcMeanDlayer2array(mpdi2w, mdgo, Cwdowdowwdi2, ni, no, B)
mddgi = matrix(rowSums(mddgi), nrow(mddgi), 1)
}
return(mddgi)
}
#' Products of first derivatives multiplied to products of first derivatives (not only last layer) for fully connected layers
#'
#' This function calculates mean of product of derivatives, when new product term involves product
#' of two first derivatives (wdwd) from the same layer multiplied to product
#' of two first derivatives (wdwd) from next and second next layers.
#'
#' @param mw Mean vector of weights for the current layer
#' @param Sw Covariance of weights for the current layer
#' @param mwo Mean vector of weights for the next layer
#' @param mao Mean vector of activation units from next layer
#' @param mai Mean vector of activation units from current layer
#' @param mdao Mean vector of activation units' first derivative from next layer
#' @param mdai Mean vector of activation units' first derivative from current layer
#' @param Sdai Covariance of activation units' first derivative from current layer
#' @param mpdo Mean vector of first derivative product wd of next layer
#' @param mpdi Mean vector of first derivative product wd of current layer
#' @param mdgo Mean vector of product of derivatives in next layer
#' @param mdgo2 Mean vector of product of derivatives in second next layer
#' @param Cdowdi Covariance between derivatives from next layer and weights times derivatives from current layer
#' @param acto Activation function index for next layer defined by \code{\link{activationFunIndex}}
#' @param acti Activation function index for current layer defined by \code{\link{activationFunIndex}}
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param no2 Number of units in second next layer
#' @param B Batch size
#' @param dlayer TRUE if derivatives will be in respect to current layer
#' @return Mean of product terms for second derivative calculations
#' @export
fcDerivative5 <- function(mw, Sw, mwo, mao, mai, mdao, mdai, Sdai, mpdo, mpdi, mdgo, mdgo2, Cdowdi, acto, acti, ni, no, no2, B, dlayer){
# Combination of products of first derivative of current layer (wd)*(wd) (iterations on weights on the same node)
mpdi2w <- fcCombinaisonDweight(mpdi, mw, Sw, mdai, Sdai, ni, no, B)
Cdgodgi <- fcCovDlayer(matrix(1, B*no2, 1), mwo, Cdowdi, ni, no, no2, B)
if (dlayer == FALSE){
# Combination of products of first derivative of current layer (wd)*(wd) (iterations on nodes from same layer (with weights pointing to same next layer node))
mpdi2n <- fcCombinaisonDnode(mpdi, mw, Sw, mdai, Sdai, ni, no, B)
# All possible combinations
mpdi2wnAll <- fcCombinaisonDweightNodeAll(mpdi, mpdi2n, mpdi2w, ni, no, B)
Cwdowdowdiwdi <- fcCwdowdowdiwdi_4hl(mpdi, mpdo, mdgo2, Cdgodgi, acti, ni, no, no2, B)
mdgo = array(matrix(rep(rep(matrix(mdgo, ncol = no, byrow = TRUE), times = ni), each = ni), B*ni, no*no), c(B*ni, no, no))
mdgoA = array(0, c(B*ni, no, ni*no))
for (b in 0:(no-1)){
for (i in (b*ni+1):(b*ni+ni)){
mdgoA[,,i] = mdgo[,,b+1]
}
}
md = mpdi2wnAll * mdgoA
mddgi = md + Cwdowdowdiwdi
# Come back to Bni x ni matrix
mddgi = matrix(apply(mddgi, 3, rowSums), B*ni*ni, no)
mddgi = matrix(rowSums(mddgi), B*ni, ni)
} else {
Cwdowdowwdi2 <- fcCwdowdowwdi2_3hl(mpdi, mpdo, mdgo2, Cdgodgi, acti, ni, no, no2, B)
mddgi <- fcMeanDlayer2array(mpdi2w, mdgo, Cwdowdowwdi2, ni, no, B)
mddgi = matrix(rowSums(mddgi), nrow(mddgi), 1)
}
return(mddgi)
}
#' Mean and covariance of derivatives
#'
#' This function calculates the mean vector and the covariance matrix for derivatives.
#'
#' @param mw Mean vector of weights for the current layer
#' @param Sw Covariance of weights for the current layer
#' @param mda Mean vector of activation units' derivative from current layer
#' @param Sda Covariance of activation units' derivative from current layer
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param B Batch size
#' @return - Mean vector of derivatives
#' @return - Covariance matrix of derivatives
#' @export
fcMeanVarDnode <- function(mw, Sw, mda, Sda, ni, no, B){
mw = matrix(rep(t(matrix(mw, ni, no)), B), nrow = ni*B, ncol = no, byrow = TRUE)
Sw = matrix(rep(t(matrix(Sw, ni, no)), B), nrow = ni*B, ncol = no, byrow = TRUE)
mda = matrix(mda, nrow(mw), ncol(mw))
Sda = matrix(Sda, nrow(Sw), ncol(Sw))
md = mw*mda
Sd = Sw*Sda + Sw*(mda^2) + Sda*(mw^2)
outputs <- list(md, Sd)
return(outputs)
}
#' Covariance between activation units and weights
#'
#' This function calculates covariance between activation units and weights and
#' covariance between activation units from consecutive layers.
#'
#' @param mw Mean vector of weights for the current layer
#' @param Sw Covariance of weights for the current layer
#' @param Jo Jacobian of next layer
#' @param mai Mean vector of activation units from current layer
#' @param Sai Covariance of activation units from current layer
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param B Batch size
#' @return - Covariance between activation units and weights
#' @return - Covariance between activation units from current and next layers
#' @export
fcCovawaa <- function(mw, Sw, Jo, mai, Sai, ni, no, B){
Joloop = t(matrix(matrix(rep(t(matrix(Jo, no, B)), ni), nrow =no*ni, ncol = B, byrow = TRUE), no, ni*B))
Sw = matrix(rep(t(matrix(Sw, ni, no)), B), nrow = ni*B, ncol = no, byrow = TRUE)
mai = matrix(mai, nrow(Sw), ncol(Sw))
Caw = Sw*mai*Joloop
mw = matrix(rep(t(matrix(mw, ni, no)), B), nrow = ni*B, ncol = no, byrow = TRUE)
Sai = matrix(Sai, nrow(mw), ncol(mw))
Caa = mw*Sai*Joloop
outputs <- list(Caw, Caa)
return(outputs)
}
#' Covariance between derivatives and weights
#'
#' This function calculates covariance between derivatives and weights and
#' covariance between derivatives from consecutive layers.
#'
#' @param mao Mean vector of activation units from next layer
#' @param Sao Covariance of activation units from next layer
#' @param mai Mean vector of activation units from current layer
#' @param Sai Covariance of activation units from current layer
#' @param Caow Covariance between activation units and weights
#' @param Caoai Covariance between activation units from current and next layers
#' @param acto Activation function index for next layer defined by \code{\link{activationFunIndex}}
#' @param acti Activation function index for current layer defined by \code{\link{activationFunIndex}}
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param B Batch size
#' @return - Covariance between derivatives and weights
#' @return - Covariance between derivatives from current and next layers
#' @export
fcCovdwddd <- function(mao, Sao, mai, Sai, Caow, Caoai, acto, acti, ni, no, B){
mao = t(matrix(matrix(rep(t(matrix(mao, no, B)), ni), nrow = no*ni, ncol = B, byrow = TRUE), no, ni*B))
Sao = t(matrix(matrix(rep(t(matrix(Sao, no, B)), ni), nrow = no*ni, ncol = B, byrow = TRUE), no, ni*B))
mai = matrix(mai, nrow(mao), ncol(mao))
Sai = matrix(Sai, nrow(Sao), ncol(Sao))
if (acti == 1){ # tanh
Cdodi = 2*Caoai^2 + 4*mao*Caoai*mai
} else if (acti == 2){ # sigmoid
Cdodi = Caoai - 2*Caoai*mai - 2*mao*Caoai + 2*Caoai^2 + 4*mao*Caoai*mai
} else if (acti == 4){ # relu
Cdodi = matrix(0, nrow(mao), ncol(mao))
} else {
Cdodi = matrix(0, nrow(mao), ncol(mao))
}
if (acto == 1){ # tanh
Cdow = -2*mao*Caow
} else if (acto == 2){ # sigmoid
Cdow = Caow*(1-2*mao)
} else if (acto == 4){ # relu
Cdow = matrix(0, nrow(mao), ncol(mao))
} else {
Cdow = matrix(0, nrow(mao), ncol(mao))
}
outputs <- list(Cdow, Cdodi)
return(outputs)
}
#' Covariance between first and second derivatives from consecutive layers
#'
#' This function calculates covariance between activation units and first derivatives and
#' covariance between first and second derivatives from consecutive layers.
#'
#' @param mao Mean vector of activation units from next layer
#' @param mai Mean vector of activation units from current layer
#' @param mdai Mean vector of activation units' first derivative from current layer
#' @param Caoai Covariance between activation units from current and next layers
#' @param Cdodi Covariance between derivatives from current and next layers
#' @param acti Activation function index for current layer defined by \code{\link{activationFunIndex}}
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param B Batch size
#' @return - Covariance between first derivatives from next layer and activation units from current layer
#' @return - Covariance between first and second derivatives from consecutive layers
#' @export
fcCovdaddd <- function(mao, mai, mdai, Caoai, Cdodi, acti, ni, no, B){
mao = t(matrix(matrix(rep(t(matrix(mao, no, B)), ni), nrow = no*ni, ncol = B, byrow = TRUE), no, ni*B))
mai = matrix(mai, nrow(mao), ncol(mao))
mdai = matrix(mdai, nrow(mao), ncol(mao))
if (acti == 1){ # tanh
Cdoai = - 2*Caoai*mao
Cdoddi = - 2*Cdodi*mai - 2*Cdoai*mdai
} else if (acti == 2){ # sigmoid
Cdoai = Caoai * (1 - 2*mao)
Cdoddi = Cdodi - 2*Cdodi*mai - 2*Cdoai*mdai
} else if (acti == 4){ # relu
Cdoai = matrix(0, nrow(mao), ncol(mao))
Cdoddi = matrix(0, nrow(mao), ncol(mao))
} else {
Cdoai = matrix(0, nrow(mao), ncol(mao))
Cdoddi = matrix(0, nrow(mao), ncol(mao))
}
outputs <- list(Cdoai, Cdoddi)
return(outputs)
}
#' Covariance between first and second derivatives from consecutive layers
#'
#' This function calculates covariance between weights and second derivatives and
#' covariance between first and second derivatives from consecutive layers.
#'
#' @param mao Mean vector of activation units from next layer
#' @param mai Mean vector of activation units from current layer
#' @param mdao Mean vector of activation units' first derivative from next layer
#' @param Caoai Covariance between activation units from current and next layers
#' @param Cdodi Covariance between first derivatives from current and next layers
#' @param Caow Covariance between activation units and weights
#' @param Cdow Covariance between derivatives and weights
#' @param acto Activation function index for next layer defined by \code{\link{activationFunIndex}}
#' @param acti Activation function index for current layer defined by \code{\link{activationFunIndex}}
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param B Batch size
#' @return - Covariance between first and second derivatives from consecutive layers
#' @return - Covariance between second derivatives from next layer and weights
#' @export
fcCovaddddddw <- function(mao, mai, mdao, Caoai, Cdodi, Caow, Cdow, acto, acti, ni, no, B){
mao = t(matrix(matrix(rep(t(matrix(mao, no, B)), ni), nrow = no*ni, ncol = B, byrow = TRUE), no, ni*B))
mdao = t(matrix(matrix(rep(t(matrix(mdao, no, B)), ni), nrow = no*ni, ncol = B, byrow = TRUE), no, ni*B))
mai = matrix(mai, nrow(mao), ncol(mao))
if (acti == 1){ # tanh
Caodi = - 2*Caoai*mai
Cddodi = - 2*Cdodi*mao - 2*Caodi*mdao
} else if (acti == 2){ # sigmoid
Caodi = Caoai * (1 - 2*mai)
Cddodi = Cdodi - 2*Cdodi*mao - 2*Caodi*mdao
} else if (acti == 4){ # relu
Caodi = matrix(0, nrow(mao), ncol(mao))
Cddodi = matrix(0, nrow(mao), ncol(mao))
} else {
Caodi = matrix(0, nrow(mao), ncol(mao))
Cddodi = matrix(0, nrow(mao), ncol(mao))
}
if (acto == 1){ # tanh
Cddow = - 2*Caow*mdao - 2*mao*Cdow
} else if (acto == 2){ # sigmoid
Cddow = Cdow - 2*Caow*mdao - 2*mao*Cdow
} else if (acto == 4){ # relu
Cddow = matrix(0, nrow(mao), ncol(mao))
} else {
Cddow = matrix(0, nrow(mao), ncol(mao))
}
outputs <- list(Cddodi, Cddow)
return(outputs)
}
#' Covariance between derivatives and weights*derivatives
#'
#' This function calculates covariance between derivatives and weights and
#' covariance between derivatives from consecutive layers.
#'
#' @param md Mean vector of derivatives
#' @param mw Mean vector of weights for the current layer
#' @param Cdow Covariance between derivatives and weights
#' @param Cdodi Covariance between derivatives from current and next layers
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param B Batch size
#' @return Covariance between derivatives and weights times derivatives
#' @export
fcCovdwd <- function(md, mw, Cdow, Cdodi, ni, no, B){
mw = matrix(rep(t(matrix(mw, ni, no)), B), nrow = ni*B, ncol = no, byrow = TRUE)
md = matrix(md, nrow(mw), ncol(mw))
Cdowdi = Cdow*md + Cdodi*mw
return(Cdowdi)
}
#' Covariance between products of derivatives and weights
#'
#' This function calculates covariance between products of derivatives and
#' weights from consecutive layers.
#'
#' @param mdgo2 Mean vector of product of derivatives in second next layer
#' @param mwo Mean vector of weights for the next layer
#' @param Cdowdi Covariance between derivatives and weights times derivatives
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param no2 Number of units in second next layer
#' @param B Batch size
#' @return Covariance between weights times derivatives from consecutive layers
#' @export
fcCovDlayer <- function(mdgo2, mwo, Cdowdi, ni, no, no2, B){
mdgo2 = matrix(matrix(rep(mdgo2, no), nrow = nrow(t(mdgo2))*no, byrow = TRUE), no*no2, B)
m = t(matrix(matrix(rep(t(mdgo2*mwo), ni), nrow = nrow(mdgo2*mwo)*ni, byrow = TRUE), no*no2, B*ni))
Cdowdi = matrix(rep(Cdowdi, no2), nrow = nrow(Cdowdi))
Cdgodgi = Cdowdi*m
return(Cdgodgi)
}
#' Mean and variance of weights times derivatives products terms
#'
#' This function calculates mean and variance of weights times derivatives products terms.
#'
#' @param mx Mean vector of inputs
#' @param Sx Variance of inputs
#' @param mye Mean derivatives at each node in next layer
#' @param my Mean vector of outputs
#' @param Sy Variance of outputs
#' @param Cxy Covariance between inputs and outputs
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param no2 Number of units in second next layer
#' @param B Batch size
#' @return - Mean of weights times derivatives products terms
#' @return - Covariance between weights times derivatives products terms
#' @export
fcMeanVarDlayer <- function(mx, Sx, my, mye, Sy, Cxy, ni, no, no2, B){
my = t(matrix(matrix(rep(t(matrix(my, no, B)), ni), nrow = no*ni, ncol = B, byrow = TRUE), no, ni*B))
Sy = t(matrix(matrix(rep(t(matrix(Sy, no, B)), ni), nrow = no*ni, ncol = B, byrow = TRUE), no, ni*B))
mye = array(aperm(array(t(mye), c(no2,no,B)), perm=c(2, 1, 3)), c(no*no2,1,B))
mye = t(matrix(mye[, rep(1:ncol(mye), each = ni),], no*no2,B*ni))
md = mx*my
Sd = Sx*Sy + Sy*(mx^2)
SxS = matrix(rep(Sx, no2), nrow = nrow(Sx), ncol = ncol(Sx)*no2)
Sd2 = rowSums(matrix(SxS*(mye^2), B*ni*no, no2))
Sd2 = matrix(Sd2, B*ni, no)
Sd = Sd + Sd2
Cxym = rowSums(matrix(Cxy, B*ni*no, no2))
Cxym = matrix(Cxym, B*ni, no)
md = md + Cxym
CxyS1 = rowSums(matrix(Cxy^2, B*ni*no, no2))
CxyS1 = matrix(CxyS1, B*ni, no)
CxyS2 = rowSums(matrix(2*Cxy*mye, B*ni*no, no2))
CxyS2 = matrix(CxyS2, B*ni, no)
CxyS2 = CxyS2*mx
Sd = Sd + CxyS1 + CxyS2
outputs <- list(md, Sd)
return(outputs)
}
#' Mean of weights times derivatives products terms squared (wdo x (wdi*wdi))
#'
#' This function calculates mean of weights times derivatives products terms when
#' adding two of those products from current layer to already calculated expectation
#' that ended with one such product of next layer (i.e. wdo x (wdiwdi)). Mean terms
#' are in array format. Once added, rows need to be
#' summed to aggregate expectations by node*node combinations of current layer.
#'
#' @param mpdi Mean vector of first derivative product wd of current layer
#' @param mpdi2 Mean array of combination of products of first derivatives
#' @param mdgo Mean vector of product of derivatives in next layer
#' @param Cwdowdiwdi Covariance cov(wdo,(wdi*wdi)) of weights times derivatives products terms when there is one product in next layer and two in current
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param no2 Number of units in second next layer
#' @param B Batch size
#' @return Mean of weights times derivatives products terms
#' @export
fcMeanDlayer2row <- function(mpdi, mpdi2, mdgo, Cwdowdiwdi, ni, no, no2, B){
mdgo = array(matrix(rep(rep(matrix(mdgo, ncol = no, byrow = TRUE), times = ni), each = ni), B*ni, no), c(B*ni, no, ni))
md = mpdi2 * mdgo
md = md + Cwdowdiwdi
# Rearrange to get (B*ni x ni) matrix (expectation of each node from current layer multiplied by each other)
# Sum each weight combination related to the same node
md2 = matrix(apply(md, 3, rowSums), B*ni, ni)
# Rearrange to have "chunk" of batches (consecutive rows for same batch' nodes)
if (B>1){
md3 = matrix(md2[1,],nrow=1)
for (b in 1:B){
for (i in 1:ni){
md3 = rbind(md3, matrix(md2[b + i*B - B,], nrow = 1))
}
}
md2 = md3[2:nrow(md3),]
}
return(md2)
}
#' Mean of weights times derivatives products terms ((wdo*wdo) x (wwdi^2))
#'
#' This function calculates mean of weights times derivatives products terms when
#' adding two of those products from current layer to already calculated expectation
#' that ended with one such product of next layer (i.e. (wdowdo) x (wwdi^2)). Mean terms
#' are in array format. Once added, rows need to be
#' summed to aggregate expectations by node*weight combinations of current layer.
#'
#' @param mpdi2w Combination of products of first derivative of current layer (wd)*(wd) (iterations on weights on the same node)
#' @param mdgo Mean of product of derivatives in next layer
#' @param Cwdowdowwdi2 Covariance cov(wdowdo,wdiwdi) of weights times derivatives products terms, where the di terms are the same
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param B Batch size
#' @return Mean of weights times derivatives products terms
#' @export
fcMeanDlayer2array <- function(mpdi2w, mdgo, Cwdowdowwdi2, ni, no, B){
mdgoA = array(0, c(B*ni, no, no))
for (k in 1:no){
for (b in 0:(B-1)){
mdgoA[(b*ni+1):(b*ni+ni), ,k] = matrix(rep(mdgo[k + no*b,], ni), nrow = ni, byrow = TRUE)
}
}
md = mpdi2w * mdgoA
md = md + Cwdowdowwdi2
# Rearrange to get usual (B*ni x no) matrix
md = matrix(apply(md, 3, rowSums), B*ni, no)
return(md)
}
#' Covariance between next layer product and current layer multiplied products
#'
#' This function calculates covariance cov(wdo,wdiwdi) of weights times derivatives
#' products terms when there are one product in next layer and two in current.
#'
#' @param mpdi Mean vector of first derivative product wd of current layer
#' @param Cdgoddgik Covariance between weights times derivatives from consecutive layers
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param B Batch size
#' @return Covariance cov(wdo,wdi*wdi) of weights times derivatives products terms when there is one product in next layer and two in current
#' @export
fcCovwdowdiwdi <- function(mpdi, Cdgoddgik, ni, no, B){
Cwdowdiwdi = array(0, c(ni*B, no, ni))
for (b in 0:(B-1)){
for (k in 1:ni){
for (i in (b*ni+1):(b*ni+ni)){
Cwdowdiwdi[i,,k] = mpdi[i,] * Cdgoddgik[k+b*ni,] + mpdi[k+b*ni,] * Cdgoddgik[i,]
}
}
}
return(Cwdowdiwdi)
}
#' Covariance between next layer product and current layer squared product
#'
#' This function calculates covariance cov(wdo,(wdi)^2) of weights times derivatives
#' products terms when there are one product in next layer and two squared in current.
#'
#' @param mpdi Mean vector of first derivative product wd of current layer
#' @param Cdgoddgik Covariance between weights times derivatives from consecutive layers
#' @return Covariance cov(wdo,(wdi)^2) of weights times derivatives products terms when there is one product in next layer and two squared in current
#' @export
fcCovwdowdi2 <- function(mpdi, Cdgoddgik){
Cwdowdi2 = 2*mpdi*Cdgoddgik
return(Cwdowdi2)
}
#' Covariance between products in (same) next and current layers
#'
#' This function calculates covariance cov(wdo^2,wdiwdi) of weights times derivatives
#' products terms when there are two products in both next and current layers.
#' The product fom next layer is the same squared.
#'
#' @param mpdo Mean vector of first derivative product wd of next layer
#' @param Cwdowdiwdi Covariance cov(wdo,wdi*wdi) of weights times derivatives products terms when there are one product in next layer and two in current
#' @return Covariance cov(wdo^2,wdi*wdi) of weights times derivatives products terms when there are two products in both next and current layers
#' @export
fcCovwdo2wdiwdi <- function(mpdo, Cwdowdiwdi){
Cwdo2wdiwdi = 2*mpdo*Cwdowdiwdi
return(Cwdo2wdiwdi)
}
#' Covariance between next layer multiplied products and current layer multiplied products (same derivative)
#'
#' This function calculates covariance cov(wdowdo,wdiwdi) where the di terms are the same, when next second layer involves only a product term (wddo2).
#'
#' @param mpdi Mean vector of first derivative product wd of current layer
#' @param mpdo Mean vector of first derivative product wd of next layer
#' @param Cdgodgi Covariance between weights times derivatives from consecutive layers
#' @param acti Activation function index for current layer defined by \code{\link{activationFunIndex}}
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param no2 Number of units in second next layer
#' @param B Batch size
#' @return Covariance cov(wdowdo,wdiwdi) where the di terms are the same
#' @export
fcCwdowdowwdi2 <- function(mpdi, mpdo, Cdgodgi, acti, ni, no, no2, B){
# mpdo = array(aperm(array(t(mpdo), c(no2,no,B)), perm=c(2, 1, 3)), c(no*no2,1,B))
# mpdo = t(matrix(mpdo[, rep(1:ncol(mpdo), each = ni),], no*no2,B*ni))
#
# Cwdowdowwdi2 = array(0, c(ni*B, no*no2, no))
# for (b in 0:(no2-1)){
# for (k in 1:no){
# for (j in (b*no+1):(b*no+no)){
# if (j == (b*no+k)){
# Cwdowdowwdi2[,j,k] = 4 * mpdo[,j] * mpdi[,k] * Cdgodgi[,j]
# } else {
# Cwdowdowwdi2[,j,k] = mpdo[,j] * mpdi[,j-b*no] * Cdgodgi[,(b*no+k)] + mpdo[,(b*no+k)] * mpdi[,k] * Cdgodgi[,j]
# }
#
# }
# }
# }
#
# # Sum covariances together to come back (B*ni x no x no) array. Iterations for sum are next layer weights that change.
# sum = array(matrix(Cwdowdowwdi2, nrow = B*ni*no), c(B*ni*no, no2, no))
# Cwdowdowwdi2 = array(apply(sum, 3, rowSums), c(B*ni, no, no))
mpdo_real = mpdo
mpdo = array(aperm(array(t(mpdo), c(no2,no,B)), perm=c(2, 1, 3)), c(no*no2,1,B))
# Replicate Cdgodgi matrix for each dimension of the array
CdgodgiA_moving = array(Cdgodgi, c(B*ni, no*no2, no))
mpdiA_moving = array(mpdi, c(B*ni, no*no2, no))
mpdoA_moving = array(t(matrix(mpdo[, rep(1:ncol(mpdo), each = ni),], no*no2,B*ni)), c(B*ni,no*no2, no))
# Prepare "fixed" elements for iterations
CdgodgiA_temp = matrix(Cdgodgi[,1], ncol = 1)
for (j in 1:no){
for (i in 1:no2){
CdgodgiA_temp = cbind(CdgodgiA_temp, matrix(Cdgodgi[,j + i*no - no], ncol = 1))
}
}
CdgodgiA_fixed = array(matrix(rep(t(CdgodgiA_temp[,-1]), each = no), nrow=B*ni, byrow = TRUE), c(B*ni,no*no2, no))
mpdiA_fixed = array(matrix(rep(t(mpdi), each = no*no2), nrow=B*ni, byrow = TRUE), c(B*ni,no*no2, no))
mpdoM = matrix(mpdo, ncol = B)
testA = matrix(mpdoM[1,], nrow = 1)
for (j in 1:no){
for (i in 1:no2){
testA = rbind(testA, matrix(mpdoM[j + i*no - no,], nrow = 1))
}
}
mpdoA = array(testA[-1,], c(no*no2,1,B))
mpdoA_temp = matrix(mpdoA[, rep(1:ncol(mpdoA), each = ni*no),], no*no2,B*no*ni)
mpdoA_fixed = array(aperm(array(matrix(t(mpdoA_temp), nrow=B), c(no,B*ni, no*no2)), c(2,1,3)), c(B*ni, no*no2, no))
# Multiplier (multiply by 2 when start at same node and arrive at same node, no matter the weights)
multiplier = array(1, c(B*ni, no*no2, no))
for (j in 1:no){
for (b in 0:(no2-1)){
multiplier[,b*no+j,j] = 2*multiplier[,b*no+j,j]
}
}
if (acti == 0){ # If current layer is not activated, only weights to consider for current layer
test = multiplier * (mpdoA_moving * mpdiA_moving * CdgodgiA_fixed + mpdoA_fixed * mpdiA_fixed * CdgodgiA_moving)
} else {
test = multiplier * (CdgodgiA_fixed * CdgodgiA_moving + mpdoA_moving * mpdiA_moving * CdgodgiA_fixed + mpdoA_fixed * mpdiA_fixed * CdgodgiA_moving)
}
# Sum covariances together to come back (B*ni x no x no) array. Iterations for sum are next layer weights that change.
sum = array(matrix(test, nrow = B*ni*no), c(B*ni*no, no2, no))
Cwdowdowwdi2 = array(apply(sum, 3, rowSums), c(B*ni, no, no))
return(Cwdowdowwdi2)
}
#' Covariance between next layer multiplied products and current layer multiplied products (same derivative, minimum 3 hidden layers)
#'
#' This function calculates covariance cov(wdowdo,wdiwdi) where the di terms are the same when next second layer involves multiplied terms (wdo2wdo2).
#' It is used when there are at least 3 hidden layers.
#'
#' @param mpdi Mean vector of first derivative product wd of current layer
#' @param mpdo Mean vector of first derivative product wd of next layer
#' @param mdgo2 Mean vector of product of derivatives in second next layer
#' @param Cdgodgi Covariance between weights times derivatives from consecutive layers
#' @param acti Activation function index for current layer defined by \code{\link{activationFunIndex}}
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param no2 Number of units in second next layer
#' @param B Batch size
#' @return Covariance cov(wdowdo,wdiwdi) where the di terms are the same
#' @export
fcCwdowdowwdi2_3hl <- function(mpdi, mpdo, mdgo2, Cdgodgi, acti, ni, no, no2, B){
mpdo = array(aperm(array(t(mpdo), c(no2,no,B)), perm=c(2, 1, 3)), c(no*no2,1,B))
# Replicate Cdgodgi matrix for each dimension of the array
CdgodgiA_moving = array(Cdgodgi, c(B*ni, no*no2, no*no2))
mpdiA_moving = array(mpdi, c(B*ni, no*no2, no*no2))
mpdoA_moving = array(t(matrix(mpdo[, rep(1:ncol(mpdo), each = ni),], no*no2,B*ni)), c(B*ni,no*no2, no*no2))
# Prepare "fixed" elements for iterations
CdgodgiA_fixed = array(matrix(rep(t(Cdgodgi), each = no*no2), nrow=B*ni, byrow = TRUE), c(B*ni,no*no2, no*no2))
mpdiA_fixed = array(matrix(rep(t(mpdi), each = no*no2), nrow=B*ni, byrow = TRUE), c(B*ni,no*no2, no*no2))
mpdoA_temp = matrix(mpdo[, rep(1:ncol(mpdo), each = ni*no*no2),], no*no2,B*no*ni*no2)
mpdoA_fixed = aperm(array(matrix(t(mpdoA_temp), nrow=B), c(no*no2,B*ni, no*no2)), c(2,1,3))
# Prepare mdgo2
mdgo2_temp1 = matrix(rep(mdgo2, each=no), ncol = no2)
mdgo2_temp2= array(array(t(mdgo2_temp1), c(no*no2,no2,B)), c(no*no2*no2,1,B))
mdgo2_temp3 = matrix(mdgo2_temp2[, rep(1:ncol(mdgo2_temp2), each = ni*no),], no*no2*no2,B*no*ni)
mdgo2_temp4=array(matrix(t(mdgo2_temp3), nrow=B), c(no,B*ni, no*no2*no2 ))
mdgo2A = array(aperm(mdgo2_temp4, c(2,1,3)), c(B*ni, no*no2, no*no2))
# Multiplier (multiply by 2 when start at same node and arrive at same node, no matter the weights)
multiplier = array(1, c(ni*B, no*no2, no))
for (j in 1:no){
for (b in 0:(no2-1)){
multiplier[,b*no+j,j] = 2*multiplier[,b*no+j,j]
}
}
multiplier = array(multiplier, c(ni*B, no*no2, no*no2))
if (acti == 0){ # If current layer is not activated, only weights to consider for current layer
Cwdowdowwdi2 = multiplier * mdgo2A * (mpdoA_moving * mpdiA_moving * CdgodgiA_fixed + mpdoA_fixed * mpdiA_fixed * CdgodgiA_moving)
} else{
Cwdowdowwdi2 = multiplier * mdgo2A * (CdgodgiA_fixed * CdgodgiA_moving + mpdoA_moving * mpdiA_moving * CdgodgiA_fixed + mpdoA_fixed * mpdiA_fixed * CdgodgiA_moving)
}
# Sum covariances together to come back (B*ni x no x no) array. Iterations for sum are next layer weights that change + across array dimensions (each (no)th array)
sum = array(matrix(Cwdowdowwdi2, nrow = B*ni*no), c(B*ni*no, no2, no*no2))
Cwdowdowwdi2 = matrix(apply(sum, 3, rowSums), nrow = B*ni*no*no)
Cwdowdowwdi2 = array(rowSums(Cwdowdowwdi2), c(B*ni, no, no))
return(Cwdowdowwdi2)
}
#' Covariance between next layer multiplied products and current layer multiplied products (minimum 3 hidden layers)
#'
#' This function calculates covariance cov(wdowdo,wdiwdi) where all terms can be different.
#' It is used when there are at least 3 hidden layers and second next layer is a product of only two terms (wdo2).
#'
#' @param mpdi Mean vector of first derivative product wd of current layer
#' @param mpdo Mean vector of first derivative product wd of next layer
#' @param Cdgodgi Covariance between weights times derivatives from consecutive layers
#' @param acti Activation function index for current layer defined by \code{\link{activationFunIndex}}
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param no2 Number of units in second next layer
#' @param B Batch size
#' @return Covariance cov(wdowdo,wdiwdi) where all terms can be different
#' @export
fcCwdowdowdiwdi <- function(mpdi, mpdo, Cdgodgi, acti, ni, no, no2, B){
# mpdo_original = mpdo
# mpdi2 = matrix(rep(mpdi, no2), nrow = B*ni)
# mpdo2 = array(aperm(array(t(mpdo), c(no2,no,B)), perm=c(2, 1, 3)), c(no*no2,1,B))
# mpdo2 = t(matrix(mpdo2[, rep(1:ncol(mpdo2), each = ni),], no*no2,B*ni))
#
# Cwdowdowdiwdi = array(0, c(ni*B, no*no2, no*ni))
# seq = c(mpdi)
# for (k in 1:(no*ni)){
# i = as.numeric(which(mpdi == seq[k], arr.ind = TRUE)[,"row"])
# j = as.numeric(which(mpdi == seq[k], arr.ind = TRUE)[,"col"])
# for (b in 0:(no2-1)){
# for (c in (b*no+1):(b*no+no)){
# if (c == (b*no+j)){
# # When same weight*node from next layer (i.e. (wdo)^2), coming from same current layer combination or not
# Cwdowdowdiwdi[,c,k] = 2*(mpdo[j,b+1] * mpdi[i,j] * Cdgodgi[,c] + mpdo2[,c] * mpdi2[,c] * Cdgodgi[i,j+b*no])
# } else {
# Cwdowdowdiwdi[,c,k] = mpdo[j,b+1] * mpdi[i,j] * Cdgodgi[,c] + mpdo2[,c] * mpdi2[,c] * Cdgodgi[i,j+b*no]
# }
# }
# }
# }
#
#
mpdo = array(aperm(array(t(mpdo), c(no2,no,B)), perm=c(2, 1, 3)), c(no*no2,1,B))
# Prepare "moving" elements for iterations
CdgodgiA_moving = array(Cdgodgi, c(B*ni, no*no2, no*ni)) # Replicate Cdgodgi matrix for each dimension of the array
mpdiA_moving = array(mpdi, c(B*ni, no*no2, no*ni))
mpdoA_moving = array(t(matrix(mpdo[, rep(1:ncol(mpdo), each = ni),], no*no2,B*ni)), c(B*ni,no*no2, no*ni))
# Prepare "fixed" elements for iterations
CdgodgiA_temp = matrix(Cdgodgi[,1], ncol = 1)
for (j in 1:no){
for (i in 1:no2){
CdgodgiA_temp = cbind(CdgodgiA_temp, matrix(Cdgodgi[,j + i*no - no], ncol = 1))
}
}
CdgodgiA_temp2 = apply(array(CdgodgiA_temp[,-1], c(B*ni, no2, no)), 2, rbind)
CdgodgiA_fixed = array(rep(t(CdgodgiA_temp2), each = ni*no), c(B*ni, no*no2, ni*no)) # If B = 1
mpdiA_fixed = array(rep(mpdi, each=ni*no*no2), c(B*ni,no*no2, no*ni)) # If B = 1
mpdoM = matrix(mpdo, ncol = B)
testA = matrix(mpdoM[1,], nrow = 1)
for (j in 1:no){
for (i in 1:no2){
testA = rbind(testA, matrix(mpdoM[j + i*no - no,], nrow = 1))
}
}
mpdoA = array(testA[-1,], c(no*no2,1,B))
mpdoA_temp = matrix(mpdoA[, rep(1:ncol(mpdoA), each = ni*no),], no*no2,B*no*ni)
mpdoA_temp2 = array(aperm(array(matrix(t(mpdoA_temp), nrow=B), c(no,B*ni, no*no2)), c(2,1,3)), c(B*ni, no*no2, no))
mpdoA_fixed = array(0, c(B*ni, no*no2, ni*no))
for (i in 1:no){
for (j in 1:ni){
mpdoA_fixed[,,j + i*ni - ni] = mpdoA_temp2[,,i]
}
}
# Multiplier (multiply by 2 when arrive at same node, no matter the weights)
multiplier = array(1, c(ni*B, no*no2, no*ni))
for (i in 1:no){
for (j in 1:ni){
for (b in 0:(no2-1)){
multiplier[,b*no+i,j + i*ni - ni] = matrix(2, ncol = 1)
}
}
}
if (acti == 0){ # If current layer is not activated, only weights to consider for current layer
Cwdowdowdiwdi = multiplier * (mpdoA_moving * mpdiA_moving * CdgodgiA_fixed + mpdoA_fixed * mpdiA_fixed * CdgodgiA_moving)
} else {
Cwdowdowdiwdi = multiplier * (CdgodgiA_fixed * CdgodgiA_moving + mpdoA_moving * mpdiA_moving * CdgodgiA_fixed + mpdoA_fixed * mpdiA_fixed * CdgodgiA_moving)
}
# Sum covariances together to come back (B*ni x no x no*ni) array. Iterations for sum are next layer weights that change.
sum = array(matrix(Cwdowdowdiwdi, nrow = B*ni*no), c(B*ni*no, no2, no*ni))
Cwdowdowdiwdi = array(apply(sum, 3, rowSums), c(B*ni, no, no*ni))
return(Cwdowdowdiwdi)
}
#' Covariance between next layer multiplied products and current layer multiplied products (minimum 4 hidden layers)
#'
#' This function calculates covariance cov(wdowdo,wdiwdi) where all terms can be different.
#' It is used when there are at least 4 hidden layers.
#'
#' @param mpdi Mean vector of first derivative product wd of current layer
#' @param mpdo Mean vector of first derivative product wd of next layer
#' @param mdgo2 Mean vector of product of derivatives in second next layer
#' @param Cdgodgi Covariance between weights times derivatives from consecutive layers
#' @param acti Activation function index for current layer defined by \code{\link{activationFunIndex}}
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param no2 Number of units in second next layer
#' @param B Batch size
#' @return Covariance cov(wdowdo,wdiwdi) where all terms can be different
#' @export
fcCwdowdowdiwdi_4hl <- function(mpdi, mpdo, mdgo2, Cdgodgi, acti, ni, no, no2, B){
mpdo_original = mpdo
mpdo = array(aperm(array(t(mpdo), c(no2,no,B)), perm=c(2, 1, 3)), c(no*no2,1,B))
# Prepare "moving" elements for iterations
CdgodgiA_moving = array(Cdgodgi, c(B*ni, no*no2, no*ni*no2)) # Replicate Cdgodgi matrix for each dimension of the array
mpdiA_moving = array(mpdi, c(B*ni, no*no2, no*ni*no2))
mpdoA_moving = array(t(matrix(mpdo[, rep(1:ncol(mpdo), each = ni),], no*no2,B*ni)), c(B*ni,no*no2, no*ni*no2))
# Prepare "fixed" elements for iterations
Cdgodgi_temp = array(aperm(array(t(Cdgodgi), c(no*no2,ni,B)), perm=c(2, 1, 3)), c(ni*no*no2,1,B))
CdgodgiA_fixed = array(rep(t(matrix(rep(matrix(Cdgodgi_temp, ncol = B), each = no*no2), ncol = B)), each = ni), c(B*ni, no*no2, ni*no*no2))
mpdi_temp = array(aperm(array(t(mpdi), c(no,ni,B)), perm=c(2, 1, 3)), c(ni*no,1,B))
mpdiA_fixed = array(rep(t(matrix(rep(matrix(mpdi_temp, ncol = B), each = no*no2), ncol = B)), each = ni), c(B*ni, no*no2, ni*no*no2))
mpdoA_fixed = array(rep(t(matrix(rep(matrix(mpdo, ncol = B), each = no*no2*ni), ncol = B)), each = ni), c(B*ni, no*no2, ni*no*no2))
# Prepare mdgo2
mdgo2_temp1 = matrix(rep(mdgo2, each=ni*no), ncol = no2)
mdgo2_temp2= array(array(t(mdgo2_temp1), c(ni*no*no2,no2,B)), c(ni*no*no2*no2,1,B))
mdgo2_temp3 = matrix(mdgo2_temp2[, rep(1:ncol(mdgo2_temp2), each = ni*no),], ni*no*no2*no2,B*no*ni)
mdgo2_temp4=array(matrix(t(mdgo2_temp3), nrow=B), c(no,B*ni, ni*no*no2*no2 ))
mdgo2A = array(aperm(mdgo2_temp4, c(2,1,3)), c(B*ni, no*no2, ni*no*no2))
# Multiplier (multiply by 2 when arrive at same node, no matter the weights)
multiplier = array(1, c(ni*B, no*no2, no*ni))
for (i in 1:no){
for (j in 1:ni){
for (b in 0:(no2-1)){
multiplier[,b*no+i,j + i*ni - ni] = matrix(2, ncol = 1)
}
}
}
multiplier = array(multiplier, c(ni*B, no*no2, no*ni*no2))
if (acti == 0){ # If current layer is not activated, only weights to consider for current layer
Cwdowdowdiwdi = multiplier * mdgo2A * (mpdoA_moving * mpdiA_moving * CdgodgiA_fixed + mpdoA_fixed * mpdiA_fixed * CdgodgiA_moving)
} else {
Cwdowdowdiwdi = multiplier * mdgo2A * (CdgodgiA_fixed * CdgodgiA_moving + mpdoA_moving * mpdiA_moving * CdgodgiA_fixed + mpdoA_fixed * mpdiA_fixed * CdgodgiA_moving)
}
# Sum covariances together to come back (B*ni x no x no) array. Iterations for sum are next layer weights that change + across array dimensions (each (ni*no)th array)
sum = array(matrix(Cwdowdowdiwdi, nrow = B*ni*no), c(B*ni*no, no2, ni*no*no2)) # Prepare to sum each (no)th columns in all matrices
sum2 = matrix(apply(sum, 3, rowSums), nrow = B*ni*no*ni*no) # Once summed, results in (B*ni*no X 1) matrices. Rearrange to sum each current layer unique product (no2 terms to sum)
Cwdowdowdiwdi = array(rowSums(sum2), c(B*ni, no, no*ni))
return(Cwdowdowdiwdi)
}
#' Covariance between activation and hidden units
#'
#' This function calculates covariance between activation and hidden units.
#'
#'
#' @param Jo Jacobian of next layer
#' @param J Jacobian of current layer
#' @param Sz Covariance of units from current layer
#' @param mw Mean vector of weights for the current layer
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param B Batch size
#' @return - Covariance between activation and hidden layers (same layer)
#' @return - Covariance between activation (next) and hidden (current) layers
#' @export
fcCovaz <- function(Jo, J, Sz, mw, ni, no, B){
Jo = t(matrix(matrix(rep(t(matrix(Jo, no, B)), ni), nrow = no*ni, ncol = B, byrow = TRUE), no, ni*B))
mw = matrix(rep(t(matrix(mw, ni, no)), B), nrow = ni*B, ncol = no, byrow = TRUE)
Caizi = J*Sz
Caozi = Jo*matrix(Caizi, nrow(mw), ncol(mw))*mw
outputs <- list(Caizi, Caozi)
return(outputs)
}
#' Covariance between derivatives and hidden Units
#'
#' This function calculates covariance between derivatives and hidden units.
#'
#'
#' @param mao Mean vector of activation units from next layer
#' @param mai Mean vector of activation units from current layer
#' @param Caizi covariance between activation and hidden layers (same layer)
#' @param Caozi covariance between activation (next layer) and hidden (current) layers
#' @param acto Activation function index for next layer defined by \code{\link{activationFunIndex}}
#' @param acti Activation function index for current layer defined by \code{\link{activationFunIndex}}
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param B Batch size
#' @return - Covariance between derivative (next) and hidden (current) layers
#' @return - Covariance between derivative and hidden layers (same layer)
#' @export
fcCovdz <- function(mao, mai, Caizi, Caozi, acto, acti, ni, no, B){
mao = t(matrix(matrix(rep(t(matrix(mao, no, B)), ni), nrow = no*ni, ncol = B, byrow = TRUE), no, ni*B))
if (acti == 1){ # tanh
Cdizi = -2*mai*Caizi
} else if (acti == 2){ # sigmoid
Cdizi = (1-2*mai)*Caizi
} else if (acti == 4){ # relu
Cdizi = matrix(0, nrow(mai), ncol(mai))
} else {
Cdizi = matrix(0, nrow(mai), ncol(mai))
}
if (acto == 1){ # tanh
Cdozi = -2*mao*Caozi
} else if (acto == 2){ # sigmoid
Cdozi = (1-2*mao)*Caozi
} else if (acto == 4){ # relu
Cdozi = matrix(0, nrow(mao), ncol(mao))
} else {
Cdozi = matrix(0, nrow(mao), ncol(mao))
}
outputs <- list(Cdozi, Cdizi)
return(outputs)
}
#' Covariance between derivatives and hidden states
#'
#' This function calculates covariance between derivatives and hidden states.
#' It is not related to the derivative calculation process.
#' It could be used infer Z (hidden states) with the constraint that the derivative of g with respect to Z equals 0.
#'
#' @param mwo Mean vector of weights for the next layer
#' @param mw Mean vector of weights for the current layer
#' @param mdgo2 Mean vector of product of derivatives in second next layer
#' @param mpdi Mean vector of first derivative product wd of current layer
#' @param mdgoe Mean of product of derivatives at each node in next layer
#' @param Cdozi Covariance between derivative (next) and hidden (current) layers
#' @param Cdizi Covariance between derivative and hidden layers (same layer)
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param no2 Number of units in second next layer
#' @param B Batch size
#' @return Covariance between derivative and hidden states
#' @export
covdx <- function(mwo, mw, mdgo2, mpdi, mdgoe, Cdozi, Cdizi, ni, no, no2, B){
mdgo2 = matrix(matrix(rep(mdgo2, no), nrow = no, byrow = TRUE), no*no2, B)
mw = matrix(rep(matrix(t(mw), ni, no), B), nrow = ni*B, ncol = no, byrow = TRUE)
mdgoe = array(aperm(array(t(mdgoe), c(no2,no,B)), perm=c(2, 1, 3)), c(no*no2,1,B))
mdgoe = t(matrix(mdgoe[, rep(1:ncol(mdgoe), each = ni),], no*no2,B*ni))
m = t(matrix(matrix(rep(t(mdgo2*mwo), ni), nrow = ni*nrow(mdgo2*mwo), byrow = TRUE), no*no2, B*ni))
Cdozi = matrix(rep(Cdozi, no2), nrow = nrow(Cdozi))
mpdi = matrix(rep(mpdi, no2), nrow(Cdozi), ncol(Cdozi))
Cdx1 = Cdozi*m*mpdi
mwdx2 = matrix(rep(mw, no2), nrow = nrow(mw))
Cdizi = matrix(Cdizi, nrow(mwdx2), ncol(mwdx2))
Cdx2 = Cdizi*mwdx2*mdgoe
Cdx = matrix(rowSums(matrix(Cdx1+Cdx2, B*ni*no, no2)), B*ni, no)
return(Cdx)
}
#' Mean vector of units
#'
#' This function calculate the mean vector of units \eqn{\mu_{Z}} for a given layer.
#'
#' @param mp Mean vector of parameters for the current layer
#' @param ma Mean vector of activation units from previous layer
#' @param idxFmwa Indices for weights and for activation
#' units for the current and previous layers respectively
#' @param idxFmwab Indices for biases of the current layer
#' @return Mean vector of units for the current layer \eqn{\mu_{Z}}
#' @export
meanMz <- function(mp, ma, idxFmwa, idxFmwab){
# mp is the mean of parameters for the current layer
# ma is the mean of activation unit (a) from previous layer
# idxFmwa{1} is the indices for weight w
# idxFmwa{2} is the indices for activation unit a
# idxFmwab is the indices for bias b
# *NOTE*: All indices have been built in the way that we bypass
# the transition step such as F*mwa + F*b
idxSum = 1 # Sum by row
mpb = matrix(mp[idxFmwab,], nrow = length(idxFmwab))
mp = matrix(mp[idxFmwa[[1]],], nrow = nrow(idxFmwa[[1]]))
ma = matrix(ma[idxFmwa[[2]],], nrow = nrow(idxFmwa[[2]]))
mWa = matrix(apply(mp * ma, idxSum, sum), nrow = nrow(mpb))
mz = mWa + mpb
return(mz)
}
#' Covariance matrix of units
#'
#' This function calculate the covariance matrix of the units \eqn{\Sigma_{Z}} for a given layer.
#'
#' @param mp Mean vector of parameters for the current layer
#' @param ma Mean vector of activation units from previous layer
#' @param Sp Covariance matrix of parameters for the current layer
#' @param Sa Covariance matrix of activation units from previous layer
#' @param idxFSwaF Indices for weights and for activation
#' units for the current and previous layers respectively
#' @param idxFSwaFb Indices for biases of the current layer
#' @return Covariance matrix of units for the current layer \eqn{\Sigma_{Z}}
#' @export
covarianceSz <- function(mp, ma, Sp, Sa, idxFSwaF, idxFSwaFb){
# mp is the mean of parameters for the current layer
# ma is the mean of activation unit (a) from previous layer
# Sp is the covariance matrix for parameters p
# Sa is the covariance matrix for a from the previous layer
# idxFSwaF{1} is the indices for weight w
# idxFSwaF{2} is the indices for activation unit a
# idxFSwaFb is the indices for bias
# *NOTE*: All indices have been built in the way that we bypass
# the transition step such as Sa = F*Cwa*F' + F*Cb*F'
idxSum = 1 # Sum by row
Spb = matrix(Sp[idxFSwaFb,], nrow = length(idxFSwaFb))
Sp = matrix(Sp[idxFSwaF[[1]],], nrow = nrow(idxFSwaF[[1]]))
ma = matrix(ma[idxFSwaF[[2]],], nrow = nrow(idxFSwaF[[2]]))
if (is.null(Sa)){
Sz = apply(Sp * ma * ma, idxSum, sum)
} else {
mp = matrix(mp[idxFSwaF[[1]],], nrow = nrow(idxFSwaF[[1]]))
Sa = matrix(Sa[idxFSwaF[[2]],], nrow = nrow(idxFSwaF[[2]]))
Sz = apply(Sp * Sa + Sp * ma * ma + Sa * mp * mp, idxSum, sum)
}
Sz = Sz + Spb
return(Sz)
}
#' Mean and covariance vectors of units (many observations)
#'
#' This function calculate the mean vector of units \eqn{\mu_{Z}} and the covariance matrix of the units \eqn{\Sigma_{Z}}for a given layer.
#'
#' @param mw Mean vector of weights for the current layer
#' @param Sw Covariance of weights for the current layer
#' @param mb Mean vector of biases for the current layer
#' @param Sb Covariance of biases for the current layer
#' @param ma Mean vector of activation units from previous layer
#' @param Sa Covariance of activation units from previous layer
#' @param ni Number of units in previous layer
#' @param no Number of units in current layer
#' @param B Batch size
#' @param rB Number of times batch size is repeated
#' @return - Mean vector of units for the current layer \eqn{\mu_{Z}}
#' @return - Covariance matrix of units for the current layer \eqn{\Sigma_{Z}}
#' @export
fcMeanVar <- function(mz, Sz, mw, Sw, mb, Sb, ma, Sa, ni, no, B, rB){
if (any(is.nan(mb))){
mb = rep(0, 1)
Sb = rep(0, 1)
} else {
mb = matrix(mb, nrow = length(mb)*B)
Sb = matrix(Sb, nrow = length(Sb)*B)
}
mw = matrix(matrix(mw, ni, no), nrow = ni, ncol = no*B)
Sw = matrix(matrix(Sw, ni, no), nrow = ni, ncol = no*B)
for (t in 1:rB){
maloop = matrix(matrix(rep(t(matrix(ma[,t], ni, B)), no), ncol = B, byrow = TRUE), ni, no*B)
Saloop = matrix(matrix(rep(t(matrix(Sa[,t], ni, B)), no), ncol = B, byrow = TRUE), ni, no*B)
out_vectorizedMeanVar <- vectorizedMeanVar(maloop, mw, Saloop, Sw)
mzloop = out_vectorizedMeanVar[[1]]
Szloop = out_vectorizedMeanVar[[2]]
mzloop = colSums(mzloop)
Szloop = colSums(Szloop)
mz[,t] = mzloop + mb
Sz[,t] = Szloop + Sb
}
outputs <- list(mz, Sz)
return(outputs)
}
#' Mean and covariance vectors of units (one observation)
#'
#' This function calculate the mean vector of units \eqn{\mu_{Z}} and the covariance matrix of the units \eqn{\Sigma_{Z}}for a given layer.
#'
#' @param mw Mean vector of weights for the current layer
#' @param Sw Covariance of weights for the current layer
#' @param mb Mean vector of biases for the current layer
#' @param Sb Covariance of biases for the current layer
#' @param ma Mean vector of activation units from previous layer
#' @param Sa Covariance of activation units from previous layer
#' @param ni Number of units in previous layer
#' @param no Number of units in current layer
#' @return - Mean vector of units for the current layer \eqn{\mu_{Z}}
#' @return - Covariance matrix of units for the current layer \eqn{\Sigma_{Z}}
#' @export
fcMeanVarB1 <- function(mw, Sw, mb, Sb, ma, Sa, ni, no){
if (any(is.nan(mb))){
mb = matrix(0, no, 1)
Sb = matrix(0, no, 1)
} else {
mb = matrix(mb, no, 1)
Sb = matrix(Sb, no, 1)
}
mw = matrix(mw, ni, no)
Sw = matrix(Sw, ni, no)
ma = matrix(ma, ni, no)
Sa = matrix(Sa, ni, no)
out_vectorizedMeanVar <- vectorizedMeanVar(ma, mw, Sa, Sw)
mzloop = out_vectorizedMeanVar[[1]]
Szloop = out_vectorizedMeanVar[[2]]
mzloop = matrix(colSums(mzloop), no, 1)
Szloop = matrix(colSums(Szloop), no, 1)
mz = mzloop + mb
Sz = Szloop + Sb
outputs <- list(mz, Sz)
return(outputs)
}
#' Backpropagation (parameters' deltas) for fully connected layers (many observations)
#'
#' This function calculates parameters' deltas at a given layer when using more than one observation at the time.
#'
#' @param deltaMw next layer delta of mean vector of weights given \eqn{y} \eqn{\mu_{\theta}|y}
#' @param deltaSw next layer delta of covariance matrix of weights given \eqn{y} \eqn{\Sigma_{\theta}|y}
#' @param deltaMb next layer delta of mean vector of biases given \eqn{y} \eqn{\mu_{\theta}|y}
#' @param deltaSb next layer delta of covariance matrix of biases given \eqn{y} \eqn{\Sigma_{\theta}|y}
#' @param Sw Covariance of weights for the current layer
#' @param Sb Covariance of biases for the current layer
#' @param ma Mean vector of activation units for the current layer
#' @param deltaMr Delta of mean vector of next layer units given \eqn{y} \eqn{\mu_{Z}|y}
#' @param deltaSr Delta of covariance matrix of next layer units given \eqn{y} \eqn{\Sigma_{Z}|y}
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param B Batch size
#' @param rB Number of times batch size is repeated
#' @return - Delta of mean vector of weights given \eqn{y} \eqn{\mu_{\theta}|y}
#' @return - Delta of covariance matrix of weights given \eqn{y} \eqn{\Sigma_{\theta}|y}
#' @return - Delta of mean vector of biases given \eqn{y} \eqn{\mu_{\theta}|y}
#' @return - Delta of covariance matrix of biases given \eqn{y} \eqn{\Sigma_{\theta}|y}
#' @export
fcParameterBackwardPass <- function(deltaMw, deltaSw, deltaMb, deltaSb, Sw, Sb, ma, deltaMr, deltaSr, ni, no, B, rB){
Cbz = matrix(rep(Sb, B), nrow = length(Sb))
deltaMw = matrix(deltaMw, ncol = rB)
deltaSw = matrix(deltaSw, ncol = rB)
deltaMb = matrix(deltaMb, ncol = rB)
deltaSb = matrix(deltaSb, ncol = rB)
for (t in 1:rB){
maloop = matrix(rep(t(matrix(ma[,t], ni, B)), no), ncol = B, byrow = TRUE)
deltaMrw = matrix(matrix(rep(deltaMr[,t], ni), nrow = ni, byrow = TRUE), ni*no, B)
deltaSrw = matrix(matrix(rep(deltaSr[,t], ni), nrow = ni, byrow = TRUE), ni*no, B)
# weights
Cwz = Sw*maloop
deltaMrw = Cwz*deltaMrw
deltaSrw = (Cwz^2)*deltaSrw
deltaMw[,t] = matrix(rowSums(deltaMrw), nrow(deltaMrw), 1)
deltaSw[,t] = matrix(rowSums(deltaSrw), nrow(deltaSrw), 1)
# Bias
if (any(!is.nan(Sb))){
deltaMrb = matrix(deltaMr[,t], no, B)
deltaSrb = matrix(deltaSr[,t], no, B)
deltaMrb = Cbz*deltaMrb
deltaSrb = (Cbz^2)*deltaSrb
deltaMb[,t] = matrix(rowSums(deltaMrb), nrow(deltaMrb), 1)
deltaSb[,t] = matrix(rowSums(deltaSrb), nrow(deltaSrb), 1)
}
}
deltaMw = matrix(rowSums(deltaMw), nrow(deltaMw), 1)
deltaSw = matrix(rowSums(deltaSw), nrow(deltaSw), 1)
deltaMb = matrix(rowSums(deltaMb), nrow(deltaMb), 1)
deltaSb = matrix(rowSums(deltaSb), nrow(deltaSb), 1)
outputs <- list(deltaMw, deltaSw, deltaMb, deltaSb)
return(outputs)
}
#' Backpropagation (parameters' deltas) for fully connected layers (one observation)
#'
#' This function calculates parameters' deltas at a given layer when using one observation at the time.
#'
#' @param Sw Covariance of weights for the current layer
#' @param Sb Covariance of biaises for the current layer
#' @param ma Mean vector of activation units for the current layer
#' @param deltaMr Delta of mean vector of next layer units given \eqn{y} \eqn{\mu_{Z}|y}
#' @param deltaSr Delta of covariance matrix of next layer units given \eqn{y} \eqn{\Sigma_{Z}|y}
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @return - Delta of mean vector of weights given \eqn{y} \eqn{\mu_{\theta}|y}
#' @return - Delta of covariance matrix of weights given \eqn{y} \eqn{\Sigma_{\theta}|y}
#' @return - Delta of mean vector of biases given \eqn{y} \eqn{\mu_{\theta}|y}
#' @return - Delta of covariance matrix of biaises given \eqn{y} \eqn{\Sigma_{\theta}|y}
#' @export
fcParameterBackwardPassB1 <- function(Sw, Sb, ma, deltaMr, deltaSr, ni, no){
Cbz = Sb
maloop = matrix(rep(t(ma), no), nrow = nrow(ma)*no, byrow = TRUE)
deltaMrw = matrix(rep(deltaMr, ni), nrow = ncol(deltaMr)*ni, byrow = TRUE)
deltaMrw = matrix(deltaMrw, ncol = 1)
deltaSrw = matrix(rep(deltaSr, ni), nrow = ncol(deltaSr)*ni, byrow = TRUE)
deltaSrw = matrix(deltaSrw, ncol = 1)
# weights
Cwz = Sw*maloop
deltaMrw = Cwz*deltaMrw
deltaSrw = (Cwz^2)*deltaSrw
deltaMw = matrix(rowSums(deltaMrw), nrow(deltaMrw), 1)
deltaSw = matrix(rowSums(deltaSrw), nrow(deltaSrw), 1)
# Bias
if (any(!is.nan(Sb))){
out_vectorizedDelta <- vectorizedDelta(Cbz, deltaMr, deltaSr)
deltaMrb = out_vectorizedDelta[[1]]
deltaSrb = out_vectorizedDelta[[2]]
deltaMb = matrix(rowSums(deltaMrb), nrow(deltaMrb), 1)
deltaSb = matrix(rowSums(deltaSrb), nrow(deltaSrb), 1)
} else {
deltaMb = Sb
deltaSb = Sb
}
outputs <- list(deltaMw, deltaSw, deltaMb, deltaSb)
return(outputs)
}
#' Covariance matrices between units and parameters
#'
#' This function calculate the covariance matrices between units and parameters
#' \eqn{\Sigma_{ZW}} and \eqn{\Sigma_{ZB}} for a given layer.
#'
#' @param ma Mean vector of activation units from previous layer \eqn{\mu_{A}}
#' @param Sp Covariance matrix of parameters for the current layer \eqn{\Sigma_{\theta}}
#' @param idxFCwwa Indices for weights and for activation
#' units for the current and previous layers respectively
#' @param idxFCb Indices for biases of the current layer
#' @return - Covariance matrix between units and biases for the current layer \eqn{\Sigma_{ZB}}
#' @return - Covariance matrix between units and weights for the current layer \eqn{\Sigma_{ZW}}
#' @export
covarianceCzp <- function(ma, Sp, idxFCwwa, idxFCb) {
# ma is the mean of activation unit (a) from previous layer
# Sp is the covariance matrix for parameters p
# idxFCwwa{1} is the indices for weight w
# idxFCwwa{2} is the indices for weight action unit a
# idxFcb is the indices for bias b
# *NOTE*: All indices have been built in the way that we bypass
# the transition step such as Cpa = F*Cpwa + F*Cb
Czb = matrix(Sp[idxFCb,], nrow = length(idxFCb))
Sp = matrix(Sp[idxFCwwa[[1]]], nrow = nrow(idxFCwwa[[1]]))
ma = matrix(ma[idxFCwwa[[2]]], nrow = nrow(idxFCwwa[[2]]))
Czw = Sp * ma
outputs <- list(Czb, Czw)
return(outputs)
}
#' Covariance matrix between units of the previous and current layers
#'
#' This function calculate the covariance matrix between units of the previous
#' and current layers \eqn{\Sigma_{ZZ^{+}}} for a given layer.
#'
#' @param mp Mean vector of parameters for the current layer \eqn{\mu_{\theta}}
#' @param Sz Covariance matrix of units for the current layer \eqn{\Sigma_{Z}}
#' @param J Jacobian matrix evaluated at \eqn{\mu_{Z}}
#' @param idxCawa Indices for weights and for activation
#' units for the current and previous layers respectively
#' @return Covariance matrix between units of previous and current layers \eqn{\Sigma_{ZZ^{+}}}
#' @export
covarianceCzz <- function(mp, Sz, J, idxCawa) {
Sz = matrix(Sz[idxCawa[[2]]], nrow = nrow(idxCawa[[2]]))
mp = matrix(mp[idxCawa[[1]]], nrow = nrow(idxCawa[[1]]))
J = matrix(J[idxCawa[[2]]], nrow = nrow(idxCawa[[2]]))
Czz = J * Sz * mp
return(Czz)
}
# Update Step
#' Backward parameters update
#'
#' This function updates parameters from responses to input data. It updates
#' \eqn{\mu_{\theta|y}} and \eqn{\Sigma_{\theta|y}} from the \eqn{\theta|y}
#' distribution for a given layer.
#'
#' @param mp Mean vector of parameters for the current layer \eqn{\mu_{\theta}}
#' @param Sp Covariance matrix of parameters for the current layer \eqn{\Sigma_{\theta}}
#' @param mzF Mean vector of units for the next layer \eqn{\mu_{Z^{+}}}
#' @param SzF Covariance matrix of units for the next layer \eqn{\Sigma_{Z^{+}}}
#' @param SzB Covariance matrix of units for the next layer given \eqn{y} \eqn{\Sigma_{Z^{+}|y}}
#' @param Czp Covariance matrix between units and parameters for the current layer \eqn{\Sigma_{\theta Z^{+}}}
#' @param mzB Mean vector of units for the next layer given \eqn{y} \eqn{\mu_{Z^{+}|y}}
#' @param idx Indices for the parameter update step of
#' the current layer
#' @details \eqn{f(\boldsymbol{\theta}|\boldsymbol{y}) = \mathcal{N}(\boldsymbol{\theta};\boldsymbol{\mu_{\theta|y}},\boldsymbol{\Sigma_{\theta|y}})} where
#' @details \eqn{\boldsymbol{\mu_{\theta|y}} =\boldsymbol{\mu_{\theta}} + \boldsymbol{J_{\theta}}(\boldsymbol{\mu_{Z^{+}|y}} - \boldsymbol{\mu_{Z^{+}}})}
#' @details \eqn{\boldsymbol{\Sigma_{\theta|y}} = \boldsymbol{\Sigma_{\theta}} + \boldsymbol{J_{\theta}}(\boldsymbol{\Sigma_{Z^{+}|y}} - \boldsymbol{\Sigma_{Z^{+}}})\boldsymbol{J_{\theta}^{T}}}
#' @details \eqn{\boldsymbol{J_{\theta}} = \boldsymbol{\Sigma_{\theta Z^{+}}}\boldsymbol{\Sigma^{-1}_{Z^{+}}}}
#' @return - Mean vector of parameters for the current layer given \eqn{y} \eqn{\mu_{\theta|y}}
#' @return - Covariance matrix of parameters for the current layer given \eqn{y} \eqn{\Sigma_{\theta|y}}
#' @export
backwardParameterUpdate <- function(mp, Sp, mzF, SzF, SzB, Czp, mzB, idx){
dz = mzB - mzF
dz = matrix(dz[idx,], nrow = length(idx))
dS = SzB - SzF
dS = matrix(dS[idx,], nrow = length(idx))
SzF = 1 / SzF
SzF = matrix(SzF[idx,], nrow = length(idx))
J = Czp * SzF
# Mean
mpUd = mp + rowSums(J * dz)
# Covariance
SpUd = Sp + rowSums(J * dS * J)
outputs <- list(mpUd, SpUd)
return(outputs)
}
#' Backward hidden states update
#'
#' This function updates hidden units from responses to input data. It updates
#' \eqn{\mu_{Z|y}} and \eqn{\Sigma_{Z|y}} from the \eqn{Z|y}
#' distribution for a given layer.
#'
#' @param mz Mean vector of units for the current layer \eqn{\mu_{Z}}
#' @param Sz Covariance matrix of units for the current layer \eqn{\Sigma_{Z}}
#' @param mzF Mean vector of units for the next layer \eqn{\mu_{Z^{+}}}
#' @param SzF Covariance matrix of units for the next layer \eqn{\Sigma_{Z^{+}}}
#' @param SzB Covariance matrix of units for the next layer given \eqn{y} \eqn{\Sigma_{Z^{+}|y}}
#' @param Czz Covariance matrix between units of previous and current layers \eqn{\Sigma_{ZZ^{+}}}
#' @param mzB Mean vector of units for the next layer given \eqn{y} \eqn{\mu_{Z^{+}|y}}
#' @param idx Indices for the hidden state update step of
#' the current layer
#' @details \eqn{f(\boldsymbol{z}|\boldsymbol{y}) = \mathcal{N}(\boldsymbol{z};\boldsymbol{\mu_{Z|y}},\boldsymbol{\Sigma_{Z|y}})} where
#' @details \eqn{\boldsymbol{\mu_{Z|y}} = \boldsymbol{\mu_{Z}} + \boldsymbol{J_{Z}}(\boldsymbol{\mu_{Z^{+}|y}} - \boldsymbol{\mu_{Z^{+}}})}
#' @details \eqn{\boldsymbol{\Sigma_{Z|y}} = \boldsymbol{\Sigma_{Z}} + \boldsymbol{J_{Z}}(\boldsymbol{\Sigma_{Z^{+}|y}} - \boldsymbol{\Sigma_{Z^{+}}})\boldsymbol{J_{Z}^{T}}}
#' @details \eqn{\boldsymbol{J_{Z}} = \boldsymbol{\Sigma_{Z Z^{+}}}\boldsymbol{\Sigma^{-1}_{Z^{+}}}}
#' @return - Mean vector of units for the current layer given \eqn{y} \eqn{\mu_{Z|y}}
#' @return - Covariance matrix of units for the current layer given \eqn{y} \eqn{\Sigma_{Z|y}}
#' @export
backwardHiddenStateUpdate <- function(mz, Sz, mzF, SzF, SzB, Czz, mzB, idx){
dz = mzB - mzF
dz = matrix(dz[idx,], nrow = nrow(idx))
dS = SzB - SzF
dS = matrix(dS[idx,], nrow = nrow(idx))
SzF = 1 / SzF
SzF = matrix(SzF[idx,], nrow = nrow(idx))
J = Czz * SzF
# Mean
mzUd = mz + rowSums(J * dz)
# covariance
SzUd = Sz + rowSums(J * dS * J)
outputs <- list(mzUd, SzUd)
return(outputs)
}
#' Last hidden layer states' deltas update
#'
#' This function updates hidden layer units' deltas using next hidden layer' deltas. It updates
#' \eqn{\mu_{Z|y}} and \eqn{\Sigma_{Z|y}} from the \eqn{Z|y}
#' distribution.
#'
#' @param SzF Covariance matrix of units for the next layer \eqn{\Sigma_{y}}
#' @param dMz Delta of mean vector of units for the next hidden layer \eqn{\mu_{Z}}
#' @param dSz Delta of covariance matrix of units for the next hidden layer \eqn{\Sigma_{Z}}
#' @details \eqn{f(\boldsymbol{z}|\boldsymbol{y}) = \mathcal{N}(\boldsymbol{z};\boldsymbol{\mu_{Z|y}},\boldsymbol{\Sigma_{Z^|y}})} where
#' @details \eqn{\boldsymbol{\mu_{{Z}|y}} = \boldsymbol{\mu_{Z}} + \boldsymbol{J_{Z}}(\boldsymbol{\mu_{Z^{+}|y}} - \boldsymbol{\mu_{Z^{+}}})}
#' @details \eqn{\boldsymbol{\Sigma_{{Z}|y}} = \boldsymbol{\Sigma_{Z}} + \boldsymbol{J_{Z}}(\boldsymbol{\Sigma_{Z^{+}|y}} - \boldsymbol{\Sigma_{Z^{+}}})\boldsymbol{J_{Z}^{T}}}
#' @details \eqn{\boldsymbol{J_{Z}} = \boldsymbol{\Sigma_{ZZ^{+}}} \boldsymbol{\Sigma_{Z^{+}}^{-1}}}
#' @return - Delta of mean vector of current hidden layer units given \eqn{y} \eqn{\mu_{Z}|y}
#' @return - Delta of covariance matrix of current hidden layer units given \eqn{y} \eqn{\Sigma_{Z}|y}
#' @export
innovationVector <- function(SzF, dMz, dSz){
iSzF = 1 / SzF
iSzF[is.infinite(iSzF)] = 0
out_vectorizedDelta <- vectorizedDelta(iSzF, dMz, dSz)
deltaM = out_vectorizedDelta[[1]]
deltaS = out_vectorizedDelta[[2]]
out <- list(deltaM, deltaS)
return(out)
}
#' Last hidden layer states update
#'
#' This function updates last hidden layer units using responses. It updates
#' \eqn{\mu_{Z^{(0)}|y}} and \eqn{\Sigma_{Z^{(0)}|y}} from the \eqn{Z^{(0)}|y}
#' distribution.
#'
#' @param mz Mean vector of units for the last hidden layer \eqn{\mu_{X^{(0)}}}
#' @param Sz Covariance matrix of units for the last hidden layer \eqn{\Sigma_{Z^{(0)}}}
#' @param mzF Mean vector of units for the output layer \eqn{\mu_{y}}
#' @param SzF Covariance matrix of tunits for the output layer \eqn{\Sigma_{y}}
#' @param Cyz Covariance matrix between last hidden layer units and responses \eqn{\Sigma_{YZ^{(0)}}}
#' @param y Response data
#' @details \eqn{f(\boldsymbol{z}^{(0)}|\boldsymbol{y}) = \mathcal{N}(\boldsymbol{z}^{(0)};\boldsymbol{\mu_{Z^{(0)}|y}},\boldsymbol{\Sigma_{Z^{(0)}|y}})} where
#' @details \eqn{\boldsymbol{\mu_{{Z}^{(0)}|y}} = \boldsymbol{\mu_{Z^{(0)}}} + \boldsymbol{\Sigma_{YZ^{(0)}}^{T}}\boldsymbol{\Sigma^{-1}_{Y}}(\boldsymbol{y} - \boldsymbol{\mu_{Y}})}
#' @details \eqn{\boldsymbol{\Sigma_{{Z}^{(0)}|y}} = \boldsymbol{\Sigma_{Z^{(0)}}} - \boldsymbol{\Sigma_{YZ^{(0)}}^{T}}\boldsymbol{\Sigma^{-1}_{Y}}\boldsymbol{\Sigma_{YZ^{(0)}}}}
#' @return - Mean vector of last hidden layer units given \eqn{y} \eqn{\mu_{Z^{(0)}|y}}
#' @return - Covariance matrix of last hidden layer units given \eqn{y} \eqn{\Sigma_{Z^{(0)}|y}}
#' @export
forwardHiddenStateUpdate <- function(mz, Sz, mzF, SzF, Cyz, y){
dz = y - mzF
SzF = 1 / SzF
SzF[is.infinite(SzF)] = 0
K = Cyz * SzF
# Mean
mzUd = mz + K * dz
# covariance
SzUd = Sz - K * Cyz
#Outputs
out <- list(mzUd, SzUd)
return(out)
}
#' Backpropagation (parameters update)
#'
#' This function updates parameters.
#'
#' @param theta List of parameters
#' @param deltaTheta Parameters' deltas (mean and covariance for each)
#' @return List of updated parameters
#' @export
globalParameterUpdate <- function(theta, deltaTheta){
# Initialization
out_extractParameters <- extractParameters(theta)
mw = out_extractParameters[[1]]
Sw = out_extractParameters[[2]]
mb = out_extractParameters[[3]]
Sb = out_extractParameters[[4]]
mwx = out_extractParameters[[5]]
Swx = out_extractParameters[[6]]
mbx = out_extractParameters[[7]]
Sbx = out_extractParameters[[8]]
out_extractParameters <- extractParameters(deltaTheta)
deltaMw = out_extractParameters[[1]]
deltaSw = out_extractParameters[[2]]
deltaMb = out_extractParameters[[3]]
deltaSb = out_extractParameters[[4]]
deltaMwx = out_extractParameters[[5]]
deltaSwx = out_extractParameters[[6]]
deltaMbx = out_extractParameters[[7]]
deltaSbx = out_extractParameters[[8]]
out_twoPlus <- twoPlus(mw, Sw, deltaMw, deltaSw)
mw = out_twoPlus[[1]]
Sw = out_twoPlus[[2]]
out_twoPlus <- twoPlus(mb, Sb, deltaMb, deltaSb)
mb = out_twoPlus[[1]]
Sb = out_twoPlus[[2]]
out_twoPlus <- twoPlus(mwx, Swx, deltaMwx, deltaSwx)
mwx = out_twoPlus[[1]]
Swx = out_twoPlus[[2]]
out_twoPlus <- twoPlus(mbx, Sbx, deltaMbx, deltaSbx)
mbx = out_twoPlus[[1]]
Sbx = out_twoPlus[[2]]
theta = compressParameters(mw, Sw, mb, Sb, mwx, Swx, mbx, Sbx)
return(theta)
}
#' Reformat covariance matrix between units and parameters
#'
#' This function properly reformats covariance matrix between units and parameters
#' \eqn{\Sigma_{Z\theta}} for the update step.
#'
#' @param Czw Covariance matrix between units and weights for the current layer
#' @param Czb Covariance matrix between units and baises for the current layer
#' @param currentHiddenUnit Number of units in the current layer
#' @param prevHiddenUnit Number of units in the previous layer
#' @param batchSize Number of observations trained at the same time
#' @return Reformatted covariance matrix between units and parameters
#' @export
buildCzp <- function(Czw, Czb, currentHiddenUnit, prevHiddenUnit, batchSize){
Czp = rbind(Czb, Czw)
Czp = t(matrix(Czp, nrow = batchSize, ncol = currentHiddenUnit*prevHiddenUnit + currentHiddenUnit))
return(Czp)
}
#' Reformat covariance matrix between units of the previous and current layers
#'
#' This function properly reformats covariance matrix between units of the
#' previous and current layers \eqn{\Sigma_{ZZ^{+}}} for the update step.
#'
#' @param Czz Covariance matrix between units of the previous and current layers
#' @param currentHiddenUnit Number of units in the current layer
#' @param prevHiddenUnit Number of units in the previous layer
#' @param batchSize Number of observations trained at the same time
#' @return Reformatted covariance matrix between of the previous and current layers
#' @export
buildCzz <- function(Czz, currentHiddenUnit, prevHiddenUnit, batchSize){
Czz = t(matrix(Czz, nrow = currentHiddenUnit, ncol = prevHiddenUnit*batchSize))
return(Czz)
}
#' Combination of products of first derivative (iterations on nodes)
#'
#' This function calculates mean of combination of products of first derivatives (wd)*(wd).
#' Each node is multiplied to another node in the same layer (including itself).
#' Their weights are both pointing to the same node in the next layer.
#'
#' @param mpdi Mean vector of first derivative product wd of current layer
#' @param mw Mean vector of weights for the current layer
#' @param Sw Covariance of weights for the current layer
#' @param mda Mean vector of activation units' first derivative from current layer
#' @param Sda Covariance of activation units' first derivative from current layer
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param B Batch size
#' @return Mean array of combination of products of first derivatives
#' @export
fcCombinaisonDnode <- function(mpdi, mw, Sw, mda, Sda, ni, no, B){
mw = matrix(rep(t(matrix(mw, ni, no)), B), nrow = ni*B, ncol = no, byrow = TRUE)
Sw = matrix(rep(t(matrix(Sw, ni, no)), B), nrow = ni*B, ncol = no, byrow = TRUE)
mda = matrix(mda, nrow(mw), ncol(mw))
Sda = matrix(Sda, nrow(Sw), ncol(Sw))
mpdi2 = array(0, c(ni*B, no, ni))
for (b in 0:(B-1)){
for (k in 1:ni){
for (j in 1:no){
for (i in (b*ni+1):(b*ni+ni)){
if (b*ni+k == i){
var = Sw[i,j]*Sda[i,j] + Sw[i,j]*mda[i,j]^2 + Sda[i,j]*mw[i,j]^2
} else{
var = 0
}
mpdi2[i,j,k] = mpdi[i,j]*mpdi[b*ni+k,j] + var
}
}
}
}
return(mpdi2)
}
#' Combination of products of first derivative (iterations on weights)
#'
#' This function calculates mean of combination of products of first derivatives (wd)*(wd).
#' Each weight is multiplied to another weight (including itself) from the same node.
#' Each node is multiplied to the same node (in the same layer).
#'
#' @param mpdi Mean vector of first derivative product wd of current layer
#' @param mw Mean vector of weights for the current layer
#' @param Sw Covariance of weights for the current layer
#' @param mda Mean vector of activation units' first derivative from current layer
#' @param Sda Covariance of the activation units' first derivative from current layer
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param B Batch size
#' @return Mean array of combination of products of first derivatives
#' @export
fcCombinaisonDweight <- function(mpdi, mw, Sw, mda, Sda, ni, no, B){
mw = matrix(rep(t(matrix(mw, ni, no)), B), nrow = ni*B, ncol = no, byrow = TRUE)
Sw = matrix(rep(t(matrix(Sw, ni, no)), B), nrow = ni*B, ncol = no, byrow = TRUE)
mda = matrix(mda, nrow(mw), ncol(mw))
Sda = matrix(Sda, nrow(Sw), ncol(Sw))
mpdi2w = array(0, c(ni*B, no, no))
for (k in 1:no){
for (j in 1:no){
if (j == k){
var = Sw[,j]*Sda[,j] + Sw[,j]*mda[,j]^2 + Sda[,j]*mw[,j]^2
} else {
var = mw[,k]*mw[,j]*Sda[,k]
}
mpdi2w[,j,k] = mpdi[,k]*mpdi[,j] + var
}
}
return(mpdi2w)
}
#' Combination of squared products of first derivative
#'
#' This function calculates mean of squared products of first derivatives (wd)^2.
#' Every products (weight times node) from current layer are considered which results in a (B*ni x no)-matrix.
#'
#' @param mpdi Mean vector of first derivative product wd of current layer
#' @param mw Mean vector of weights for the current layer
#' @param Sw Covariance of weights for the current layer
#' @param mda Mean vector of activation units' first derivative from current layer
#' @param Sda Covariance of activation units' first derivative from current layer
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param B Batch size
#' @return Mean matrix of squared products of first derivatives
#' @export
fcCombinaisonDweightNode <- function(mpdi, mw, Sw, mda, Sda, ni, no, B){
mw = matrix(rep(t(matrix(mw, ni, no)), B), nrow = ni*B, ncol = no, byrow = TRUE)
Sw = matrix(rep(t(matrix(Sw, ni, no)), B), nrow = ni*B, ncol = no, byrow = TRUE)
mda = matrix(mda, nrow(mw), ncol(mw))
Sda = matrix(Sda, nrow(Sw), ncol(Sw))
mpdi2wn = mpdi^2 + Sw*Sda + Sw*mda^2 + Sda*mw^2
return(mpdi2wn)
}
#' All possible combinations of products of first derivatives
#'
#' This function calculates mean of products of first derivatives wd*wd.
#' Since both weight and node are iterated over all products, every products
#' (weight times node) from current layer are considered which results in a
#' (Bni x no x noni)-array. I.e. each dimension of the array represents a single
#' product being multiplied to all other possible products from current layer.
#' Order is as followed: w11d1, w12d2, w13d3, ..., w1nidni, w21d1, w22d2, ..., w2nidni, ... wno1d1, ..., wnonidni
#'
#' @param mpdi Mean vector of first derivative product wd of current layer
#' @param mpdin Mean array of combination of products of first derivatives (iterations on nodes)
#' @param mpdiw Mean array of combination of products of first derivatives (iterations on weights)
#' @param ni Number of units in current layer
#' @param no Number of units in next layer
#' @param B Batch size
#' @return Mean array of combination of products of first derivatives
#' @export
fcCombinaisonDweightNodeAll <- function(mpdi, mpdin, mpdiw, ni, no, B){
mpdi2wnAll = array(0, c(ni*B, no, no*ni))
seq = c(mpdi)
for (k in 1:(no*ni)){
mpdi2wnAll[,,k] = seq[k]*mpdi
}
# Adjust expectations when there is a covariance term to consider
for (k in 1:no*ni){
i = as.numeric(which(mpdi == seq[k], arr.ind = TRUE)[,"row"])
j = as.numeric(which(mpdi == seq[k], arr.ind = TRUE)[,"col"])
mpdi2wnAll[i,,k] = mpdiw[i,,j]
mpdi2wnAll[,j,k] = mpdin[,j,i]
}
return(mpdi2wnAll)
}
#' Weights and biases initialization
#'
#' This function initializes the first weights and biases of the neural network.
#'
#' @param NN Lists the structure of the neural network
#' @return - Initial mean vectors of parameters for each layer
#' @return - Initial covariance matrices of parameters
#' for each layer
#' @export
initializeWeightBias <- function(NN){
# Initialization
NN$dropWeight = NULL
NN <- c(NN, dropWeight = 0)
nodes = NN$nodes
numLayers = length(nodes)
idxw = NN$idxw
idxb = NN$idxb
factor4Bp = NN$factor4Bp
factor4Wp = NN$factor4Wp
mp = matrix(list(), nrow = numLayers - 1, ncol = 1)
Sp = matrix(list(), nrow = numLayers - 1, ncol = 1)
for (j in 2:numLayers){
# Bias variance
Sbwloop_1 = factor4Bp[j-1] * matrix(1L, nrow = 1, ncol = length(idxb[[j-1, 1]]))
# Bias mean
bwloop_1 = stats::rnorm(ncol(Sbwloop_1)) * sqrt(Sbwloop_1)
# weight variance
if (NN$hiddenLayerActivation == "relu" || NN$hiddenLayerActivation == "softplus" || NN$hiddenLayerActivation == "sigm"){
Sbwloop_2 = factor4Wp[j-1] * (1/nodes[j-1]) * matrix(1L, nrow = 1, ncol = length(idxw[[j-1, 1]]))
} else {
Sbwloop_2 = factor4Wp[j-1] * (2/nodes[j-1] + nodes[j]) * matrix(1L, nrow = 1, ncol = length(idxw[[j-1, 1]]))
}
# weight mean
if (NN$dropWeight == 0){
bwloop_2 = stats::rnorm(ncol(Sbwloop_2)) * sqrt(Sbwloop_2)
} else {
bwloop_2 = matrix(0, nrow = 1, ncol = length(idxw[[j-1, 1]]))
}
mp[[j-1, 1]] = t(cbind(bwloop_1, bwloop_2))
Sp[[j-1, 1]] = t(cbind(Sbwloop_1, Sbwloop_2))
}
outputs <- list(mp, Sp)
return(outputs)
}
#' Weights and biases initialization for calculating derivatives
#'
#' This function initializes the first weights and biases of the neural network.
#'
#' @param NN Lists the structure of the neural network
#' @return All parameters required in the neural network to perform derivative calculations
#' @export
initializeWeightBiasD <- function(NN){
# Initialization
nodes = NN$nodes
numLayers = length(nodes)
idxw = NN$idxw
idxb = NN$idxb
biasStd = 0.01
noParam = NaN
gainM = NN$gainM
gainS = NN$gainS
mw = createInitCellwithArray(numLayers-1)
Sw = createInitCellwithArray(numLayers-1)
mb = createInitCellwithArray(numLayers-1)
Sb = createInitCellwithArray(numLayers-1)
mwx = createInitCellwithArray(numLayers-1)
Swx = createInitCellwithArray(numLayers-1)
mbx = createInitCellwithArray(numLayers-1)
Sbx = createInitCellwithArray(numLayers-1)
for (j in 2:numLayers){
if (!is.null(idxw[[j-1,1]])){
fanIn = nodes[j-1]
fanOut = nodes[j]
if (NN$initParamType == "Xavier"){
scale = 2 / (fanIn +fanOut)
Sw[[j-1,1]] = gainS[j-1] * scale * rep(1, nrow(idxw[[j-1,1]]))
}
else if (NN$initParamType == "He"){
scale = 1 / (fanIn +fanOut)
Sw[[j-1,1]] = gainS[j-1] * scale * rep(1, nrow(idxw[[j-1,1]]))
}
mw[[j-1,1]] = gainM[j-1] * stats::rnorm(length(Sw[[j-1,1]])) * sqrt(Sw[[j-1,1]])
if (!is.null(idxb[[j-1,1]])){
Sb[[j-1,1]] = gainS[j-1] * scale * rep(1, nrow(idxb[[j-1,1]]))
mb[[j-1,1]] = stats::rnorm(length(Sb[[j-1,1]])) * sqrt(Sb[[j-1,1]])
}
else {
mw[[j-1,1]] = noParam
Sw[[j-1,1]] = noParam
Sb[[j-1,1]] = noParam
mb[[j-1,1]] = noParam
}
}
}
out_catParameters = catParameters(mw, Sw, mb, Sb, mwx, Swx, mbx, Sbx)
mw = out_catParameters[[1]]
Sw = out_catParameters[[2]]
mb = out_catParameters[[3]]
Sb = out_catParameters[[4]]
mwx = out_catParameters[[5]]
Swx = out_catParameters[[6]]
mbx = out_catParameters[[7]]
Sbx = out_catParameters[[8]]
theta = compressParameters(mw, Sw, mb, Sb, mwx, Swx, mbx, Sbx)
return(theta)
}
#' States initialization
#'
#' Initiliazes neural network states.
#'
#' @param nodes Vector which contains the number of nodes at each layer
#' @param B Batch size
#' @param rB Number of times batch size is repeated
#' @return States of the neural network
#' @export
initializeStates <- function(nodes, B, rB, xsc){
# Normal network
numLayers = length(nodes)
mz <- createStateCellarray(nodes, numLayers, B, rB)
Sz = mz
ma = mz
Sa = mz
J = mz
# Residual network
idx = ifelse(xsc, no = 0)
mdxs = matrix(list(), nrow = numLayers, ncol = 1)
for (i in 1:numLayers){
if (idx[i] == 1){
mdxs[[i, 1]] = mz[[i, 1]]
}
}
Sdxs = mdxs
mxs = mdxs
Sxs = mdxs
states = compressStates(mz, Sz, ma, Sa, J, mdxs, Sdxs, mxs, Sxs)
return(states)
}
#' Input initialization
#'
#' Initializes neural network inputs.
#'
#' @param states States of the neural network
#' @param mz0 Input data
#' @param Sz0 Variance of input data
#' @param ma0 Activated input data
#' @param Sa0 Variance of activated input data
#' @param J0 Jacobian
#' @param ... Other parameters
#' @return States of the neural network
#' @export
initializeInputs <- function(states, mz0, Sz0, ma0, Sa0, J0, mdxs0, Sdxs0, mxs0, Sxs0, xsc){
out_extractStates = extractStates(states)
mz = out_extractStates[[1]]
Sz = out_extractStates[[2]]
ma = out_extractStates[[3]]
Sa = out_extractStates[[4]]
J = out_extractStates[[5]]
mdxs = out_extractStates[[6]]
Sdxs = out_extractStates[[7]]
mxs = out_extractStates[[8]]
Sxs = out_extractStates[[9]]
# Normal network
mz[[1,1]] = mz0
if (any(is.null(Sz0))){
Sz[[1,1]] = matrix(0, nrow(mz0), ncol(mz0))
} else {
Sz[[1,1]] = Sz0
}
if (any(is.null(ma0))){
ma[[1,1]] = mz0
} else {
ma[[1,1]] = ma0
}
if (any(is.null(Sa0))){
Sa[[1,1]] = Sz[[1,1]]
} else {
Sa[[1,1]] = Sa0
}
if (any(is.null(J0))){
J[[1,1]] = matrix(1, nrow(mz0), ncol(mz0))
} else {
J[[1,1]] = J0
}
# Residual network
if (any(is.null(mdxs0)) & !all(xsc == 0)){
mdxs[[1,1]] = mz0
} else {
mdxs[1,1] = list(mdxs0) # In this case, mdxs0 might be NULL (only way to code it)
}
if (any(is.null(Sdxs0)) & !all(xsc == 0)){
Sdxs[[1,1]] = matrix(0, nrow(mz0), ncol(mz0))
} else {
Sdxs[1,1] = list(Sdxs0)
}
if (any(is.null(mxs0)) & !all(xsc == 0)){
mxs[[1,1]] = mz0
} else {
mxs[1,1] = list(mxs0)
}
if (any(is.null(Sxs0)) & !all(xsc == 0)){
Sxs[[1,1]] = matrix(0, nrow(mz0), ncol(mz0))
} else {
Sxs[1,1] = list(Sxs0)
}
states = compressStates(mz, Sz, ma, Sa, J, mdxs, Sdxs, mxs, Sxs)
return(states)
}
#' Initialization (matrix of lists)
#'
#' Initializes a matrix containing lists.
#'
#' @param numLayers Number of layers in the neural network
#' @return Matrix containing empty lists
#' @export
createInitCellwithArray <- function(numLayers){
x = matrix(list(), nrow = numLayers, ncol = 1)
for (j in 1:numLayers){
x[[j, 1]] = NaN
}
return(x)
}
#' States initialization (zero-matrices)
#'
#' Initiliazes neural network states at 0.
#'
#' @param nodes Vector which contains the number of nodes at each layer
#' @param numLayers Number of layers in the neural network
#' @param B Batch size
#' @param rB Number of times batch size is repeated
#' @return Zero-matrices for each layer
#' @export
createStateCellarray <- function(nodes, numLayers, B, rB){
z = matrix(list(), nrow = numLayers, ncol = 1)
for (j in 2:numLayers){
z[[j, 1]] = matrix(0, nrow = nodes[j]*B, ncol = rB)
}
return(z)
}
#' States initialization (unit matrices)
#'
#' Initiliazes neural network derivative states at 1.
#'
#' @param nodes Vector which contains the number of nodes at each layer
#' @param numLayers Number of layers in the neural network
#' @param B Batch size
#' @param rB Number of times batch size is repeated
#' @return Unit matrices for each layer
#' @export
createDevCellarray <- function(nodes, numLayers, B, rB){
d = matrix(list(), nrow = numLayers, ncol = 1)
for (j in 1:numLayers){
d[[j, 1]] = matrix(1, nrow = nodes[j]*B, ncol = rB)
}
return(d)
}
#' Concatenate parameters
#'
#' Combines in a single column vector each parameter for all layers.
#'
#' @param mw Mean of weights for the current layer
#' @param Sw Covariance of weights for the current layer
#' @param mb Mean of biases for the current layer
#' @param Sb Covariance of biases for the current layer
#' @param ... Other parameters
#' @return - Mean vector of weights for the current layer
#' @return - Covariance vector of weights for the current layer
#' @return - Mean vector of biases for the current layer
#' @return - Covariance vector of biases for the current layer
#' @export
catParameters <- function(mw, Sw, mb, Sb, mwx, Swx, mbx, Sbx){
var <- list(mw, Sw, mb, Sb, mwx, Swx, mbx, Sbx)
for (n in 1:length(var)){
start = var[[n]][[1]]
for (i in 1:(length(var[[n]])-1)){
start = c(start, var[[n]][[i+1]])
}
var[[n]] = start
}
mw = var[[1]]
Sw = var[[2]]
mb = var[[3]]
Sb = var[[4]]
mwx = var[[5]]
Swx = var[[6]]
mbx = var[[7]]
Sbx = var[[8]]
outputs <- list(mw, Sw, mb, Sb, mwx, Swx, mbx, Sbx)
return(outputs)
}
#' Compress states
#'
#' Put together states into a list of states.
#'
#' @param mz Mean of units for each layer
#' @param Sz Covariance of units for each layer
#' @param ma Mean of activated units for each layer
#' @param Sa Covariance of activated units for each layer
#' @param J Jacobian
#' @param ... Other parameters
#' @return List of states
#' @export
compressStates <- function(mz, Sz, ma, Sa, J, mdxs, Sdxs, mxs, Sxs){
states = matrix(list(), nrow = 9, ncol = 1)
states[[1, 1]] = mz
states[[2, 1]] = Sz
states[[3, 1]] = ma
states[[4, 1]] = Sa
states[[5, 1]] = J
states[[6, 1]] = mdxs
states[[7, 1]] = Sdxs
states[[8, 1]] = mxs
states[[9, 1]] = Sxs
return(states)
}
#' Extract states
#'
#' Extract states from list of states.
#'
#' @param states List of states
#' @return - Mean of units for each layer
#' @return - Covariance of units for each layer
#' @return - Mean of activated units for each layer
#' @return - Covariance of activated units for each layer
#' @return - Jacobian
#' @export
extractStates <- function(states){
mz = states[[1, 1]]
Sz = states[[2, 1]]
ma = states[[3, 1]]
Sa = states[[4, 1]]
J = states[[5, 1]]
mdxs = states[[6, 1]]
Sdxs = states[[7, 1]]
mxs = states[[8, 1]]
Sxs = states[[9, 1]]
outputs <- list(mz, Sz, ma, Sa, J, mdxs, Sdxs, mxs, Sxs)
return(outputs)
}
#' Compress parameters
#'
#' Put together parameters into a list of parameters.
#'
#' @param mw Mean vector of weights for the current layer
#' @param Sw Covariance vector of weights for the current layer
#' @param mb Mean vector of biases for the current layer
#' @param Sb Covariance vector of biases for the current layer
#' @param ... Other parameters
#' @return List of parameters
#' @export
compressParameters <- function(mw, Sw, mb, Sb, mwx, Swx, mbx, Sbx){
theta = matrix(list(), nrow = 8, ncol = 1)
theta[[1, 1]] = mw
theta[[2, 1]] = Sw
theta[[3, 1]] = mb
theta[[4, 1]] = Sb
theta[[5, 1]] = mwx
theta[[6, 1]] = Swx
theta[[7, 1]] = mbx
theta[[8, 1]] = Sbx
return(theta)
}
#' Extract parameters
#'
#' Extract parameters from list of parameters.
#'
#' @param theta List of parameters
#' @return - Mean vector of weights for the current layer
#' @return - Covariance vector of weights for the current layer
#' @return - Mean vector of biases for the current layer
#' @return - Covariance vector of biases for the current layer
#' @export
extractParameters <- function(theta){
mw = theta[[1, 1]]
Sw = theta[[2, 1]]
mb = theta[[3, 1]]
Sb = theta[[4, 1]]
mwx = theta[[5, 1]]
Swx = theta[[6, 1]]
mbx = theta[[7, 1]]
Sbx = theta[[8, 1]]
outputs <- list(mw, Sw, mb, Sb, mwx, Swx, mbx, Sbx)
return(outputs)
}
# Compress normalized statistics
#
# Put together normalized statistics into a list.
#
# @param mra Mean vector of normalized units for the current layer
# @param Sra TBD
# @return normStat: TBD
# @export
compressNormStat <- function(mra, Sra){
normStat = matrix(list(), nrow = 2, ncol = 1)
normStat[[1, 1]] = mra
normStat[[2, 1]] = Sra
return(normStat)
}
# Initialization of normalized statistics
#
# Initializes a matrix containing lists of 0 for TBD.
#
# @param NN Lists the structure of the neural network
# @return normStat: TBD
# @export
createInitNormStat <- function(NN){
mra = matrix(list(), nrow = length(NN$nodes) - 1, ncol = 1)
Sra = matrix(list(), nrow = length(NN$nodes) - 1, ncol = 1)
for (i in 1:(length(NN$nodes) - 1)){
mra[[i,1]] = 0
Sra[[i,1]] = 0
}
normStat = compressNormStat(mra, Sra)
return(normStat)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.