# R/nn-init.R In torch: Tensors and Neural Networks with 'GPU' Acceleration

#### Documented in nn_init_calculate_gainnn_init_constant_nn_init_dirac_nn_init_eye_nn_init_kaiming_normal_nn_init_kaiming_uniform_nn_init_normal_nn_init_ones_nn_init_orthogonal_nn_init_sparse_nn_init_trunc_normal_nn_init_uniform_nn_init_xavier_normal_nn_init_xavier_uniform_nn_init_zeros_

```nn_init_no_grad_uniform <- function(tensor, a, b) {
out <- tensor\$uniform_(a, b)
})
out
}

nn_init_no_grad_normal <- function(tensor, mean, std) {
out <- tensor\$normal_(mean, std)
})
out
}

nn_init_no_grad_trunc_normal <- function(tensor, mean, std, a, b) {

if (mean < (a - 2 * std) && mean > (b + 2 * std))
warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. ",
"The distribution of values may be incorrect.")

l <- stats::pnorm((a - mean) / std)
u <- stats::pnorm((b - mean) / std)

# Uniformly fill tensor with values from [l, u], then translate to
# [2l-1, 2u-1].
tensor\$uniform_(2* l - 1, 2* u - 1)

# Use inverse cdf transform for normal distribution to get truncated
# standard normal
tensor\$erfinv_()

# Transform to proper mean, std
tensor\$mul_(std * sqrt(2))

# Clamp to ensure it's in the proper range
tensor\$clamp_(min=a, max=b)

})

tensor
}

tensor\$fill_(val)
})
tensor
}

tensor\$zero_()
})
tensor
}

#' Calculate gain
#'
#' Return the recommended gain value for the given nonlinearity function.
#'
#' @param nonlinearity the non-linear function
#' @param param optional parameter for the non-linear function
#'
#' @export
nn_init_calculate_gain <- function(nonlinearity, param = NULL) {

linear_fns <- c('linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d',
'conv_transpose2d', 'conv_transpose3d')

if (nonlinearity %in% linear_fns || nonlinearity == "sigmoid") {
return(1)
} else if (nonlinearity == "tanh") {
return(5/3)
} else if (nonlinearity == "relu") {
return(sqrt(2))
} else if (nonlinearity == "leaky_relu") {
if (is.null(param)) {
negative_slope <- 0.01
} else {
negative_slope <- param
}
return(sqrt(2/(1 + negative_slope^2)))
} else {
not_implemented_error("Unsupported nonlinearity: {nonlinearity}")
}

}

#' Uniform initialization
#'
#' Fills the input Tensor with values drawn from the uniform distribution
#'
#' @param tensor an n-dimensional Tensor
#' @param a the lower bound of the uniform distribution
#' @param b the upper bound of the uniform distribution
#'
#' @examples
#' w <- torch_empty(3, 5)
#' nn_init_uniform_(w)
#'
#' @export
nn_init_uniform_ <- function(tensor, a = 0, b = 1) {
}

#' Normal initialization
#'
#' Fills the input Tensor with values drawn from the normal distribution
#'
#' @inheritParams nn_init_uniform_
#' @param mean the mean of the normal distribution
#' @param std the standard deviation of the normal distribution
#'
#' @examples
#' w <- torch_empty(3, 5)
#' nn_init_normal_(w)
#'
#' @export
nn_init_normal_ <- function(tensor, mean = 0, std = 1) {
}

#' Truncated normal initialization
#'
#' Fills the input Tensor with values drawn from a truncated
#' normal distribution.
#'
#' @inheritParams nn_init_normal_
#' @param a the minimum cutoff value
#' @param b the maximum cutoff value
#'
#' @examples
#' w <- torch_empty(3, 5)
#' nn_init_trunc_normal_(w)
#'
#' @export
nn_init_trunc_normal_ <- function(tensor, mean = 0, std = 1, a = -2, b = 2) {
}

#' Constant initialization
#'
#' Fills the input Tensor with the value `val`.
#'
#' @param tensor an n-dimensional `Tensor`
#' @param val the value to fill the tensor with
#'
#' @examples
#' w <- torch_empty(3, 5)
#' nn_init_constant_(w, 0.3)
#'
#' @export
nn_init_constant_ <- function(tensor, val) {
}

#' Ones initialization
#'
#' Fills the input Tensor with the scalar value `1`
#'
#' @param tensor an n-dimensional `Tensor`
#'
#' @examples
#' w <- torch_empty(3, 5)
#' nn_init_ones_(w)
#'
#' @export
nn_init_ones_ <- function(tensor) {
}

#' Zeros initialization
#'
#' Fills the input Tensor with the scalar value `0`
#'
#' @param tensor an n-dimensional tensor
#'
#' @examples
#' w <- torch_empty(3, 5)
#' nn_init_zeros_(w)
#'
#' @export
nn_init_zeros_ <- function(tensor) {
}

#' Eye initialization
#'
#' Fills the 2-dimensional input `Tensor` with the identity matrix.
#' Preserves the identity of the inputs in `Linear` layers, where as
#' many inputs are preserved as possible.
#'
#' @param tensor a 2-dimensional torch tensor.
#'
#' @examples
#' w <- torch_empty(3, 5)
#' nn_init_eye_(w)
#'
#' @export
nn_init_eye_ <- function(tensor) {
size <- tensor\$size()
torch_eye_out(tensor, size[1], size[2])
})
}

#' Dirac initialization
#'
#' Fills the {3, 4, 5}-dimensional input `Tensor` with the Dirac
#' delta function. Preserves the identity of the inputs in `Convolutional`
#' layers, where as many input channels are preserved as possible. In case
#' of groups>1, each group of channels preserves identity.
#'
#' @param tensor a {3, 4, 5}-dimensional `torch.Tensor`
#' @param groups (optional) number of groups in the conv layer (default: 1)
#'
#' @examples
#' \dontrun{
#' w <- torch_empty(3, 16, 5, 5)
#' nn_init_dirac_(w)
#' }
#'
#' @export
nn_init_dirac_ <- function(tensor, groups = 1) {

sizes <- tensor\$size()
dimensions <- length(sizes)

out_chans_per_grp <- floor(sizes[1] / groups)
min_dim <- min(out_chans_per_grp, sizes[2])

stop("not implemented")
}

nn_init_calculate_fan_in_and_fan_out <- function(tensor) {

dimensions <- tensor\$dim()
num_input_fmaps <- tensor\$size(2)
num_output_fmaps <- tensor\$size(1)
receptive_field_size <- 1

if (dimensions > 2)
receptive_field_size <- tensor[1,1,..]\$numel()

fan_in <- num_input_fmaps * receptive_field_size
fan_out <- num_output_fmaps * receptive_field_size

list(fan_in, fan_out)
}

#' Xavier uniform initialization
#'
#' Fills the input `Tensor` with values according to the method
#' described in `Understanding the difficulty of training deep feedforward
#' neural networks` - Glorot, X. & Bengio, Y. (2010), using a uniform
#' distribution.
#'
#' @param tensor an n-dimensional `Tensor`
#' @param gain an optional scaling factor
#'
#' @examples
#' w <- torch_empty(3, 5)
#' nn_init_xavier_uniform_(w)
#'
#' @export
nn_init_xavier_uniform_ <- function(tensor, gain = 1) {
fans <- nn_init_calculate_fan_in_and_fan_out(tensor)
fan_in <- fans[[1]]
fan_out <- fans[[2]]
std <- gain * sqrt(2.0 / (fan_in + fan_out))
a <- sqrt(3.0) * std # Calculate uniform bounds from standard deviation
}

#' Xavier normal initialization
#'
#' Fills the input `Tensor` with values according to the method
#' described in `Understanding the difficulty of training deep feedforward
#' neural networks` - Glorot, X. & Bengio, Y. (2010), using a normal
#' distribution.
#'
#' @param tensor an n-dimensional `Tensor`
#' @param gain an optional scaling factor
#'
#' @examples
#' w <- torch_empty(3, 5)
#' nn_init_xavier_normal_(w)
#'
#' @export
nn_init_xavier_normal_ <- function(tensor, gain = 1) {
fans <- nn_init_calculate_fan_in_and_fan_out(tensor)
fan_in <- fans[[1]]
fan_out <- fans[[2]]
std <- gain * sqrt(2.0 / (fan_in + fan_out))
}

nn_init_calculate_correct_fan <- function(tensor, mode) {
mode <- tolower(mode)

fans <- nn_init_calculate_fan_in_and_fan_out(tensor)
fan_in <- fans[[1]]
fan_out <- fans[[2]]

if (mode == "fan_in")
fan_in
else
fan_out
}

#' Kaiming uniform initialization
#'
#' Fills the input `Tensor` with values according to the method
#' described in `Delving deep into rectifiers: Surpassing human-level
#' performance on ImageNet classification` - He, K. et al. (2015), using a
#' uniform distribution.
#'
#' @param tensor an n-dimensional `torch.Tensor`
#' @param a the negative slope of the rectifier used after this layer (only used
#'  with `'leaky_relu'`)
#' @param mode either 'fan_in' (default) or 'fan_out'. Choosing 'fan_in' preserves
#'   the magnitude of the variance of the weights in the forward pass. Choosing
#'   'fan_out' preserves the magnitudes in the backwards pass.
#' @param nonlinearity the non-linear function. recommended to use only with 'relu'
#'   or 'leaky_relu' (default).
#'
#' @examples
#' w <- torch_empty(3, 5)
#' nn_init_kaiming_uniform_(w, mode = "fan_in", nonlinearity = "leaky_relu")
#'
#' @export
nn_init_kaiming_uniform_ <- function(tensor, a = 0, mode = "fan_in", nonlinearity = "leaky_relu") {
fan <- nn_init_calculate_correct_fan(tensor, mode)
gain <- nn_init_calculate_gain(nonlinearity, a)
std <- gain/sqrt(fan)
bound <- sqrt(3)*std
}

#' Kaiming normal initialization
#'
#' Fills the input `Tensor` with values according to the method
#' described in `Delving deep into rectifiers: Surpassing human-level
#' performance on ImageNet classification` - He, K. et al. (2015), using a
#' normal distribution.
#'
#' @inheritParams nn_init_kaiming_uniform_
#'
#' @examples
#' w <- torch_empty(3, 5)
#' nn_init_kaiming_normal_(w, mode = "fan_in", nonlinearity = "leaky_relu")
#'
#' @export
nn_init_kaiming_normal_ <- function(tensor, a = 0, mode = "fan_in", nonlinearity = "leaky_relu") {
fan <- nn_init_calculate_correct_fan(tensor, mode)
gain <- nn_init_calculate_gain(nonlinearity, a)
std <- gain/sqrt(fan)
}

#' Orthogonal initialization
#'
#' Fills the input `Tensor` with a (semi) orthogonal matrix, as
#' described in `Exact solutions to the nonlinear dynamics of learning in deep
#' linear neural networks` - Saxe, A. et al. (2013). The input tensor must have
#' at least 2 dimensions, and for tensors with more than 2 dimensions the
#' trailing dimensions are flattened.
#'
#' @param tensor an n-dimensional `Tensor`
#' @param gain optional scaling factor
#'
#' @examples
#' w <- torch_empty(3,5)
#' nn_init_orthogonal_(w)
#'
#' @export
nn_init_orthogonal_ <- function(tensor, gain = 1) {
rows <- tensor\$size(1)
cols <- floor(tensor\$numel() / rows)
flattened <- torch_randn(rows, cols)

if (rows < cols)
flattened\$t_()

# Compute the qr factorization
qr <- torch_qr(flattened)
q <- qr[[1]]
r <- qr[[2]]

# Make Q uniform according to https://arxiv.org/pdf/math-ph/0609050.pdf
d <- torch_diag(r, 0)
ph <- d\$sign()
q <- q * ph

if (rows < cols)
q\$t_()

tensor\$view_as(q)\$copy_(q)
tensor\$mul_(gain)
})

tensor
}

#' Sparse initialization
#'
#' Fills the 2D input `Tensor` as a sparse matrix, where the
#' non-zero elements will be drawn from the normal distribution
#' as described in `Deep learning via
#' Hessian-free optimization` - Martens, J. (2010).
#'
#' @param tensor an n-dimensional `Tensor`
#' @param sparsity The fraction of elements in each column to be set to zero
#' @param std the standard deviation of the normal distribution used to generate
#'   the non-zero values
#'
#' @examples
#' \dontrun{
#' w <- torch_empty(3, 5)
#' nn_init_sparse_(w, sparsity = 0.1)
#' }
#' @export
nn_init_sparse_ <- function(tensor, sparsity, std = 0.01) {
stop("not implemented")
}
```

## Try the torch package in your browser

Any scripts or data that you put into this service are public.

torch documentation built on Oct. 7, 2021, 9:22 a.m.