implemented_layers_keras <- c(
"Dense", "Dropout", "InputLayer", "Conv1D", "Conv2D", "Flatten",
"MaxPooling1D", "MaxPooling2D", "AveragePooling1D", "AveragePooling2D",
"Concatenate", "Add", "Activation", "ZeroPadding1D", "ZeroPadding2D",
"BatchNormalization", "GlobalAveragePooling1D", "GlobalAveragePooling2D",
"GlobalMaxPooling1D", "GlobalMaxPooling2D"
# Convert Keras Model
convert_keras_model <- function(model) {
# Define parameters for the data format and the layer index
data_format <- NULL
n <- 1
# Get layer names and reconstruct graph
if (inherits(model, "keras.engine.sequential.Sequential")) {
# If the model is a sequential model, the first layer is the only input
# layer and the last layer is the only output layer
graph <- lapply(seq_along(model$layers), function(i) {
list(input_layers = i - 1, output_layers = i + 1)
graph[[length(graph)]]$output_layers <- -1
names <- unlist(lapply(model$layers, FUN = function(x) x$name))
input_names <- names[1]
layers <- model$layers
} else {
# Otherwise, we have to reconstruct the computational graph from the
# model config
res <- keras_reconstruct_graph(model$layers, model$get_config())
graph <- res$graph
input_names <- model$input_names
layers <- res$layers
names <- names(layers)
# Declare list for the list-converted layers
model_as_list <- vector("list", length = length(names))
for (layer in layers) {
# Get the layer type and check whether it is implemented
type <- layer$`__class__`$`__name__`
cli_check(checkChoice(type, implemented_layers_keras), "type")
# Convert the layer to a list based on its type
# Note: It is assumed that the same data format was used for all
# convolutional layers!
layer_list <-
InputLayer = convert_keras_skipping(type),
Dropout = convert_keras_skipping(type),
Dense = convert_keras_dense(layer),
Conv1D = {
# check for consistent data format
data_format <- check_consistent_data_format(
data_format, layer$data_format
convert_keras_convolution(layer, type)
Conv2D = {
# check for consistent data format
data_format <- check_consistent_data_format(
data_format, layer$data_format
convert_keras_convolution(layer, type)
Flatten = convert_keras_flatten(layer),
MaxPooling1D = {
# check for consistent data format
data_format <- check_consistent_data_format(
data_format, layer$data_format
convert_keras_pooling(layer, type)
MaxPooling2D = {
# check for consistent data format
data_format <- check_consistent_data_format(
data_format, layer$data_format
convert_keras_pooling(layer, type)
AveragePooling1D = {
# check for consistent data format
data_format <- check_consistent_data_format(
data_format, layer$data_format
convert_keras_pooling(layer, type)
AveragePooling2D = {
# check for consistent data format
data_format <- check_consistent_data_format(
data_format, layer$data_format
convert_keras_pooling(layer, type)
Concatenate = convert_keras_concatenate(layer),
Add = convert_keras_add(layer),
Activation = convert_keras_activation(layer$get_config()$activation),
ZeroPadding1D = convert_keras_zeropadding(layer, type),
ZeroPadding2D = convert_keras_zeropadding(layer, type),
BatchNormalization = convert_keras_batchnorm(layer),
GlobalAveragePooling1D = convert_keras_globalpooling(layer, type),
GlobalAveragePooling2D = convert_keras_globalpooling(layer, type),
GlobalMaxPooling1D = convert_keras_globalpooling(layer, type),
GlobalMaxPooling2D = convert_keras_globalpooling(layer, type)
# Define the incoming and outgoing layers of this layer
# Thereby means '0' Input-Node and '-1' Output-Node
layer_list$input_layers <- graph[[n]]$input_layers
layer_list$output_layers <- graph[[n]]$output_layers
# Set name of this layer and save it
model_as_list[[n]] <- layer_list
n <- n + 1
# Combine activation functions with convolution or dense layers
model_as_list <- combine_activations(model_as_list)
# Get in- and output shape of the model
input_dim <- model$input_shape
output_dim <- model$output_shape
if (length(model$input_names) == 1) {
input_dim <- list(unlist(input_dim))
} else {
input_dim <- lapply(input_dim, unlist)
if (length(model$output_names) == 1) {
output_dim <- list(unlist(output_dim))
} else {
output_dim <- lapply(output_dim, unlist)
# In this package only 'channels_first' is allowed, i.e. convert the format
# to 'channels_first' if necessary
for (i in seq_along(input_dim)) {
in_dim <- input_dim[[i]]
if (length(in_dim) > 1 && is.character(data_format) &&
data_format == "channels_last") {
input_dim[[i]] <- c(rev(in_dim)[1], in_dim[-length(in_dim)])
for (i in seq_along(output_dim)) {
out_dim <- output_dim[[i]]
if (length(out_dim) > 1 && is.character(data_format) &&
data_format == "channels_last") {
output_dim[[i]] <- c(rev(out_dim)[1], out_dim[-length(out_dim)])
# Get input and output nodes
if (any(grepl("_input", input_names))) {
input_names <- c(input_names, gsub("_input", "", input_names))
input_nodes <- match(input_names, names)
input_nodes <- input_nodes[!]
output_nodes <- match(model$output_names, names)
if (layer_list$type == "Activation") {
if ((n - 1) %in% output_nodes) {
idx <- layer_list$input_layers
output_nodes[output_nodes == n - 1] <- idx
# Return the list-converted model with in- and output shapes and nodes
input_dim = input_dim,
input_nodes = input_nodes,
output_dim = output_dim,
output_nodes = output_nodes,
layers = model_as_list
# Convert Keras Layers
# Dense Layer -----------------------------------------------------------------
convert_keras_dense <- function(layer) {
act_name <- layer$activation$`__name__`
weights <- layer$get_weights()
w <- torch_tensor(as.array(weights[[1]]))$transpose(1,2)
if (layer$use_bias) {
bias <- torch_tensor(as.vector(weights[[2]]))
} else {
bias <- torch_zeros(dim(w)[1])
type = "Dense",
weight = w,
bias = bias,
activation_name = act_name,
dim_in = dim(w)[2],
dim_out = dim(w)[1]
# Convolution Layer -----------------------------------------------------------
convert_keras_convolution <- function(layer, type) {
config <- layer$get_config()
act_name <- config$activation
kernel_size <- as.integer(unlist(config$kernel_size))
stride <- as.integer(unlist(config$strides))
padding <- config$padding
dilation <- as.integer(unlist(config$dilation_rate))
# input_shape:
# channels_first: [batch_size, in_channels, in_length]
# channels_last: [batch_size, in_length, in_channels]
input_dim <- as.integer(unlist(layer$input_shape))
output_dim <- as.integer(unlist(layer$output_shape))
# in this package only 'channels_first'
if (layer$data_format == "channels_last") {
input_dim <- move_channels_first(input_dim)
output_dim <- move_channels_first(output_dim)
# padding differs in keras and torch
cli_check(checkChoice(padding, c("valid", "same")), "padding")
if (padding == "valid") {
padding <- rep(c(0L, 0L), length(kernel_size))
} else if (padding == "same") {
padding <- get_same_padding(input_dim, kernel_size, dilation, stride)
weights <- layer$get_weights()
weight <- torch_tensor(as.array(weights[[1]]))
if (layer$use_bias) {
bias <- torch_tensor(as.vector(weights[[2]]))
} else {
bias <- torch_zeros(dim(weight)[length(dim(weight))])
# Conv1D
# keras weight format: [kernel_length, in_channels, out_channels]
# torch weight format: [out_channels, in_channels, kernel_length]
# Conv2D
# keras weight format:
# [kernel_height, kernel_width, in_channels, out_channels]
# torch weight format:
# [out_channels, in_channels, kernel_height, kernel_width]
if (length(dim(weight)) == 3) {
weight <- weight$movedim(c(2,3), c(2,1))
} else {
weight <- weight$movedim(c(3,4), c(2,1))
type = type,
weight = weight,
bias = bias,
activation_name = act_name,
dim_in = input_dim,
dim_out = output_dim,
stride = stride,
padding = padding,
dilation = dilation
# Pooling Layer ---------------------------------------------------------------
convert_keras_pooling <- function(layer, type) {
input_dim <- unlist(layer$input_shape)
output_dim <- unlist(layer$output_shape)
kernel_size <- unlist(layer$pool_size)
strides <- unlist(layer$strides)
if (layer$padding != "valid") {
stopf("Padding mode '", layer$padding, "' is not implemented yet!")
# in this package only 'channels_first'
if (layer$data_format == "channels_last") {
input_dim <- move_channels_first(input_dim)
output_dim <- move_channels_first(output_dim)
type = type,
dim_in = input_dim,
dim_out = output_dim,
kernel_size = kernel_size,
strides = strides
# GlobalPooling Layer --------------------------------------------------
convert_keras_globalpooling <- function(layer, type) {
if (startsWith(type, "GlobalAverage")) {
method <- "average"
} else {
method <- "max"
dim_in <- unlist(layer$input_shape)
dim_out <- unlist(layer$output_shape)
data_format <- layer$data_format
# in this package only 'channels_first'
if (data_format == "channels_last") {
dim_in <- move_channels_first(dim_in)
dim_out <- move_channels_first(dim_out)
type = "GlobalPooling",
dim_in = dim_in,
dim_out = dim_out,
method = method
# ZeroPadding layer -----------------------------------------------------------
convert_keras_zeropadding <- function(layer, type) {
# padding size: either [left, right] or [top, bottom, left, right]
padding <- unlist(layer$padding)
if (length(padding) == 4) {
padding <- c(padding[3:4], padding[1:2]) # in torch [left,right,top,bottom]
data_format <- layer$data_format
} else {
data_format <- "channels_last"
dim_in <- unlist(layer$input_shape)
dim_out <- unlist(layer$output_shape)
# in this package only 'channels_first'
if (data_format == "channels_last") {
dim_in <- move_channels_first(dim_in)
dim_out <- move_channels_first(dim_out)
type = "Padding",
dim_in = dim_in,
dim_out = dim_out,
padding = padding,
mode = "constant",
value = 0
# BatchNormalization Layer ----------------------------------------------------
convert_keras_batchnorm <- function(layer) {
input_dim <- unlist(layer$input_shape)
output_dim <- unlist(layer$output_shape)
if (is.numeric(layer$axis)) axis <- layer$axis
else if (is.list(layer$axis)) axis <- as.numeric(layer$axis)
else axis <- as.numeric(layer$axis[[0]])
gamma <- as.numeric(layer$gamma$value())
eps <- as.numeric(layer$epsilon)
if (layer$center) {
beta <- as.numeric(layer$beta)
} else {
beta <- NULL
run_mean <- as.numeric(layer$moving_mean)
run_var <- as.numeric(layer$moving_variance)
if (axis == length(input_dim)) { # i.e. channels last
input_dim <- move_channels_first(input_dim)
output_dim <- move_channels_first(output_dim)
} else if (axis != 1L) { # i.e. neither first nor last axis
"Only batchnormalzation on axis '1' or '-1' are accepted! ",
"Your axis: '", axis, "'")
type = "BatchNorm",
dim_in = input_dim,
dim_out = output_dim,
num_features = input_dim[1],
gamma = gamma,
eps = eps,
beta = beta,
run_mean = run_mean,
run_var = run_var
# Flatten Layer ---------------------------------------------------------------
convert_keras_flatten <- function(layer) {
input_dim <- unlist(layer$input_shape)
output_dim <- unlist(layer$output_shape)
# in this package only 'channels_first'
if (layer$data_format == "channels_last") {
input_dim <- move_channels_first(input_dim)
type = "Flatten",
start_dim = 2,
end_dim = -1,
dim_in = input_dim,
dim_out = output_dim
# Concatenate Layer -----------------------------------------------------------
convert_keras_concatenate <- function(layer) {
num_input_dims <- lapply(layer$input_shape, function(x) length(unlist(x)))
if (any(unlist(num_input_dims) > 1)) {
"I assume that the concatenations axis points to the channel axis.",
" Otherwise, an error can be thrown in the further process."
type = "Concatenate",
axis = layer$axis,
dim_in = lapply(layer$input_shape, unlist),
dim_out = unlist(layer$output_shape)
# Add Layer -------------------------------------------------------------------
convert_keras_add <- function(layer) {
type = "Add",
dim_in = NULL,
dim_out = NULL
# Activation Layer ------------------------------------------------------------
convert_keras_activation <- function(name) {
list(type = "Activation", act_name = name)
# Skipping Layers -------------------------------------------------------------
convert_keras_skipping <- function(type) {
messagef("Skipping ", type, " ...")
list(type = "Skipping")
# Utility methods: Graph reconstruction
keras_reconstruct_graph <- function(layers, config) {
res <- get_layers_graph(layers, config)
graph <- res$graph
layer_list <- res$layers
name_changes <- res$name_changes
# Rename sequential layers
for (name in name_changes) {
for (i in seq_along(graph)) {
if (!is.null(graph[[i]]$input_layers)) {
graph[[i]]$input_layers[graph[[i]]$input_layers == name$old] <- name$new
# Transform input layers to the layer indices and register output nodes
# for each layer
names <- names(layer_list)
for (i in seq_along(graph)) {
if (!is.null(graph[[i]]$input_layers)) {
input_layers <- match(graph[[i]]$input_layers, names)
graph[[i]]$input_layers <- input_layers
# Register output layers
for (node_idx in input_layers) {
graph[[node_idx]]$output_layers <- c(graph[[node_idx]]$output_layers, i)
# Register model input and output nodes
input_nodes <- unlist(lapply(config$input_layers, function(x) x[[1]]))
for (node in input_nodes) {
idx <- which(node == names)
graph[[idx]]$input_layers <- 0L
output_nodes <- unlist(lapply(config$output_layers, function(x) x[[1]]))
for (node in output_nodes) {
idx <- which(node == names)
graph[[idx]]$output_layers <- -1L
list(graph = graph, layers = layer_list)
get_layers_graph <- function(layers, config) {
config_names <- unlist(lapply(config$layers, function(x) x$name))
graph <- list()
layer_list <- list()
name_changes <- list()
for (layer in layers) {
# Get layer index in the config
config_idx <- which(layer$name == config_names)
layer_config <- config$layers[[config_idx]]
# Layers in a 'Sequential' model doesn't contain the config key
# 'inbound_nodes', hence they need a separat treatment
if (layer_config$class_name == "Sequential") {
is_first <- TRUE
l_names <- unlist(lapply(layer$layers, function(x) x$name))
for (i in seq_along(layer$layers)) {
if (is_first) {
in_names <- unlist(
lapply(layer_config$inbound_nodes[[1]], function(x) x[[1]])
is_first <- FALSE
} else {
in_names <- l_names[i - 1]
graph[[l_names[i]]] <-
list(input_layers = in_names, output_layers = NULL)
layer_list[[l_names[i]]] <- layer$layers[[i]]
# A sequential model is saved as a single layer with a default name
# 'sequential_*'. Therefore, the in- or output layer of the sequential
# model refers to this name. The list 'name_changes' stores all the
# relevant name changes.
name_changes <- append(name_changes,
list(list(old = layer$name,
new = l_names[i])))
} else if (layer_config$class_name == "Functional") {
res <- get_layers_graph(layer$layers, layer_config$config)
# Register input layers
layer_graph <- res$graph
layer_graph[[layer_config$config$input_layers[[1]][[1]]]]$input_layers <-
graph <- append(graph, layer_graph)
layer_list <- append(layer_list, res$layers)
name_changes <- append(name_changes, res$name_changes)
name_changes <- append(
list(list(old = layer$name,
new = layer_config$config$output_layers[[1]][[1]])))
} else {
if (length(layer_config$inbound_nodes) == 1) { # non InputLayer
in_names <- unlist(
lapply(layer_config$inbound_nodes[[1]], function(x) x[[1]]))
} else if (length(layer_config$inbound_nodes) == 0) { # InputLayer
in_names <- NULL
} else { # Weight-Sharing is not supported
stopf("Models that share weights are not supported yet!")
graph[[layer$name]] <-
list(input_layers = in_names, output_layers = NULL)
layer_list[[layer$name]] <- layer
list(graph = graph, layers = layer_list, name_changes = name_changes)
# Other utility methods
get_same_padding <- function(input_dim, kernel_size, dilation, stride) {
if (length(kernel_size) == 1) {
in_length <- input_dim[2]
filter_length <- (kernel_size - 1) * dilation + 1
if ((in_length %% stride[1]) == 0) {
pad <- max(filter_length - stride[1], 0)
} else {
pad <- max(filter_length - (in_length %% stride[1]), 0)
pad_left <- pad %/% 2
pad_right <- pad - pad_left
padding <- as.integer(c(pad_left, pad_right))
} else if (length(kernel_size) == 2) {
in_height <- input_dim[2]
in_width <- input_dim[3]
filter_height <- (kernel_size[1] - 1) * dilation[1] + 1
filter_width <- (kernel_size[2] - 1) * dilation[2] + 1
if ((in_height %% stride[1]) == 0) {
pad_along_height <- max(filter_height - stride[1], 0)
} else {
pad_along_height <- max(filter_height - (in_height %% stride[1]), 0)
if ((in_width %% stride[2]) == 0) {
pad_along_width <- max(filter_width - stride[2], 0)
} else {
pad_along_width <- max(filter_width - (in_width %% stride[2]), 0)
pad_top <- pad_along_height %/% 2
pad_bottom <- pad_along_height - pad_top
pad_left <- pad_along_width %/% 2
pad_right <- pad_along_width - pad_left
padding <- as.integer(c(pad_left, pad_right, pad_top, pad_bottom))
check_consistent_data_format <- function(current_format, given_format) {
# Everything is fine if the data format is unset
if (is.null(current_format)) {
data_format <- given_format
} else if (current_format == given_format) {
# or if the data format doesn't change
data_format <- current_format
} else {
# The package can not handle different data formats
"The package {.pkg innsight} can not handle unconsistent data formats. ",
"I found the format '", given_format, "', but the data format ",
"of a previous layer was '", current_format, "'! ",
"Choose either only the format 'channels_first' or only ",
"'channels_last' for all layers.")
combine_activations <- function(model_as_list) {
for (i in seq_along(model_as_list)) {
if (model_as_list[[i]]$type == "Activation") {
keep <- FALSE
for (in_layer in model_as_list[[i]]$input_layers) {
act_name <- model_as_list[[in_layer]]$activation_name
if (identical(act_name, "linear")) {
model_as_list[[in_layer]]$activation_name <-
} else if (is.character(act_name)) {
stopf("It is not allowed to use several activation functions in ",
"consecutive order! You used a '", model_as_list[[i]]$act_name,
"' activation directly after a '",
model_as_list[[in_layer]]$activation_name, "' activation.")
} else {
keep <- TRUE
if (i == length(model_as_list) &&
all(model_as_list[[i]]$output_layers == -1) &&
length(model_as_list[[i]]$input_layers == 1)) {
in_layer <- model_as_list[[i]]$input_layers
out_layers <- model_as_list[[in_layer]]$output_layers
model_as_list[[in_layer]]$output_layers <-
ifelse(out_layers == i, -1, out_layers)
model_as_list[[i]] <- NULL
} else if (!keep) {
# Convert to 'Skipping Layer'
model_as_list[[i]]$type <- "Skipping"
model_as_list[[i]]$act_name <- NULL
move_channels_first <- function(shape) {
as.integer(c(rev(shape)[1], shape[-length(shape)]))
