#' Neural style transfer with Keras.
#'
#' It is preferable to run this script on a GPU, for speed.
#'
#' Example result: https://twitter.com/fchollet/status/686631033085677568
#'
#' Style transfer consists in generating an image
#' with the same "content" as a base image, but with the
#' "style" of a different picture (typically artistic).
#'
#' This is achieved through the optimization of a loss function
#' that has 3 components: "style loss", "content loss",
#' and "total variation loss":
#'
#' - The total variation loss imposes local spatial continuity between
#' the pixels of the combination image, giving it visual coherence.
#'
#' - The style loss is where the deep learning keeps in --that one is defined
#' using a deep convolutional neural network. Precisely, it consists in a sum of
#' L2 distances between the Gram matrices of the representations of
#' the base image and the style reference image, extracted from
#' different layers of a convnet (trained on ImageNet). The general idea
#' is to capture color/texture information at different spatial
#' scales (fairly large scales --defined by the depth of the layer considered).
#'
#' - The content loss is a L2 distance between the features of the base
#' image (extracted from a deep layer) and the features of the combination image,
#' keeping the generated image close enough to the original one.
#'
library(keras)
library(purrr)
library(R6)
# Parameters --------------------------------------------------------------
base_image_path <- "neural-style-base-img.png"
style_reference_image_path <- "neural-style-style.jpg"
iterations <- 10
# these are the weights of the different loss components
total_variation_weight <- 1
style_weight <- 1
content_weight <- 0.025
# dimensions of the generated picture.
img <- image_load(base_image_path)
width <- img$size[[1]]
height <- img$size[[2]]
img_nrows <- 400
img_ncols <- as.integer(width * img_nrows / height)
# Functions ---------------------------------------------------------------
# util function to open, resize and format pictures into appropriate tensors
preprocess_image <- function(path){
img <- image_load(path, target_size = c(img_nrows, img_ncols)) %>%
image_to_array() %>%
array_reshape(c(1, dim(.)))
imagenet_preprocess_input(img)
}
# util function to convert a tensor into a valid image
# also turn BGR into RGB.
deprocess_image <- function(x){
x <- x[1,,,]
# Remove zero-center by mean pixel
x[,,1] <- x[,,1] + 103.939
x[,,2] <- x[,,2] + 116.779
x[,,3] <- x[,,3] + 123.68
# BGR -> RGB
x <- x[,,c(3,2,1)]
# clip to interval 0, 255
x[x > 255] <- 255
x[x < 0] <- 0
x[] <- as.integer(x)/255
x
}
# Defining the model ------------------------------------------------------
# get tensor representations of our images
base_image <- k_variable(preprocess_image(base_image_path))
style_reference_image <- k_variable(preprocess_image(style_reference_image_path))
# this will contain our generated image
combination_image <- k_placeholder(c(1, img_nrows, img_ncols, 3))
# combine the 3 images into a single Keras tensor
input_tensor <- k_concatenate(list(base_image, style_reference_image,
combination_image), axis = 1)
# build the VGG16 network with our 3 images as input
# the model will be loaded with pre-trained ImageNet weights
model <- application_vgg16(input_tensor = input_tensor, weights = "imagenet",
include_top = FALSE)
print("Model loaded.")
nms <- map_chr(model$layers, ~.x$name)
output_dict <- map(model$layers, ~.x$output) %>% set_names(nms)
# compute the neural style loss
# first we need to define 4 util functions
# the gram matrix of an image tensor (feature-wise outer product)
gram_matrix <- function(x){
features <- x %>%
k_permute_dimensions(pattern = c(3, 1, 2)) %>%
k_batch_flatten()
k_dot(features, k_transpose(features))
}
# the "style loss" is designed to maintain
# the style of the reference image in the generated image.
# It is based on the gram matrices (which capture style) of
# feature maps from the style reference image
# and from the generated image
style_loss <- function(style, combination){
S <- gram_matrix(style)
C <- gram_matrix(combination)
channels <- 3
size <- img_nrows*img_ncols
k_sum(k_square(S - C)) / (4 * channels^2 * size^2)
}
# an auxiliary loss function
# designed to maintain the "content" of the
# base image in the generated image
content_loss <- function(base, combination){
k_sum(k_square(combination - base))
}
# the 3rd loss function, total variation loss,
# designed to keep the generated image locally coherent
total_variation_loss <- function(x){
y_ij <- x[,1:(img_nrows - 1L), 1:(img_ncols - 1L),]
y_i1j <- x[,2:(img_nrows), 1:(img_ncols - 1L),]
y_ij1 <- x[,1:(img_nrows - 1L), 2:(img_ncols),]
a <- k_square(y_ij - y_i1j)
b <- k_square(y_ij - y_ij1)
k_sum(k_pow(a + b, 1.25))
}
# combine these loss functions into a single scalar
loss <- k_variable(0.0)
layer_features <- output_dict$block4_conv2
base_image_features <- layer_features[1,,,]
combination_features <- layer_features[3,,,]
loss <- loss + content_weight*content_loss(base_image_features,
combination_features)
feature_layers = c('block1_conv1', 'block2_conv1',
'block3_conv1', 'block4_conv1',
'block5_conv1')
for(layer_name in feature_layers){
layer_features <- output_dict[[layer_name]]
style_reference_features <- layer_features[2,,,]
combination_features <- layer_features[3,,,]
sl <- style_loss(style_reference_features, combination_features)
loss <- loss + ((style_weight / length(feature_layers)) * sl)
}
loss <- loss + (total_variation_weight * total_variation_loss(combination_image))
# get the gradients of the generated image wrt the loss
grads <- k_gradients(loss, combination_image)[[1]]
f_outputs <- k_function(list(combination_image), list(loss, grads))
eval_loss_and_grads <- function(image){
image <- array_reshape(image, c(1, img_nrows, img_ncols, 3))
outs <- f_outputs(list(image))
list(
loss_value = outs[[1]],
grad_values = array_reshape(outs[[2]], dim = length(outs[[2]]))
)
}
# Loss and gradients evaluator.
#
# This Evaluator class makes it possible
# to compute loss and gradients in one pass
# while retrieving them via two separate functions,
# "loss" and "grads". This is done because scipy.optimize
# requires separate functions for loss and gradients,
# but computing them separately would be inefficient.
Evaluator <- R6Class(
"Evaluator",
public = list(
loss_value = NULL,
grad_values = NULL,
initialize = function() {
self$loss_value <- NULL
self$grad_values <- NULL
},
loss = function(x){
loss_and_grad <- eval_loss_and_grads(x)
self$loss_value <- loss_and_grad$loss_value
self$grad_values <- loss_and_grad$grad_values
self$loss_value
},
grads = function(x){
grad_values <- self$grad_values
self$loss_value <- NULL
self$grad_values <- NULL
grad_values
}
)
)
evaluator <- Evaluator$new()
# run scipy-based optimization (L-BFGS) over the pixels of the generated image
# so as to minimize the neural style loss
dms <- c(1, img_nrows, img_ncols, 3)
x <- array(data = runif(prod(dms), min = 0, max = 255) - 128, dim = dms)
# Run optimization (L-BFGS) over the pixels of the generated image
# so as to minimize the loss
for(i in 1:iterations){
# Run L-BFGS
opt <- optim(
array_reshape(x, dim = length(x)), fn = evaluator$loss, gr = evaluator$grads,
method = "L-BFGS-B",
control = list(maxit = 15)
)
# Print loss value
print(opt$value)
# decode the image
image <- x <- opt$par
image <- array_reshape(image, dms)
# plot
im <- deprocess_image(image)
plot(as.raster(im))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.