Nothing
context("tensorflow probability keras layers")
# test_succeeds("use layer_autoregressive with 1d autoregressivity", {
#
# library(keras)
# n <- 2000
# x2 <- rnorm(n) %>% tf$cast(tf$float32) * 2
# x1 <- rnorm(n) %>% tf$cast(tf$float32) + (x2 * x2 / 4)
# data <- tf$stack(list(x1, x2), axis = -1L)
#
# made <- layer_autoregressive(params = 2, hidden_units = list(10, 10)) # output will be (n, 2, 2)
# distribution <- tfd_transformed_distribution(
# distribution = tfd_normal(loc = 0, scale = 1),
# bijector = tfb_masked_autoregressive_flow(
# function(x) tf$unstack(made(x), num = 2L, axis = -1L)), # output is list of (2000, 2) of length 2
# event_shape = list(2)) # distribution has shapes () and (2,)
#
# x_ <- layer_input(shape = c(2), dtype = "float32")
# log_prob_ <- distribution$log_prob(x_)
# model <- keras_model(x_, log_prob_)
# loss <- function(x, log_prob) -log_prob
# model %>% compile(optimizer = "adam", loss = loss)
#
# model %>% fit(x = data,
# y = rep(0, n),
# batch_size = 25,
# epochs = 1,
# steps_per_epoch = 1,
# verbose = 0)
#
# expect_equal((distribution %>% tfd_sample())$get_shape()$as_list(), 2)
# expect_equal((distribution %>% tfd_log_prob(matrix(rep(1, 3*2), ncol = 2)))$get_shape()$as_list(), c(3))
# })
#
#
# # `AutoregressiveLayer` can be used as a building block to achieve different
# # autoregressive structures over rank-2+ tensors. For example, suppose we want
# # to build an autoregressive distribution over images with dimension
# # `[weight, height, channels]` with `channels = 3`:
# # We can parameterize a "fully autoregressive" distribution, with
# # cross-channel and within-pixel autoregressivity:
# # ```
# # r0 g0 b0 r0 g0 b0 r0 g0 b0
# # ^ ^ ^ ^ ^ ^ ^ ^ ^
# # | / ____/ \ | / \____ \ |
# # | /__/ \ | / \__\ |
# # r1 g1 b1 r1 <- g1 b1 r1 g1 <- b1
# # ^ |
# # \_________/
#
# test_succeeds("use layer_autoregressive to model rank-3 tensors with full autoregressivity", {
#
# library(keras)
#
# n <- 1000L
# width <- 8L
# height <- 8L
# channels <- 3L
# images <-
# runif(n * height * width * channels) %>% array(dim = c(n, height, width, channels)) %>%
# tf$cast(tf$float32)
#
# # Reshape images to achieve desired autoregressivity.
# event_shape <- height * width * channels
# reshaped_images <- tf$reshape(images, c(n, event_shape))
#
# # yields (n, 192, 2)
# made <-
# layer_autoregressive(
# params = 2,
# event_shape = event_shape,
# hidden_units = list(20, 20),
# activation = "relu"
# )
#
# distribution <- tfd_transformed_distribution(
# distribution = tfd_normal(loc = 0, scale = 1),
# bijector = tfb_masked_autoregressive_flow(function (x)
# tf$unstack(
# made(x), num = 2, axis = -1L # yields list (1000, 192) of length 2
# )),
# event_shape = event_shape
# )
#
# x_ <- layer_input(shape = event_shape, dtype = "float32")
# log_prob_ <- distribution %>% tfd_log_prob(x_)
#
# model <- keras_model(x_, log_prob_)
# loss <- function(x, log_prob)
# - log_prob
# model %>% compile(optimizer = "adam", loss = loss)
#
# model %>% fit(
# x = reshaped_images,
# y = rep(0, n),
# batch_size = 10,
# epochs = 1,
# steps_per_epoch = 1,
# verbose = 0
# )
#
# expect_equal((distribution %>% tfd_sample(c(3, 1)))$get_shape()$as_list(),
# c(3, 1, event_shape))
# })
#
# test_succeeds("use layer_autoregressive to model rank-3 tensors without autoregressivity over channels", {
#
# library(keras)
#
# n <- 1000L
# width <- 8L
# height <- 8L
# channels <- 3L
# images <-
# sample(0:1, n * height * width * channels, replace = TRUE) %>% array(dim = c(n, height, width, channels)) %>%
# tf$cast(tf$float32)
#
# # Reshape images to achieve desired autoregressivity.
# event_shape <- height * width
# # (n, 3, 64)
# reshaped_images <- tf$reshape(images, c(n, event_shape, channels)) %>% tf$transpose(perm = c(0L, 2L, 1L))
#
# # yields (n, 192, 2)
# made <-
# layer_autoregressive(
# params = 1,
# event_shape = event_shape,
# hidden_units = list(20, 20),
# activation = "relu"
# )
# # batch_shape=(), event_shape=(3, 64)
# distribution = tfd_autoregressive(
# # batch_shape=(1000,), event_shape=(3, 64)
# function(x) tfd_independent(
# # batch_shape=(1000, 3, 64), event_shape=()
# tfd_bernoulli(logits = tf$unstack(made(x), axis = -1L)[[1]], # (1000, 3, 64)
# dtype = tf$float32),
# reinterpreted_batch_ndims = 2),
# sample0 = tf$zeros(list(channels, width * height), dtype = tf$float32))
#
#
# x_ <- layer_input(shape = c(channels, event_shape), dtype = "float32")
# log_prob_ <- distribution %>% tfd_log_prob(x_)
#
# model <- keras_model(x_, log_prob_)
# loss <- function(x, log_prob)
# - log_prob
# model %>% compile(optimizer = "adam", loss = loss)
#
# model %>% fit(
# x = reshaped_images,
# y = rep(0, n),
# batch_size = 10,
# epochs = 1,
# steps_per_epoch = 1,
# verbose = 0
# )
#
# expect_equal((distribution %>% tfd_sample(c(7)))$get_shape()$as_list(),
# c(7, channels, event_shape))
# })
#
# test_succeeds("layer_autoregressive_transform works", {
#
# skip_if_tf_below("2.0.0")
# skip_if_tfp_below("0.8.0")
#
# n <- 2000
# x2 <- rnorm(n) * 2
# x1 <- rnorm(n) + (x2 * x2 / 4)
#
# model <- keras::keras_model_sequential() %>%
# layer_distribution_lambda(
# make_distribution_fn = function(t) {
# tfd_multivariate_normal_diag(
# loc = tf$constant(matrix(0, nrow = 25, ncol = 2), dtype = "float32"),
# scale_diag = tf$constant(c(1,1), dtype = "float32")
# )
# },
# dtype = "float32"
# ) %>%
# layer_autoregressive_transform(made = layer_autoregressive(
# params = list(2L),
# hidden_units = list(10L),
# activation = "relu",
# dtype = "float32"
# ),
# dtype = "float32"
# )
#
# loss_fun <-function(y, rv_y) -rv_y$log_prob(y)
#
# model %>%
# keras::compile(
# optimizer = "adam",
# loss = loss_fun
# )
#
# model %>%
# keras::fit(
# x = matrix(rep(0.0, n)),
# y = cbind(x1, x2),
# batch_size = 25,
# epochs = 1
# )
#
# })
test_succeeds("layer_variable works", {
library(keras)
x = tf$ones(shape = c(3L, 4L))
y = tf$ones(3L)
trainable_normal <- keras_model_sequential(list(
layer_variable(shape = 2),
layer_distribution_lambda(
make_distribution_fn = function (t)
tfd_independent(
tfd_normal(loc = t[1], scale = tf$math$softplus(t[2])),
reinterpreted_batch_ndims = 0
)
)
))
negloglik <- function(x, rv_x) -(rv_x %>% tfd_log_prob(x))
trainable_normal %>% compile(optimizer = 'adam', loss = negloglik)
trainable_normal %>% fit(x, y, steps_per_epoch = 1, verbose = 0L)
})
test_succeeds("layer_dense_variational works", {
library(keras)
x = tf$ones(shape = c(150L,1L))
y = tf$ones(150L)
posterior_mean_field <- function(kernel_size, bias_size = 0, dtype = NULL) {
n <- kernel_size + bias_size
c <- log(expm1(1))
keras_model_sequential(list(
layer_variable(shape = 2 * n, dtype = dtype),
layer_distribution_lambda(make_distribution_fn = function(t) {
tfd_independent(
tfd_normal(loc = t[1:n], scale = 1e-5 + tf$nn$softplus(c + t[(n+1):(2*n)])),
reinterpreted_batch_ndims = 1
)
})
))
}
prior_trainable <- function(kernel_size, bias_size = 0, dtype = NULL) {
n <- kernel_size + bias_size
keras_model_sequential() %>%
layer_variable(n, dtype = dtype) %>%
layer_distribution_lambda(function(t) {
tfd_independent(
tfd_normal(loc = t, scale = 1),
reinterpreted_batch_ndims = 1
)
})
}
model <- keras_model_sequential(list(
layer_dense_variational(
units = 1,
make_posterior_fn = posterior_mean_field,
make_prior_fn = prior_trainable
),
layer_distribution_lambda(
make_distribution_fn = function(x)
tfd_normal(loc = x, scale = 1)
)
))
negloglik <- function(x, rv_x) -(rv_x %>% tfd_log_prob(x))
model %>% compile(optimizer = 'adam', loss = negloglik)
model %>% fit(x, y, steps_per_epoch = 1, verbose = 0L)
yhat <- model(x)
expect_equal((yhat %>% tfd_sample())$get_shape()$as_list(), c(150,1))
})
test_succeeds("layer_dense_reparameterization works", {
library(keras)
x = tf$ones(shape = c(150L, 1L))
y = tf$ones(150L)
n <- dim(y)[1] %>% tf$cast(tf$float32)
kl_div <- function(q, p, unused)
tfd_kl_divergence(q, p) / n
model <- keras_model_sequential(list(
layer_dense_reparameterization(
units = 512,
activation = "relu",
kernel_divergence_fn = kl_div
),
layer_dense_reparameterization(units = 1,
kernel_divergence_fn = kl_div)
))
bayesian_loss <- function() {
function(y_true, y_pred) {
nll <-
tf$keras$losses$mean_squared_error(y_true, y_pred)
kl <- tf$reduce_sum(model$losses)
nll + kl
}
}
if (tfp_version() < "0.10") {
model %>% compile(
optimizer = 'adam',
loss = bayesian_loss(),
experimental_run_tf_function = FALSE
)
} else {
# KL divergence is now added automatically by keras
# see https://github.com/tensorflow/probability/blob/master/tensorflow_probability/examples/bayesian_neural_network.py
model %>% compile(
optimizer = 'adam',
loss = "mse"
)
}
model %>% fit(x, y, steps_per_epoch = 1, verbose = 0L)
yhat <- model(x)
expect_equal(yhat$get_shape()$as_list(), c(150, 1))
expect_equal(length(model$losses), 2)
})
test_succeeds("layer_dense_flipout works", {
library(keras)
x = tf$ones(shape = c(150L, 1L))
y = tf$ones(150L)
n <- dim(y)[1] %>% tf$cast(tf$float32)
kl_div <- function(q, p, unused)
tfd_kl_divergence(q, p) / n
model <- keras_model_sequential(list(
layer_dense_flipout(
units = 512,
activation = "relu",
kernel_divergence_fn = kl_div
),
layer_dense_flipout(units = 1,
kernel_divergence_fn = kl_div)
))
bayesian_loss <- function() {
function(y_true, y_pred) {
nll <-
tf$keras$losses$mean_squared_error(y_true, y_pred)
kl <- tf$reduce_sum(model$losses)
nll + kl
}
}
if (tfp_version() < "0.10") {
model %>% compile(
optimizer = 'adam',
loss = bayesian_loss(),
experimental_run_tf_function = FALSE
)
} else {
# KL divergence is now added automatically by keras
# see https://github.com/tensorflow/probability/blob/master/tensorflow_probability/examples/bayesian_neural_network.py
model %>% compile(
optimizer = 'adam',
loss = "mse"
)
}
model %>% fit(x, y, steps_per_epoch = 1, verbose = 0L)
yhat <- model(x)
expect_equal(yhat$get_shape()$as_list(), c(150, 1))
expect_equal(length(model$losses), 2)
})
test_succeeds("layer_dense_local_reparameterization works", {
library(keras)
x = tf$ones(shape = c(150L, 1L))
y = tf$ones(150L)
n <- dim(y)[1] %>% tf$cast(tf$float32)
kl_div <- function(q, p, unused)
tfd_kl_divergence(q, p) / n
model <- keras_model_sequential(
list(
layer_dense_local_reparameterization(
units = 512,
activation = "relu",
kernel_divergence_fn = kl_div
),
layer_dense_local_reparameterization(units = 1,
kernel_divergence_fn = kl_div)
)
)
bayesian_loss <- function() {
function(y_true, y_pred) {
nll <-
tf$keras$losses$mean_squared_error(y_true, y_pred)
kl <- tf$reduce_sum(model$losses)
nll + kl
}
}
if (tfp_version() < "0.10") {
model %>% compile(
optimizer = 'adam',
loss = bayesian_loss(),
experimental_run_tf_function = FALSE
)
} else {
# KL divergence is now added automatically by keras
# see https://github.com/tensorflow/probability/blob/master/tensorflow_probability/examples/bayesian_neural_network.py
model %>% compile(
optimizer = 'adam',
loss = "mse"
)
}
model %>% fit(x, y, steps_per_epoch = 1, verbose = 0L)
yhat <- model(x)
expect_equal(yhat$get_shape()$as_list(), c(150, 1))
expect_equal(length(model$losses), 2)
})
test_succeeds("layer_conv_1d_reparameterization works", {
skip_if_tf_below("2.0")
library(keras)
x = tf$ones(shape = c(150L, 1L, 1L))
y = tf$ones(shape = c(150L, 10L))
n <- dim(y)[1] %>% tf$cast(tf$float32)
kl_div <- function(q, p, unused)
tfd_kl_divergence(q, p) / n
model <- keras_model_sequential(
list(
layer_conv_1d_reparameterization(
filters = 64,
kernel_size = 5,
padding = "same",
activation = "relu",
kernel_divergence_fn = kl_div
),
layer_flatten(),
layer_dense_reparameterization(units = 10, kernel_divergence_fn = kl_div)
)
)
bayesian_loss <- function() {
function(y_true, y_pred) {
nll <-
tf$nn$softmax_cross_entropy_with_logits(labels = y_true, logits = y_pred)
kl <- tf$reduce_sum(model$losses)
nll + kl
}
}
if (tfp_version() < "0.10") {
model %>% compile(
optimizer = 'adam',
loss = bayesian_loss(),
experimental_run_tf_function = FALSE
)
} else {
# KL divergence is now added automatically by keras
# see https://github.com/tensorflow/probability/blob/master/tensorflow_probability/examples/bayesian_neural_network.py
model %>% compile(
optimizer = 'adam',
loss = "mse"
)
}
model %>% fit(x, y, steps_per_epoch = 1, verbose = 0L)
yhat <- model(x)
expect_equal(yhat$get_shape()$as_list(), c(150, 10))
expect_equal(length(model$losses), 2)
})
test_succeeds("layer_conv_1d_flipout works", {
skip_if_tf_below("2.0")
library(keras)
x = tf$ones(shape = c(150L, 1L, 1L))
y = tf$ones(shape = c(150L, 10L))
n <- dim(y)[1] %>% tf$cast(tf$float32)
kl_div <- function(q, p, unused)
tfd_kl_divergence(q, p) / n
model <- keras_model_sequential(list(
layer_conv_1d_flipout(
filters = 64,
kernel_size = 5,
padding = "same",
activation = "relu",
kernel_divergence_fn = kl_div
),
layer_flatten(),
layer_dense_flipout(units = 10, kernel_divergence_fn = kl_div)
))
bayesian_loss <- function() {
function(y_true, y_pred) {
nll <-
tf$nn$softmax_cross_entropy_with_logits(labels = y_true, logits = y_pred)
kl <- tf$reduce_sum(model$losses)
nll + kl
}
}
if (tfp_version() < "0.10") {
model %>% compile(
optimizer = 'adam',
loss = bayesian_loss(),
experimental_run_tf_function = FALSE
)
} else {
# KL divergence is now added automatically by keras
# see https://github.com/tensorflow/probability/blob/master/tensorflow_probability/examples/bayesian_neural_network.py
model %>% compile(
optimizer = 'adam',
loss = "mse"
)
}
model %>% fit(x, y, steps_per_epoch = 1, verbose = 0L)
yhat <- model(x)
expect_equal(yhat$get_shape()$as_list(), c(150, 10))
expect_equal(length(model$losses), 2)
})
test_succeeds("layer_conv_2d_reparameterization works", {
skip_if_tf_below("2.0")
library(keras)
x = tf$ones(shape = c(7L, 32L, 32L, 3L))
y = tf$ones(shape = c(7L, 10L))
n <- dim(y)[1] %>% tf$cast(tf$float32)
kl_div <- function(q, p, unused)
tfd_kl_divergence(q, p) / n
model <- keras_model_sequential(
list(
layer_conv_2d_reparameterization(
filters = 64,
kernel_size = 5,
padding = "same",
activation = "relu",
kernel_divergence_fn = kl_div
),
layer_max_pooling_2d(),
layer_flatten(),
layer_dense_reparameterization(units = 10, kernel_divergence_fn = kl_div)
)
)
bayesian_loss <- function() {
function(y_true, y_pred) {
nll <-
tf$nn$softmax_cross_entropy_with_logits(labels = y_true, logits = y_pred)
kl <- tf$reduce_sum(model$losses)
nll + kl
}
}
if (tfp_version() < "0.10") {
model %>% compile(
optimizer = 'adam',
loss = bayesian_loss(),
experimental_run_tf_function = FALSE
)
} else {
# KL divergence is now added automatically by keras
# see https://github.com/tensorflow/probability/blob/master/tensorflow_probability/examples/bayesian_neural_network.py
model %>% compile(
optimizer = 'adam',
loss = "mse"
)
}
model %>% fit(x, y, steps_per_epoch = 1, verbose = 0L)
yhat <- model(x)
expect_equal(yhat$get_shape()$as_list(), c(7, 10))
expect_equal(length(model$losses), 2)
})
test_succeeds("layer_conv_2d_flipout works", {
skip_if_tf_below("2.0")
library(keras)
x = tf$ones(shape = c(7L, 32L, 32L, 3L))
y = tf$ones(shape = c(7L, 10L))
n <- dim(y)[1] %>% tf$cast(tf$float32)
kl_div <- function(q, p, unused)
tfd_kl_divergence(q, p) / n
model <- keras_model_sequential(
list(
layer_conv_2d_flipout(
filters = 64,
kernel_size = 5,
padding = "same",
activation = "relu",
kernel_divergence_fn = kl_div
),
layer_max_pooling_2d(),
layer_flatten(),
layer_dense_flipout(units = 10, kernel_divergence_fn = kl_div)
)
)
bayesian_loss <- function() {
function(y_true, y_pred) {
nll <-
tf$nn$softmax_cross_entropy_with_logits(labels = y_true, logits = y_pred)
kl <- tf$reduce_sum(model$losses)
nll + kl
}
}
if (tfp_version() < "0.10") {
model %>% compile(
optimizer = 'adam',
loss = bayesian_loss(),
experimental_run_tf_function = FALSE
)
} else {
# KL divergence is now added automatically by keras
# see https://github.com/tensorflow/probability/blob/master/tensorflow_probability/examples/bayesian_neural_network.py
model %>% compile(
optimizer = 'adam',
loss = "mse"
)
}
model %>% fit(x, y, steps_per_epoch = 1, verbose = 0L)
yhat <- model(x)
expect_equal(yhat$get_shape()$as_list(), c(7, 10))
expect_equal(length(model$losses), 2)
})
test_succeeds("layer_conv_3d_reparameterization works", {
skip_if_tf_below("2.0")
library(keras)
x = tf$ones(shape = c(7L, 16L, 4L, 4L, 3L))
y = tf$ones(shape = c(7L, 10L))
n <- dim(y)[1] %>% tf$cast(tf$float32)
kl_div <- function(q, p, unused)
tfd_kl_divergence(q, p) / n
model <- keras_model_sequential(
list(
layer_conv_3d_reparameterization(
filters = 64,
kernel_size = 5,
padding = "same",
activation = "relu",
kernel_divergence_fn = kl_div
),
layer_max_pooling_3d(),
layer_flatten(),
layer_dense_reparameterization(units = 10, kernel_divergence_fn = kl_div)
)
)
bayesian_loss <- function() {
function(y_true, y_pred) {
nll <-
tf$nn$softmax_cross_entropy_with_logits(labels = y_true, logits = y_pred)
kl <- tf$reduce_sum(model$losses)
nll + kl
}
}
if (tfp_version() < "0.10") {
model %>% compile(
optimizer = 'adam',
loss = bayesian_loss(),
experimental_run_tf_function = FALSE
)
} else {
# KL divergence is now added automatically by keras
# see https://github.com/tensorflow/probability/blob/master/tensorflow_probability/examples/bayesian_neural_network.py
model %>% compile(
optimizer = 'adam',
loss = "mse"
)
}
model %>% fit(x, y, steps_per_epoch = 1, verbose = 0L)
yhat <- model(x)
expect_equal(yhat$get_shape()$as_list(), c(7, 10))
expect_equal(length(model$losses), 2)
})
test_succeeds("layer_conv_3d_flipout works", {
skip_if_tf_below("2.0")
library(keras)
x = tf$ones(shape = c(7L, 16L, 4L, 4L, 3L))
y = tf$ones(shape = c(7L, 10L))
n <- dim(y)[1] %>% tf$cast(tf$float32)
kl_div <- function(q, p, unused)
tfd_kl_divergence(q, p) / n
model <- keras_model_sequential(
list(
layer_conv_3d_flipout(
filters = 64,
kernel_size = 5,
padding = "same",
activation = "relu",
kernel_divergence_fn = kl_div
),
layer_max_pooling_3d(),
layer_flatten(),
layer_dense_flipout(units = 10, kernel_divergence_fn = kl_div)
)
)
bayesian_loss <- function() {
function(y_true, y_pred) {
nll <-
tf$nn$softmax_cross_entropy_with_logits(labels = y_true, logits = y_pred)
kl <- tf$reduce_sum(model$losses)
nll + kl
}
}
if (tfp_version() < "0.10") {
model %>% compile(
optimizer = 'adam',
loss = bayesian_loss(),
experimental_run_tf_function = FALSE
)
} else {
# KL divergence is now added automatically by keras
# see https://github.com/tensorflow/probability/blob/master/tensorflow_probability/examples/bayesian_neural_network.py
model %>% compile(
optimizer = 'adam',
loss = "mse"
)
}
model %>% fit(x, y, steps_per_epoch = 1, verbose = 0L)
yhat <- model(x)
expect_equal(yhat$get_shape()$as_list(), c(7, 10))
expect_equal(length(model$losses), 2)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.