Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
eval = reticulate::py_module_available("keras")
)
# Suppress verbose Keras output for the vignette
options(keras.fit_verbose = 0)
set.seed(123)
## ----eval=FALSE---------------------------------------------------------------
# install.packages("pak")
# pak::pak("davidrsch/kerasnip")
# pak::pak("rstudio/keras3")
#
# # Install the backend
# keras3::install_keras()
## ----load-kerasnip------------------------------------------------------------
library(kerasnip)
library(tidymodels)
library(keras3)
## ----prepare-data-------------------------------------------------------------
mnist <- dataset_mnist()
x_train <- mnist$train$x
y_train <- mnist$train$y
x_test <- mnist$test$x
y_test <- mnist$test$y
# Reshape
x_train <- array_reshape(x_train, c(nrow(x_train), 784))
x_test <- array_reshape(x_test, c(nrow(x_test), 784))
# Rescale
x_train <- x_train / 255
x_test <- x_test / 255
# Convert outcomes to factors for tidymodels
# kerasnip will handle y convertion internally using keras3::to_categorical()
y_train_factor <- factor(y_train)
y_test_factor <- factor(y_test)
# For tidymodels, it's best to work with data frames
# Use I() to keep the matrix structure of x within the data frame
train_df <- data.frame(x = I(x_train), y = y_train_factor)
test_df <- data.frame(x = I(x_test), y = y_test_factor)
## ----keras-standard, eval=FALSE, echo=TRUE, results='hide'--------------------
# # The standard Keras3 approach
# model <- keras_model_sequential(input_shape = 784) |>
# layer_dense(units = 256, activation = "relu") |>
# layer_dropout(rate = 0.4) |>
# layer_dense(units = 128, activation = "relu") |>
# layer_dropout(rate = 0.3) |>
# layer_dense(units = 10, activation = "softmax")
#
# summary(model)
#
# model |>
# compile(
# loss = "categorical_crossentropy",
# optimizer = optimizer_rmsprop(),
# metrics = "accuracy"
# )
#
# # The model would then be trained with model |> fit(...)
## ----define-blocks------------------------------------------------------------
# An input block to initialize the model.
# The 'model' argument is supplied implicitly by the kerasnip backend.
mlp_input_block <- function(model, input_shape) {
keras_model_sequential(input_shape = input_shape)
}
# A reusable "module" that combines a dense layer and a dropout layer.
# All arguments that should be tunable need a default value.
dense_dropout_block <- function(model, units = 128, rate = 0.1) {
model |>
layer_dense(units = units, activation = "relu") |>
layer_dropout(rate = rate)
}
# The output block for classification.
mlp_output_block <- function(model, num_classes) {
model |> layer_dense(units = num_classes, activation = "softmax")
}
## ----create-spec--------------------------------------------------------------
create_keras_sequential_spec(
model_name = "mnist_mlp",
layer_blocks = list(
input = mlp_input_block,
hidden_1 = dense_dropout_block,
hidden_2 = dense_dropout_block,
output = mlp_output_block
),
mode = "classification"
)
## ----use-spec-----------------------------------------------------------------
mlp_spec <- mnist_mlp(
hidden_1_units = 256,
hidden_1_rate = 0.4,
hidden_2_rate = 0.3,
hidden_2_units = 128,
compile_loss = "categorical_crossentropy",
compile_optimizer = optimizer_rmsprop(),
compile_metrics = c("accuracy"),
fit_epochs = 30,
fit_batch_size = 128,
fit_validation_split = 0.2
) |>
set_engine("keras")
# Fit the model
mlp_fit <- fit(mlp_spec, y ~ x, data = train_df)
## ----model-summarize----------------------------------------------------------
mlp_fit |>
extract_keras_model() |>
summary()
## ----model-plot, eval=FALSE---------------------------------------------------
# mlp_fit |>
# extract_keras_model() |>
# plot(show_shapes = TRUE)
## ----model-fit-history--------------------------------------------------------
mlp_fit |>
extract_keras_history() |>
plot()
## ----model-evaluate-----------------------------------------------------------
mlp_fit |> keras_evaluate(x_test, y_test)
## ----model-predict-class------------------------------------------------------
# Predict the class for the first 5 images in the test set
class_preds <- mlp_fit |>
predict(new_data = head(select(test_df, x)))
class_preds
## ----model-predict-prob-------------------------------------------------------
# Predict probabilities for the first 5 images
prob_preds <- mlp_fit |>
predict(new_data = head(select(test_df, x)), type = "prob")
prob_preds
## ----model-predict-compare----------------------------------------------------
# Combine predictions with actuals for comparison
comparison <- bind_cols(
class_preds,
prob_preds
) |>
bind_cols(
head(test_df[, "y", drop = FALSE])
)
comparison
## ----tune-spec-mnist----------------------------------------------------------
# Define a tunable specification
# We set num_hidden_2 = 0 to disable the second hidden block
# for this tuning example
tune_spec <- mnist_mlp(
num_hidden_1 = tune(),
hidden_1_units = tune(),
hidden_1_rate = tune(),
num_hidden_2 = 0,
compile_loss = "categorical_crossentropy",
compile_optimizer = optimizer_rmsprop(),
compile_metrics = c("accuracy"),
fit_epochs = 30,
fit_batch_size = 128,
fit_validation_split = 0.2
) |>
set_engine("keras")
# Create a workflow
tune_wf <- workflow(y ~ x, tune_spec)
## ----create-grid-mnist--------------------------------------------------------
# Define the tuning grid
params <- extract_parameter_set_dials(tune_wf) |>
update(
num_hidden_1 = dials::num_terms(c(1, 3)),
hidden_1_units = dials::hidden_units(c(64, 256)),
hidden_1_rate = dials::dropout(c(0.2, 0.4))
)
grid <- grid_regular(params, levels = 3)
grid
## ----run-tuning, cache=TRUE---------------------------------------------------
# Using only the first 100 rows for speed. The real call
# should be: folds <- vfold_cv(train_df, v = 3)
folds <- vfold_cv(train_df[1:100, ], v = 3)
tune_res <- tune_grid(
tune_wf,
resamples = folds,
grid = grid,
metrics = metric_set(accuracy),
control = control_grid(save_pred = FALSE, save_workflow = TRUE)
)
## ----show-best-mnist----------------------------------------------------------
# Show the summary table of the best models
show_best(tune_res, metric = "accuracy")
## ----finalize-best-model------------------------------------------------------
# Select the best hyperparameters
best_hps <- select_best(tune_res, metric = "accuracy")
# Finalize the workflow with the best hyperparameters
final_wf <- finalize_workflow(tune_wf, best_hps)
# Fit the final model on the full training data
final_fit <- fit(final_wf, data = train_df)
## ----inspect-final-model------------------------------------------------------
# Print the model summary
final_fit |>
extract_fit_parsnip() |>
extract_keras_model() |>
summary()
# Plot the training history
final_fit |>
extract_fit_parsnip() |>
extract_keras_history() |>
plot()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.