Nothing
## ---- include = FALSE-------------------------------------------------------------------
knitr::opts_chunk$set(
fig.dpi = ifelse(Sys.getenv("RENDER_PLOTLY", unset = 0) == 1, 400, 50),
collapse = TRUE,
comment = "#>",
fig.align = "center",
out.width = "90%",
eval = requireNamespace("palmerpenguins") & requireNamespace("luz") &
torch::torch_is_installed()
)
## ---- echo = FALSE----------------------------------------------------------------------
Sys.setenv(LANG = "en_US.UTF-8")
set.seed(111)
torch::torch_manual_seed(111)
## ---- warning=FALSE, fig.width=12, fig.height=8, message=FALSE--------------------------
library(palmerpenguins)
# remove NAs
penguin_data <- na.omit(penguins)
# create plot
GGally::ggpairs(penguin_data,
ggplot2::aes(color = species, alpha = 0.75),
columns = 3:7,
progress = FALSE
)
## ----setup, fig.width=16----------------------------------------------------------------
# Load packages
library(torch)
library(luz)
# Create torch penguin dataset
penguin_dataset <- dataset(
name = "penguin_dataset",
initialize = function(df) {
df <- na.omit(df) # remove NAs
# Get all numeric features and transform them to `torch_tensor`
x_cont <- df[, c(
"bill_length_mm", "bill_depth_mm", "flipper_length_mm",
"body_mass_g", "year"
)]
x_cont <- torch_tensor(as.matrix(x_cont))
# Get and encode (one-hot) categorical features and transform them to `torch_tensor`
x_cat <- sapply(df[, c("island", "sex")], as.integer)
x_cat <- torch_tensor(x_cat, dtype = torch_int64())
x_cat <- torch_hstack(list(
nnf_one_hot(x_cat[, 1]),
nnf_one_hot(x_cat[, 2])
))
# Stack and store all features together in the field 'x'
self$x <- torch_hstack(list(x_cont, x_cat))
# Get, transform and store the target variable (the three penguin species)
self$y <- torch_tensor(as.integer(df$species))
},
.getitem = function(i) {
list(x = self$x[i, ], y = self$y[i])
},
.length = function() {
self$y$size()[[1]]
}
)
## ---------------------------------------------------------------------------------------
# Normalize inputs
scaled_penguin_ds <- penguin_data
scaled_penguin_ds[, c(3:6, 8)] <- scale(penguin_data[, c(3:6, 8)])
# Create train and validation split
idx <- sample(seq_len(nrow(penguin_data)))
train_idx <- idx[1:250]
valid_idx <- idx[251:333]
# Create train and validation datasets
train_ds <- penguin_dataset(scaled_penguin_ds[train_idx, ])
valid_ds <- penguin_dataset(scaled_penguin_ds[valid_idx, ])
# Create dataloaders
train_dl <- dataloader(train_ds, batch_size = 32, shuffle = TRUE)
valid_dl <- dataloader(valid_ds, batch_size = 32, shuffle = FALSE)
# We use the whole dataset as the test data
test_ds <- penguin_dataset(scaled_penguin_ds)
## ---------------------------------------------------------------------------------------
# Create the model
net <- nn_module(
initialize = function(dim_in) {
# Here, we define our sequential model
self$seq_model <- nn_sequential(
nn_linear(dim_in, 256),
nn_relu(),
nn_dropout(p = 0.4),
nn_linear(256, 256),
nn_relu(),
nn_dropout(p = 0.4),
nn_linear(256, 64),
nn_relu(),
nn_dropout(p = 0.4),
nn_linear(64, 3),
nn_softmax(dim = 2)
)
},
# The forward pass should only contain the call of the sequential model
forward = function(x) {
self$seq_model(x)
}
)
## ---------------------------------------------------------------------------------------
# We have imbalanced classes, so we weight the output classes accordingly
weight <- length(train_ds$y) /
(3 * torch_stack(lapply(1:3, function(i) sum(train_ds$y == i))))
# Fit the model
fitted <- net %>%
setup(
loss =
function(input, target) nnf_nll_loss(log(input), target, weight = weight),
optimizer = optim_adam,
metrics = list(
luz_metric_accuracy()
)
) %>%
set_hparams(dim_in = 10) %>%
fit(train_dl, epochs = 50, valid_data = valid_dl)
# Extract the sequential model
model <- fitted$model$seq_model
# Show result
get_metrics(fitted)[c(99, 100, 199, 200), ]
## ---------------------------------------------------------------------------------------
library(innsight)
# Define input and output names
input_names <-
c(
"bill_length", "bill_depth", "flipper_length", "body_mass", "year",
"island_Biscoe", "island_Dream", "island_Torgersen",
"sex_female", "sex_male"
)
output_names <- c("Adelie", "Chinstrap", "Gentoo")
# Create the `Converter` object
converter_1 <- convert(model,
input_dim = 10,
input_names = input_names,
output_names = output_names
)
## ---------------------------------------------------------------------------------------
# Create a second `Converter` object for combined categorical features
converter_2 <- convert(model,
input_dim = 10,
output_names = output_names
)
## ---------------------------------------------------------------------------------------
# Data to be analyzed (in this case, we use the whole dataset)
data <- test_ds$x
# Apply method 'LRP' with rule alpha-beta
lrp_ab_1 <- run_lrp(converter_1, data, rule_name = "alpha_beta", rule_param = 2)
# the result for 333 instances, 10 inputs and all 3 outputs
dim(get_result(lrp_ab_1))
## ---- results='hide', message=FALSE-----------------------------------------------------
# Apply method as in the other case
lrp_ab_2 <- run_lrp(converter_2, data, rule_name = "alpha_beta", rule_param = 2)
# Adjust input dimension and input names in the method converter object
lrp_ab_2$converter$input_dim[[1]] <- 7
lrp_ab_2$converter$input_names[[1]][[1]] <-
c(
"bill_length", "bill_depth", "flipper_length", "body_mass", "year",
"island", "sex"
)
# Combine (sum) the results for feature 'island' and 'sex'
lrp_ab_2$result[[1]][[1]] <- torch_cat(list(
lrp_ab_2$result[[1]][[1]][, 1:5, ], # results for all numeric features
lrp_ab_2$result[[1]][[1]][, 6:8, ]$sum(2, keepdim = TRUE), # results for feature island
lrp_ab_2$result[[1]][[1]][, 9:10, ]$sum(2, keepdim = TRUE) # results for feature sex
),
dim = 2
)
# Now we have the desired output shape with combined categorical features
dim(get_result(lrp_ab_2))
#> [1] 333 7 3
## ---- fig.width=8-----------------------------------------------------------------------
library(ggplot2)
# Separated categorical features
plot(lrp_ab_1, output_idx = c(1, 3)) +
coord_flip()
# Combined categorical features
plot(lrp_ab_2, output_idx = c(1, 3)) +
coord_flip()
## ---- echo = FALSE----------------------------------------------------------------------
knitr::kable(penguin_data[1, ])
## ---- fig.width=8-----------------------------------------------------------------------
library(ggplot2)
boxplot(lrp_ab_2,
output_idx = c(1, 3), preprocess_FUN = identity,
ref_data_idx = 1
) +
coord_flip() +
facet_grid(rows = vars(output_node))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.