Nothing
## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
eval = FALSE,
cache = FALSE
)
## ----setup, message = F, eval = T---------------------------------------------
library(processpredictR)
library(bupaR)
library(ggplot2)
library(dplyr)
library(keras)
library(purrr)
## ----echo = F, eval = T, out.width = "60%", fig.align = "center"--------------
knitr::include_graphics("framework.PNG")
## ---- eval = T----------------------------------------------------------------
df <- prepare_examples(traffic_fines, task = "outcome")
df
## ---- eval = T----------------------------------------------------------------
set.seed(123)
split <- df %>% split_train_test(split = 0.8)
split$train_df %>% head(5)
split$test_df %>% head(5)
## ---- eval = T----------------------------------------------------------------
nrow(split$train_df) / nrow(df)
n_distinct(split$train_df$case_id) / n_distinct(df$case_id)
## -----------------------------------------------------------------------------
# model <- split$train_df %>% create_model(name = "my_model")
# # pass arguments as ... that are applicable to keras::keras_model()
#
# model # is a list
## -----------------------------------------------------------------------------
# model %>% names() # objects from a returned list
## -----------------------------------------------------------------------------
# model$model$name # get the name of a model
## -----------------------------------------------------------------------------
# model$model$non_trainable_variables # list of non-trainable parameters of a model
## -----------------------------------------------------------------------------
# model %>% compile() # model compilation
## -----------------------------------------------------------------------------
# hist <- fit(object = model, train_data = split$train_df, epochs = 5)
## -----------------------------------------------------------------------------
# hist$params
## -----------------------------------------------------------------------------
# hist$metrics
## -----------------------------------------------------------------------------
# predictions <- model %>% predict(test_data = split$test_df,
# output = "append") # default
# predictions %>% head(5)
## -----------------------------------------------------------------------------
# predictions %>% class
## -----------------------------------------------------------------------------
# confusion_matrix(predictions)
## ---- out.width="100%", fig.width = 7-----------------------------------------
# plot(predictions) +
# theme(axis.text.x = element_text(angle = 90))
## ---- out.width="100%", fig.width = 7-----------------------------------------
# knitr::include_graphics("confusion_matrix.PNG")
## -----------------------------------------------------------------------------
# model %>% evaluate(split$test_df)
## -----------------------------------------------------------------------------
# # preprocessed dataset with categorical hot encoded features
# df_next_time <- traffic_fines %>%
# group_by_case() %>%
# mutate(month = lubridate::month(min(timestamp), label = TRUE)) %>%
# ungroup_eventlog() %>%
# prepare_examples(task = "next_time", features = "month") %>% split_train_test()
#
#
## -----------------------------------------------------------------------------
# # the attributes of df are added or changed accordingly
#
# df_next_time$train_df %>% attr("features")
## -----------------------------------------------------------------------------
# df_next_time$train_df %>% attr("hot_encoded_categorical_features")
## -----------------------------------------------------------------------------
# df <- prepare_examples(traffic_fines, task = "next_activity") %>% split_train_test()
# custom_model <- df$train_df %>% create_model(custom = TRUE, name = "my_custom_model")
# custom_model
## -----------------------------------------------------------------------------
# custom_model <- custom_model %>%
# stack_layers(layer_dropout(rate = 0.1)) %>%
# stack_layers(layer_dense(units = 64, activation = 'relu'))
# custom_model
## -----------------------------------------------------------------------------
# # this works too
# custom_model %>%
# stack_layers(layer_dropout(rate = 0.1), layer_dense(units = 64, activation = 'relu'))
## -----------------------------------------------------------------------------
# new_outputs <- custom_model$model$output %>% # custom_model$model to access a model and $output to access the outputs of that model
# keras::layer_dropout(rate = 0.1) %>%
# keras::layer_dense(units = custom_model$num_outputs, activation = 'softmax')
#
# custom_model <- keras::keras_model(inputs = custom_model$model$input, outputs = new_outputs, name = "new_custom_model")
# custom_model
#
## -----------------------------------------------------------------------------
# # class of the model
# custom_model %>% class
## -----------------------------------------------------------------------------
# # compile
# compile(object=custom_model, optimizer = "adam",
# loss = loss_sparse_categorical_crossentropy(),
# metrics = metric_sparse_categorical_crossentropy())
## -----------------------------------------------------------------------------
# # the trace of activities must be tokenized
# tokens_train <- df$train_df %>% tokenize()
# map(tokens_train, head) # the output of tokens is a list
#
#
## -----------------------------------------------------------------------------
# # make sequences of equal length
# x <- tokens_train$token_x %>% pad_sequences(maxlen = max_case_length(df$train_df), value = 0)
# y <- tokens_train$token_y
## ---- eval=F------------------------------------------------------------------
# # train
# fit(object = custom_model, x, y, epochs = 10, batch_size = 10) # see also ?keras::fit.keras.engine.training.Model
#
# # predict
# tokens_test <- df$test_df %>% tokenize()
# x <- tokens_test$token_x %>% pad_sequences(maxlen = max_case_length(df$train_df), value = 0)
# predict(custom_model, x)
#
# # evaluate
# tokens_test <- df$test_df %>% tokenize()
# x <- tokens_test$token_x
# # normalize by dividing y_test over the standard deviation of y_train
# y <- tokens_test$token_y / sd(tokens_train$token_y)
# evaluate(custom_model, x, y)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.