#' Find best model per id and create forecast out of sample
#'
#' @param forecast_tbl Data frame with all the forecasts by all the models
#' @param first_date_in_test_set The first date in the test set
#' @param one_obs_id Vector of id's with one observation
#' @param transformation_used What transformation, if any, was used. Possibe values are 'none', 'log' or 'log1p'
#' @param model_used Character vector of models used.
accuracy_by_id <- function(forecast_tbl, one_obs_id, first_date_in_test_set, transformation_used, model_used, unnest = FALSE) {
return_list <- list() # list with values to return
# Accuracy table
accuracy_tbl <- forecast_tbl %>%
filter(!id %in% one_obs_id) %>%
# Select necessary columns and convert to original level
select(.model_desc, .index, .value, id)
if (transformation_used == "log") {
accuracy_tbl <- accuracy_tbl %>%
mutate(.value = exp(.value))
} else if (transformation_used == "log1p") {
accuracy_tbl <- accuracy_tbl %>%
mutate(.value = expm1(.value))
} else {
accuracy_tbl <- accuracy_tbl
}
model_used <- c("ACTUAL", stringr::str_to_upper(model_used), "ENSEMBLE")
if (unnest) {
accuracy_tbl <- accuracy_tbl %>%
pivot_wider(names_from = .model_desc, values_from = .value) %>%
unnest(model_used)
} else {
accuracy_tbl <- accuracy_tbl %>%
pivot_wider(names_from = .model_desc, values_from = .value)
}
accuracy_tbl <- accuracy_tbl %>%
filter(.index >= first_date_in_test_set) %>%
pivot_longer(cols = -c(.index, id, ACTUAL)) %>%
# necessary if some .value is empty
drop_na(value) %>%
group_by(id, name) %>%
summarize_accuracy_metrics(
truth = ACTUAL,
estimate = value,
metric_set = default_forecast_accuracy_metric_set()
) %>%
# Find best model per id by counting number of times it has the lowest error
pivot_longer(cols = mae:rsq,
names_to = "metric_name",
values_to = "metric_value") %>%
# Skip rsq
filter(!metric_name == "rsq") %>%
# Arrange and create index to sum (lowest sum = best model)
group_by(metric_name, id) %>%
arrange(metric_value) %>%
mutate(index = 1:n()) %>%
group_by(name, id) %>%
mutate(sum = sum(index)) %>%
group_by(id) %>%
# Best model based on majority vote
select(id, name, sum, metric_name, metric_value) %>%
distinct() %>%
filter(sum == min(sum)) %>%
# If it's a tie, filter by rmse, then mase, then mae. If still tie, pick
# the "firest"
pivot_wider(names_from = metric_name, values_from = metric_value) %>%
filter(rmse == min(rmse)) %>%
filter(mase == min(mase)) %>%
filter(mae == min(mae)) %>%
slice_head(n = 1)
# Best metric per ID
best_metric_by_id <- accuracy_tbl %>%
select(id, name) %>%
set_names("id", "best_model") %>%
group_by(id) %>%
mutate(n_models = n(),
choose = case_when(
n_models > 1 & str_detect(best_model, "CATBOOST") ~ "choose",
n_models > 1 & !str_detect(best_model, "CATBOOST") ~ "not_choose",
TRUE ~ "choose"
)
) %>%
filter(choose == "choose") %>%
select(-c(n_models, choose)) %>%
ungroup()
# One sample ID model
one_sample_id_tbl <- tibble(
id = one_obs_id,
best_model = "CATBOOST"
)
# Final best by id
best_metric_by_id <- best_metric_by_id %>%
bind_rows(one_sample_id_tbl)
# Return
return_list$accuracy_tbl <- accuracy_tbl
return_list$best_model_per_id <- best_metric_by_id
return(return_list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.