library(tsibble)
library(tidyverse)
library(parallel)
library(doSNOW)
library(doParallel)
library(furrr)
library(profvis)
library(imputeTS)
library(sparklyr)
library(mgcv)
library(glmnet)
library(forecast)
library(plotly)
library(shinydashboard)
library(shinycssloaders)
library(shinyjs)
library(tidyverse)
library(shinyWidgets)
library(formattable)
sales_tmp %>%
mutate(reg_value = )
# Initial -----------------------------------------------------------------
new_data <- sales %>%
left_join(cf, by = c("forecast_item", "date"))
source("R/data_cleansing.R")
source("R/data_preparation.R")
source("R/feature_engineering.R")
source("R/data_validation.R")
source("R/auxiliar.R")
source("R/fit_gam.R")
source("R/fit_glmnet.R")
source("R/fit_glm.R")
source("R/fit_arima.R")
source("R/fit_croston.R")
source("R/fit_ets.R")
source("R/model_optimization.R")
source("R/predict_ts.R")
source("R/data_import.R")
source("R/stat_analysis.R")
presc_data <- prescribe_ts(.data_init = oc_data$sales
, key = "forecast_item"
, y_var = "sales"
, date_var = "date"
, reg_name = "reg_name"
, reg_value = "reg_value"
, freq = 12
, date_format = "ymd")
.data <- presc_data$data[[1]] %>%
microbenchmark::microbenchmark(
results <- pd1$data[[2]] %>%
validate_ts() %>%
feature_engineering_ts() %>%
clean_ts() %>%
optim_ts(.data = .
, ts_model = c("arima", "gam", "glm", "ets", "glmnet")
, optim_conf = get_default_optim_conf()
, parameter = parameter
, export_fit = F)
)
presc_data$data[[1]] %>%
filter(history == 0)
validate_ts() %>%
feature_engineering_ts() %>%
filter(history == 1) %>%
dplyr::select(-history) %>%
clean_ts(winsorize_config = list(apply_winsorize = TRUE)
, imputation_config = list(impute_method = "none"
, na_regressor = FALSE
, na_missing_dates = FALSE)) %>%
optim_ts(ts_model = "glmnet", optim_conf = get_default_optim_conf()
, parameter = get_default_hyperpar()
, export_fit = T)
# Parameter ---------------------------------------------------------------
optim_conf <- list(test_size = 6, lag = 1)
tdata <- pd1$data[[1]] %>%
validate_ts() %>%
feature_engineering_ts() %>%
clean_ts(winsorize_config = list(apply_winsorize = TRUE)
, imputation_config = list(impute_method = "none"
, na_regressor = FALSE
, na_missing_dates = FALSE))
my_mkt <- presc_data %>%
filter(str_detect(key, "IN|DK|SE|NO|FI|NL|BE|EE|LT|LV")) %>%
pull(key)
# TESTING
d1 <- optim_ts(.data = tdata
, ts_model = c("arima", "glmnet", "gam", "glm", "ets")
, optim_conf = get_default_optim_conf()
, parameter = get_default_hyperpar()
, export_fit = F)
profvis(
optim_ts(.data = .data
, ts_model = c("glmnet")
, optim_conf = get_optim_conf()
, parameter = get_hyperpar()
, export_fit = F)
)
# -------
.data %>%
mutate(distance = 0.3*mape + 0.7*spa_d, .after = 1) %>%
arrange(distance)
results %>%
mutate(spa_d = 1 - spa
, score = 0.5*mape + 0.5*abs(spa_d), .after = 1) %>%
# mutate(spa_d_log = log(spa_d + 1)
# , mape_log = log(mape + 1)) %>%
ggplot()+
geom_density2d_filled(aes(mape, spa_d), alpha = .9)+
geom_hline(yintercept = 0, col = "white")+
geom_point(aes(mape, spa_d
, shape = model
, col = model
, size = 100*(score^-1)), fill = NA
#, col = "white"
#, shape = 21
)+
scale_x_log10()+
#scale_y_log10()+
theme_minimal()
plotly::ggplotly(p)
count <- 0
glmnet <- map(.x = my_mkt[1:10], .f = function(x){
count <<- count + length(x)
d1 %>%
filter(key == x) %>%
pull(data) %>%
.[[1]] %>%
validate_ts() %>%
feature_engineering_ts() %>%
clean_ts(winsorize_config = list(apply_winsorize = T)
, imputation_config = list(impute_method = "none"
, na_regressor = TRUE
, na_missing_dates = TRUE)) %>%
optim_ts(.data = ., ts_model = "glmnet", optim_conf = optim_conf
, parameter = parameter, export_fit = F)
})
count <- 0
gam <- map(.x = my_mkt, .f = function(x){
count <<- count + length(x)
d1 %>%
filter(key == x) %>%
pull(data) %>%
.[[1]] %>%
validate_ts() %>%
feature_engineering_ts() %>%
clean_ts(winsorize_config = list(apply_winsorize = F)
, imputation_config = list(impute_method = "none"
, na_regressor = TRUE
, na_missing_dates = TRUE)) %>%
optim_ts(.data = ., ts_model = "gam", optim_conf = optim_conf
, parameter = parameter, export_fit = F)
})
count <- 0
glm <- map(.x = my_mkt, .f = function(x){
count <<- count + length(x)
d1 %>%
filter(key == x) %>%
pull(data) %>%
.[[1]] %>%
validate_ts() %>%
feature_engineering_ts() %>%
clean_ts(winsorize_config = list(apply_winsorize = F)
, imputation_config = list(impute_method = "none"
, na_regressor = TRUE
, na_missing_dates = TRUE)) %>%
optim_ts(.data = ., ts_model = "glm", optim_conf = optim_conf
, parameter = parameter, export_fit = F)
})
#count_i <- 0
arima <- map(.x = my_mkt, ~{
#count_i <<- count_i + length(x)
d1 %>%
filter(key == .x) %>%
pull(data) %>%
.[[1]] %>%
validate_ts() %>%
feature_engineering_ts() %>%
clean_ts(winsorize_config = list(apply_winsorize = F)
, imputation_config = list(impute_method = "none"
, na_regressor = TRUE
, na_missing_dates = TRUE)) %>%
optim_ts(.data = ., ts_model = "arima", optim_conf = optim_conf
, parameter = parameter, export_fit = F)
})
count_i = 0
ets <- map(.x = my_mkt[6], ~{
count_i <<- count_i + length(.x)
d1 %>%
filter(key == .x) %>%
pull(data) %>%
.[[1]] %>%
validate_ts() %>%
feature_engineering_ts() %>%
clean_ts(winsorize_config = list(apply_winsorize = F)
, imputation_config = list(impute_method = "none"
, na_regressor = TRUE
, na_missing_dates = TRUE)) %>%
optim_ts(.data = ., ts_model = "ets", optim_conf = optim_conf
, parameter = parameter, export_fit = F)
})
opt_sum <- function(.key){
d1 %>%
filter(key == .key) %>%
pull(data) %>%
.[[1]] %>%
validate_ts() %>%
feature_engineering_ts() %>%
clean_ts(winsorize_config = list(apply_winsorize = T)
, imputation_config = list(impute_method = "none"
, na_regressor = TRUE
, na_missing_dates = TRUE)) %>%
optim_ts(.data = ., ts_model = "glmnet"
, optim_conf = optim_conf
, parameter = parameter, export_fit = F) %>%
summary_ts()
}
.data <- d1 %>%
filter(key == "AE: 424283") %>%
pull(data) %>%
.[[1]] %>%
validate_ts() %>%
feature_engineering_ts() %>%
clean_ts(winsorize_config = list(apply_winsorize = F)
, imputation_config = list(impute_method = "none"
, na_regressor = TRUE
, na_missing_dates = TRUE))
.data <- d1 %>%
filter(key == "AE: 424283") %>%
pull(data) %>%
.[[1]] %>%
validate_ts() %>%
feature_engineering_ts() %>%
clean_ts(winsorize_config = list(apply_winsorize = FALSE)
, imputation_config = list(impute_method = "none"
, na_regressor = TRUE
, na_missing_dates = TRUE)) %>%
optim_ts(.data = ., ts_model = "glm", optim_conf = optim_conf
, parameter = parameter, export_fit = T) %>%
summary_ts()
f <- d1 %>%
filter(key == "AE: 424283") %>%
pull(data) %>%
.[[1]] %>%
validate_ts() %>%
feature_engineering_ts(hierarchy_seas = T, numeric_seas = T) %>%
clean_ts(winsorize_config = list(apply_winsorize = T
, add_transformations = T)
, imputation_config = list(impute_method = "kalman"
, na_regressor = TRUE
, na_missing_dates = TRUE
, add_transformations = T
, na_value = 0)) %>%
pivot_longer(cols = matches("y_var")) %>%
ggplot()+
geom_line(aes(date_var, value, col = name))
s1 <- d1 %>%
filter(key == "DK: 421908") %>%
pull(data) %>%
.[[1]] %>%
validate_ts() %>%
feature_engineering_ts(hierarchy_seas = T) %>%
clean_ts(winsorize_config = list(apply_winsorize = T
, add_transformations = T)
, imputation_config = list(impute_method = "none"
, na_regressor = TRUE
, na_missing_dates = TRUE
, add_transformations = T
, na_value = NULL)) %>%
mutate(y_var_d1 = tsibble::difference(y_var)) %>%
slice(1:(n())) %>%
na.omit()
fit <- glm(y_var_d1 ~ month_seas, data = s1)
ggeffects::ggemmeans(fit, terms = "month_seas", ci.lvl = .8) %>%
plot()
%>%
fit_ts(ts_model = "gam", parameter = parameter, optim = optim) %>%
summary_ts()
optim_ts(.data = ., test_size = 6, lag_ref = 3, ts_model = "gam"
, parameter = parameter, export_fit = FALSE)
d2 <- d1 %>%
mutate(n = map_int(data, ~nrow(.x))) %>%
filter(str_detect(key, "^DK|^SE"))
registerDoParallel(cores = 5)
l <- foreach(i = d1$key[4], .packages = c("tidyverse", "mgcv", "glmnet", "stlplus")) %dopar% {
d1 %>%
filter(key == i) %>%
pull(data) %>%
.[[1]] %>%
validate_ts() %>%
feature_engineering_ts() %>%
clean_ts(winsorize_config = list(apply_winsorize = FALSE)
, imputation_config = list(impute_method = "none"
, na_regressor = TRUE
, na_missing_dates = TRUE)) %>%
optim_ts(.data = ., ts_model = "gam", optim_conf = optim_conf
, parameter = parameter, export_fit = FALSE) %>%
summary_ts()
}
names(l) <- d2$key[1:5]
l2 <- l %>%
enframe() %>%
mutate(mape = map_dbl(value, ~.x[["mape"]]))
l <- .data$.data
.data$.data %>%
ggplot()+
geom_line(aes(date_var, y_var))+
geom_line(aes(date_var, y_var_clean), col ="red")+
geom_line(aes(date_var, y_var_imp), col = "purple")+
geom_line(aes(date_var, y_var_winso_imp), col ="blue")
profvis::profvis(
.data %>%
validate_ts()
)
cl <- makeCluster(3, type = "SOCK")
doSNOW::registerDoSNOW(cl = cl)
mb <- microbenchmark::microbenchmark(
seq = lapply(d1$data[1:100], FUN = function(x) validate_ts(x))
, multi = foreach(i = seq_along(d1$key[1:100]), .packages = "tidyverse") %dopar% {
validate_ts(d1$data[[i]])
}
, future = future_map(d1$data[1:100], ~validate_ts(.x))
, mc = mclapply(d1$data[1:100], FUN = function(x) validate_ts(x)
, mc.cores = 3)
, times = 20)
profvis::profvis(
k <- mclapply(d1$data[1:100], FUN = function(x) validate_ts(x)
, mc.cores = 3)
)
logs <- bind_rows(af_log, map_df(k, ~.x[[2]]))
l1 <- logs %>%
unnest(log) %>%
filter(has_reg == T)
k <- foreach(i = seq_along(d1$key[1:100])
, .packages = c("tidyverse")) %dopar% {
job_1(d1$data[[i]])
}
job_1 <- function(.data){
.d1 <- validate_ts(.data)
new_log <- logger
list(.d1, new_log)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.