knitr::opts_chunk$set(echo = TRUE, message = F, warning = F, fig.width = 13, fig.height = 6, comment = "")
library(tidyverse) library(lubridate) library(e1071) library(gridExtra) library(janitor) library(data.table) library(funModeling) library(kableExtra) library(htmltools) library(ISOweek) library(Metrics) library(tidymodels) library(timetk)
minmax_scaler <- function(x) { return( ( x - min( x ) ) / ( max(x) - min(x) ) ) } robust_scaler <- function(x){ return( ( x - quantile( x , 0.5) ) / ( quantile(x ,0.75) - quantile(x, 0.25) ) ) } ml_error <- function(model_name = "Linear Regression Model",model_predictions){ MAE <- model_predictions %>% yardstick::mae( actual,predictions) MAPE <- model_predictions %>% yardstick::mape( actual,predictions) RMSE <- model_predictions %>% yardstick::rmse( actual, predictions) data.frame(Model_Name = model_name, MAE= round(MAE$.estimate,3), MAPE = round(MAPE$.estimate,3), RMSE = round(RMSE$.estimate, 3)) } # TimeSeries cross validation function cross_validation <- function(data_training, kfold , Model_name, model){ mae_list <- c() mape_list <- c() rmse_list <- c() for (k in seq(kfold ,1)){ print(paste("kfolds number", k)) validation_start_date <- max(data_train$date) - as.difftime(k*6*7, units = "days") validation_end_date <- max(data_train$date) - as.difftime((k-1)*6*7, units = "days") data_training <- data_train %>% filter(date < validation_start_date) data_validation <- data_train %>% filter(date >= validation_start_date & date <= validation_end_date) lm_fit_cv <- model %>% fit(sales ~ . , data = data_training) lm_pred_cv <- lm_fit_cv %>% predict(data_validation) %>% bind_cols(data_validation$sales) %>% rename(predictions = ".pred", actual = "...2") lm_result_cv <- ml_error("Linear Regression Model",lm_pred_cv) # store performance of each kfold iteration mae_list [k] <- unlist( lm_result_cv['MAE'] , use.names=FALSE) mape_list[k] <- unlist( lm_result_cv['MAPE'] , use.names=FALSE ) rmse_list[k] <- unlist( lm_result_cv['RMSE'] , use.names=FALSE) } return( tibble( Model_name = Model_name, MAE = paste( round(mean(mae_list),2)," +/- ", round(sd(mae_list), 2)), MAPE = paste( round(mean(mape_list),2)," +/- ", round(sd(mape_list), 2)), RMSE = paste( round(mean(rmse_list),2)," +/- ", round(sd(rmse_list), 2))) ) }
# Dataset of Sales df5 <- read_csv("/home/renato/repos/Rossmann/inst/Data/data_modelling.csv")
df6 <- as_tibble(df5) %>% select(-X1)
# Boruta selected features cols_selected_boruta_full <- c("store","store_type","promo", "assortment", "competition_distance", "competition_open_since_month", "competition_open_since_year","promo2","promo2since_week","promo2since_year", "competition_time_month", "promo_time_week", "day_of_week_sin", "day_of_week_cos", "month_sin","month_cos", "day_sin", "day_cos", "week_of_year_sin", "week_of_year_cos", "sales", "date")
df6 <- df6 %>% select(cols_selected_boruta_full) %>% mutate(sales = expm1(sales)) # Splitthe set in training and testing data_split <- df6 %>% time_series_split(assess = "6 weeks", cumulative = T) # Dataset Train data_train <- training(data_split) # Dataset Test data_test <- testing(data_split) rec <- recipe(sales~.,data_train) # Selected metrics mt <- metric_set(yardstick::rmse, yardstick::mae, yardstick::mape)
aux1 <- data_test %>% select(store, sales) aux2 <- data_test %>% group_by(store) %>% summarise(predictions = mean(sales)) avg_model <- aux1 %>% left_join(aux2, by= "store") %>% select(sales, predictions) %>% rename(actual = "sales") baseline_result <- ml_error("Average Model",avg_model) baseline_result
# Create Linear Regresion Model #lm <- # linear_reg() %>% # set_engine("lm") %>% # set_mode("regression") # Training Model #lm_fit <- lm %>% # fit(sales ~ . , data = data_train) # Preditions #lm_pred <- lm_fit %>% # predict(data_test) %>% # bind_cols(data_test$sales) %>% # rename(predictions = ".pred", actual = "...2") # Evaluate #lm_result <- ml_error("Linear Regression Model",lm_pred) # Save pickle of results #saveRDS(lm_result,"Resultados_Modelos/lm_result.rds") lm_result <- readRDS("/home/renato/repos/Rossmann/inst/Resultados_Modelos/lm_result.rds") lm_result %>% kable() %>% kable_styling(full_width = F, bootstrap_options = c("striped", "hover", "condesend", "responsive"), html_font = "Cambria")
#lm_result_cv <- cross_validation(data_training , 5, "Linear Regression Model Cross Validation", lm) # Save pickle of results #saveRDS(lm_result_cv,"Resultados_Modelos/lm_result_cv.rds") lm_result_cv <- readRDS("/home/renato/repos/Rossmann/inst/Resultados_Modelos/lm_result_cv.rds") lm_result_cv
# Create Non-linear Model RandomForest #rf <- # rand_forest(trees = 100) %>% # set_engine("ranger") %>% # set_mode("regression") # Training Model #rf_fit <- rf %>% # fit(sales ~ ., data = data_train) # Preditions #rf_pred <- rf_fit %>% # predict(data_test) %>% # bind_cols(data_test$sales) %>% # rename(predictions = ".pred", actual = "...2") #rf_result <- ml_error("Random Forest Model",rf_pred) # Save pickle of results #saveRDS(rf_result,"Resultados_Modelos/rf_result.rds") rf_result <- readRDS("/home/renato/repos/Rossmann/inst/Resultados_Modelos/rf_result.rds") rf_result %>% kable() %>% kable_styling(full_width = F, bootstrap_options = c("striped", "hover", "condesend", "responsive"), html_font = "Cambria")
#rf_result_cv <- cross_validation(data_training , 5, "Random Forest Model Cross Validation", rf) # Save pickle of results #saveRDS(rf_result_cv,"Resultados_Modelos/rf_result_cv.rds") rf_result_cv <- readRDS("/home/renato/repos/Rossmann/inst/Resultados_Modelos/rf_result_cv.rds") rf_result_cv
# Create Model #xg <- # boost_tree(trees = 100) %>% # set_engine("xgboost") %>% # set_mode("regression") # Training Model #xg_fit <- xg %>% # fit(sales ~ ., data = data_train) # Preditions #xg_pred <- xg_fit %>% # predict(data_test) %>% # bind_cols(data_test$sales) %>% # rename(predictions = ".pred", actual = "...2") xg_result <- readRDS("/home/renato/repos/Rossmann/inst/Resultados_Modelos/xg_result.rds") # Evaluate #xg_result <- ml_error("Xgboosting Model",xg_pred) # Save pickle of results #saveRDS(xg_result,"Resultados_Modelos/xg_result.rds") xg_result %>% kable() %>% kable_styling(full_width = F, bootstrap_options = c("striped", "hover", "condesend", "responsive"), html_font = "Cambria")
result_xg_cv <- readRDS("/home/renato/repos/Rossmann/inst/Resultados_Modelos/result_xg_cv.rds") #result_xg_cv <- cross_validation(data_training , 5, "Xgboosting Model Cross Validation", xg) # Save pickle of results #saveRDS(result_xg_cv,"Resultados_Modelos/result_xg_cv.rds") result_xg_cv
bind_rows(baseline_result, lm_result, rf_result, xg_result) %>% arrange(RMSE) %>% kable() %>% kable_styling(full_width = F, bootstrap_options = c("striped", "hover", "condesend", "responsive"), html_font = "Cambria")
bind_rows( lm_result_cv, rf_result_cv, result_xg_cv) %>% arrange(RMSE) %>% kable() %>% kable_styling(full_width = F, bootstrap_options = c("striped", "hover", "condesend", "responsive"), html_font = "Cambria")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.