Nothing
Run multiple stations and models for 9 euro ticket
# set the dir where the data is stored data_dir <- "../../Daten/user_sample_data/"
sample_name <- "NeunEuroTicket" target <- "NO2" stations <- list(Luenen = "DENW006", AachenBurtscheid = "DENW094") meteo_variables <- c("TMP", "RFE", "WIG", "WIR", "LDR") application_start <- lubridate::ymd("20220301") # = start reference time date_effect_start <- lubridate::ymd_hm("20220601 00:00") application_end <- lubridate::ymd("20220831") # = end effect time buffer <- 0 # number of data points to be ignored before effect trend <- "linear" # hyperparameters can be set in params/params.yaml model_types <- c("lightgbm", "rf", "dynamic_regression", "fnn") window_size <- 14 # days of data to calculate the mean in prediction results
library(ubair)
# This might take a few seconds for large files data <- load_uba_data_from_dir(data_dir = data_dir) params <- load_params() params$target <- target params$meteo_variables <- meteo_variables
for (station_name in names(stations)) { station <- stations[[station_name]] predictions_all <- data.table::data.table() metrics_all <- data.table::data.table() env_data <- clean_data(data, station = station) dt_prepared <- prepare_data_for_modelling(env_data, params) dt_prepared <- dt_prepared[complete.cases(dt_prepared)] split_data <- split_data_counterfactual( dt_prepared, application_start = application_start, application_end = application_end ) for (model_type in model_types) { message(paste("start training:", station_name, station, model_type)) res <- run_counterfactual(split_data, params, detrending_function = trend, model_type = model_type, alpha = 0.9, log_transform = FALSE ) predictions <- data.table::copy(res$prediction) # plot bau_plot <- plot_counterfactual(predictions, params, window_size = window_size, date_effect_start, buffer = buffer ) # evaluation metrics <- round(calc_performance_metrics(predictions, date_effect_start, buffer = buffer ), 2) effect <- estimate_effect_size(predictions, date_effect_start, buffer = buffer, verbose = FALSE ) metrics["effect_size"] <- effect["absolute_effect"] metrics["relative_effect"] <- effect["relative_effect"] # add information for export metrics["model"] <- model_type metrics["trend"] <- trend metrics["station_name"] <- station_name metrics["station"] <- station metrics["buffer_start"] <- format( date_effect_start - as.difftime(buffer, units = "hours"), "%Y-%m-%d" ) metrics["effect_start"] <- format(date_effect_start, "%Y-%m-%d") metrics_dt <- data.table::as.data.table(t(metrics)) metrics_all <- rbind(metrics_all, metrics_dt) predictions[, station := station] predictions[, model := model_type] predictions[, trend := trend] predictions_all <- rbind(predictions_all, predictions) } # save predictions (hourly data) and metrics predictions_save <- dplyr::select( predictions_all, c( date, value, prediction, prediction_lower, prediction_upper, station, model, trend ) ) predictions_save$date <- format(predictions_save$date, "%Y-%m-%d %H:%M") } #> start training: Luenen DENW006 lightgbm #> [LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000319 seconds. #> You can set `force_row_wise=true` to remove the overhead. #> And if memory is not enough, you can set `force_col_wise=true`. #> [LightGBM] [Info] Total Bins 1549 #> [LightGBM] [Info] Number of data points in the train set: 60472, number of used features: 8 #> [LightGBM] [Info] Start training from score 0.000000 #> start training: Luenen DENW006 rf #> start training: Luenen DENW006 dynamic_regression #> Using data for dynamic regression training from 2021-01-22 01:00:00 to 2022-02-28 23:00:00. Too long training series can lead to worse performance. Adjust this via the dynamic_regression$ntrain hyperparameter. #> start training: Luenen DENW006 fnn #> start training: AachenBurtscheid DENW094 lightgbm #> [LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.031247 seconds. #> You can set `force_col_wise=true` to remove the overhead. #> [LightGBM] [Info] Total Bins 1550 #> [LightGBM] [Info] Number of data points in the train set: 60039, number of used features: 8 #> [LightGBM] [Info] Start training from score -0.000000 #> start training: AachenBurtscheid DENW094 rf #> start training: AachenBurtscheid DENW094 dynamic_regression #> Using data for dynamic regression training from 2021-01-10 04:00:00 to 2022-02-28 23:00:00. Too long training series can lead to worse performance. Adjust this via the dynamic_regression$ntrain hyperparameter. #> start training: AachenBurtscheid DENW094 fnn
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.