user_sample_1
In ubair: Effects of External Conditions on Air Quality

library(ubair)

Load data for one station and run counterfactual

Investigate effect of 9 Euro ticket for station Köln.

Prerequisites

Installation for user as described in README

Steps

adapt the data_dir (in windows e.g. //de-inf-008/\$/Eigene Dateien/ubair-master/data/)

# set data_dir where the data is stored
data_dir <- "../../Daten/user_sample_data/"
# This might take a few seconds for large files
data <- load_uba_data_from_dir(data_dir = data_dir)

Set sample variables.

target <- "NO2"
station <- "DENW006"
station_name <- "Köln"
meteo_variables <- c("TMP", "RFE", "WIG", "WIR", "LDR")

# dates for 9 Euro effect
application_start <- lubridate::ymd("20220301") # = start reference time
date_effect_start <- lubridate::ymd_hm("20220601 00:00")
application_end <- lubridate::ymd("20220831") # = end effect time

buffer <- 0 # number of data points to be ignored before effect

trend <- "linear"
model_type <- "rf"

window_size <- 14 # days of data to calculate the mean in prediction results

create a params.yaml
either
by copying the default to your working dir, update the new params.yaml file and load it or
load the default and adapt the params programmatically

params <- load_params()
# adapt params programatically
params$target <- target
params$meteo_variables <- meteo_variables

Prepare data of station for training.

See function documentation for further details

env_data <- clean_data(data, station = station)
dt_prepared <- prepare_data_for_modelling(env_data, params)
dt_prepared <- dt_prepared[complete.cases(dt_prepared)]
split_data <- split_data_counterfactual(
  dt_prepared,
  application_start = application_start,
  application_end = application_end
)

Run counterfactual scenario (training and prediction)

res <- run_counterfactual(split_data,
  params,
  detrending_function = trend,
  model_type = model_type,
  alpha = 0.9,
  log_transform = FALSE
)
predictions <- res$prediction

Plot counterfactual run and optionally save to data_dir

counterfactual_plot <- plot_counterfactual(predictions, params,
  window_size = window_size,
  date_effect_start,
  buffer = buffer
)
counterfactual_plot

plot of chunk plot_counter_1

Evaluate model and effect

round(calc_performance_metrics(predictions,
  date_effect_start,
  buffer = buffer
), 2)
#>           RMSE            MSE            MAE           MAPE           Bias             R2 
#>           8.69          75.45           5.83           0.36          -2.16           0.67 
#> Coverage lower Coverage upper       Coverage    Correlation            MFB            FGE 
#>           1.00           0.98           0.98           0.84          -0.10           0.41
round(calc_summary_statistics(predictions,
  date_effect_start,
  buffer = buffer
), 2)
#>                        true prediction
#> min                    0.63      -1.02
#> max                   92.58      57.05
#> var                  228.50     111.89
#> mean                  17.76      15.61
#> 5-percentile           3.24       2.31
#> 25-percentile          6.94       7.16
#> median/50-percentile  11.91      13.02
#> 75-percentile         24.65      22.92
#> 95-percentile         48.59      34.90
paste("effect size:", estimate_effect_size(predictions,
  date_effect_start,
  buffer = buffer,
  verbose = FALSE
))
#> [1] "effect size: -0.393826299501436" "effect size: -0.0288"