library(reservoirnet) library(dplyr) library(ggplot2)
This vignette aims to find the best hyperparameters using randomsearch strategy. We will use the same data as in the vignette 01 basic usage and learn how to find the best set of hyperparameters.
We first load the data :
data("dfCovid") dist_forecast = 14 traintest_date = as.Date("2022-01-01")
Then we smooth the data to avoid huge variability of RT-PCR :
dfOutcome <- dfCovid %>% # outcome at 14 days mutate(outcome = lead(x = hosp, n = dist_forecast), outcomeDate = date + dist_forecast) %>% # rolling average for iptcc and positive_pcr mutate_at(.vars = c("Positive", "Tested"), .funs = function(x) slider::slide_dbl(.x = x, .before = 6, .f = mean))
Now that we have our data ready, we can plot those :
dfOutcome %>% tidyr::pivot_longer(cols = c("hosp", "Positive", "Tested")) %>% ggplot2::ggplot(mapping = aes(x = date, y = value)) + geom_line() + facet_grid(name ~ ., scales = "free_y") + theme_bw() + geom_vline(mapping = aes(color = "train-test sets", xintercept = traintest_date)) + labs(color = "") + theme(legend.position = "bottom")
In order to chose the best set of hyperparameters (leaking rate, input scaling, spectral radius and ridge penalty) on the train set we are going to use an accumulate forward procedure. Basically, during the year 2021, we are going to train the model each 6 months and evaluate the forecast on the next 3 months. This procedure will be repeated several times with different sets of hyperparameters each time.
First we set the periods of training and evaluation :
# dates of training vec_train_sets <- as.Date(c("2021-01-01", "2021-06-01")) # dates of evaluation vec_test_sets_start <- vec_train_sets+dist_forecast vec_test_sets_end <- c(vec_test_sets_start[2:length(vec_test_sets_start)], traintest_date-1) # get everything in a table dfaccumulateForward = data.frame(train_date = vec_train_sets, test_start = vec_test_sets_start, test_end = vec_test_sets_end)
Then we set the objective functions computing the mse for the chosen of hyperparameters :
fct_objective <- function(ridge, leaking_rate, input_scaling, spectral_radius, dfaccumulateForward){ ##### reservoir architecture # set reservoir reservoir <- reservoirnet::createNode(nodeType = "Reservoir", units = 500, lr = 0.7, sr = 1, input_scaling = 1) # set readout readout <- reservoirnet::createNode(nodeType = "Ridge", ridge = 0.1) # connect them model <- reservoirnet::link(reservoir, readout) ##### evaluate model dfPredictions <- apply(dfaccumulateForward, MARGIN = 1, FUN = function(row_accumulate_forward){ fct_performance_period(model = model, dfOutcome = dfOutcome, train_date = as.Date(row_accumulate_forward["train_date"]), test_start = as.Date(row_accumulate_forward["test_start"]), test_end = as.Date(row_accumulate_forward["test_end"])) }) %>% bind_rows() ##### get performance mse = dfPredictions %>% mutate(squared_error = (pred-outcome)^2) %>% pull(squared_error) %>% mean(.) return(mse) }
fct_performance_period <- function(model, train_date, test_start, test_end, dfOutcome){ ##### train and test set # train set yTrain <- dfOutcome %>% filter(outcomeDate <= train_date) %>% select(outcome) xTrain <- dfOutcome %>% filter(outcomeDate <= train_date) %>% select(hosp, Positive, Tested) # test set xTest <- dfOutcome %>% filter(outcomeDate <= test_end) %>% select(hosp, Positive, Tested) yTest <- dfOutcome %>% filter(outcomeDate <= test_end) %>% select(outcomeDate, outcome) %>% mutate(eval_period = outcomeDate >= test_start) ##### preprocessing of the data # standardise based on training set values ls_fct_stand <- apply(xTrain, MARGIN = 2, FUN = function(x) function(feature) return(feature/(max(x)))) xTrainstand <- xTrain xTeststand <- xTest lapply(X = names(ls_fct_stand), FUN = function(x){ xTrainstand[,x] <<- ls_fct_stand[[x]](feature = xTrain[,x]) xTeststand[,x] <<- ls_fct_stand[[x]](feature = xTest[,x]) return() }) # convert to array lsdf <- lapply(list(yTrain = yTrain, xTrain = xTrainstand, xTest = xTeststand), function(x) as.array(as.matrix(x))) ##### fit reservoir on train set fit <- reservoirnet::reservoirR_fit(node = model, X = lsdf$xTrain, Y = lsdf$yTrain, warmup = 30, reset = TRUE) ##### predict with the reservoir vec_pred <- reservoirnet::predict_seq(node = fit$fit, X = lsdf$xTest, reset = TRUE) dfPredictions <- yTest %>% mutate(pred = vec_pred) %>% filter(eval_period) %>% select(outcomeDate, outcome, pred) return(dfPredictions) }
To generate the hyperparameters we use log-uniform generations. This is easily done using the rloguniform and random_search_hyperparam functions. For instance we have :
random_search_hyperparam( n = 50, ls_fct = list( ridge = function(n) rloguniform(n = n, min = 1e-10, max = 1e-1), input_scaling = function(n) 1, spectral_radius = function(n) rloguniform(n = n, min = 1e-10, max = 1e5), leaking_rate = function(n) 0.9 ) ) %>% head()
## # A tibble: 6 × 5 ## search_id ridge input_scaling spectral_radius leaking_rate ## <int> <dbl> <dbl> <dbl> <dbl> ## 1 1 0.0000000245 1 1.46e-3 0.9 ## 2 2 0.000000223 1 8.28e+2 0.9 ## 3 3 0.0000143 1 3.73e-4 0.9 ## 4 4 0.0149 1 4.70e-7 0.9 ## 5 5 0.00000000653 1 1.15e-9 0.9 ## 6 6 0.0122 1 3.10e-9 0.9
To find the right set of hyperparameters, we will keep the ridge hyperparameter random between 1e-10 and 1e-1 and vary other hyperparameters 2 by 2. For each set of hyperparameters, we will run 2 experiment and take the mean mse of the 2. At each step we will run 30 experiments to reduce computation time.
For a real case, it would be better to increase the number of experiment and of repetition to better explore the hyperparameter space.
dfHyperparam <- random_search_hyperparam(n = 30, ls_fct = list(ridge = function(n) rloguniform(n = n, min = 1e-10, max = 1e-1), input_scaling = function(n) rloguniform(n = n, min = 1e-5, max = 1e2), spectral_radius = function(n) rloguniform(n = n, min = 1e-5, max = 1e2), leaking_rate = function(n) 0.7)) %>% # replicate 2 times replicate(n = 2, simplify = FALSE) %>% bind_rows() %>% tibble::rowid_to_column(var = "search_id_master") vecMSE <- apply(X = dfHyperparam, MARGIN = 1, FUN = function(row_hp){ fct_objective(ridge = row_hp["ridge"], leaking_rate = row_hp["leaking_rate"], input_scaling = row_hp["input_scaling"], spectral_radius = row_hp["spectral_radius"], dfaccumulateForward = dfaccumulateForward) }) dfPerf = dfHyperparam %>% select(search_id, search_id_master) %>% mutate(mse = vecMSE) %>% group_by(search_id) %>% summarise(mse = mean(mse)) %>% left_join(dfHyperparam %>% select(-search_id_master) %>% distinct(), by = "search_id")
We can now plot the performance using the adequate functions :
plot_2x2_perf(dfPerf %>% select(perf = mse, spectral_radius, input_scaling), perf_lab = "MSE", trans = "identity")
plot_marginal_perf(dfPerf %>% select(perf = mse, spectral_radius, input_scaling), perf_lab = "MSE")
There is no clear area of better performance and for a real world application, we should probably increase the number of explored hyperparameter set. Nevertheless, the area where spectral radius and input scaling are equal to 1 seems to have better performance.
We can now move on to tune the leaking rate and the ridge hyperparameters.
dfHyperparam <- random_search_hyperparam(n = 30, ls_fct = list(ridge = function(n) rloguniform(n = n, min = 1e-10, max = 1e-1), input_scaling = function(n) 1, spectral_radius = function(n) 1, leaking_rate = function(n) rloguniform(n = n, min = 1e-3, max = 1))) %>% # replicate 2 times replicate(n = 2, simplify = FALSE) %>% bind_rows() %>% tibble::rowid_to_column(var = "search_id_master") vecMSE <- apply(X = dfHyperparam, MARGIN = 1, FUN = function(row_hp){ fct_objective(ridge = row_hp["ridge"], leaking_rate = row_hp["leaking_rate"], input_scaling = row_hp["input_scaling"], spectral_radius = row_hp["spectral_radius"], dfaccumulateForward = dfaccumulateForward) }) dfPerf = dfHyperparam %>% select(search_id, search_id_master) %>% mutate(mse = vecMSE) %>% group_by(search_id) %>% summarise(mse = mean(mse)) %>% left_join(dfHyperparam %>% select(-search_id_master) %>% distinct(), by = "search_id")
We can now plot the performance using the adequate functions :
plot_2x2_perf(dfPerf %>% select(perf = mse, leaking_rate, ridge), perf_lab = "MSE", trans = "identity")
plot_marginal_perf(dfPerf %>% select(perf = mse, leaking_rate, ridge), perf_lab = "MSE")
Again, for a real world application, we should probably explore a larger set of hyperparameter. Here, we can consider that a leaking rate of 0.5 and a ridge penalty of 1e-3 seem to provide overall best performance. A finer tuning could be done but we stop here for this vignette using random search to find the best set of hyperparameters.
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.