knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.align = "center", fig.path = "man/figures/README-", echo = TRUE, fig.width = 8, fig.height = 6 )
trendeval aims to provide a coherent interface for evaluating models fit with the trending package. Whilst it is useful in an interactive context, it's main focus is to provide an intuitive interface on which other packages can be developed (e.g. trendbreaker).
You can install the stable version of this package from CRAN with:
install.packages("trendeval")
The development version can be installed from GitHub with:
if (!require(remotes)) { install.packages("remotes") } remotes::install_github("reconverse/trendeval", build_vignettes = TRUE)
library(dplyr) # for data manipulation library(outbreaks) # for data library(trending) # for trend fitting library(trendeval) # for model selection # load data data(covid19_england_nhscalls_2020) # define a selection of model in a named list models <- list( simple = lm_model(count ~ day), glm_poisson = glm_model(count ~ day, family = "poisson"), glm_poisson_weekday = glm_model(count ~ day + weekday, family = "quasipoisson"), glm_quasipoisson = glm_model(count ~ day, family = "poisson"), glm_quasipoisson_weekday = glm_model(count ~ day + weekday, family = "quasipoisson"), glm_negbin = glm_nb_model(count ~ day), glm_negbin_weekday = glm_nb_model(count ~ day + weekday), will_error = glm_nb_model(count ~ day + nonexistant) ) # select 8 weeks of data (from a period when the prevalence was decreasing) last_date <- as.Date("2020-05-28") first_date <- last_date - 8*7 pathways_recent <- covid19_england_nhscalls_2020 %>% filter(date >= first_date, date <= last_date) %>% group_by(date, day, weekday) %>% summarise(count = sum(count), .groups = "drop") # split data for fitting and prediction dat <- pathways_recent %>% group_by(date <= first_date + 6*7) %>% group_split() fitting_data <- dat[[2]] pred_data <- select(dat[[1]], date, day, weekday) # assess the models using the evaluate_resampling results <- models %>% evaluate_resampling(fitting_data, metric = "rmse") %>% summary results
library(tidyr) # for data manipulation library(purrr) # for data manipulation library(ggplot2) # for plotting # Pull out the model with the lowest RMSE best_by_rmse <- results %>% slice_min(value) %>% select(model_name) %>% pluck(1,1) %>% pluck(models, .) # Now let's look at the following 14 days as well new_dat <- covid19_england_nhscalls_2020 %>% filter(date > "2020-05-28", date <= "2020-06-11") %>% group_by(date, day, weekday) %>% summarise(count = sum(count), .groups = "drop") all_dat <- bind_rows(pathways_recent, new_dat) out <- best_by_rmse %>% fit(pathways_recent) %>% predict(all_dat) %>% pluck(1) %>% .subset2(1L) out # plot output ggplot(out, aes(x = date, y = count)) + geom_line() + geom_ribbon(mapping = aes(x = date, ymin = lower_ci, ymax = upper_ci), data = out, alpha = 0.5, fill = "#BBB67E") + geom_ribbon(mapping = aes(x = date, ymin = lower_pi, ymax = upper_pi), data = out, alpha = 0.5, fill = "#BBB67E") + geom_vline(xintercept = as.Date("2020-05-28") + 0.5) + theme_bw()
Bug reports and feature requests should be posted on github using the issue system. All other questions should be posted on the RECON slack channel see https://www.repidemicsconsortium.org/forum/ for details on how to join.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.