knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
#library(canadacovidshiny) library(canadacovid) library(dplyr) library(tidyr) library(ggplot2) library(purrr) library(stringr) theme_set(theme_minimal())
This vignette will serve as a sandbox/notebook for working on the back-end implementation of the dashboard -- the "RMarkdown first" approach as suggested by the Engineering Shiny book.
The first and simplest data I want to include is the latest daily counts, both overall and by province, which I retrieve with get_summary()
:
summary_overall <- get_summary() summary_province <- get_summary(split = "province") glimpse(summary_province)
I want to keep the API requests down to a minimum to avoid 429 errors (too many requests in a short time).
The provinces
table lets me know if and when data has been reported:
provinces <- canadacovid::get_provinces() glimpse(provinces)
It will be helpful to put this into an easily-accessible list like so:
province_status <- provinces %>% select(code, name, data_status, updated_at) %>% split(.$code) str(province_status[1])
Then I can check the status of each province:
province_status$NS$data_status
If the data has been reported for the day, then the data can be updated by calling the API.
A package I've been meaning to try is pins
.
It allows me to store R objects remotely (on boards), and retrieve and update that data when necessary.
Create a temporary board (that will be deleted once the R session ends):
library(pins) board <- board_temp() board
Then save the data to the board:
board %>% pin_write(summary_overall, "summary_overall", type = "rds")
Then retrieve it:
board %>% pin_read("summary_overall")
I can also get some metadata about the data, like when it was created
:
board %>% pin_meta("summary_overall")
pins
has numerous options for storing boards, including RStudio connect, Amazon S3, and Google Cloud Platform.
For now, I'll register a board on my GitHub, in this repository (canadacovidshiny
).
^[Note that I don't need to provide my personal access token
argument to register the board, because it is retrieved for me from gitcreds
.]
Unfortunately, I have to use the legacy pins
API for this task, because GitHub boards haven't been implemented in the modern API as of me writing this:
board <- board_register_github( name = "github", repo = "taylordunn/canadacovidshiny", path = "data/pins" )
Pin summary_overall
:
pin(summary_overall, name = "summary_overall", board = "github")
And get it:
pin_get("summary_overall", board = "github")
# I have a Google Cloud Platform account (free trial for now). # I'll make a `canadacovidshiny_bucket` just to try it out. gcp_board <- board_register_gcloud( name = "gcloud", bucket = "canadacovidshiny_bucket", # TCP token stored locally in my interactive session token = Sys.getenv("GCLOUD_ACCESS_TOKEN") ) pin(summary_overall, name = "summary_overall", board = "gcloud")
Also store the province-level summaries:
summary_province <- get_summary(split = "province") pin(summary_province, name = "summary_province", board = "github")
pin_get("summary_province", board = "github")
The reports are larger data frames, which I will want overall
and by province
:
report_overall <- get_reports() # Use the `province` = "AB", "BC", etc. argument instead of `split` = "province" # The former lets me request one province at a time, and only when data has # been updated. The latter runs 13 requests at once, every time. province_codes <- provinces$code reports_province <- map(province_codes, ~ get_reports(province = .x)) %>% setNames(province_codes) reports_province$NB
Store each set of province reports into separate pins:
iwalk( reports_province, function(reports, province_code) { pin_name <- paste0("reports_", tolower(province_code)) pin(reports, name = pin_name, board = "github") } )
I will frequently want the rolling averages of various counts.
The RcppRoll
package can do this:
library(RcppRoll) reports_ns <- pin_get("reports_ns", board = "github") reports_ns <- reports_ns %>% mutate( across(starts_with("change_"), ~ roll_mean(.x, n = 7, align = "right", fill = NA), .names = "{.col}_roll_7") )
Then I can plot all of the rolling averages:
reports_ns %>% select(date, matches("roll")) %>% pivot_longer(cols = -date) %>% filter(!is.na(value)) %>% ggplot(aes(x = date, y = value)) + geom_line() + facet_wrap( ~ name, # This will remove the prefix ("change_") and suffix ("_roll_7") labeller = labeller(name = ~ str_remove_all(.x, "change_|_roll_7")), scales = "free_y", ncol = 2 )
Here's the ggplot2 theme I'll use:
library(showtext) sysfonts::font_add_google("Roboto Condensed", "roboto") showtext_auto() theme_canadacovid <- function(base_size = 16, base_family = "roboto", base_grey = "grey85") { theme_minimal(base_size = base_size, base_family = base_family) + theme( panel.grid.minor = element_blank(), plot.title = element_text(face = "bold"), axis.title = element_text(face = "bold"), strip.text = element_text(face = "bold", size = rel(0.8), hjust = 0), strip.background = element_rect(fill = base_grey, color = NA), legend.title = element_text(face = "bold") ) }
var_colors <- list( #"cases" = "#e9c46a", #"cases" = "#f4a261", "cases" = "#f8961e", "hospitalizations" = "#f3722c", "criticals" = "#f3722c", "fatalities" = "#f94144", "recoveries" = "#90be6d", "vaccinations" = "#43aa8b", "boosters_1" = "#577590" )
I think the main plot I want to display is cases:
library(ggtext) plot_change <- function(reports_province, var = "cases", rolling_window = 7) { var_color <- var_colors[[var]] change_var <- paste0("change_", var) change_var_rolling_avg <- paste0(change_var, "_rolling_avg") reports_province <- reports_province %>% mutate( across(change_var, ~ roll_mean(.x, n = rolling_window, align = "right", fill = NA), .names = "{.col}_rolling_avg") ) %>% filter(across(change_var_rolling_avg, ~ !is.na(.))) latest_val <- reports_province %>% filter(date == max(date)) %>% pull(change_var_rolling_avg) %>% round(1) latest_val_label <- glue::glue( "<b style='color:{var_color}'>{latest_val}</b>" ) reports_province %>% ggplot(aes(x = date, y = !!sym(change_var_rolling_avg))) + geom_line(size = 1, color = var_color) + geom_point(data = . %>% filter(date == max(date)), size = 2, color = var_color) + labs( title = paste0(stringr::str_to_sentence(var), " (", rolling_window, "-day rolling average)"), y = NULL, x = NULL ) + scale_y_continuous( sec.axis = sec_axis(~ ., breaks = latest_val, labels = latest_val_label) ) + scale_x_date(expand = expansion(mult = c(0, 0.01))) + theme_canadacovid() + theme(axis.text.y.right = element_markdown()) }
plot_change(reports_ns, var = "cases", rolling_window = 7) plot_change(reports_ns, var = "fatalities", rolling_window = 7) plot_change(reports_ns, var = "hospitalizations", rolling_window = 30) plot_change(reports_ns, var = "criticals", rolling_window = 30) plot_change(reports_ns, var = "vaccinations", rolling_window = 30) plot_change(reports_ns, var = "boosters_1", rolling_window = 7)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.