knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
#library(canadacovidshiny)
library(canadacovid)
library(dplyr)
library(tidyr)
library(ggplot2)
library(purrr)
library(stringr)

theme_set(theme_minimal())

This vignette will serve as a sandbox/notebook for working on the back-end implementation of the dashboard -- the "RMarkdown first" approach as suggested by the Engineering Shiny book.

Summaries

The first and simplest data I want to include is the latest daily counts, both overall and by province, which I retrieve with get_summary():

summary_overall <- get_summary()
summary_province <- get_summary(split = "province")
glimpse(summary_province)

Checking for new reports

I want to keep the API requests down to a minimum to avoid 429 errors (too many requests in a short time).

The provinces table lets me know if and when data has been reported:

provinces <- canadacovid::get_provinces()
glimpse(provinces)

It will be helpful to put this into an easily-accessible list like so:

province_status <- provinces %>%
  select(code, name, data_status, updated_at) %>%
  split(.$code)
str(province_status[1])

Then I can check the status of each province:

province_status$NS$data_status

If the data has been reported for the day, then the data can be updated by calling the API.

Storing the data

A package I've been meaning to try is pins. It allows me to store R objects remotely (on boards), and retrieve and update that data when necessary. Create a temporary board (that will be deleted once the R session ends):

library(pins)

board <- board_temp()
board

Then save the data to the board:

board %>% pin_write(summary_overall, "summary_overall", type = "rds")

Then retrieve it:

board %>% pin_read("summary_overall")

I can also get some metadata about the data, like when it was created:

board %>% pin_meta("summary_overall")

pins has numerous options for storing boards, including RStudio connect, Amazon S3, and Google Cloud Platform. For now, I'll register a board on my GitHub, in this repository (canadacovidshiny). ^[Note that I don't need to provide my personal access token argument to register the board, because it is retrieved for me from gitcreds.] Unfortunately, I have to use the legacy pins API for this task, because GitHub boards haven't been implemented in the modern API as of me writing this:

board <- board_register_github(
  name = "github", repo = "taylordunn/canadacovidshiny", path = "data/pins"
)

Pin summary_overall:

pin(summary_overall, name = "summary_overall", board = "github")

And get it:

pin_get("summary_overall", board = "github")
# I have a Google Cloud Platform account (free trial for now).
# I'll make a `canadacovidshiny_bucket` just to try it out.
gcp_board <- board_register_gcloud(
  name = "gcloud", bucket = "canadacovidshiny_bucket",
  # TCP token stored locally in my interactive session
  token = Sys.getenv("GCLOUD_ACCESS_TOKEN")
)
pin(summary_overall, name = "summary_overall", board = "gcloud")

Also store the province-level summaries:

summary_province <- get_summary(split = "province")
pin(summary_province, name = "summary_province", board = "github")
pin_get("summary_province", board = "github")

Reports

The reports are larger data frames, which I will want overall and by province:

report_overall <- get_reports()

# Use the `province` = "AB", "BC", etc. argument instead of `split` = "province"
# The former lets me request one province at a time, and only when data has
#  been updated. The latter runs 13 requests at once, every time.
province_codes <- provinces$code

reports_province <- map(province_codes, ~ get_reports(province = .x)) %>%
  setNames(province_codes)

reports_province$NB

Store each set of province reports into separate pins:

iwalk(
  reports_province,
  function(reports, province_code) {
    pin_name <- paste0("reports_", tolower(province_code))
    pin(reports, name = pin_name, board = "github")
  }
)

I will frequently want the rolling averages of various counts. The RcppRoll package can do this:

library(RcppRoll)

reports_ns <- pin_get("reports_ns", board = "github")
reports_ns <- reports_ns %>%
  mutate(
    across(starts_with("change_"),
           ~ roll_mean(.x, n = 7, align = "right", fill = NA),
           .names = "{.col}_roll_7")
  )

Then I can plot all of the rolling averages:

reports_ns %>%
  select(date, matches("roll")) %>%
  pivot_longer(cols = -date) %>%
  filter(!is.na(value)) %>%
  ggplot(aes(x = date, y = value)) +
  geom_line() +
  facet_wrap(
    ~ name,
    # This will remove the prefix ("change_") and suffix ("_roll_7")
    labeller = labeller(name = ~ str_remove_all(.x, "change_|_roll_7")),
    scales = "free_y", ncol = 2
  )

Plots

Here's the ggplot2 theme I'll use:

library(showtext)
sysfonts::font_add_google("Roboto Condensed", "roboto")
showtext_auto()
theme_canadacovid <- function(base_size = 16, base_family = "roboto",
                              base_grey = "grey85") {
  theme_minimal(base_size = base_size, base_family = base_family) +
    theme(
      panel.grid.minor = element_blank(),
      plot.title = element_text(face = "bold"),
      axis.title = element_text(face = "bold"),
      strip.text = element_text(face = "bold", size = rel(0.8), hjust = 0),
      strip.background = element_rect(fill = base_grey, color = NA),
      legend.title = element_text(face = "bold")
    )
}
var_colors <-
  list(
    #"cases" = "#e9c46a",
    #"cases" = "#f4a261",
    "cases" = "#f8961e",
    "hospitalizations" = "#f3722c",
    "criticals" = "#f3722c",
    "fatalities" = "#f94144",
    "recoveries" = "#90be6d",
    "vaccinations" = "#43aa8b",
    "boosters_1" = "#577590"
  )

I think the main plot I want to display is cases:

library(ggtext)

plot_change <- function(reports_province, var = "cases", rolling_window = 7) {
  var_color <- var_colors[[var]]
  change_var <- paste0("change_", var)
  change_var_rolling_avg <- paste0(change_var, "_rolling_avg")
  reports_province <- reports_province %>%
    mutate(
      across(change_var,
             ~ roll_mean(.x, n = rolling_window, align = "right", fill = NA),
             .names = "{.col}_rolling_avg")
    ) %>%
    filter(across(change_var_rolling_avg, ~ !is.na(.)))

  latest_val <- reports_province %>%
    filter(date == max(date)) %>%
    pull(change_var_rolling_avg) %>%
    round(1)
  latest_val_label <- glue::glue(
    "<b style='color:{var_color}'>{latest_val}</b>"
  )

  reports_province %>%
    ggplot(aes(x = date, y = !!sym(change_var_rolling_avg))) +
    geom_line(size = 1, color = var_color) +
    geom_point(data = . %>% filter(date == max(date)),
               size = 2, color = var_color) +
    labs(
      title = paste0(stringr::str_to_sentence(var),
                     " (", rolling_window, "-day rolling average)"),
      y = NULL, x = NULL
    ) +
    scale_y_continuous(
      sec.axis = sec_axis(~ ., breaks = latest_val, labels = latest_val_label)
    ) +
    scale_x_date(expand = expansion(mult = c(0, 0.01))) +
    theme_canadacovid() +
    theme(axis.text.y.right = element_markdown())
}
plot_change(reports_ns, var = "cases", rolling_window = 7)
plot_change(reports_ns, var = "fatalities", rolling_window = 7)
plot_change(reports_ns, var = "hospitalizations", rolling_window = 30)
plot_change(reports_ns, var = "criticals", rolling_window = 30)
plot_change(reports_ns, var = "vaccinations", rolling_window = 30)
plot_change(reports_ns, var = "boosters_1", rolling_window = 7)


taylordunn/canadacovidshiny documentation built on July 3, 2023, 8:49 a.m.