knitr::opts_chunk$set(
  collapse = TRUE,
  warning = FALSE,
  message = FALSE,
  comment = "#>",
  dev = "png",
  dev.opts = list(type = "cairo-png")
)

library(SaviR)
library(dplyr)
library(tidyr)
library(ggplot2)
library(passport)
library(gt)

After installing SaviR, you are ready to use the following functions to read in the data and create visualizations. There are functions to create plots, maps, or tables by World Health Organization (WHO) Region, Department of State (DoS) Region, or globally.

Data: The Frankenstein Dataset

onetable - stored metadata with iso2 and iso3 codes, country names, WHO/DoS/World Bank regions, and UN population estimates
get_covid_df(sources = c("all", "WHO", "WHO+JHU", "WHO+Primary")) - COVID cases/deaths dataframe from WHO and other sources. calc_add_risk() - Add risk matrix calculations to dataframe (df)
get_vax() - Get vaccination data from Our World In Data (OWID)
get_combined_table(type = c("WHO", "Both", "legacy"), geometry=FALSE) - Combine all of the above into one large df, optionally add geometry

This snippet produces the master df with all the variables required for all of the following graphics.

# Load in data to create visuals
df_who <- get_combined_table("WHO") # China includes Taiwan, Hong Kong, and Macau data

# Data for visuals are typically as of Sunday of the current week
# but this can be determined dynamically using WHO data.
sunday_date <- df_who |>
  distinct(date, dow = weekdays(date)) |>
  arrange(desc(date)) |>
  filter(dow == "Sunday") |>
  slice(1) |>
  pull(date)

Plots {.tabset}

Epicurves

plot_epicurve(df, transparent = T)

When creating epi curves for the WHO regions, the "plot_epicurve" function should take in the df with only "WHO" observations in order to match the same curve on the WHO Coronavirus Dashboard. The individual epi curves for each region can be run simply by filtering for one WHO region in the df. The function will detect that the region column has only one region and will produce the epi curve accordingly.

When creating epi curves for the DoS regions, the plot_epicurve function should take in the df with "Both" observations. Similarly to the WHO epi curves, individual epi curves can also be produce by filtering to one DoS region.

# Epi Curves for WHO -- Global
epi_curve_global <- plot_epicurve(df_who, transparent = T)


epi_curve_global

# Epi Curves for DoS -- Global
state_epi_curve <- plot_epicurve(filter(df_who, date <= sunday_date),
                                 type = "cases", 
                                 by_cat = "State Region")
state_epi_curve

Inset plots

inset_epi_curve <- df_who |>
  filter(date <= sunday_date) |>
  plot_epicurve(type = "cases", by_cat = "State Region", transparent = FALSE, inset = TRUE)

inset_epi_curve
# Regions
for (r in unique(na.omit(df_who$who_region))) {
  epi_curve_regional <- plot_epicurve(filter(df_who, who_region == r), transparent = F)


  print(epi_curve_regional)
}

Risk Matrix (Global Only)

plot_riskmatrix(df)

Note for labeling purposes, the labels variable should be added to the dataframe specifying the top 10 countries for weekly average incidence, week case percent change, and weekly total cases. Exclusion criteria: Countries with population less than 10 million and countries with weekly cases totals less than 100.

global_risk_matrix <- df_who %>%
  filter(date == sunday_date) %>%
  filter(population >= 10000000) %>% # Exclude population less than 10M
  filter(week_case >= 100) %>%
  mutate(
    rank_inc = dense_rank(desc(week_case_incidence)),
    rank_per = dense_rank(desc(percent_change_case)),
    rank_cas = dense_rank(desc(week_case))
  ) %>%
  mutate(labels = case_when(
    rank_inc %in% 1:10 ~ country,
    rank_per %in% 1:10 ~ country,
    rank_cas %in% 1:10 ~ country
  )) %>%
  plot_riskmatrix()


global_risk_matrix

Vaccination Coverage (Global Only)

global_vax_coverage <- df_who %>%
  filter(date <= sunday_date) %>%
  filter(date == max(date)) %>%
  filter(!is.na(state_region)) %>%
  plot_vaxcoverage(by_cat = "State Region")

global_vax_coverage

Maps {.tabset}

Notes on mapping

By default, get_combined_table() does not return geometry. This is preferable, since the size of the table is quite large and the geometry list column takes up a lot of space in memory. However, if we want to map these data, we can either call get_combined_table("Both", geometry = TRUE), or join with country_coords after filtering, like so:

# Filter to only observations with date as of Sunday
# and join in geometry
df_who_mapping <- df_who %>%
  filter(date == sunday_date) %>%
  left_join(select(country_coords, id, geometry), by = "id")

Burden Map

map_burden(df, region, time_step)

The "map_burden" function takes in a pre-filtered data.frame and computes and displays average incidence for each country over the time period selected.
The region argument is optional, but it is used to specify a specific DoS/WHO region to zoom to, if desired.
time_step is used to specify the days used to average incidence over. This is always relative to the latest date in the data.frame.
bin_breaks and bin_colors can be used to over-ride sensible defaults for incidence cutpoints and color scheme, if desired.

burden_map_global <- df_who |>
  filter(date <= sunday_date) |>
  map_burden()


burden_map_global

for (r in unique(na.omit(df_who$who_region))) {
  burden_map_regional <- df_who |>
    filter(who_region == r, date <= sunday_date) |>
    map_burden(region = r)


  print(burden_map_regional)
}

and an example of how to produce a 14d map:

burden_map_global_14 <- df_who |>
  filter(date <= sunday_date) |>
  map_burden(time_step = 14)


burden_map_global_14

We can also over-ride the incidence cut-points, like so:

# same map with 0-1, 1-5, 5-10, 10+ breaks
burden_map_global_14_2 <- df_who |>
  filter(date <= sunday_date) |>
  map_burden(
    time_step = 14,
    bin_breaks = c(0, 1, 5, 10, Inf),
    bin_colors = c("0- <1" = "#f1e5a1", "1- <5" = "#e7b351", "5- <10" = "#d26230", "10+" = "#aa001e")
  )

burden_map_global_14_2

Trend Map

map_trend(df, region, time_step = 7)

The "map_trend" function requires a result column where weekly percent change is converted into factors using "cut".

The default behavior is to visualize a global trend map, but passing a data frame filtered to a specific region and providing the region name as a region argument will change the bounding box accordingly.

A different time_step can be provided. The default assumes you are comparing two 7d periods, but this can be increased or decreased accordingly.

trend_map_global <- df_who |>
  filter(date <= sunday_date) |>
  map_trend()


trend_map_global
for (r in unique(na.omit(df_who$who_region))) {
  trend_map_reg <- df_who |>
  filter(date <= sunday_date) |>
    map_trend(region = r)

  print(trend_map_reg)
}

and an example producing the same with different time_step:

df_who |>
  filter(date <= sunday_date) |>
  map_trend(time_step = 14)

Vaccine Map (Global Only)

map_vaccinations(df, vac_type = c("People", "Fully"))

global_vax_map <- df_who_mapping %>%
  mutate(result = cut(people_vaccinated_per_hundred, breaks = c(0, 1, 3, 10, 30, Inf))) %>%
  group_by(country) %>%
  filter(!is.na(result)) %>%
  filter(date == max(date)) %>%
  ungroup() %>%
  map_vaccinations(., vac_type = "People")

global_vax_map

Tables {.tabset}

Top 10 Most Cases Table

table_10mostcases(df, time_step = 7, region = NULL, data_as_of = NULL)

This function presents a top-10 table of countries based on cases reported in the past time_step.
The default is a 7 day period, but this can be adjusted. Additionally, you can tweak the title and data-as-of footnote using those variables respectively, or they will be inferred internally.

7-day Table

df_who |>
  filter(date <= sunday_date) |>
  table_10mostcases(data_as_of = format(sunday_date, "%B %d, %Y"))

14-day Table

df_who |>
  filter(date <= sunday_date) |>
  table_10mostcases(time_step = 14)

By region

for (r in unique(na.omit(df_who$who_region))) {
  tab_out <- df_who |>
    filter(date <= sunday_date, who_region == r) |>
    table_10mostcases(region = r, data_as_of = format(Sys.Date(), "%B %d, %Y"))

  print(htmltools::tagList(tab_out))
}

Top 10 Incidence Table

table_10incidence(df, time_step = 7, region = NULL, data_as_of = NULL)

Similar to the above, but computes top-10 countries baseed on incidence over past time_step.

7-day table

df_who |>
  filter(date <= sunday_date) |>
  table_10incidence(data_as_of = format(sunday_date, "%B %d, %Y"))

14-day table

df_who |>
  filter(date <= sunday_date) |>
  table_10incidence(time_step = 14)

By Region

for (r in unique(na.omit(df_who$who_region))) {
  tab_out <- df_who %>%
    filter(date <= sunday_date, who_region == r) |>
    table_10incidence(region = r, data_as_of = format(Sys.Date(), "%B %d, %Y"))

  print(htmltools::tagList(tab_out))
}

Top 10 Percent Change Table

table_10percentchange(df, time_step = 7, second_time_step = 28, region = NULL, data_as_of = NULL)

Works similarly to the above, but selects top 10 countries based on percent change over time_step.
There's an additional argument second_time_step that specifies a second (normally longer) period to compute a percent change over.
The default visualization is using weekly change and 4 week change.

This table is usually filtered to only countries over a certain population size, but this should be handled
externally.

Global table (7d and 28d pct change)

df_who %>%
  filter(date <= sunday_date, population >= 10000000) |>
  table_10percentchange(data_as_of = format(sunday_date, "%B %d, %Y"))

Global table (14d and 28d pct change)

df_who %>%
  filter(date <= sunday_date, population >= 10000000) |>
  table_10percentchange(time_step = 14)

Regional Tables

for (r in unique(na.omit(df_who$who_region))) {
  tab_out <- df_who %>%
    filter(date <= sunday_date, population >= 100000, who_region == r) |> # Exclude population less than 100,000
    table_10percentchange(region = r, data_as_of = format(sunday_date, "%B %d, %Y")) 

  print(htmltools::tagList(tab_out))
}

Top 10 Vaccinations Table

table_10vaccinations(df, type = c("Global", "Region"), run_date)

The "table_10vaccinations" function takes looks for a country, value1 (people vaccinated per hundred), and a value2 (daily vaccinations per hundred) column. Note as vaccination reporting has gaps, the df must be sliced by country and the most recent date with people_vaccinated_per_hundred value (if there is one).

df_who %>%
  filter(date <= sunday_date) %>%
  filter(population > 1000000) %>%
  group_by(country) %>%
  filter(!is.na(people_vaccinated_per_hundred)) %>%
  filter(date == max(date)) %>%
  ungroup() %>%
  select(country = who_country, value1 = people_vaccinated_per_hundred, value2 = daily_vaccinations_per_hundred) %>%
  arrange(desc(value1)) %>%
  head(10) %>%
  table_10vaccinations(., run_date = format(sunday_date, "%B %d, %Y"))
for (r in unique(na.omit(df_who$who_region))) {
  tab_out <- df_who %>%
    filter(date <= sunday_date) %>%
    filter(population > 100000) %>% # Exclude population less than 100,000
    filter(who_region == r) %>%
    group_by(country) %>%
    filter(!is.na(people_vaccinated_per_hundred)) %>%
    arrange(date) %>%
    top_n(1, date) %>%
    distinct(id, .keep_all = T) %>%
    select(country = who_country, value1 = people_vaccinated_per_hundred, value2 = daily_vaccinations_per_hundred) %>%
    arrange(desc(value1)) %>%
    head(10) %>%
    table_10vaccinations(., type = r, run_date = format(sunday_date, "%B %d, %Y"))

  print(htmltools::tagList(tab_out))
}

Top 10 Fully Vaccinated Table

table_10vaccinations(df, vac_type = c("Partial", "Fully"), type = "Global", run_date = "Enter a date")

df_who %>%
  filter(date <= sunday_date) %>%
  filter(population > 1000000) %>%
  group_by(country) %>%
  filter(!is.na(people_fully_vaccinated_per_hundred)) %>%
  filter(date == max(date)) %>%
  ungroup() %>%
  select(country = who_country, value1 = people_fully_vaccinated_per_hundred, value2 = daily_vaccinations_per_hundred) %>%
  arrange(desc(value1)) %>%
  head(10) %>%
  table_10vaccinations(., run_date = format(sunday_date, "%B %d, %Y"))

Countries of Concern Table

table_countriesofconcern(df, df_vaccinations_manufacturers, country_list)

c_list <- c("United Kingdom","Denmark","Portugal","South Africa","Kenya","Zambia","United States of America")
c_list_iso <- parse_country(c_list, to = "iso3c")

df_who_latest <- df_who %>%
  group_by(id) %>%
  filter(date == max(date)) %>%
  ungroup()

vax_man <- get_vax_manufacturers()

table_countriesofconcern(df_who_latest, vax_man, c_list_iso)


CDCgov/SaviR documentation built on April 14, 2025, 7:46 a.m.