knitr::opts_chunk$set( collapse = TRUE, warning = FALSE, message = FALSE, comment = "#>", dev = "png", dev.opts = list(type = "cairo-png") ) library(SaviR) library(dplyr) library(tidyr) library(ggplot2) library(passport) library(gt)
After installing SaviR, you are ready to use the following functions to read in the data and create visualizations. There are functions to create plots, maps, or tables by World Health Organization (WHO) Region, Department of State (DoS) Region, or globally.
onetable
- stored metadata with iso2 and iso3 codes, country names, WHO/DoS/World Bank regions, and UN population estimates
get_covid_df(sources = c("all", "WHO", "WHO+JHU", "WHO+Primary"))
- COVID cases/deaths dataframe from WHO and other sources.
calc_add_risk()
- Add risk matrix calculations to dataframe (df)
get_vax()
- Get vaccination data from Our World In Data (OWID)
get_combined_table(type = c("WHO", "Both", "legacy"), geometry=FALSE)
- Combine all of the above into one large df, optionally add geometry
This snippet produces the master df with all the variables required for all of the following graphics.
# Load in data to create visuals df_who <- get_combined_table("WHO") # China includes Taiwan, Hong Kong, and Macau data # Data for visuals are typically as of Sunday of the current week # but this can be determined dynamically using WHO data. sunday_date <- df_who |> distinct(date, dow = weekdays(date)) |> arrange(desc(date)) |> filter(dow == "Sunday") |> slice(1) |> pull(date)
plot_epicurve(df, transparent = T)
When creating epi curves for the WHO regions, the "plot_epicurve" function should take in the df with only "WHO" observations in order to match the same curve on the WHO Coronavirus Dashboard. The individual epi curves for each region can be run simply by filtering for one WHO region in the df. The function will detect that the region column has only one region and will produce the epi curve accordingly.
When creating epi curves for the DoS regions, the plot_epicurve function should take in the df with "Both" observations. Similarly to the WHO epi curves, individual epi curves can also be produce by filtering to one DoS region.
# Epi Curves for WHO -- Global epi_curve_global <- plot_epicurve(df_who, transparent = T) epi_curve_global # Epi Curves for DoS -- Global state_epi_curve <- plot_epicurve(filter(df_who, date <= sunday_date), type = "cases", by_cat = "State Region") state_epi_curve
inset = TRUE
to plot an inset plot for the most recent weeksmin(date) + weeks(1)
, whichever is laterinset_epi_curve <- df_who |> filter(date <= sunday_date) |> plot_epicurve(type = "cases", by_cat = "State Region", transparent = FALSE, inset = TRUE) inset_epi_curve
# Regions for (r in unique(na.omit(df_who$who_region))) { epi_curve_regional <- plot_epicurve(filter(df_who, who_region == r), transparent = F) print(epi_curve_regional) }
plot_riskmatrix(df)
Note for labeling purposes, the labels variable should be added to the dataframe specifying the top 10 countries for weekly average incidence, week case percent change, and weekly total cases. Exclusion criteria: Countries with population less than 10 million and countries with weekly cases totals less than 100.
global_risk_matrix <- df_who %>% filter(date == sunday_date) %>% filter(population >= 10000000) %>% # Exclude population less than 10M filter(week_case >= 100) %>% mutate( rank_inc = dense_rank(desc(week_case_incidence)), rank_per = dense_rank(desc(percent_change_case)), rank_cas = dense_rank(desc(week_case)) ) %>% mutate(labels = case_when( rank_inc %in% 1:10 ~ country, rank_per %in% 1:10 ~ country, rank_cas %in% 1:10 ~ country )) %>% plot_riskmatrix() global_risk_matrix
global_vax_coverage <- df_who %>% filter(date <= sunday_date) %>% filter(date == max(date)) %>% filter(!is.na(state_region)) %>% plot_vaxcoverage(by_cat = "State Region") global_vax_coverage
By default, get_combined_table()
does not return geometry. This is preferable, since the size of the table is quite large and the geometry list column takes up a lot of space in memory.
However, if we want to map these data, we can either call get_combined_table("Both", geometry = TRUE)
, or join with country_coords
after filtering, like so:
# Filter to only observations with date as of Sunday # and join in geometry df_who_mapping <- df_who %>% filter(date == sunday_date) %>% left_join(select(country_coords, id, geometry), by = "id")
map_burden(df, region, time_step)
The "map_burden" function takes in a pre-filtered data.frame and computes and displays average incidence for each country over the time period selected.
The region
argument is optional, but it is used to specify a specific DoS/WHO region to zoom to, if desired.
time_step
is used to specify the days used to average incidence over. This is always relative to the latest date in the data.frame.
bin_breaks
and bin_colors
can be used to over-ride sensible defaults for incidence cutpoints and color scheme, if desired.
burden_map_global <- df_who |> filter(date <= sunday_date) |> map_burden() burden_map_global for (r in unique(na.omit(df_who$who_region))) { burden_map_regional <- df_who |> filter(who_region == r, date <= sunday_date) |> map_burden(region = r) print(burden_map_regional) }
and an example of how to produce a 14d map:
burden_map_global_14 <- df_who |> filter(date <= sunday_date) |> map_burden(time_step = 14) burden_map_global_14
We can also over-ride the incidence cut-points, like so:
# same map with 0-1, 1-5, 5-10, 10+ breaks burden_map_global_14_2 <- df_who |> filter(date <= sunday_date) |> map_burden( time_step = 14, bin_breaks = c(0, 1, 5, 10, Inf), bin_colors = c("0- <1" = "#f1e5a1", "1- <5" = "#e7b351", "5- <10" = "#d26230", "10+" = "#aa001e") ) burden_map_global_14_2
map_trend(df, region, time_step = 7)
The "map_trend" function requires a result
column where weekly percent change is converted into factors using "cut".
The default behavior is to visualize a global trend map, but passing a data frame filtered to a specific region
and providing the region name as a region
argument will change the bounding box accordingly.
A different time_step
can be provided. The default assumes you are comparing two 7d periods,
but this can be increased or decreased accordingly.
trend_map_global <- df_who |> filter(date <= sunday_date) |> map_trend() trend_map_global
for (r in unique(na.omit(df_who$who_region))) { trend_map_reg <- df_who |> filter(date <= sunday_date) |> map_trend(region = r) print(trend_map_reg) }
and an example producing the same with different time_step:
df_who |> filter(date <= sunday_date) |> map_trend(time_step = 14)
map_vaccinations(df, vac_type = c("People", "Fully"))
global_vax_map <- df_who_mapping %>% mutate(result = cut(people_vaccinated_per_hundred, breaks = c(0, 1, 3, 10, 30, Inf))) %>% group_by(country) %>% filter(!is.na(result)) %>% filter(date == max(date)) %>% ungroup() %>% map_vaccinations(., vac_type = "People") global_vax_map
table_10mostcases(df, time_step = 7, region = NULL, data_as_of = NULL)
This function presents a top-10 table of countries based on cases reported in the past time_step
.
The default is a 7 day period, but this can be adjusted. Additionally, you can tweak the title and
data-as-of footnote using those variables respectively, or they will be inferred internally.
df_who |> filter(date <= sunday_date) |> table_10mostcases(data_as_of = format(sunday_date, "%B %d, %Y"))
df_who |> filter(date <= sunday_date) |> table_10mostcases(time_step = 14)
for (r in unique(na.omit(df_who$who_region))) { tab_out <- df_who |> filter(date <= sunday_date, who_region == r) |> table_10mostcases(region = r, data_as_of = format(Sys.Date(), "%B %d, %Y")) print(htmltools::tagList(tab_out)) }
table_10incidence(df, time_step = 7, region = NULL, data_as_of = NULL)
Similar to the above, but computes top-10 countries baseed on incidence over past time_step
.
df_who |> filter(date <= sunday_date) |> table_10incidence(data_as_of = format(sunday_date, "%B %d, %Y"))
df_who |> filter(date <= sunday_date) |> table_10incidence(time_step = 14)
for (r in unique(na.omit(df_who$who_region))) { tab_out <- df_who %>% filter(date <= sunday_date, who_region == r) |> table_10incidence(region = r, data_as_of = format(Sys.Date(), "%B %d, %Y")) print(htmltools::tagList(tab_out)) }
table_10percentchange(df, time_step = 7, second_time_step = 28, region = NULL, data_as_of = NULL)
Works similarly to the above, but selects top 10 countries based on percent change over time_step
.
There's an additional argument second_time_step
that specifies a second (normally longer) period to compute a percent change over.
The default visualization is using weekly change and 4 week change.
This table is usually filtered to only countries over a certain population size, but this should be handled
externally.
df_who %>% filter(date <= sunday_date, population >= 10000000) |> table_10percentchange(data_as_of = format(sunday_date, "%B %d, %Y"))
df_who %>% filter(date <= sunday_date, population >= 10000000) |> table_10percentchange(time_step = 14)
for (r in unique(na.omit(df_who$who_region))) { tab_out <- df_who %>% filter(date <= sunday_date, population >= 100000, who_region == r) |> # Exclude population less than 100,000 table_10percentchange(region = r, data_as_of = format(sunday_date, "%B %d, %Y")) print(htmltools::tagList(tab_out)) }
table_10vaccinations(df, type = c("Global", "Region"), run_date)
The "table_10vaccinations" function takes looks for a country, value1 (people vaccinated per hundred), and a value2 (daily vaccinations per hundred) column. Note as vaccination reporting has gaps, the df must be sliced by country and the most recent date with people_vaccinated_per_hundred value (if there is one).
df_who %>% filter(date <= sunday_date) %>% filter(population > 1000000) %>% group_by(country) %>% filter(!is.na(people_vaccinated_per_hundred)) %>% filter(date == max(date)) %>% ungroup() %>% select(country = who_country, value1 = people_vaccinated_per_hundred, value2 = daily_vaccinations_per_hundred) %>% arrange(desc(value1)) %>% head(10) %>% table_10vaccinations(., run_date = format(sunday_date, "%B %d, %Y"))
for (r in unique(na.omit(df_who$who_region))) { tab_out <- df_who %>% filter(date <= sunday_date) %>% filter(population > 100000) %>% # Exclude population less than 100,000 filter(who_region == r) %>% group_by(country) %>% filter(!is.na(people_vaccinated_per_hundred)) %>% arrange(date) %>% top_n(1, date) %>% distinct(id, .keep_all = T) %>% select(country = who_country, value1 = people_vaccinated_per_hundred, value2 = daily_vaccinations_per_hundred) %>% arrange(desc(value1)) %>% head(10) %>% table_10vaccinations(., type = r, run_date = format(sunday_date, "%B %d, %Y")) print(htmltools::tagList(tab_out)) }
table_10vaccinations(df, vac_type = c("Partial", "Fully"), type = "Global", run_date = "Enter a date")
df_who %>% filter(date <= sunday_date) %>% filter(population > 1000000) %>% group_by(country) %>% filter(!is.na(people_fully_vaccinated_per_hundred)) %>% filter(date == max(date)) %>% ungroup() %>% select(country = who_country, value1 = people_fully_vaccinated_per_hundred, value2 = daily_vaccinations_per_hundred) %>% arrange(desc(value1)) %>% head(10) %>% table_10vaccinations(., run_date = format(sunday_date, "%B %d, %Y"))
table_countriesofconcern(df, df_vaccinations_manufacturers, country_list)
c_list <- c("United Kingdom","Denmark","Portugal","South Africa","Kenya","Zambia","United States of America") c_list_iso <- parse_country(c_list, to = "iso3c") df_who_latest <- df_who %>% group_by(id) %>% filter(date == max(date)) %>% ungroup() vax_man <- get_vax_manufacturers() table_countriesofconcern(df_who_latest, vax_man, c_list_iso)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.