library(covidregionaldata) library(ggplot2) library(ggridges) library(roll) library(scales) library(forcats) library(dplyr) library(tidyr) if (!exists("country")) { country <- params$country } # Set-up output ---- # Figure path on disk = base.dir + fig.path # Figure URL online = base.url + fig.path knitr::opts_knit$set(base.dir = stringr::str_c(here::here(), "/docs/"), base.url = "/covidregionaldatagraphs/") # project root folder knitr::opts_chunk$set(fig.path = stringr::str_c(paste0(paste("images", country, sep="/")), "-")) # Load data ---- dataset_details <- get_available_datasets("regional") %>% filter(grepl(country, origin)) level_1_data <- get_regional_data( country = country, totals = FALSE, level = 1, localise = FALSE ) # Specific code to remove double-counting of England in UK data if (country == "United Kingdom") { level_1_data <- level_1_data %>% filter(level_1_region != "England") } if (!is.na(dataset_details$level_2_region)) { level_2_data <- get_regional_data( country = country, totals = FALSE, level = 2, localise = FALSE ) } national_data <- level_1_data %>% group_by(date) %>% summarise(across(where(is.double), sum), .groups = "drop_last") last_date <- format(max(level_1_data$date), "%B %d, %Y") caption_text <- paste0( "Data: ", dataset_details$source_text, " (sourced through covidregionaldata), ", last_date, "\nPrepared by: ", params$prepared_by ) # Set graphing defaults ---- theme_set( theme_minimal() + theme(plot.caption = element_text(size = 6)) )
cat("These plots are prepared using\n", "[covidregionaldata](https://epiforecasts.io/covidregionaldata) to\n", "download data published by\n[", dataset_details$source_text, "](", dataset_details$source_url, ").\n\n")
Ridgeline graphs allow for comparison of the incidence in different regions side by side. These are not per capita calculations but just the daily incidence. There is no smoothing, so weekly variations and gaps in testing or reporting due to weekends or holidays are visible.
intercity_gap <- max(level_1_data$cases_new, na.rm = TRUE) / 2 intercity_gap <- round(intercity_gap, -floor(log10(intercity_gap))) ridgeline_labels <- level_1_data %>% filter(level_1_region != "Unknown") %>% mutate(y = -as.numeric(factor(level_1_region)) * intercity_gap, region = level_1_region) %>% select(region, y) %>% unique() level_1_data %>% filter(level_1_region != "Unknown") %>% ggplot(aes( x = date, y = -as.numeric(factor(level_1_region)) * intercity_gap, height = cases_new, group = level_1_region )) + # y=as.numeric(level_2_region)*250 geom_ridgeline(alpha = 0.5, aes(fill = level_1_region), size = 0.25) + scale_y_continuous( breaks = ridgeline_labels$y, labels = ridgeline_labels$region, sec.axis = sec_axis(~ . + 10, name = "Daily incidence (confirmed cases)", labels = rep(c(intercity_gap / 2, "0"), nrow(ridgeline_labels)), breaks = seq( from = -intercity_gap / 2, to = -intercity_gap * nrow(ridgeline_labels), by = -intercity_gap / 2 ) ), name = "Region" ) + scale_x_date(date_breaks = "3 months", date_minor_breaks = "1 month", date_labels = "%b %y") + theme_ridges() + theme(plot.caption = element_text(size = 6)) + theme(legend.position = "none") + labs( x = "Date", y = "Region / Incidence", title = paste0("Regional COVID-19 incidence in ", country), subtitle = "All level 1 regions", caption = caption_text ) + theme( axis.text.y = element_text(size = 8), axis.title.y.left = element_blank() )
if (length(unique(level_1_data$level_1_region))>11 ) { cat( "# Plot ridgeline incidence for top 10 level 1 regions\n\n", "Where there are many level 1 regions, the top 10 regions are displayed.\n" ) }
if (length(unique(level_1_data$level_1_region))>11 ) { # Make summary table for level 1 ---- region_summaries <- level_1_data %>% group_by(level_1_region) %>% summarise( min_i = min(cases_new, na.rm = TRUE), max_i = max(cases_new, na.rm = TRUE), median_i = median(cases_new, na.rm = TRUE), mean_i = mean(cases_new, na.rm = TRUE), .groups = "drop_last" ) %>% rename(region = level_1_region) narrowed_regions <- pull(region_summaries %>% slice_max(max_i, n = 10) %>% select(region)) narrowed_regional_incidence <- level_1_data %>% # filter(date < as_date("2021-01-04")) %>% filter(level_1_region %in% narrowed_regions) # Calculate intercity_gap for level 1 ---- intercity_gap <- max(level_1_data$cases_new, na.rm = TRUE) / 2 intercity_gap <- round(intercity_gap, -floor(log10(intercity_gap))) ridgeline_labels <- narrowed_regional_incidence %>% mutate(y = -as.numeric(factor(level_1_region)) * intercity_gap, region = level_1_region) %>% select(region, y) %>% unique() narrowed_regional_incidence %>% ggplot(aes( x = date, y = -as.numeric(factor(level_1_region)) * intercity_gap, height = cases_new, group = level_1_region )) + # y=as.numeric(level_2_region)*250 geom_ridgeline(alpha = 0.5, aes(fill = level_1_region), size = 0.25) + scale_y_continuous( breaks = ridgeline_labels$y, labels = ridgeline_labels$region, sec.axis = sec_axis(~ . + 10, name = "Daily incidence (confirmed cases)", labels = rep(c(intercity_gap / 2, "0"), 10), breaks = seq( from = -intercity_gap / 2, to = -intercity_gap * 10, by = -intercity_gap / 2 ) ), name = "Region" ) + scale_x_date(date_breaks = "3 months", date_minor_breaks = "1 month", date_labels = "%b %y") + theme_ridges() + theme(plot.caption = element_text(size = 8)) + theme(legend.position = "none") + labs( x = "Date", y = "Region / Incidence", title = paste0("Regional COVID-19 incidence in ", country), subtitle = "Top ten level 1 regions by maximum daily incidence", caption = caption_text ) + theme( axis.text.y = element_text(size = 8), axis.title.y.left = element_blank() ) }
if (!is.na(dataset_details$level_2_region)) { cat("# Plot ridgeline incidence for top 10 level 2 regions\n\n", "The top 10 level 2 regions are shown.\n") }
if (!is.na(dataset_details$level_2_region)) { # Make summary table for level 2 ---- region_summaries <- level_2_data %>% group_by(level_2_region) %>% summarise( min_i = min(cases_new, na.rm = TRUE), max_i = max(cases_new, na.rm = TRUE), median_i = median(cases_new, na.rm = TRUE), mean_i = mean(cases_new, na.rm = TRUE), .groups = "drop_last" ) %>% rename(region = level_2_region) narrowed_regions <- pull(region_summaries %>% slice_max(max_i, n = 10) %>% select(region)) narrowed_regional_incidence <- level_2_data %>% # filter(date < as_date("2021-01-04")) %>% filter(level_2_region %in% narrowed_regions) # Calculate intercity gap for level 2 ---- intercity_gap <- max(level_2_data$cases_new, na.rm = TRUE) / 2 intercity_gap <- round(intercity_gap, -floor(log10(intercity_gap))) ridgeline_labels <- narrowed_regional_incidence %>% mutate(y = -as.numeric(factor(level_2_region)) * intercity_gap, region = level_2_region) %>% select(region, y) %>% unique() narrowed_regional_incidence %>% ggplot(aes( x = date, y = -as.numeric(factor(level_2_region)) * intercity_gap, height = cases_new, group = level_2_region )) + # y=as.numeric(level_2_region)*250 geom_ridgeline(alpha = 0.5, aes(fill = level_2_region), size = 0.25) + scale_y_continuous( breaks = ridgeline_labels$y, labels = ridgeline_labels$region, sec.axis = sec_axis(~ . + 10, name = "Daily incidence (confirmed cases)", labels = rep(c(intercity_gap / 2, "0"), 10), breaks = seq( from = -intercity_gap / 2, to = -intercity_gap * 10, by = -intercity_gap / 2 ) ), name = "Region" ) + scale_x_date(date_breaks = "3 months", date_minor_breaks = "1 month", date_labels = "%b %y") + theme_ridges() + theme(plot.caption = element_text(size = 8)) + theme(legend.position = "none") + labs( x = "Date", y = "Region / Incidence", title = paste0("Regional COVID-19 incidence in ", country), subtitle = "Top ten level 2 regions by maximum daily incidence", caption = caption_text ) + theme( axis.text.y = element_text(size = 8), axis.title.y.left = element_blank() ) }
The following charts are a form of aggregated heatmap. They are a stacked column display of the number of regions for each country with average weekly incidence falling into certain ranges. This gives an overview of how concentrated a shift in the data may be, but masks variation as to which regions are being more or less impacted from week to week.
# Attempt to calculate a "natural" bucket scale for the waterfalls bucket_scale <- median(level_1_data$cases_new, na.rm = TRUE) * 2 if (bucket_scale < 20) { bucket_scale <- 20 } bucket_scale <- floor(round(bucket_scale, -ceiling(log10(bucket_scale)) + 1)/20)*20 bucket_breaks <- c(-1, seq(from = 0, to = bucket_scale, length.out = 5), Inf) bucket_labels <- c( 0, paste(bucket_breaks[c(-1, -6, -7)], bucket_breaks[c(-1, -2, -7)], sep = "-" ), paste0(bucket_scale, "+") ) level_1_counts <- level_1_data %>% select(date, cases_new, level_1_region) %>% mutate(cases_new = if_else(is.na(cases_new), 0, cases_new)) %>% group_by(level_1_region) %>% arrange(date, .by_group = TRUE) %>% mutate(weekly_mean_cases = roll_mean(cases_new, 7, complete_obs = TRUE)) %>% filter(date > "2020-10-01") %>% group_by(date, group = fct_rev(cut(weekly_mean_cases, breaks = bucket_breaks, labels = bucket_labels, include.lowest = TRUE )) ) %>% summarise(count = n(), .groups = "drop_last") level_1_counts %>% ggplot() + geom_col(mapping = aes(x = date, y = count, fill = group), width = 1) + scale_x_date(date_breaks = "2 months", date_minor_breaks = "1 month", date_labels = "%b %y") + labs( x = "Date", y = paste0("Number of level 1 regions - ", dataset_details$level_1_region), fill = "Case count", title = paste0( tools::toTitleCase(dataset_details$level_1_region), " case counts in ", country ), subtitle = "7 day average counts of new cases", caption = caption_text ) + scale_fill_brewer(palette = "Blues", direction = 1)
if (!is.na(dataset_details$level_2_region)) { cat( "# Waterfall chart case counts - level 2\n\n", "Plotting these charts for level 2 regions typically shows smoother curves.\n" ) }
if (!is.na(dataset_details$level_2_region)) { # Attempt to calculate a "natural" bucket scale for the waterfalls bucket_scale <- median(level_2_data$cases_new, na.rm = TRUE) * 2 if (bucket_scale < 20) { bucket_scale <- 20 } bucket_scale <- floor(round(bucket_scale, -ceiling(log10(bucket_scale)) + 1)/20)*20 bucket_breaks <- c(-1, seq(from = 0, to = bucket_scale, length.out = 5), Inf) bucket_labels <- c( 0, paste(bucket_breaks[c(-1, -6, -7)], bucket_breaks[c(-1, -2, -7)], sep = "-" ), paste0(bucket_scale, "+") ) level_2_counts <- level_2_data %>% select(date, cases_new, level_2_region) %>% mutate(cases_new = if_else(is.na(cases_new), 0, cases_new)) %>% group_by(level_2_region) %>% arrange(date, .by_group = TRUE) %>% mutate(weekly_mean_cases = roll_mean(cases_new, 7, complete_obs = TRUE)) %>% filter(date > "2020-10-01") %>% group_by(date, group = fct_rev(cut(weekly_mean_cases, breaks = bucket_breaks, labels = bucket_labels, include.lowest = TRUE )) ) %>% summarise(count = n(), .groups = "drop_last") level_2_counts %>% ggplot() + geom_col(mapping = aes(x = date, y = count, fill = group), width = 1) + scale_x_date(date_breaks = "2 months", date_minor_breaks = "1 month", date_labels = "%b %y") + labs( x = "Date", y = paste0("Number of level 2 regions - ", dataset_details$level_2_region), fill = "Case count", title = paste0( tools::toTitleCase(dataset_details$level_2_region), " case counts in ", country ), subtitle = "7 day average counts of new cases", caption = caption_text ) + scale_fill_brewer(palette = "Blues", direction = 1) }
if (length(unique(level_1_data$tested_new))>1 && !is.na(unique(level_1_data$tested_new)[1])) { cat("# Waterfall chart level 1 region test positivity\n\n", "This proxy for test positivity is calculated by comparing the number of new cases each day with the number of tests taken each day.") }
if (length(unique(level_1_data$tested_new))>1 && !is.na(unique(level_1_data$tested_new)[1])) { level_1_positivity <- level_1_data %>% filter(date > "2020-10-01") %>% # Calculate a proxy for test positivity: cases / tests mutate(dgn_prc_day = cases_new / tested_new * 100) %>% select(date, dgn_prc_day, level_1_region) %>% group_by(level_1_region) %>% arrange(date, .by_group = TRUE) %>% mutate(weekly_mean_positivity = roll_mean(dgn_prc_day, 7)) %>% group_by(date, group = fct_rev(cut( dgn_prc_day, breaks = c(-1, 0, 5, 10, 15, 20, Inf), labels = c("0%", "0-5%", "5-10%", "10-15%", "15-20%", "20%+"), include.lowest = TRUE )) ) %>% summarise(count = n(), .groups = "drop_last") level_1_positivity %>% ggplot() + geom_col(mapping = aes(x = date, y = count, fill = group), width = 1) + scale_x_date(date_breaks = "2 months", date_minor_breaks = "1 month", date_labels = "%b %y") + labs( x = "Date", y = paste0("Number of level 1 regions - ", dataset_details$level_1_region), fill = "Test positivity", title = paste0("Level 1 region test positivity in ", country), subtitle = "7 day average test positivity", caption = caption_text ) + scale_fill_brewer(palette = "Oranges", direction = 1) }
if (!is.na(dataset_details$level_2_region) && length(unique(level_2_data$tested_new))>1 && !is.na(unique(level_2_data$tested_new)[1])) { cat("# Waterfall chart level 2 region test positivity\n\n", "This proxy for test positivity is calculated by comparing the number of new cases each day with the number of tests taken each day.") }
if (!is.na(dataset_details$level_2_region) && length(unique(level_2_data$tested_new))>1 && !is.na(unique(level_2_data$tested_new)[1]) ) { level_2_positivity <- level_2_data %>% filter(date > "2020-10-01") %>% # Calculate a proxy for test positivity: cases / tests mutate(dgn_prc_day = cases_new / tested_new * 100) %>% select(date, dgn_prc_day, level_2_region) %>% arrange(level_2_region, date) %>% mutate(weekly_mean_positivity = roll_mean(dgn_prc_day, 7)) %>% group_by(date, group = fct_rev(cut( dgn_prc_day, breaks = c(-1, 0, 5, 10, 15, 20, Inf), labels = c("0%", "0-5%", "5-10%", "10-15%", "15-20%", "20%+"), include.lowest = TRUE )) ) %>% summarise(count = n(), .groups = "drop_last") level_2_positivity %>% ggplot() + geom_col(mapping = aes(x = date, y = count, fill = group), width = 1) + scale_x_date(date_breaks = "2 months", date_minor_breaks = "1 month", date_labels = "%b %y") + labs( x = "Date", y = paste0("Number of level 2 regions - ", dataset_details$level_2_region), fill = "Test positivity", title = paste0("Level 2 region test positivity in ", country), subtitle = "7 day average test positivity", caption = caption_text ) + scale_fill_brewer(palette = "Oranges", direction = 1) }
if (length(unique(national_data$tested_new))>1 && !is.na(unique(national_data$tested_new)[1])) { cat("# Acceleration calculations - national\n\n", "This acceleration calculation is made based on a proxy for test positivity calculated by comparing the number of new cases each day with the number of tests taken each day.\n\n" ) }
if (length(unique(national_data$tested_new))>1 && !is.na(unique(national_data$tested_new)[1])) { national_data %>% arrange(date) %>% # Calculate a proxy for test positivity: cases / tests mutate(dgn_prc_day = cases_new / tested_new * 100) %>% # put rolling 7 day average in here mutate( weekly_mean_cases = roll_mean(cases_new, 7), weekly_mean_positivity = roll_mean(dgn_prc_day, 7) ) %>% mutate( cases_accel = ((weekly_mean_cases - lag(weekly_mean_cases)) / abs(lag(weekly_mean_cases))), test_accel = ((weekly_mean_positivity - lag(weekly_mean_positivity)) / abs(lag(weekly_mean_positivity))) ) %>% filter(date > "2020-09-01") %>% select(date, cases_accel, test_accel) %>% pivot_longer( cols = ends_with("_accel"), values_to = "accel", names_to = "type", names_pattern = "(.*)_accel" ) %>% mutate(type = if_else(type == "test", "test positivity", type)) %>% ggplot(aes(x = date, y = accel, colour = type)) + geom_line() + scale_x_date(date_breaks = "2 months", date_minor_breaks = "1 month", date_labels = "%b %y") + # scale_y_continuous(trans = modulus_trans(-0.5), labels=label_percent()) + scale_y_continuous(labels = label_percent()) + geom_hline(yintercept = 0, size = 0.2) + labs( x = "Date", y = "Acceleration", title = paste0("Acceleration of the COVID-19 pandemic in ", country), subtitle = "% change in 7-day average of incidence or test positivity", caption = caption_text ) }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.