knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(tidyverse) library(covdata) library(ggrepel)
Data for the United States come from a variety of sources:
covus
## Which n states are leading the count of positive cases or deaths? top_n_states <- function(data, n = 5, measure = c("positive", "death")) { meas <- match.arg(measure) data %>% group_by(state) %>% filter(measure == meas, date == max(date)) %>% drop_na(count) %>% ungroup() %>% top_n(n, wt = count) %>% pull(state) } state_cols <- c("gray70", "#195F90FF", "#D76500FF", "#238023FF", "#AB1F20FF", "#7747A3FF", "#70453CFF", "#D73EA8FF", "#666666FF", "#96971BFF", "#1298A6FF", "#6F9BD6FF", "#FF952DFF", "#66CF51FF", "#FF4945FF", "#A07DBAFF", "#AC7368FF", "#EF69A2FF", "#9F9F9FFF", "#CACA56FF", "#61C3D5FF") covus %>% group_by(state) %>% mutate(core = case_when(state %nin% top_n_states(covus) ~ "", TRUE ~ state), end_label = ifelse(date == max(date), core, NA)) %>% arrange(date) %>% filter(measure == "positive", date > "2020-03-09") %>% ggplot(aes(x = date, y = count, group = state, color = core, label = end_label)) + geom_line(size = 0.5) + geom_text_repel(segment.color = NA, nudge_x = 0.2, nudge_y = 0.1) + scale_color_manual(values = state_cols) + scale_x_date(date_breaks = "1 month", date_labels = "%b" ) + scale_y_continuous(trans = "log2", labels = scales::comma_format(accuracy = 1), breaks = 2^c(seq(1, 22, 1))) + guides(color = "none") + labs(title = "COVID-19 Cumulative Recorded Cases by US State", subtitle = paste("Data as of", format(max(covus$date), "%A, %B %e, %Y")), x = "Date", y = "Count of Cases (log 2 scale)", caption = "Data: COVID Tracking Project, http://covidtracking.com | Graph: @kjhealy") + theme_minimal()
The COVID Tracking Project reports cumulative counts for key measures such as positive tests and deaths. For example, for New York State:
measures <- c("positive", "death") covus %>% filter(measure %in% measures, state == "NY") %>% select(date, state, measure, count) %>% pivot_wider(names_from = measure, values_from = count)
To calculate daily counts from these cumulative measures, use lag()
.
covus %>% filter(measure %in% measures, state == "NY") %>% select(date, state, measure, count) %>% pivot_wider(names_from = measure, values_from = count) %>% mutate(across(positive:death, ~.x - lag(.x, order_by = date), .names = "daily_{col}"))
state_pops <- uspop %>% filter(sex_id == "totsex", hisp_id == "tothisp") %>% select(state_abbr, statefips, pop, state) %>% rename(name = state, state = state_abbr, fips = statefips) %>% mutate(state = replace(state, fips == "11", "DC")) ## Using a convenience function to do something similar ## to the lambda version above get_daily_count <- function(count, date){ count - lag(count, order_by = date) } covus %>% filter(measure == "death", state %in% unique(state_pops$state)) %>% group_by(state) %>% mutate( deaths_daily = get_daily_count(count, date), deaths7 = slider::slide_dbl(deaths_daily, mean, .before = 7, .after = 0, na.rm = TRUE)) %>% left_join(state_pops) %>% filter(date > lubridate::ymd("2020-03-15")) %>% ggplot(mapping = aes(x = date, y = (deaths7/pop)*1e5)) + geom_line(size = 0.5) + scale_y_continuous(labels = scales::comma_format(accuracy = 1)) + facet_wrap(~ name, ncol = 4) + labs(x = "Date", y = "Deaths per 100,000 Population (Seven Day Rolling Average)", title = "Average Death Rate from COVID-19: US States and Washington, DC", subtitle = paste("COVID Tracking Project data as of", format(max(covus$date), "%A, %B %e, %Y")), caption = "Kieran Healy @kjhealy / Data: https://www.covidtracking.com/") + theme_minimal()
covus %>% filter(measure == "death", state %in% unique(state_pops$state)) %>% group_by(state) %>% mutate( deaths_daily = get_daily_count(count, date), deaths7 = slider::slide_dbl(deaths_daily, mean, .before = 7, .after = 0, na.rm = TRUE)) %>% left_join(state_pops) %>% filter(date > lubridate::ymd("2020-03-15")) %>% ggplot(mapping = aes(x = date, y = (deaths7/pop)*1e5)) + geom_line(size = 0.5) + facet_wrap(~ name, ncol = 4, scales = "free_y") + labs(x = "Date", y = "Deaths per 100,000 Population (Seven Day Rolling Average)", title = "Average Death Rate from COVID-19: US States and Washington, DC. Free scales.", subtitle = paste("COVID Tracking Project data as of", format(max(covus$date), "%A, %B %e, %Y")), caption = "Kieran Healy @kjhealy / Data: https://www.covidtracking.com/") + theme_minimal()
covus %>% filter(measure == "death") %>% group_by(date) %>% summarize(count = sum(count, na.rm = TRUE)) %>% ggplot(aes(x = date, y = count)) + geom_line(size = 0.75) + scale_x_date(date_breaks = "3 months", date_labels = "%b %Y" ) + scale_y_continuous(labels = scales::comma, breaks = seq(0, 500000, 100000)) + labs(title = "COVID-19 Cumulative Recorded Deaths in the United States", subtitle = paste("Data as of", format(max(covus$date), "%A, %B %e, %Y"), "Recorded counts underestimate total mortality."), x = "Date", y = "Count of Deaths", caption = "Data: COVID Tracking Project, http://covidtracking.com | Graph: @kjhealy") + theme_minimal()
nytcovstate
nytcovcounty
We can see that data for some counties is either not available or hasn't been correctly coded.
nytcovcounty %>% filter(state %in% c("New York", "North Carolina", "Texas", "Florida", "California", "Illinois")) %>% mutate(uniq_name = paste(county, state)) %>% # Can't use FIPS because of how the NYT bundled cities group_by(uniq_name) %>% mutate(days_elapsed = date - min(date)) %>% ggplot(aes(x = days_elapsed, y = cases + 1, group = uniq_name)) + geom_line(size = 0.25, color = "gray20") + scale_x_continuous() + scale_y_log10(labels = scales::label_number_si()) + guides(color = "none") + facet_wrap(~ state, ncol = 2) + labs(title = "COVID-19 Cumulative Recorded Cases by US County", subtitle = paste("New York is bundled into a single area in this data.\nData as of", format(max(nytcovcounty$date), "%A, %B %e, %Y")), x = "Days since first case", y = "Count of Cases (log 10 scale)", caption = "Data: The New York Times | Graph: @kjhealy") + theme_minimal()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.