knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

Source code.

Read data

Reads a NY Times dataset of COVID-19 cases and deaths by state.

library(covid19)
library(dplyr)
library(lubridate)
library(glue)
library(ggplot2)

state_populations <-
  read_state_populations() %>% 
  select(state = state_name, population = population_estimate)
cases <- 
  read_state_cases_nytimes() %>% 
  dplyr::left_join(state_populations, by = "state")
states_to_compare <- c(
  "Arizona", 
  "California", 
  "Colorado", 
  "Idaho", 
  "Nevada", 
  "Oregon", 
  "Washington"
)

Cases by state

Cumulative number of cases

Plot of cumulative number of cases by state in the United States for a few select states.

cases %>%
  dplyr::filter(
    state %in% states_to_compare, 
    date > lubridate::ymd("2020-02-20")
  ) %>% 
  ggplot(aes(x = date, y = cases, color = state)) +
    geom_point(na.rm = TRUE) +
    geom_line(aes(y = cases), na.rm = TRUE)  + 
    theme(
      panel.grid.minor = element_line(color="grey60", size=0.5),
      panel.grid.major = element_line(color="grey40", size=0.5),
      panel.background = element_rect(fill="snow2")
    ) +
    labs(
      title = "Confirmed cases of COVID-19",
      subtitle = "for selected western states in the United States",
      x = "Date", 
      y = "Cases", 
      color = "State"
    )

Cumulative cases per comparable unit of population

population_unit = 100000
cases %>%
  dplyr::filter(
    state %in% states_to_compare,
    date > lubridate::ymd("2020-02-20")) %>%
  dplyr::mutate(cases_per_unit = ((cases / population) * population_unit)) %>%
  ggplot(aes(x = date, y = cases_per_unit, color = state)) +
  geom_point(na.rm = TRUE) +
  geom_line(aes(y = cases_per_unit), na.rm = TRUE)  +
  theme(
    panel.grid.minor = element_line(color = "grey60", size = 0.5),
    panel.grid.major = element_line(color = "grey40", size = 0.5),
    panel.background = element_rect(fill = "snow2")
  ) +
  labs(
    title = glue(
      "Cumulative cases per ",
      format(population_unit, scientific=FALSE, big.mark=","),
      " population from COVID-19"
    ),
    subtitle = "for selected western states in the United States",
    x = "Date", 
    y = glue(
      "Cumulative Cases per ", 
      format(population_unit, scientific=FALSE, big.mark=",")
    ), 
    color = "State"
  )

Deaths by state

Cumulative deaths by state

Plot of cumulative number of deaths per state in the United States for a few select states.

cases %>%
  dplyr::filter(
    state %in% states_to_compare, 
    date > lubridate::ymd("2020-02-20")
  ) %>% 
  ggplot(aes(x = date, y = deaths, color = state)) +
    geom_point(na.rm = TRUE) +
    geom_line(aes(y = deaths), na.rm = TRUE)  + 
  theme(
    panel.grid.minor = element_line(color="grey60", size=0.5),
    panel.grid.major = element_line(color="grey40", size=0.5),
    panel.background = element_rect(fill="snow2")
  ) +
  labs(
    title = "Cumulative deaths from COVID-19",
    subtitle = "for selected western states in the United States",
    x = "Date", 
    y = "Cumulative Deaths", 
    color = "State"
  )

Cumulative deaths per comparable unit of population

population_unit = 100000
cases %>%
  dplyr::filter(
    state %in% states_to_compare,
    date > lubridate::ymd("2020-02-20")) %>%
  dplyr::mutate(deaths_per_unit = ((deaths / population) * population_unit)) %>%
  ggplot(aes(x = date, y = deaths_per_unit, color = state)) +
  geom_point(na.rm = TRUE) +
  geom_line(aes(y = deaths_per_unit), na.rm = TRUE)  +
  theme(
    panel.grid.minor = element_line(color = "grey60", size = 0.5),
    panel.grid.major = element_line(color = "grey40", size = 0.5),
    panel.background = element_rect(fill = "snow2")
  ) +
  labs(
    title = glue(
      "Cumulative deaths per ",
      format(population_unit, scientific=FALSE, big.mark=","),
      " population from COVID-19"
    ),
    subtitle = "for selected western states in the United States",
    x = "Date", 
    y = glue(
      "Cumulative Deaths per ", 
      format(population_unit, scientific=FALSE, big.mark=",")
    ), 
    color = "State"
  )

Daily number of deaths

Plot of number of deaths per day per state in the United States for a few select states.

cases %>%
  dplyr::filter(
    state %in% states_to_compare, 
    date > lubridate::ymd("2020-02-20")
  ) %>% 
  dplyr::arrange(date) %>% 
  dplyr::group_by(state) %>% 
  dplyr::mutate(daily_deaths = (deaths - dplyr::lag(deaths))) %>% 
  dplyr::filter(daily_deaths >= 0) %>% 
  ggplot(aes(x = date, y = daily_deaths, color = state)) +
    geom_point(na.rm = TRUE) +
    geom_line(aes(y = daily_deaths), na.rm = TRUE)  + 
    theme(
      panel.grid.minor = element_line(color="grey60", size=0.5),
      panel.grid.major = element_line(color="grey40", size=0.5),
      panel.background = element_rect(fill="snow2")
    ) +
    labs(
      title = "Daily deaths from COVID-19",
      subtitle = "for selected western states in the United States",
      x = "Date", 
      y = "Deaths", 
      color = "State"
    )

Mortality by state

Plot of percentage of deaths per state, calculated as (deaths / cases) * 100.

latest_date <- max(cases$date)
cases %>% 
  dplyr::filter(date == latest_date) %>% 
  dplyr::mutate(pct = ((deaths / cases) * 100.0)) %>% 
  dplyr::arrange(desc(pct)) %>% 
  ggplot(mapping = aes(x = state, y = pct)) +
    geom_col() +
    theme(axis.text.x = element_text(angle = 90)) +
    labs(x = "State", y = "Percentage of deaths for confirmed cases")

Distribution of mortality rates

Plot of mortality rates across states, i.e. percentage of deaths for confirmed cases, in the United States.

latest_date <- max(cases$date)
cases %>% 
  dplyr::filter(date == latest_date) %>% 
  dplyr::mutate(pct = ((deaths / cases) * 100.0)) %>% 
  dplyr::mutate(pct_bin = floor(pct)) %>% 
  ggplot(aes(x = pct_bin, y = stat(count))) +
    geom_bar() +
    labs(
      title = "Distribution of mortality rates",
      subtitle = "in states in the United States",
      x = "Percentage of deaths for confirmed cases", 
      y = "Number of states"
    )


jimtyhurst/covid19 documentation built on Aug. 30, 2020, 3:39 p.m.