covid-19

This is another analysis of the outbreak of Coronavirus / Covid-19 disease.

Many others have done excellent analysis, and my attempts are specifically trying to add flavour to the statistics in South Africa.

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  echo = FALSE,
  warning = FALSE
)
suppressPackageStartupMessages({
  library(tidyverse)
  library(ggplot2)
  library(ggrepel)
  library(DT)
  library(maps)
  library(pins)
  library(mapproj)
})


to_include <- c(
  "South Africa",
  "Italy",
  "India",
  "Brazil",
  "Russia",
  "China",
  "US",
  "Singapore",
  "United Kingdom",
  "Spain",
  "Germany",
  "Sweden"
)

source("R/get_data.R")
source("R/prep_data.R")
source("R/plot_covid.R")

Data source

The data source is a repository maintained by Johns Hopkins University. The data is updated once per day.

Last updated at r Sys.time()

Total cases

type <- "cases"
imported_threshold <- if (type == "cases") 30 else 10

dat <- get_covid19_data(type)

if (type == "cases"){
  dat <-
    dat %>% 
    bind_rows(
      tribble(
        ~country, ~date, ~long, ~lat, ~n,
        "China", as.Date("2020-01-16"), 112, 32.8, 28,
        "China", as.Date("2020-01-17"), 112, 32.8, 41,
        "China", as.Date("2020-01-20"), 112, 32.8, 260
        # "South Africa", as.Date("2020-03-30"), 22.9, -30.6, 1326
      )
    ) %>% 
    group_by(country, date) %>% 
    summarize(
      long = mean(long),
      lat = mean(lat),
      n = max(n),
      .groups = "drop_last"
    )
  attr(dat, "type") <- "cases"
}
dat %>%
  filter(country == "South Africa") %>%
  select(country, date, lat, long, n) %>% 
  filter(n > 0) %>% 
  rename(cases = n) %>% 
  arrange(country, date) %>% 
  tail(10)

Linear scale

Using a linear scale it's easiest to see the relative scale of the worst affected countries, but it's hard to distinguish the countries that were affected later.

dat %>%
  prep_data_timeseries(
    threshold = imported_threshold, 
    highlighted_countries = to_include
  ) %>%
  plot(scale = "linear")

Logarithmic scale

Using a logarithmic scale it's easier to discern whether the pandemic is still in the exponential growth phase. On this scale, a straight line indicates exponential growth.

dat %>%
  prep_data_timeseries(
    threshold = imported_threshold, 
    highlighted_countries = to_include
  ) %>%
  plot(scale = "log10")

New cases

get_covid19_data(type, aggregate = TRUE) %>% 
  arrange(country, date) %>% 
  group_by(country) %>% 
  mutate(new = n - lag(n)) %>% 
  filter(
    n > 0,
    country %in% "South Africa",
    new >= 0
  ) %>%
  ggplot(aes(x = date, y = new, group = country)) +
  geom_line() +
  geom_smooth(method = "gam", formula = y ~ s(x, bs = "ts"), 
              method.args = list(family = poisson, gamma = 4)) +
  scale_x_date(date_breaks = "1 month", guide = guide_axis(n.dodge = 2)) +
  ggtitle("New cases reported") + 
  xlab(NULL) +
  ylab(NULL) +
  facet_wrap(facets = "country", scales = "free")
get_covid19_data(type, aggregate = TRUE) %>% 
  arrange(country, date) %>% 
  group_by(country) %>% 
  mutate(new = n - lag(n)) %>% 
  filter(
    n > 100,
    country %in% to_include,
    new >= 0
  ) %>%
  ggplot(aes(x = date, y = new, group = country)) +
  geom_line() +
  geom_smooth(method = "gam", formula = y ~ s(x, bs = "ts"), 
              method.args = list(family = poisson, gamma = 4)) +
  scale_x_date(date_breaks = "3 month", guide = guide_axis(n.dodge = 2)) +
  ggtitle("New cases reported") + 
  xlab(NULL) +
  ylab(NULL) +
  facet_wrap(facets = "country", scales = "free")

Regional distribution of Covid-19 cases

dat %>%
  prep_data_map() %>%
  plot()
dat %>%
  prep_data_map() %>%
  plot() +
  coord_map(xlim = c(-15, 35), ylim = c(35, 65), clip = "on", projection = "conic", 50)
dat %>%
  prep_data_map() %>%
  plot() +
  coord_map(xlim = c(-30, 60), ylim = c(-35, 35))

Total number of deaths

type <- "deaths"
imported_threshold <- if (type == "cases") 30 else 10

dat <- get_covid19_data(type)
dat %>%
  filter(country == "South Africa") %>%
  select(country, date, n) %>% 
  filter(n > 0) %>% 
  rename(deaths = n) %>% 
  arrange(country, date) %>% 
  tail(10)

Linear scale

dat %>%
  prep_data_timeseries(
    threshold = imported_threshold, 
    highlighted_countries = to_include
  ) %>%
  plot(scale = "linear")

Logarithmic scale

dat %>%
  prep_data_timeseries(
    threshold = imported_threshold, 
    highlighted_countries = to_include
  ) %>%
  plot(scale = "log10")

New deaths

get_covid19_data(type, aggregate = TRUE) %>% 
  arrange(country, date) %>% 
  group_by(country) %>% 
  mutate(new = n - lag(n)) %>% 
  filter(
    n > 10,
    country %in% to_include,
    new >= 0
  ) %>%
  # filter(country == "South Africa") %>%
  # filter(country == "Italy") %>% 
  ggplot(aes(x = date, y = new, group = country)) +
  geom_line() +
  geom_smooth(method = "gam", formula = y ~ s(x, bs = "ts"), method.args = list(family = poisson, gamma = 4)) +
  scale_x_date(date_breaks = "1 month", guide = guide_axis(n.dodge = 2)) +
  ggtitle("New deaths reported") + 
  xlab(NULL) +
  ylab(NULL) +
  facet_wrap(facets = "country", scales = "free")

Regional distribution of Covid-19 deaths

dat %>%
  prep_data_map() %>%
  plot()
dat %>%
  prep_data_map() %>%
  plot() +
  coord_map(xlim = c(-15, 35), ylim = c(35, 65), clip = "on", projection = "conic", 50)
dat %>%
  prep_data_map() %>%
  plot() +
  coord_map(xlim = c(-30, 60), ylim = c(-35, 35))


andrie/covid-19 documentation built on March 10, 2021, 3:13 p.m.