knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

For additional data and details please see this R package's GitHub repository.

library(data2019nCoV)

library(data.table)
library(tidyr)
library(dplyr)
library(ggplot2)
library(scales)

WHO_SR$Date[length(WHO_SR$Date)]
sarscov2_ecdc_2019$date[length(sarscov2_ecdc_2019$date)]

This vignette visualizes data in the R package data2019nCoV, available on GitHub. Be sure to reinstall the package to use the latest data, via devtools::install_github("eebrown/data2019nCoV").

The global data comes from 2 sources, the World Health Organization and the European CDC.

The data were manually extracted from the WHO situation reports and stored in raw-data/WHO_SR.csv, which is loaded as the data frame WHO_SR in this package. This package uses freely available data, and itself is freely available with the CC0 licence. Information from WHO may be subject to WHO copyright and terms. The data source and package should be attributed. No warranty is made as to the accuracy of the data or transcription: use at your own risk. Please file an issue if you find any errors.

European CDC data was extracted by Our World in Data.

Global Cases

# Global Cases
options(scipen=5)
#plot(WHO_SR$Date, WHO_SR$Global.confirmed, 
#     main = "Global COVID-19 Cases",
#     xlab = "Date",
#     ylab = "Confirmed Cases",
#     type = "b")

plot(sarscov2_ecdc_2019$date, sarscov2_ecdc_2019$cases_global, 
     main = "Global COVID-19 Cases",
     xlab = "Date",
     ylab = "Confirmed Cases",
     type = "b")

New Daily Cases

# A function to calculate the daily change
daily_change <- function(series) {
  change <- c(series, NA) - c(NA, series)
  change <- change[-1]
  change <- change[-length(change)]
  return(change)
}

# Remove the change introduced with SR 27, when the definition was expanded.
change_cases <- daily_change(WHO_SR$Global.confirmed)
change_cases_ecdc <- daily_change(sarscov2_ecdc_2019$cases_global)

#plot(WHO_SR$Date[-1], change_cases,
#     main = "Change in Global Cases by Date",
#     ylab = "New Confirmed Cases",
#     xlab = "Date",
#     type = "b")

plot(sarscov2_ecdc_2019$date[-1], change_cases_ecdc,
     main = "Change in Global Cases by Date",
     ylab = "New Confirmed Cases",
     xlab = "Date",
     type = "b")

Global Cases

# Every country
gather(sarscov2_ecdc_2019, key, value, 

       cases_afg, cases_alb, cases_dza, cases_and, cases_ago, cases_aia, cases_atg, cases_arg, cases_arm, cases_abw, cases_aus, cases_aut, cases_aze, cases_bhs, cases_bhr, cases_bgd, cases_brb, cases_blr, cases_bel, cases_blz, cases_ben, cases_bmu, cases_btn, cases_bol, cases_bes, cases_bih, cases_bwa, cases_bra, cases_vgb, cases_brn, cases_bgr, cases_bfa, cases_bdi, cases_khm, cases_cmr, cases_can, cases_cpv, cases_cym, cases_caf, cases_tcd, cases_chl, cases_chn, cases_col, cases_cog, cases_cri, cases_civ, cases_hrv, cases_cub, cases_cuw, cases_cyp, cases_cze, cases_cod, cases_dnk, cases_dji, cases_dma, cases_dom, cases_ecu, cases_egy, cases_slv, cases_gnq, cases_eri, cases_est, cases_eth, cases_fro, cases_flk, cases_fji, cases_fin, cases_fra, cases_pyf, cases_gab, cases_gmb, cases_geo, cases_deu, cases_gha, cases_gib, cases_grc, cases_grl, cases_grd, cases_gum, cases_gtm, cases_ggy, cases_gin, cases_gnb, cases_guy, cases_hti, cases_hnd, cases_hun, cases_isl, cases_ind, cases_idn, cases_irn, cases_irq, cases_irl, cases_imn, cases_isr, cases_ita, cases_jam, cases_jpn, cases_jey, cases_jor, cases_kaz, cases_ken, cases_kosovo, cases_kwt, cases_kgz, cases_lao, cases_lva, cases_lbn, cases_lbr, cases_lby, cases_lie, cases_ltu, cases_lux, cases_mkd, cases_mdg, cases_mwi, cases_mys, cases_mdv, cases_mli, cases_mlt, cases_mrt, cases_mus, cases_mex, cases_mda, cases_mco, cases_mng, cases_mne, cases_msr, cases_mar, cases_moz, cases_mmr, cases_nam, cases_npl, cases_nld, cases_ncl, cases_nzl, cases_nic, cases_ner, cases_nga, cases_mnp, cases_nor, cases_omn, cases_pak, cases_xwb, cases_pan, cases_png, cases_pry, cases_per, cases_phl, cases_pol, cases_prt, cases_pri, cases_qat, cases_rou, cases_rus, cases_rwa, cases_kna, cases_lca, cases_vct, cases_smr, cases_stp, cases_sau, cases_sen, cases_srb, cases_syc, cases_sle, cases_sgp, cases_svk, cases_svn, cases_som, cases_zaf, cases_kor, cases_ssd, cases_esp, cases_lka, cases_sdn, cases_sur, cases_swz, cases_swe, cases_che, cases_syr, cases_twn, cases_tza, cases_tha, cases_tls, cases_tgo, cases_tto, cases_tun, cases_tur, cases_uga, cases_ukr, cases_are, cases_gbr, cases_usa, cases_vir, cases_ury, cases_uzb, cases_ven, cases_vnm, cases_yem, cases_zmb, cases_zwe) %>%
  ggplot(aes(x=date, y=value, col=key)) +
  geom_line() +
  theme(legend.position="none") +
  labs(title = "Confirmed Cases by Region",
       x = "Date", 
       y = "Confirmed Cases") +
  theme(legend.title = element_blank())

Chinese Provinces

gather(WHO_SR, key, value, China.Hubei,
       China.Guangdong, China.Beijing, China.Shanghai, China.Chongqing,        
       China.Zhejiang, China.Jiangxi, China.Sichuan, China.Tianjin, China.Henan,
       China.Hunan, China.Shandong, China.Yunnan, China.Taiwan, China.Taipei, 
       China.HongKongSAR, China.Macao, China.Unspecified, China.Anhui, China.Jiangsu,   
       China.Fujian, China.Shaanxi, China.Guangxi, China.Hebei, China.Heilongjiang,
       China.Liaoning, China.Hainan, China.Shanxi, China.Gansu, China.Guizhou, China.Ningxia,
       China.InnerMongolia, China.Xinjiang, China.Jilin, China.Qinghai, China.Xizang) %>%
  ggplot(aes(x=Date, y=value, col=key)) +
  geom_line() +
  scale_y_continuous(trans = 'log10', labels = comma) +
  labs(title = "Cases by Chinese Province (Semilogarithmic)",
       x = "Date", 
       y = "Confirmed Cases") +
  theme(legend.title = element_blank())

Major Outbreaks

# Major Outbreaks

# gather(sarscov2_ecdc_2019, key, value, 
#        cases_chn, 
#        cases_ita,
#        cases_irn,
#        cases_kor,
#        cases_esp, 
#        cases_deu, 
#        cases_fra,  
#        cases_usa,
#        cases_che, #Switzerland, 
#        cases_nld, #Netherlands, 
#        cases_gbr, #UnitedKingdom, 
#        cases_aut, #Austria
#        cases_tur,
#        ) %>%
#   ggplot(aes(x=date, y=value, col=key)) +
#   geom_line() +
#   theme(legend.position="right", legend.title = element_blank()) +
#   scale_y_continuous(trans = 'log10', labels = comma) +
#   labs(title = "Major COVID-19 Outbreaks (Semilogarithmic)",
#        x = "Date", 
#        y = "Confirmed Cases") 
# 
 overx <- function(country, x) {
   over <- country[country > x] 
   over <- c(over, rep(NA, length(WHO_SR$Date)))
   return(over)
 }

 start_no <- 100


# library(data.table)
#  
# change_ita <- frollmean(daily_change(overx(sarscov2_ecdc_2019$cases_ita, start_no)) / 60461826, 7) *100000
# change_usa <- frollmean(daily_change(overx(sarscov2_ecdc_2019$cases_usa, start_no)) / 331002651, 7)*100000
# change_deu <- frollmean(daily_change(overx(sarscov2_ecdc_2019$cases_deu, start_no)) / 83783942, 7)*100000
# change_can <- frollmean(daily_change(overx(sarscov2_ecdc_2019$cases_can, start_no)) / 37742154, 7)*100000
#  
# matplot(cbind(change_ita, change_usa, change_deu, change_can), 
#         col=c("blue", "orange", "red", "green"), type="l",
#       xlab=paste0("Days since each region's 100th case"),
#       ylab= "Cases",
#       #main=paste0("Outbreak Progression from ", start_no, " Cases"),
#       xlim=c(0,80))
#      ylim=c(1000,700000))
# lines(change_usa, col="orange")
# lines(change_deu, col="red")
# lines(change_can, col="green")
# lines(overx(WHO_SR$Spain, start_no), col="brown")
# lines(overx(WHO_SR$Germany, start_no), col="coral")
# lines(overx(WHO_SR$France,  start_no), col="purple")
# lines(overx(WHO_SR$UnitedStatesofAmerica, start_no), col="blue")
# lines(overx(WHO_SR$Switzerland,  start_no), col="black")
# lines(overx(WHO_SR$Austria,  start_no), col="black")
# lines(overx(WHO_SR$UnitedKingdom,  start_no), col="cornflowerblue")
# lines(overx(WHO_SR$Netherlands, start_no), col="black")
# lines(overx(WHO_SR$Norway, start_no), col="black")
# lines(overx(WHO_SR$Sweden, start_no), col="black")
# lines(overx(WHO_SR$Denmark, start_no), col="black")
# lines(overx(WHO_SR$Belgium, start_no), col="black")
# lines(overx(WHO_SR$Canada, start_no), col="red")
# 
# legend(x = "right", legend = c("USA", "Spain", "Germany", 
#                                "Italy", "China", "UK", 
#                                "France", "Iran", "Korea", "Canada"),
#        col =    c("blue", "brown",   "coral",  
#                   "green", "gray", "cornflowerblue", 
#                   "purple", "orange", "magenta", "red"), 
#        pch=18)

Outbreaks - Daily Cases

outbreaks <- list(China = sarscov2_ecdc_2019$cases_chn, 
                  Italy = sarscov2_ecdc_2019$cases_ita,
                  Canada = sarscov2_ecdc_2019$cases_can,
                  Spain = sarscov2_ecdc_2019$cases_esp,
                  Germany = sarscov2_ecdc_2019$cases_deu,
                  Iran = sarscov2_ecdc_2019$cases_irn,
                  France = sarscov2_ecdc_2019$cases_fra,
                  Korea = sarscov2_ecdc_2019$cases_kor)

par(mfrow=c(4,2))

for (i in 1:length(outbreaks)) {

 plot(sarscov2_ecdc_2019$date[-1], frollmean(daily_change(outbreaks[[i]]), 7),
      main = names(outbreaks)[[i]],
      ylab = "Daily Cases",
      xlab = "Date",
      type = "l",
      ylim = c(0,60000))
}

 par(mfrow=c(4,2))

  plot(sarscov2_ecdc_2019$date[-1], frollmean(daily_change(sarscov2_ecdc_2019$cases_usa), 7),
       main = "United States - Cases",
       ylab = "Daily Cases",
       xlab = "Date",
       type = "l",
       ylim = c(0,300000))
 # lines(sarscov2_ecdc_2019$date[-1], frollmean(sarscov2_ecdc_2019$cases_usa, 7)[-1], col="green")

  plot(sarscov2_ecdc_2019$date[-1], frollmean(daily_change(sarscov2_ecdc_2019$deaths_usa), 7),
      main = "United States - Deaths",
      ylab = "Daily Cases",
      xlab = "Date",
      type = "l",
      ylim = c(0,5000))

   plot(sarscov2_ecdc_2019$date[-1], frollmean(daily_change(sarscov2_ecdc_2019$cases_ita), 7),
      main = "Italy - Cases",
      ylab = "Daily Cases",
      xlab = "Date",
      type = "l",
      ylim = c(0,100000))

    plot(sarscov2_ecdc_2019$date[-1], frollmean(daily_change(sarscov2_ecdc_2019$deaths_ita), 7),
      main = "Italy - Deaths",
      ylab = "Daily Cases",
      xlab = "Date",
      type = "l",
      ylim = c(0,3000))

     plot(sarscov2_ecdc_2019$date[-1], frollmean(daily_change(sarscov2_ecdc_2019$cases_esp), 7),
      main = "Spain - Cases",
      ylab = "Daily Cases",
      xlab = "Date",
      type = "l",
      ylim = c(0,100000))

      plot(sarscov2_ecdc_2019$date[-1], frollmean(daily_change(sarscov2_ecdc_2019$deaths_esp), 7),
      main = "Spain - Deaths",
      ylab = "Daily Cases",
      xlab = "Date",
      type = "l",
      ylim = c(0,3000))

      plot(sarscov2_ecdc_2019$date[-1], frollmean(daily_change(sarscov2_ecdc_2019$cases_fra), 7),
      main = "France - Cases",
      ylab = "Daily Cases",
      xlab = "Date",
      type = "l",
      ylim = c(0,100000))

      plot(sarscov2_ecdc_2019$date[-1], frollmean(daily_change(sarscov2_ecdc_2019$deaths_fra), 7),
      main = "France - Deaths",
      ylab = "Daily Cases",
      xlab = "Date",
      type = "l",
      ylim = c(0,3000))

      plot(sarscov2_ecdc_2019$date[-1], frollmean(daily_change(sarscov2_ecdc_2019$cases_rus), 7),
      main = "Russia - Cases",
      ylab = "Daily Cases",
      xlab = "Date",
      type = "l",
      ylim = c(0,100000))

      plot(sarscov2_ecdc_2019$date[-1], frollmean(daily_change(sarscov2_ecdc_2019$deaths_rus), 7),
      main = "Russia - Deaths",
      ylab = "Daily Cases",
      xlab = "Date",
      type = "l",
      ylim = c(0,3000))

            plot(sarscov2_ecdc_2019$date[-1], frollmean(daily_change(sarscov2_ecdc_2019$cases_bra), 7),
      main = "Brazil - Cases",
      ylab = "Daily Cases",
      xlab = "Date",
      type = "l",
      ylim = c(0,100000))

      plot(sarscov2_ecdc_2019$date[-1], frollmean(daily_change(sarscov2_ecdc_2019$deaths_bra), 7),
      main = "Brazil - Deaths",
      ylab = "Daily Cases",
      xlab = "Date",
      type = "l",
      ylim = c(0,3000))

      plot(sarscov2_ecdc_2019$date[-1], frollmean(daily_change(sarscov2_ecdc_2019$cases_ind), 7),
      main = "India - Cases",
      ylab = "Daily Cases",
      xlab = "Date",
      type = "l",
      ylim = c(0,100000))

      plot(sarscov2_ecdc_2019$date[-1], frollmean(daily_change(sarscov2_ecdc_2019$deaths_ind), 7),
      main = "India - Deaths",
      ylab = "Daily Cases",
      xlab = "Date",
      type = "l",
      ylim = c(0,3000))

Deaths

# Change in Cases Between Reports
plot(sarscov2_ecdc_2019$date[-1], daily_change(sarscov2_ecdc_2019$deaths_global),
     main = "Change in Deaths by Date",
     ylab = "New Deaths",
     xlab = "Date",
     type = "b")
lines(sarscov2_ecdc_2019$date[-1],
frollmean(daily_change(sarscov2_ecdc_2019$deaths_global), 7), col="red")

plot(sarscov2_ecdc_2019$date[-1], daily_change(sarscov2_ecdc_2019$deaths_global),
     main = "Change in Deaths by Date (Semilog.)",
     ylab = "New Deaths",
     xlab = "Date",
     type = "b",
     log = "y")

Case Fatality Rate

matplot(as.Date(sarscov2_ecdc_2019$date), cbind( 
                      (sarscov2_ecdc_2019$deaths_chn / sarscov2_ecdc_2019$cases_chn)*100,
                      (sarscov2_ecdc_2019$deaths_kor / sarscov2_ecdc_2019$cases_kor)*100,
                      (sarscov2_ecdc_2019$deaths_ita / sarscov2_ecdc_2019$cases_ita)*100,
                      (sarscov2_ecdc_2019$deaths_usa / sarscov2_ecdc_2019$cases_usa)*100,
                      (sarscov2_ecdc_2019$deaths_can / sarscov2_ecdc_2019$cases_can)*100,
                      (sarscov2_ecdc_2019$deaths_fra / sarscov2_ecdc_2019$cases_fra)*100,
                      (sarscov2_ecdc_2019$deaths_deu / sarscov2_ecdc_2019$cases_deu)*100),
     main = "Case Fatality Rate",
     xlab = "Date",
     ylab = "Deaths / Confirmed Cases (%)",
     type = "l",
     lty = "solid",
     col = c("black",   "blue",  "green", "magenta", "red", "brown", "orange"),
     ylim = c(0,20),
     xaxt="n")
dates<-format(sarscov2_ecdc_2019$date,"%b-%d")
axis(1, at=sarscov2_ecdc_2019$date, labels=dates)
legend(x="left", 
       legend = c("Italy", "France", "USA", "China", "Canada", "Korea", "Germany"), 
       col =    c("green", "brown", "magenta", "black", "red", "blue", "orange"), 
       pch=18)
population_US <- 329968629
population_CAN <- 37894799

range <- 80:length(sarscov2_ecdc_2019$cases_can)

matplot(as.Date(sarscov2_ecdc_2019$date[range]), cbind( 
                           ( (sarscov2_ecdc_2019$cases_can / population_CAN)[range] * 100 ),
                           ( (sarscov2_ecdc_2019$deaths_can / population_CAN)[range] * 1000 ),
                           ( (sarscov2_ecdc_2019$cases_usa / population_US)[range] * 100 ),
                           ( (sarscov2_ecdc_2019$deaths_usa / population_US)[range] * 1000 )
                           ),

     main = "Cases and Deaths Per Capita",
     xlab = "Date (2020)",
     ylab = "Cases (% population); Deaths (per 1000 population)",
     type = "l",
     col = c("red",   "red",  "blue", "blue"),
     lty = c("solid", "dotted", "solid", "dotted"),
     ylim = c(0,4),
     ylog = TRUE,
     xaxt="n")
dates<-format(sarscov2_ecdc_2019$date,"%b %d")
axis(1, at=sarscov2_ecdc_2019$date, labels=dates)
legend(x="top", 
       legend = c("Canada Cases (%)", "Canada Deaths (per 1000)", "USA Cases (%)", "USA Deaths (per 1000)", "Estimates of Annual Influenza Deaths per 1000"), 
       col =    c("red",   "red",  "blue", "blue", "grey"),
       lty = c("solid", "dotted", "solid", "dotted", "dotted"),
       pch=18)
#annual mortality from influenza per thousand (https://ipac-canada.org/influenza-resources.php)
abline(h = .020, col="grey", lty="dotted") 
#lower and upper annual mortality from influenza per thousand (CDC)
abline(h = .036, col="grey", lty="dotted") 
abline(h = .184, col="grey", lty="dotted") 
# (https://www.ontario.ca/page/flu-facts)
abline(h = .092, col="grey", lty="dotted") 


eebrown/data2019nCoV documentation built on Dec. 10, 2020, 2:17 p.m.