# TODO:
#   * case input: limit values to 1, 5, 10, 50, & 100
#   * make DATA reactive
#   * add deaths
#   * link country to continent
#   * link country to wb-statistics
# some controls ----------------------------------------------------------------
from.web <- TRUE # only use true
from.eu <- TRUE  # only use true
countries.selected <- 
  c("Iceland", "Faroe Islands", "Greenland", "Norway", "Denmark",
    "Sweden", "Finland", "Estonia", "Latvia", "Lithuania",
    "Poland", "Germany", "Netherlands", "Belgium", "France",
    "Austria", "Switzerland", "Italy", "Spain", "Portugal",
    "United Kingdom", "United States")

# libraries (and then some) ----------------------------------------------------
library(lubridate)
library(highcharter)
library(tidyverse)
library(DT)
library(shiny)

# functions --------------------------------------------------------------------
read_covid2 <- function() {

  # this libraries need to be installed
  # library(httr)

  #download the dataset from the ECDC website to a local temporary file
  httr::GET("https://opendata.ecdc.europa.eu/covid19/casedistribution/csv",
            httr::authenticate(":", ":", type="ntlm"),
            httr::write_disk(tf <- tempfile(fileext = ".csv")))

  #read the Dataset sheet into “R”. The dataset will be called "data".
  readr::read_csv(tf) %>%
    mutate(date = dmy(dateRep)) %>%
    select(date,
           confirmed = cases,
           deaths,
           country = countriesAndTerritories,
           iso3c = countryterritoryCode,
           pop = popData2018)

}
covid_doubling_time <- function(d, DAY = 0L, variable = confirmed) {
  d %>%
    arrange(country, desc(date)) %>%
    group_by(country) %>%
    mutate(DAYS = (1:n()) - 1) %>%
    filter(DAYS >= DAY) %>%
    select(country, date, n = {{ variable }} ) %>%
    mutate(days = (1:n()) - 1,
           r = n / max(n),
           r.unique = length(unique(r))) %>%
    filter(r.unique > 1) %>%
    summarise(z = approx(r, days, xout = 0.5)$y) %>%
    ungroup()
}

# getting data -----------------------------------------------------------------
if(from.web) {

  # reading in from web
  if(from.eu) {
    daily <- 
      read_covid2() %>% 
      # needs fixing
      drop_na() %>% 
      filter(iso3c != "N/A") %>% 
      #filter(!is.na(country)  !is.na(iso3c), !is.na(date)) %>% 
      rename(dc = confirmed,
             dd = deaths)
    # easiest would be to use complete
    DATA <- 
      daily %>% 
      select(iso3c, date) %>% 
      expand(iso3c, date) %>% 
      left_join(daily %>% 
                  select(iso3c, date, dc, dd)) %>% 
      mutate(dc = ifelse(is.na(dc), 0, dc),
             dd = ifelse(is.na(dd), 0, dd)) %>% 
      left_join(daily %>% 
                  select(iso3c, iso3c, pop) %>% 
                  distinct()) %>% 
      arrange(iso3c, date) %>% 
      group_by(iso3c) %>% 
      mutate(confirmed = cumsum(dc),
             deaths = cumsum(dd),
             country = countrycode::countrycode(iso3c,
                                                origin = "iso3c",
                                                destination = "country.name")) %>% 
      ungroup() %>% 
      select(country, iso3c, date, confirmed, deaths, dc, dd, pop) %>% 
      mutate(country = ifelse(iso3c == "XKX", "Kosovo", country))
    #write_rds(DATA, "data/covid_eu.rds")
  } else {
    DATA <- 
      read_covid(variable = "confirmed") %>% 
      covid_tidy() %>%
      group_by(country, iso3c, date) %>% 
      summarise(n = sum(n)) %>% 
      ungroup() %>% 
      rename(confirmed = n) %>% 
      left_join(read_covid(variable = "deaths") %>% 
                  covid_tidy() %>% 
                  group_by(country, iso3c, date) %>%
                  summarise(n = sum(n)) %>% 
                  ungroup() %>% 
                  rename(deaths = n)) %>% 
      left_join(read_covid(variable = "recovered") %>% 
                  covid_tidy() %>% 
                  group_by(country, iso3c, date) %>%
                  summarise(n = sum(n)) %>% 
                  ungroup() %>% 
                  rename(recovered = n)) %>% 
      mutate(deaths = case_when(country == "Iceland" &
                                  date == ymd("2020-03-15") ~ 0,
                                TRUE ~ deaths)) %>% 
      arrange(country, date) %>% 
      group_by(country) %>% 
      mutate(dc = replace_na(confirmed - lag(confirmed), 0),
             dd = replace_na(deaths - lag(deaths), 0),
             dr = replace_na(recovered - lag(recovered), 0)) %>% 
      ungroup() %>% 
      left_join(read_rds("data/wbstats01.rds") %>% 
                  select(iso3c, pop = population)) %>% 
      ungroup()
    #write_rds(DATA, "data/covid_jhu.rds")
  }

} else {
  if(from.eu) {
    #DATA <- read_rds("data/covid_eu.rds")
  } else {
    #DATA <- read_rds("data/covid_jhu.rds")
  }
}


countries <-
  c(countries.selected,   
    DATA %>% pull(country) %>% unique() %>% sort()) %>% 
  unique()

res <- list()
for(i in 0L:20L) {
  res[[i + 1]] <-
    DATA %>%
    covid_doubling_time(i) %>%
    mutate(day = i)
}

confirmed.double <-
  bind_rows(res)

last.date <- DATA %>% pull(date) %>% max()
# NOT USED ANYMORE

DATAr <- 
  DATA %>%
  mutate(confirmed = confirmed / pop * 1e6,
         deaths = deaths / pop * 1e6) %>% 
  select(country, iso3c, date, confirmed, deaths) %>% 
  arrange(country, date) %>% 
  group_by(country, iso3c) %>% 
  mutate(dc = confirmed - lag(confirmed),
         #dc = replace_na(dc, 0),
         dd = deaths - lag(deaths),
         #dc = replace_na(dd, 0),
         day0c = date - min(date[confirmed >= 1]),
         day0d = date - min(date[deaths >= 1])) %>% 
  filter(day0c >= 0)




# creates an error
#res <- list()
#for(i in 0L:20L) {
#  print(i)
#  res[[i + 1]] <-
#    DATA %>%
#    covid_doubling_time(i, variable = deaths) %>%
#    mutate(day = i)
#}

deaths.double <-
  bind_rows(res)
# ------------------------------------------------------------------------------
# Messy percentage
# munge data
d <- 
  DATA %>% 
  filter(confirmed > 0) %>% 
  arrange(country, desc(date)) %>% 
  group_by(country) %>% 
  mutate(n.days = n()) %>%
  ungroup() %>% 
  filter(n.days >= 25) %>% 
  group_by(country) %>% 
  mutate(d0 = -((1:n()) - 1)) %>% 
  ungroup()

my.model <- function(df, time = 0, days = 5) {
  df <-
    df %>% 
    filter(between(d0, time - (days - 1), time))
  pct <- (lsfit(df$d0, log(df$confirmed))$coeff[["X"]] %>% exp() - 1) * 100
  df %>% 
    slice(1) %>% 
    mutate(pct = pct)
}

res.country <- list()
res.time <- list()
# Do for all countries that have more than 100 cases
countries <- 
  d %>% 
  group_by(country) %>% 
  mutate(max.cases = max(confirmed)) %>% 
  filter(max.cases >= 100) %>% 
  ungroup() %>% 
  pull(country) %>% 
  unique() %>% 
  sort()

for(cntr in 1:length(countries)) {

  print(countries[cntr])

  IS <- 
    d %>% 
    filter(country == countries[cntr])

  for(t in -20:0) {
    #print(t)
    res.time[[t + 21]] <- 
      IS %>% 
      my.model(time = t, days = 10)
  }
  res.country[[cntr]] <-
    bind_rows(res.time)
}

confirmed.percentage <-
  bind_rows(res.country) %>% 
  group_by(country) 

Sidebar {.sidebar data-width=175}

Most recent date: r as.character(last.date)

radioButtons("scale", "Statistic:", c("relative", "absolute"))
radioButtons("log", "Y-scale:", c("log", "ordinary"))
numericInput("case", "Minimum cases:", value = 10, min = 1, max = 1000)
selectInput("country", "Select country:",
            choices = countries,
            selected = countries.selected,
            multiple = TRUE)

Main

Column {.tabset}

Cumulative

renderPlot({ 
  d100 <-
    DATA %>% 
    filter(country %in% input$country)
  if(input$scale == "relative") {
    d100 <- 
      d100 %>% 
      mutate(confirmed = confirmed / pop * 1e6,
             deaths = deaths / pop * 1e6)
  }
  d100 <- 
    d100 %>% 
    mutate(case.min = input$case) %>%
    arrange(country, date) %>%
    group_by(country) %>%
    mutate(day = as.integer(date - min(date[confirmed >= case.min]))) %>%
    filter(day >= 0) %>% 
    ungroup()

  x.max <- d100  %>% pull(day) %>% max()
  y.max <- max(d100$confirmed)
  d.highlight <-
    d100 %>%
    filter(country %in% countries) %>%
    rename(fcountry = country)

  d.median <-
    d100 %>%
    filter(country %in% countries) %>%
    group_by(day) %>%
    summarise(confirmed = median(confirmed)) %>%
    ungroup()

  CASE.MIN <-
    dplyr::case_when(input$case  ==  1 ~ paste0(input$case, "st"),
                     input$case  ==  2 ~ paste0(input$case, "nd"),
                     input$case  ==  3 ~ paste0(input$case, "rd"),
                     TRUE ~ paste0(input$case, "th"))

  my.subtitle <- 
    paste0("Cumulative number of confirmed cases",
           ifelse(input$scale == "relative", " per million ", " "),
           "by number of days since ", CASE.MIN, " case",
           ifelse(input$scale == "relative", " per million ", ""))

  p <- 
    ggplot() +
    theme_bw(base_size = 12) +
    theme(panel.grid.minor = element_line(colour = "white"),
          strip.text = element_text(hjust = 0),
          axis.ticks.length.y = unit(.0001, "cm"),
          axis.ticks.length.x = unit(.0001, "cm"),
          panel.spacing = unit(0.3, "lines"),
          strip.background = element_blank(),
          strip.text.x = element_text(size = 11, hjust = 0, colour = "red4",
                                      margin = margin(0,0,0,0, "cm"))) +
    geom_line(data = d100 %>% filter(country %in% countries),
              aes(day, confirmed, group = country),
              colour = "grey")
  if(input$scale == "relative") {
    p <- 
      p +
      geom_smooth(data = d.median,
                  aes(day, confirmed),
                  colour = "yellow",
                  se = FALSE,
                  lwd = 2)
  }
  p <- 
    p +
    geom_line(data = d.highlight,
              aes(day, confirmed),
              colour = "red4") +
    geom_line(data = d.highlight,
              aes(day, deaths),
              colour = "red") +
    scale_x_continuous(limits = c(0, x.max),
                       expand = c(0, 0)) +
    facet_wrap(~ fcountry) +
    labs(x = NULL, y = NULL,
         subtitle = my.subtitle)

  if(input$log == "log") {
    p + scale_y_log10(breaks = c(0.1, 1, 10, 100, 1000, 10000, 100000),
                      labels = c("0.1", "1", "10", "100", "1K", "10K", "100K"),
                      limits = c(input$case, NA),
                      expand = c(0, 0))
  } else {
    p
  }

})

Cumulative2

renderHighchart({

  d100 <-
    DATA %>% 
    filter(country %in% input$country)
  if(input$scale == "relative") {
    d100 <- 
      d100 %>% 
      mutate(confirmed = confirmed / pop * 1e6,
             deaths = deaths / pop * 1e6)
  }
  d100 <- 
    d100 %>% 
    mutate(case.min = input$case) %>%
    arrange(country, date) %>%
    group_by(country) %>%
    mutate(day = as.integer(date - min(date[confirmed >= case.min]))) %>%
    filter(day >= 0) %>% 
    ungroup()

  h <-
    hchart(d100 %>% 
             mutate(confirmed = round(confirmed, 1)),
           "line", hcaes(x = day, y = confirmed, group = country)) %>%
    hc_add_theme(
      hc_theme_flatdark(
        chart = list(
          backgroundColor = "transparent",
          divBackgroundImage = "http://www.wired.com/images_blogs/underwire/2013/02/xwing-bg.gif"
        )
      )
    )

  if(input$log == "log") {
    h %>%  hc_yAxis(type = "logarithmic", 
                    min = input$case)
  } else {
    h
  }



})

Daily

renderPlot({ 
  d100 <-
    DATA %>% 
    filter(country %in% input$country)

  if(input$scale == "relative") {
    d100 <- 
      d100 %>% 
      mutate(confirmed = confirmed / pop * 1e6,
             dc = dc / pop * 1e6,
             dd = dd / pop * 1e6)
  }

  d100 <- 
    d100 %>% 
    mutate(case.min = input$case) %>%
    arrange(country, date) %>%
    group_by(country) %>%
    mutate(day = as.integer(date - min(date[confirmed >= case.min], na.rm = TRUE))) %>%
    filter(day >= 0) %>% 
    ungroup()

  # FALSIFY
  if(input$log == "log") {
    d100 <-
      d100 %>% 
      mutate(dc = if_else(dc < 1, 1, dc, NA_real_),
             dd = if_else(dd < 1, 1, dd, NA_real_))
  }


  x.max <- d100  %>% pull(day) %>% max()
  y.max <- max(d100$dc)
  #d.highlight <-
  #  d100 %>%
  #  filter(country %in% countries) %>%
  #  rename(fcountry = country)

  #d.median <-
  #  d100 %>%
  #  filter(country %in% countries) %>%
  #  group_by(day) %>%
  #  summarise(confirmed = median(confirmed)) %>%
  #  ungroup()

  CASE.MIN <-
    dplyr::case_when(input$case  ==  1 ~ paste0(input$case, "st"),
                     input$case  ==  2 ~ paste0(input$case, "nd"),
                     input$case  ==  3 ~ paste0(input$case, "rd"),
                     TRUE ~ paste0(input$case, "th"))

  my.subtitle <- 
    paste0("Number of confirmed cases",
           ifelse(input$scale == "relative", " per million ", " "),
           "by number of days since ", CASE.MIN, " case",
           ifelse(input$scale == "relative", " per million ", ""))
  p <- 
    d100 %>% 
    mutate(dc = ifelse(dc == 0, NA, dc),
           dd = ifelse(dd == 0, NA, dd)) %>% 
    ggplot() +
    theme_bw(base_size = 12) +
    theme(panel.grid.minor = element_line(colour = "white"),
          strip.text = element_text(hjust = 0),
          axis.ticks.length.y = unit(.0001, "cm"),
          axis.ticks.length.x = unit(.0001, "cm"),
          panel.spacing = unit(0.3, "lines"),
          strip.background = element_blank(),
          strip.text.x = element_text(size = 11, hjust = 0, colour = "red4",
                                      margin = margin(0,0,0,0, "cm"))) +
    geom_col(aes(day, dc), fill = "red4") +
    geom_col(aes(day, dd), fill = "red") +
    scale_x_continuous(limits = c(0, x.max),
                       expand = c(0, 0)) +
    facet_wrap(~ country) +
    labs(x = NULL, y = NULL,
         subtitle = my.subtitle)

  if(input$log == "log") {
    p + scale_y_log10(#limits = c(input$case, NA),
      #expand = c(0, 0),
      breaks = c( 0.1, 1, 3, 10, 35, 100, 350, 1000, 3500, 10000, 100000),
      labels = c("0.1", "1", "3", "10", "35", "100", "350", "1K", "3.5K", "10K", "100K"))
  } else {
    p
  }

})

Daily global

renderPlot({

  if(input$scale == "relative") {
    tile <- 
      DATA %>% 
      filter(pop >= 40000) %>% 
      mutate(dc = dc / pop * 1e6)
  } else {
    tile <- 
      DATA
  }

  tile <- 
    tile %>% 
    group_by(country) %>% 
    mutate(dc.lag1 = lag(dc, 1),
           dc3 = (dc + dc.lag1) / 2) %>% 
    #filter(date >= ymd("2020-02-25")) %>% 
    mutate(dc = ifelse(dc3 == 0, NA, dc3)) %>% 
    group_by(country) %>% 
    mutate(cur.case = dc[date == max(date)],
           max.case = max(dc, na.rm = TRUE),
           rel = dc / max(dc, na.rm = TRUE))

  tile.last <- 
    tile %>% 
    filter(date == max(date)) %>% 
    mutate(date = date,
           dc = round(dc),
           dcl = ifelse(dc >= 10000, 
                        paste0(as.character(round(dc/1000)), "k"), 
                        as.character(dc))) %>% 
    ungroup() %>% 
    arrange(desc(dc)) %>% 
    slice(1:35)

  tile %>% 
    filter(country %in% tile.last$country) %>% 
    filter(date >= ymd("2020-02-25")) %>% 
    filter(!is.na(dc)) %>% 
    mutate(dc.cut = cut(dc, breaks = c(0, 10, 50, 100, 250, 500, 1000, 5000, 25000))) %>% 
    ggplot(aes(reorder(country, cur.case), date)) +
    theme_bw(base_size = 12) +
    theme(panel.grid = element_line(colour = "white"),
          plot.subtitle = element_text(hjust = 1)) +
    geom_tile(aes(fill =  dc.cut), colour = "white") +
    geom_text(data = tile.last, aes(label = dcl), colour = "white" ) +
    scale_y_date(expand = c(0, 0)) +
    scale_x_discrete(position = "top") +
    scale_fill_viridis_d(option = "D", direction = -1,
                         guide = guide_legend(reverse = TRUE)) +
    coord_flip() +
    labs(x = NULL, y = NULL,
         fill = paste0("Cases", ifelse(input$scale == "relative", " \nper million", "")),
         title = "Confirmed cases, top 35 nations: two-day rolling average",
         subtitle = "Confirmed")
})

Doubling time

renderPlot({
  d200 <- 
    confirmed.double %>% 
    filter(country %in% input$country) %>% 
    filter(day %in% c(0)) %>%
    left_join(DATA %>% 
                filter(date == max(date)) %>%
                select(country, confirmed)) %>% 
    mutate(label = paste(paste(country, round(z, 1))))
  d.confirmed <- 
    d200 %>% 
    mutate(z.max = max(z) * 1.1,
           label = paste(confirmed))

  d200 %>% 
    ggplot() +
    theme_bw(base_size = 14) +
    theme(panel.grid.major.y = element_line(colour = "white"),
          panel.grid.major.x = element_line(colour = "white"),
          plot.subtitle = element_text(hjust = 1)) +
    geom_point(aes(reorder(country, desc(z)), z)) +
    geom_text(aes(reorder(country, desc(z)),
                  z,
                  label = label),
              nudge_y = 0.05, hjust = "left", size = 4,
              colour = "red3") +
    geom_text(data = d.confirmed,
              aes(reorder(country, desc(z)),
                  z.max,
                  label = label),
              nudge_y = 0.05, hjust = "right", size = 4) +
    scale_y_continuous(breaks = seq(1, 15, by = 0.5),
                       expand = c(0, 0.05)) +
    scale_x_discrete(position = "top") +
    coord_flip() +
    theme(legend.position = "none") +
    labs(x = NULL,
         y = NULL,
         title = "Current doubling time [days] of selected countries",
         subtitle = "Confirmed")
})

Doubling time - global

renderPlot({
  d200 <- 
    DATA %>% 
    filter(date == max(date),
           confirmed >= 100) %>% 
    select(country, confirmed) %>% 
    left_join(confirmed.double %>% 
                filter(day %in% c(0))) %>% 
    mutate(label = paste(paste(country, round(z, 1)))) %>% 
    arrange(z) %>% 
    slice(1:35)
  d.confirmed <- 
    d200 %>% 
    mutate(z.max = max(z) * 1.1,
           label = paste(confirmed))

  d200 %>% 
    ggplot() +
    theme_bw(base_size = 14) +
    theme(panel.grid.major.y = element_line(colour = "white"),
          panel.grid.major.x = element_line(colour = "white"),
          plot.subtitle = element_text(hjust = 1)) +
    geom_point(aes(reorder(country, desc(z)), z)) +
    geom_text(aes(reorder(country, desc(z)),
                  z,
                  label = label),
              nudge_y = 0.05, hjust = "left", size = 4,
              colour = "red3") +
    geom_text(data = d.confirmed,
              aes(reorder(country, desc(z)),
                  z.max,
                  label = label),
              nudge_y = 0.05, hjust = "right", size = 4) +
    scale_y_continuous(breaks = seq(1, 15, by = 0.25),
                       expand = c(0, 0.05)) +
    scale_x_discrete(position = "top") +
    coord_flip() +
    theme(legend.position = "none") +
    labs(x = NULL,
         y = NULL,
         title = "Global: Current top 35 doubling time [days]",
         subtitle = "Confirmed")
})

Tests

Column {.tabset}

Tests per million

renderPlot({
  con <- httr::GET("https://en.wikipedia.org/wiki/COVID-19_testing")

  tmp <-
    con %>%
    xml2::read_html() %>%
    rvest::html_nodes("table")
  tests <-
    rvest::html_table(tmp[4]) %>% as.data.frame() %>%
    janitor::clean_names() %>%
    tibble::as_tibble() %>%
    dplyr::select(-ref) %>%
    dplyr::rename(confirmed = positive,
                  date = as_of,
                  tests.pm = tests_millionpeople,
                  positive.pt = positive_thousandtests) %>%
    dplyr::mutate(date = lubridate::dmy(paste0(date, " 2020")),
                  tests = stringr::str_replace_all(tests, "\\*", ""),
                  tests = as.integer(stringr::str_replace_all(tests, ",", "")),
                  confirmed = as.integer(stringr::str_replace_all(confirmed, ",", "")),
                  tests.pm = as.integer(stringr::str_replace_all(tests.pm, ",", "")),
                  positive.pt = as.integer(stringr::str_replace_all(positive.pt, ",", "")))

  tests %>%
    filter(!(str_starts(country_or_region, c("Canada:")))) %>%
    filter(!str_starts(country_or_region, c("United States:"))) %>%
    filter(!str_starts(country_or_region, c("Italy:"))) %>%
    arrange(desc(tests.pm)) %>%
    slice(1:50) %>%
    mutate(country2 = paste(country_or_region, tests.pm)) %>%
    ggplot(aes(reorder(country_or_region, tests.pm), tests.pm)) +
    theme_minimal(base_size = 12) +
    theme(panel.grid.major.y = element_line(colour = "white")) +
    geom_point() +
    geom_text(aes(label = country2),
              nudge_y = 500, hjust = "left", size = 3,
              colour = "red3") +
    scale_y_continuous(position = "right",
                       breaks = seq(0, 70000, by = 5000),
                       name = "Number of tests per million",
                       lim = c(0, 65000),
                       sec.axis = sec_axis(~ .,
                                           breaks = seq(0, 70000, by = 5000))) +
    scale_x_discrete(NULL, NULL) +
    coord_flip()
})


einarhjorleifsson/kovitinn documentation built on April 5, 2020, 12:53 a.m.