add_code/update_data.R

# --- Code to generate the cached data -----------------------------------------

# remotes::install_github("joachim-gassen/tidycovid19")
library(dplyr)
library(lubridate)
library(tidycovid19)
library(stringr)

# Since March 10, 2023 there are no John Hopkins updates anymore 
# (last day of data 2023-03-09)

ecdc <- download_ecdc_covid19_data(silent = TRUE)
saveRDS(ecdc, "cached_data/ecdc_covid19.RDS", version = 2)

owid_data <- download_owid_data(silent = TRUE)
saveRDS(owid_data, "cached_data/owid_data.RDS", version = 2)

# Since December 10, 2020 ACAPS is no longer updating NPI data

wblist <- download_wbank_data(var_def = TRUE, silent = TRUE)
saveRDS(wblist, "cached_data/wbank.RDS", version = 2)

# Since Apr 14, 2022 Apple is no longer providing mobility data
# Since October 15, 2022 Google is no longer providing mobility data

gtlist <- download_google_trends_data(
  type = c('country', 'country_day', 'region', 'city'), silent = TRUE
)
saveRDS(gtlist, "cached_data/google_trends.RDS", version = 2)

# The data is no longer updated after December 31, 2022 
# while data review processes continue.
oxlist <- download_oxford_npi_data(type = c("measures", "index"), silent = TRUE)
saveRDS(oxlist, "cached_data/oxford_npi.RDS", version = 2)


# Code from download_merged_data() to avoid reloading the data

jhu_list <- readRDS("cached_data/jhu_csse_covid19.RDS")

ecdc <- readRDS("cached_data/ecdc_covid19.RDS") %>%
  select(-timestamp, -country_territory) %>%
  dplyr::filter(!is.na(.data$iso3c) &
                  !.data$iso3c %in% c("XKX", "N/A", "MSF", "CNG1925", "RKS"))

ecdc_acc <- expand.grid(
  date = lubridate::as_date(min(ecdc$date):max(ecdc$date)),
  iso3c = unique(ecdc$iso3c),
  stringsAsFactors = FALSE
) %>% select(iso3c, date) %>%
  left_join(ecdc, by = c("iso3c", "date")) %>%
  mutate(
    cases = ifelse(is.na(cases), 0, cases),
    deaths = ifelse(is.na(deaths), 0, deaths)
  ) %>%
  group_by(iso3c) %>%
  mutate(
    ecdc_cases = cumsum(cases),
    ecdc_deaths = cumsum(deaths)
  ) %>%
  filter(ecdc_cases > 0 | ecdc_deaths > 0) %>%
  select(iso3c, date, ecdc_cases, ecdc_deaths) %>%
  ungroup()

owid_data <- readRDS("cached_data/owid_data.RDS") %>%
  select(-timestamp)

jhu_cases <- jhu_list[[1]] %>%
  select(-country, -timestamp)

npis <- readRDS("cached_data/acaps_npi.RDS") %>%
  mutate(npi_date = ymd(date_implemented)) %>%
  rename(npi_type = category) %>%
  select(iso3c, npi_date, log_type, npi_type)

ox_npis <- readRDS("cached_data/oxford_npi.RDS")[[2]] %>%
  select(-timestamp, -country) %>%
  rename_at(vars(-iso3c, -date), ~ paste0("oxcgrt_", .))

amtr_list <- readRDS("cached_data/apple_mtr.RDS")

amtr <- amtr_list[[1]] %>%
  select(-timestamp) %>%
  rename_at(vars(-iso3c, -date), ~ paste0("apple_mtr_", .))

gcmr_list <- readRDS("cached_data/google_cmr.RDS")

gcmr <- gcmr_list[[1]] %>%
  select(-timestamp, -place_id) %>%
  rename_at(vars(-iso3c, -date), ~ paste0("gcmr_", .))

gtrends_list <- readRDS("cached_data/google_trends.RDS")

gtrends_cd <- gtrends_list[[2]] %>%
  select(-timestamp)

gtrends_c <- gtrends_list[[1]] %>%
  rename(gtrends_country_score = gtrends_score) %>%
  select(-timestamp)

wb_list <- readRDS("cached_data/wbank.RDS")
wbank <-  wb_list[[1]] %>%
  select(-country, -timestamp)

calc_npi_measure <-function(type, var_name) {
  my_npi <- npis %>% filter(npi_type == type)
  first_date <- min(my_npi$npi_date)
  last_date <- max(my_npi$npi_date)
  merged_base %>%
    left_join(
      my_npi %>%
        rename(date = npi_date) %>%
        mutate(npi = ifelse(log_type == "Phase-out measure", -1, 1)) %>%
        select(iso3c, date, npi) %>%
        group_by(iso3c, date) %>%
        summarise(npi = sum(npi), .groups = "drop"),
      by = c("iso3c", "date")
    ) %>%
    group_by(iso3c) %>%
    mutate(
      npi = ifelse(is.na(npi), 0, npi),
      sum_npi = cumsum(npi)
    ) %>%
    ungroup() %>%
    filter(date >= first_date, date <= last_date) %>%
    select(iso3c, date, sum_npi) -> df

  names(df)[3] <- var_name
  df
}

# 2020-04-01: There is a new populated category in the ACAPS NPI data
#             "Humanitarian exemption". I do not code it for the time
#             being as it contains only two Irish cases (parking for
#             essential workers and leeway for pharamacisist)

# 2020-04-16: The category "Social and economic measures" has been renamed
#             to "Governance and socio-economic measures" in the ACAPS data.
#             I reflect this name change by renaming the variable 'soc_econ'
#             'gov_soc_econ'.

# 2023-06-11: New merging strategy to reflect that only OWID and ECDC data
#             still receive updates.

# 2023-06-11: Removed stale ACAPS scores post 2020-12-10

# 2023-06-11: Included Oxford NPI data into the merged dataset 

merged_base <- jhu_cases %>%
  full_join(ecdc_acc, by = c("iso3c", "date")) %>%
  full_join(owid_data, by = c("iso3c", "date")) %>%
  arrange(iso3c, date) %>%
  mutate(country = suppressWarnings({
    countrycode::countrycode(iso3c, "iso3c", "country.name")
  })) %>%
  filter(!is.na(country)) %>%
  select(iso3c, country, everything())

merged <- merged_base %>%
  left_join(
    calc_npi_measure("Social distancing", "soc_dist"),
    by = c("iso3c", "date")
  ) %>%
  left_join(
    calc_npi_measure("Movement restrictions", "mov_rest"),
    by = c("iso3c", "date")
  ) %>%
  left_join(
    calc_npi_measure("Public health measures", "pub_health"),
    by = c("iso3c", "date")
  ) %>%
  left_join(
    calc_npi_measure("Governance and socio-economic measures", "gov_soc_econ"),
    by = c("iso3c", "date")
  ) %>%
  left_join(
    calc_npi_measure("Lockdown", "lockdown"),
    by = c("iso3c", "date")
  ) %>%
  left_join(ox_npis, by = c("iso3c", "date")) %>%
  left_join(amtr, by = c("iso3c", "date")) %>%
  left_join(gcmr, by = c("iso3c", "date")) %>%
  left_join(gtrends_cd, by = c("iso3c", "date")) %>%
  left_join(gtrends_c, by = "iso3c") %>%
  left_join(wbank, by = "iso3c") %>%
  group_by(iso3c) %>%
  mutate(
    has_npi = suppressWarnings({max(soc_dist, na.rm = T) + max(mov_rest, na.rm = T) +
      max(.data$pub_health, na.rm = T) + max(gov_soc_econ, na.rm = T) +
      max(lockdown, na.rm = T) > 0}),
    soc_dist = ifelse(has_npi, soc_dist, NA),
    mov_rest = ifelse(has_npi, mov_rest, NA),
    pub_health = ifelse(has_npi, pub_health, NA),
    gov_soc_econ = ifelse(has_npi, gov_soc_econ, NA),
    lockdown = ifelse(has_npi, lockdown, NA)
  ) %>%
  select(-has_npi) %>%
  ungroup() %>%
  mutate(timestamp = Sys.time())

saveRDS(merged, "cached_data/merged.RDS", version = 2)
joachim-gassen/tidycovid19 documentation built on March 21, 2024, 6:57 a.m.