data-raw/covid_raw.R

# COVID Data
# Date of download: 27 August 2021
# Source: https://covid19.who.int/info/

# PACKAGES ----
library(tidyverse)

# LOAD DATASETS ----
covid_raw <- read_csv("data-raw/covid_raw.csv")
vaccination_data_raw <- read_csv("data-raw/vaccination-data_raw.csv")
phsm_severity_data_raw <- read_csv("data-raw/phsm-severity-data_raw.csv")
who_population <- read.csv("data-raw/who_population.csv")

# COVID CASES ----
## change all <chr> to <fct>
covid <- covid_raw %>%
  mutate_if(is.character, as.factor)

## clean up column names
covid <- covid %>% janitor::clean_names()

# VACCINATION DATA ----

## Clean up column names
vaccination_data <- vaccination_data_raw %>%
  janitor::clean_names()

## shorten some column names
vaccination_data <- vaccination_data %>%
  rename("vaccinated_1plus_dose" = "persons_vaccinated_1plus_dose",
         "vaccinated_1plus_dose_per100" = "persons_vaccinated_1plus_dose_per100",
         "fully_vaccinated" = "persons_fully_vaccinated",
         "fully_vaccinated_per100" = "persons_fully_vaccinated_per100")

# JOIN covid + vaccination_data ----
covid <- left_join(covid, vaccination_data, by = c("country", "who_region"))

# COVID MEASURES ----

## clean column names
phsm <- phsm_severity_data_raw %>% janitor::clean_names()

## remove country col because of potential naming conflicts
phsm <- phsm %>%
  select(!country)

## rename col to allow left_join by date too
phsm <- phsm %>%
  rename("date_reported" = "date_start")

## join with rest of dataset
covid <- left_join(covid, phsm, by = c("iso3", "who_region", "date_reported"))

# FINAL CLEAN UPS ----
## Inspect long country names
covid %>%
  count(country) %>%
  filter(stringr::str_length(country) > 15) %>%
  select(country) %>%
  print(n = Inf)

## shorten long country names
covid <- covid %>%
  mutate(country = fct_recode(country,
                              "Bolivia" = "Bolivia (Plurinational State of)",
                              "DPR Korea" = "Democratic People's Republic of Korea",
                              "Congo" = "Democratic Republic of the Congo",
                              "Falkland Islands" = "Falkland Islands (Malvinas)",
                              "Iran" = "Iran (Islamic Republic of)",
                              "Laos" = "Lao People's Democratic Republic",
                              "Micronesia" = "Micronesia (Federated States of)",
                              "Palestinian territory (incl. Jerusalem)" = "occupied Palestinian territory, including east Jerusalem",
                              "Venezuela" = "Venezuela (Bolivarian Republic of)",
                              "United Kingdom" = "The United Kingdom")
  )

## convert <chr> to <fct> and make it a tibble
covid <- covid %>%
  mutate(who_region = as_factor(who_region),
         iso3 = as_factor(iso3),
         data_source = as_factor(data_source)
         ) %>%
  as_tibble()

## add population data
pop_data <- who_population %>%
  mutate(population = stringr::str_replace_all(Value, ",", ""),
         population = as.numeric(population)*1000,
         iso3 = as_factor(SpatialDimValueCode),
         region = as_factor(ParentLocation)) %>%
  select(iso3, region, population) %>%
  arrange(population)

## combine with original dataset
covid <- left_join(covid, pop_data, by = "iso3")

# SELECT ONLY VARIABLES OF INTEREST

covid <- covid %>%
  select(date_reported,
         iso3,
         country,
         new_cases,
         cumulative_cases,
         cumulative_deaths,
         fully_vaccinated,
         masks:global_index,
         measures_in_place,
         region,
         population
         )

# Add cleaned data to package
usethis::use_data(covid, overwrite = TRUE)
ddauber/r4np documentation built on Jan. 15, 2025, 8:46 p.m.