#................................................................................................
## Purpose: Process Country datasets for analysis
##
## Notes: Process data functions and files are specific to regions
#................................................................................................
library(tidyverse)
source("R/process_data4.R")
#......................
# global data
#......................
# JHU data for new recast df
JHUdf <- readr::read_csv("data/raw/time_series_covid19_deaths_global.csv") %>%
tidyr::pivot_longer(., cols = -c("Province/State", "Country/Region", "Lat", "Long"),
names_to = "date", values_to = "deaths") %>%
magrittr::set_colnames(c("province", "country_region", "lat", "long", "date", "deaths")) %>%
dplyr::filter(is.na(province)) %>% # only want by country
dplyr::mutate(georegion = countrycode::countryname(country_region, destination = "iso3c"),
date = lubridate::mdy(date)) %>% # NB, we just convert this to a lubridate format and later within the process data function, dates are converted to international format
dplyr::group_by(georegion) %>% # group by region for daily deaths
dplyr::rename(cumdeaths = deaths) %>%
dplyr::mutate(deaths = cumdeaths - dplyr::lag(cumdeaths),
deaths = ifelse(is.na(deaths), 0, deaths), # take care of first value
deaths = ifelse(deaths < 0, 0, deaths)) %>% # take care of cumulative death correction
dplyr::select(c("date", "georegion", "deaths")) %>%
dplyr::filter(date <= lubridate::mdy("07-17-2020")) %>%
dplyr::ungroup(.)
# serovalidation
sero_valdf <- readr::read_tsv("data/raw/serovalidation_final_raw.tsv")
# seroprevalence
sero_prevdf <- readr::read_tsv("data/raw/seroprevalence_final_raw.tsv") %>%
dplyr::mutate(date_start_survey = lubridate::ymd(date_start_survey), # NB, we just convert this to a lubridate format and later within the process data function, dates are converted to international format
date_end_survey = lubridate::ymd(date_end_survey),
seroprevalence_unadjusted = ifelse(is.na(seroprevalence_unadjusted), n_positive/n_tested, seroprevalence_unadjusted),
age_high = ifelse(age_high == 0 & age_low == 0, 0.99, age_high)) # for cut
# cumulative deaths
deathsdf <- readr::read_tsv("data/raw/cumulative_deaths.tsv") %>%
dplyr::mutate(date_start_survey = lubridate::ymd(date_start_survey), # NB, we just convert this to a lubridate format and later within the process data function, dates are converted to international format
date_end_survey = lubridate::ymd(date_end_survey))
# demography (non-US Census data and BRA on its own)
populationdf <- readr::read_tsv("data/raw/non_usa_non_bra_population.tsv") %>%
dplyr::select(-c("reference")) %>%
dplyr::mutate(
age_high = ifelse(age_high == 0 & age_low == 0, 0.99, age_high)) # for cut
#..................................................................................
#---- Preprocess Latin America Data #-----
#..................................................................................
#............................................................
#---- BRA1 #----
#...........................................................
# process Brazil time series
bra_time_series <- readRDS("data/raw/2020-10-12_Brazil_state_age_sex_deaths.rds") %>%
dplyr::filter(date <= lubridate::mdy("07-17-2020")) %>%
dplyr::group_by(date) %>%
dplyr::summarise(deaths = sum(count)) %>%
dplyr::mutate(georegion = "BRA") %>%
dplyr::select(c("date", "georegion", "deaths"))
# process Brazil cum deaths
bra_cumdeaths <- readRDS("data/raw/2020-10-12_Brazil_state_age_sex_deaths.rds") %>%
dplyr::filter(date <= lubridate::mdy("07-17-2020")) %>%
magrittr::set_colnames(tolower(colnames(.))) %>%
dplyr::rename(gender = sex) %>%
dplyr::group_by(region, age, gender, .drop = FALSE) %>%
dplyr::summarise( deaths = sum(count) ) %>%
dplyr::ungroup(.) %>%
dplyr::mutate(
age = as.numeric(as.character(age)),
country = "BRA",
study_id = "BRA1",
age_low = age,
age_high = age,
age_breakdown = 1,
gender_breakdown = 1,
for_regional_analysis = 1) %>%
dplyr::ungroup(.) %>%
dplyr::rename(n_deaths = deaths) %>%
dplyr::arrange(region) %>%
dplyr::mutate(
age_high = ifelse(age_high == 0 & age_low == 0, 0.99, age_high))
# process Brazil population
bra_populationdf <- readr::read_csv("data/raw/Brazil_2020_Population_Data.csv") %>%
magrittr::set_colnames(tolower(colnames(.))) %>%
dplyr::mutate(
age_group = stringr::str_replace(age_group, "\\+", "-999"),
age_low = as.numeric(stringr::str_split_fixed(age_group, "-[0-9]+", n = 2)[,1]),
age_high = as.numeric(stringr::str_split_fixed(age_group, "[0-9]-", n = 2)[,2])
) %>%
dplyr::group_by(region, age_low, age_high, sex) %>%
dplyr::summarise(population = sum(population)) %>%
dplyr::rename(gender =sex) %>%
dplyr::mutate(
country = "BRA",
study_id = "BRA1",
age_breakdown = 1,
for_regional_analysis = 1,
gender_breakdown = 1) %>%
dplyr::ungroup(.)
# bra populations reported 0-5, 5-10 --> change to be 0-4, 5-9 (correct format)
bra_populationdf <- bra_populationdf %>%
dplyr::mutate(age_high = ifelse(age_high == 999, age_high, age_high - 1),
age_high = ifelse(age_high == 0 & age_low == 0, 0.99, age_high)) # for cut
#......................
# regions
#......................
BRA.regions.dat <- process_data4(cum_tp_deaths = bra_cumdeaths,
time_series_totdeaths_df = bra_time_series,
time_series_totdeaths_geocode = "BRA",
population = bra_populationdf,
sero_val = sero_valdf,
seroprev = sero_prevdf,
get_descriptive_dat = TRUE,
groupingvar = "region",
study_ids = "BRA1",
origin = lubridate::ymd("2020-01-01"))
#......................
# ages
#......................
BRA.agebands.dat <- process_data4(cum_tp_deaths = bra_cumdeaths,
time_series_totdeaths_df = bra_time_series,
time_series_totdeaths_geocode = "BRA",
population = bra_populationdf,
sero_val = sero_valdf,
seroprev = sero_prevdf,
get_descriptive_dat = TRUE,
groupingvar = "ageband",
study_ids = "BRA1",
origin = lubridate::ymd("2020-01-01"),
agebreaks = c(0, 9, 19, 29, 39,
49, 59, 69, 79, 999))
#............................................................
#---- BRA1 CITIES #----
#...........................................................
#......................
# pre-process Brazil cities data.
#......................
bra_sero <- read.csv("data/raw/bra1_city_sero.csv") %>%
dplyr::rename(city = City)
bra_city_d <- readr::read_csv("data/raw/bra1_city_deaths.csv")
bra_city_pop <- readr::read_csv("data/raw/bra1_city_pops.csv")
bra_city_pop_sum <- bra_city_pop %>%
dplyr::group_by(city) %>%
dplyr::summarise(population = sum(population))
bra_city <- dplyr::left_join(bra_city_d, bra_sero, by = "city")
bra_city <- dplyr::left_join(bra_city_pop_sum, bra_city)
bra_city <- bra_city %>%
dplyr::mutate(deaths = deaths_100k*population/100000)
readr::write_csv(bra_city, path = "data/derived/BRA1/BRA1_city.csv")
#......................
# MANUAL ADJUSTMENTS
#......................
# none needed with linelist
#......................
# save out
#......................
dir.create("data/derived/BRA1/", recursive = T)
saveRDS(BRA.regions.dat, "data/derived/BRA1/BRA1_regions.RDS")
saveRDS(BRA.agebands.dat, "data/derived/BRA1/BRA1_agebands.RDS")
#..................................................................................
#---- Preprocess European Data #----
#..................................................................................
#............................................................
#---- CHE1, Geneva #----
#...........................................................
CHE1TimeSeries <- readr::read_tsv("data/raw/deaths_time_series_subnat.tsv") %>%
dplyr::filter(study_id == "CHE1")
# sanity check
identical(CHE1TimeSeries$date_start_survey, CHE1TimeSeries$date_end_survey)
CHE1TimeSeries <- CHE1TimeSeries %>%
dplyr::rename(date = date_end_survey,
deaths = n_deaths,
georegion = region) %>%
dplyr::select(c("date", "georegion", "deaths")) %>%
dplyr::mutate(date = lubridate::dmy(date)) %>% # NB, we just convert this to a lubridate format and later within the process data function, dates are converted to international format
dplyr::filter(date <= lubridate::mdy("07-17-2020"))
#......................
# ages
#......................
CHE1.agebands.dat <- process_data4(cum_tp_deaths = deathsdf,
time_series_totdeaths_df = CHE1TimeSeries,
time_series_totdeaths_geocode = "Geneva", ## use study id in case we get more studies later.
population = populationdf,
sero_val = sero_valdf,
seroprev = sero_prevdf,
get_descriptive_dat = TRUE,
study_ids = "CHE1",
groupingvar = "ageband",
agebreaks = c(0, 9, 19, 29,
39, 49, 59, 69,
79, 999)) # for pop splits
#......................
# MANUAL ADJUSTMENTS
#......................
# Assume:
# (1) seroprevalence 5-9 is representative of ages 0-9
# (2) seroprevalence 10-19 is representative of ages 10-19
# (3) seroprevalence 20-49 is representative of ages 20-29 and 30-39 and 40-49
# (4) seroprevalence 50-64 is representative of ages 50-59 and 60-69
# (5) seroprevalence 65+ is representative of ages 70-79 and 80+
ageband <- unique(CHE1.agebands.dat$deaths_propMCMC$ageband)
che_adj_seroprev <- tibble::tibble(
ObsDaymin = unique(CHE1.agebands.dat$seroprevMCMC$ObsDaymin),
ObsDaymax = unique(CHE1.agebands.dat$seroprevMCMC$ObsDaymax))
che_adj_seroprev <- tidyr::expand_grid(che_adj_seroprev, ageband) %>%
dplyr::mutate(age_low = as.numeric(stringr::str_extract(ageband, "[0-9]+(?=\\,)")),
age_high= as.numeric(stringr::str_extract(ageband, "[0-9]+?(?=])"))) %>%
dplyr::arrange(age_low)
# pull out original
che_org_seroprev <- CHE1.agebands.dat$seroprev_group %>%
dplyr::select(c("ObsDaymin", "ObsDaymax", "age_low", "age_high", "n_positive", "n_tested", "seroprevalence_unadjusted"))
# because of multiple dates, this is a slightly harder wiggle than other
# assumption 1
che_org_seroprev_assum1 <- che_org_seroprev %>%
dplyr::filter(age_low == 5 & age_high == 9) %>%
dplyr::mutate(ageband = "(0,9]")
# assumption 2
che_org_seroprev_assum2 <- che_org_seroprev %>%
dplyr::filter(age_low == 10 & age_high == 19) %>%
dplyr::mutate(ageband = "(9,19]")
# assumption 3
che_org_seroprev_assum3.1 <- che_org_seroprev %>%
dplyr::filter(age_low == 20 & age_high == 49) %>%
dplyr::mutate(ageband = "(19,29]")
che_org_seroprev_assum3.2 <- che_org_seroprev %>%
dplyr::filter(age_low == 20 & age_high == 49) %>%
dplyr::mutate(ageband = "(29,39]")
che_org_seroprev_assum3.3 <- che_org_seroprev %>%
dplyr::filter(age_low == 20 & age_high == 49) %>%
dplyr::mutate(ageband = "(39,49]")
che_org_seroprev_assum3 <- dplyr::bind_rows(che_org_seroprev_assum3.1, che_org_seroprev_assum3.2, che_org_seroprev_assum3.3)
# assumption 4
che_org_seroprev_assum4.1 <- che_org_seroprev %>%
dplyr::filter(age_low == 50 & age_high == 64) %>%
dplyr::mutate(ageband = "(49,59]")
che_org_seroprev_assum4.2 <- che_org_seroprev %>%
dplyr::filter(age_low == 50 & age_high == 64) %>%
dplyr::mutate(ageband = "(59,69]")
che_org_seroprev_assum4 <- dplyr::bind_rows(che_org_seroprev_assum4.1, che_org_seroprev_assum4.2)
# assumption 5
che_org_seroprev_assum5.1 <- che_org_seroprev %>%
dplyr::filter(age_low == 65 & age_high == 999) %>%
dplyr::mutate(ageband = "(69,79]")
che_org_seroprev_assum5.2 <- che_org_seroprev %>%
dplyr::filter(age_low == 65 & age_high == 999) %>%
dplyr::mutate(ageband = "(79,999]")
che_org_seroprev_assum5 <- dplyr::bind_rows(che_org_seroprev_assum5.1, che_org_seroprev_assum5.2)
# bring toghether this monstrosity
che_org_seroprev <- dplyr::bind_rows(che_org_seroprev_assum1,
che_org_seroprev_assum2,
che_org_seroprev_assum3,
che_org_seroprev_assum4,
che_org_seroprev_assum5)
che_adj_seroprev <- dplyr::left_join(che_adj_seroprev, che_org_seroprev,
by = c("ObsDaymin", "ObsDaymax", "ageband")) %>%
dplyr::rename(SeroPrev = seroprevalence_unadjusted) %>%
dplyr::mutate(age_high = as.numeric(stringr::str_extract(ageband, "[0-9]+?(?=])"))) %>%
dplyr::select(c("ObsDaymin", "ObsDaymax", "age_high", "ageband", "n_positive", "n_tested", "SeroPrev")) %>%
dplyr::arrange(ObsDaymin, ObsDaymax, age_high) %>%
dplyr::select(-c("age_high"))
# bring together
CHE1.agebands.dat$seroprevMCMC <- che_adj_seroprev
#......................
# save out
#......................
dir.create("data/derived/CHE1", recursive = T)
saveRDS(CHE1.agebands.dat, "data/derived/CHE1/CHE1_agebands.RDS")
#............................................................
#---- CHE2, Zurich #----
#...........................................................
CHE2TimeSeries <- readr::read_tsv("data/raw/deaths_time_series_subnat.tsv") %>%
dplyr::filter(study_id == "CHE2")
# sanity check
identical(CHE2TimeSeries$date_start_survey, CHE2TimeSeries$date_end_survey)
CHE2TimeSeries <- CHE2TimeSeries %>%
dplyr::rename(date = date_end_survey,
deaths = n_deaths,
georegion = region) %>%
dplyr::select(c("date", "georegion", "deaths")) %>%
dplyr::mutate(date = lubridate::dmy(date)) %>% # NB, we just convert this to a lubridate format and later within the process data function, dates are converted to international format
dplyr::filter(date <= lubridate::mdy("07-17-2020"))
#......................
# ages
#......................
CHE2.agebands.dat <- process_data4(cum_tp_deaths = deathsdf,
time_series_totdeaths_df = CHE2TimeSeries,
time_series_totdeaths_geocode = "Zurich",
population = populationdf,
sero_val = sero_valdf,
seroprev = sero_prevdf,
get_descriptive_dat = TRUE,
groupingvar = "ageband",
study_ids = "CHE2",
agebreaks = c(0, 9, 19, 29,
39, 49, 59, 69,
79, 999)) # for pop splits
#......................
# MANUAL ADJUSTMENTS
#......................
# Assume:
# (1) seroprevalence 18-75 is representative of all ages
ageband <- unique(CHE2.agebands.dat$deaths_propMCMC$ageband)
che2_adj_seroprev <- tibble::tibble(
ObsDaymin = unique(CHE2.agebands.dat$seroprevMCMC$ObsDaymin),
ObsDaymax = unique(CHE2.agebands.dat$seroprevMCMC$ObsDaymax),
n_positive = unique(CHE2.agebands.dat$seroprevMCMC$n_positive),
n_tested = unique(CHE2.agebands.dat$seroprevMCMC$n_tested),
SeroPrev = unique(CHE2.agebands.dat$seroprevMCMC$SeroPrev))
che2_adj_seroprev <- tidyr::expand_grid(che2_adj_seroprev, ageband)
che2_adj_seroprev <- che2_adj_seroprev %>%
dplyr::mutate(age_high = as.numeric(stringr::str_extract(ageband, "[0-9]+?(?=])"))) %>%
dplyr::arrange(ObsDaymin, ObsDaymax, age_high) %>%
dplyr::select(-c("age_high"))
# bring together
CHE2.agebands.dat$seroprevMCMC <- che2_adj_seroprev
#......................
# save out
#......................
dir.create("data/derived/CHE2", recursive = T)
saveRDS(CHE2.agebands.dat, "data/derived/CHE2/CHE2_agebands.RDS")
#............................................................
#---- DNK1 #----
#...........................................................
DNK_timeseries<-read.csv("data/raw/DNK_timeseries.csv") %>%
dplyr::mutate(date = lubridate::dmy(date)) %>% # NB, we just convert this to a lubridate format and later within the process data function, dates are converted to international format
dplyr::filter(date<=lubridate::mdy("07-17-2020")) %>%
dplyr::arrange(date) %>%
dplyr::select(date,georegion,deaths)
DNK_timeseries$deaths[2:nrow(DNK_timeseries)]<-DNK_timeseries$deaths[2:nrow(DNK_timeseries)] -
DNK_timeseries$deaths[1:(nrow(DNK_timeseries)-1)]
### quick comparison of JHU vs govt date of deaths
DNKJHU<-filter(JHUdf,georegion=="DNK")
plot(DNK_timeseries$date,DNK_timeseries$deaths,xlab="date",ylab="deaths")
points(DNKJHU$date,DNKJHU$deaths,col="red")
legend("topright",c("Govt","JHU"),pch=1,col=c("black","red"))
#......................
# regions
#......................
dnk_rgn_sero <- sero_prevdf %>%
dplyr::filter(study_id == "DNK1") %>%
dplyr::mutate(for_regional_analysis = ifelse(region == "all", 0, 1))
DNK.regions.dat <- process_data4(cum_tp_deaths = deathsdf,
time_series_totdeaths_df = DNK_timeseries,
time_series_totdeaths_geocode = "DNK",
population = populationdf,
sero_val = sero_valdf,
seroprev = dnk_rgn_sero,
get_descriptive_dat = TRUE,
groupingvar = "region",
study_ids = "DNK1",
agebreaks = c(0, 59, 69, 79, 999))
## Use age sero data for age region model
dnk_age_sero<-read.csv("data/raw/DNK1_age.csv") %>%
dplyr::select(-c("ref", "notes")) %>%
dplyr::mutate(date_start_survey = lubridate::ymd(date_start_survey), # NB, we just convert this to a lubridate format and later within the process data function, dates are converted to international format
date_end_survey = lubridate::ymd(date_end_survey),
seroprevalence_unadjusted = ifelse(is.na(seroprevalence_unadjusted), n_positive/n_tested, seroprevalence_unadjusted))
DNK.agebands_age_sero.dat <- process_data4(cum_tp_deaths = deathsdf,
time_series_totdeaths_df = DNK_timeseries,
time_series_totdeaths_geocode = "DNK",
population = populationdf,
sero_val = sero_valdf,
seroprev = dnk_age_sero,
get_descriptive_dat = TRUE,
groupingvar = "ageband",
study_ids = "DNK1",
agebreaks = c(0, 59, 69, 79, 999))
#......................
# ages
#......................
DNK.agebands.dat <- process_data4(cum_tp_deaths = deathsdf,
time_series_totdeaths_df = DNK_timeseries,
time_series_totdeaths_geocode = "DNK",
population = populationdf,
sero_val = sero_valdf,
seroprev = sero_prevdf,
get_descriptive_dat = TRUE,
groupingvar = "ageband",
study_ids = "DNK1",
agebreaks = c(0, 59, 69, 79, 999))
#......................
# MANUAL ADJUSTMENTS
#......................
# Assume DNK blood donors 17-69 years representative of all age groups
dnkagebands <- unique(DNK.agebands.dat$deaths_propMCMC$ageband)
dnk_adj_seroprev <- tibble::tibble(
ObsDaymin = DNK.agebands.dat$seroprevMCMC$ObsDaymin,
ObsDaymax = DNK.agebands.dat$seroprevMCMC$ObsDaymax,
n_positive = NA,
n_tested = NA)
dnk_adj_seroprev <- lapply(dnkagebands, function(x){
dnk_adj_seroprev %>%
dplyr::mutate(ageband = x)}) %>%
dplyr::bind_rows() %>%
dplyr::arrange(., ObsDaymin, ObsDaymax, ageband)
DNKnatprev <- DNK.agebands.dat$seroprev_group %>%
dplyr::select(c("ObsDaymin", "ObsDaymax", "seroprevalence_unadjusted", "range_sero_low", "range_sero_high")) %>%
dplyr::rename(SeroPrev = seroprevalence_unadjusted,
SeroLCI = range_sero_low,
SeroUCI = range_sero_high)
dnk_adj_seroprev <- dplyr::left_join(dnk_adj_seroprev, DNKnatprev, by = c("ObsDaymin", "ObsDaymax"))
# overwrite
DNK.agebands.dat$seroprevMCMC <- dnk_adj_seroprev
#......................
# save out
#......................
dir.create("data/derived/DNK1", recursive = T)
saveRDS(DNK.regions.dat, "data/derived/DNK1/DNK1_regions.RDS")
saveRDS(DNK.agebands.dat, "data/derived/DNK1/DNK1_agebands.RDS")
# save for age region analysis:
saveRDS(DNK.agebands_age_sero.dat, "data/derived/DNK1/DNK1_agebands_age_sero.dat.RDS")
#............................................................
#--- ESP1-2 #----
#...........................................................
ESP_timeseries<-read.csv("data/raw/ESP_timeseries.csv") %>%
dplyr::mutate(date = lubridate::dmy(date)) %>% # NB, we just convert this to a lubridate format and later within the process data function, dates are converted to international format
dplyr::filter(date<=lubridate::mdy("07-17-2020")) %>%
dplyr::arrange(date) %>%
dplyr::select(date,georegion,deaths)
ESP_timeseries$deaths[2:nrow(ESP_timeseries)]<-ESP_timeseries$deaths[2:nrow(ESP_timeseries)] -
ESP_timeseries$deaths[1:(nrow(ESP_timeseries)-1)]
### quick comparison of JHU vs ESP govt date of deaths
ESPJHU<-filter(JHUdf,georegion=="ESP")
plot(ESP_timeseries$date,ESP_timeseries$deaths,xlab="date",ylab="deaths")
points(ESPJHU$date,ESPJHU$deaths,col="red")
legend("topright",c("Govt","JHU"),pch=1,col=c("black","red"))
#......................
# regions
#......................
ESP.regions.dat <- process_data4(cum_tp_deaths = deathsdf,
time_series_totdeaths_df = ESP_timeseries,
time_series_totdeaths_geocode = "ESP",
population = populationdf,
sero_val = sero_valdf,
seroprev = sero_prevdf,
get_descriptive_dat = TRUE,
groupingvar = "region",
study_ids = "ESP1-2",
agebreaks = c(0, 9, 19, 29, 39,
49, 59, 69, 79, 89, 999))
#......................
# agebands
#......................
ESP.agebands.dat <- process_data4(cum_tp_deaths = deathsdf,
time_series_totdeaths_df = ESP_timeseries,
time_series_totdeaths_geocode = "ESP",
population = populationdf,
sero_val = sero_valdf,
seroprev = sero_prevdf,
get_descriptive_dat = TRUE,
groupingvar = "ageband",
study_ids = "ESP1-2",
agebreaks = c(0, 9, 19, 29, 39,
49, 59, 69, 79, 89, 999))
#......................
# MANUAL ADJUSTMENTS
#......................
# No adjustments to serology as there is alignment of deaths and seroprevalence age groups.
#......................
# save out
#......................
dir.create("data/derived/ESP1-2/", recursive = T)
saveRDS(ESP.agebands.dat, "data/derived/ESP1-2/ESP1-2_agebands.RDS")
saveRDS(ESP.regions.dat, "data/derived/ESP1-2/ESP1-2_regions.RDS")
#............................................................
#---- GBR3 #----
#...........................................................
GBR3TimeSeries <- readr::read_tsv("data/raw/deaths_time_series_subnat.tsv") %>%
dplyr::filter(study_id == "GBR3") %>%
dplyr::rename(date = date_end_survey,
deaths = n_deaths) %>%
dplyr::mutate(date = lubridate::dmy(date), # NB, we just convert this to a lubridate format and later within the process data function, dates are converted to international format
georegion = "ENG") %>%
dplyr::select(c("date", "georegion", "deaths")) %>%
dplyr::filter(date <= lubridate::mdy("07-17-2020"))
#......................
# ages
#......................
GBR3.agebands.dat <- process_data4(cum_tp_deaths = deathsdf,
time_series_totdeaths_df = GBR3TimeSeries,
time_series_totdeaths_geocode = "ENG",
population = populationdf,
sero_val = sero_valdf,
seroprev = sero_prevdf,
get_descriptive_dat = TRUE,
groupingvar = "ageband",
study_ids = "GBR3",
agebreaks = c(0, 44, 64, 74, 999))
#......................
# regions
#......................
GBR3.regions.dat <- process_data4(cum_tp_deaths = deathsdf,
time_series_totdeaths_df = GBR3TimeSeries,
time_series_totdeaths_geocode = "ENG",
population = populationdf,
sero_val = sero_valdf,
seroprev = sero_prevdf,
get_descriptive_dat = TRUE,
groupingvar = "region",
study_ids = "GBR3",
agebreaks = c(0, 999), # right now GBR3 popdf not refined at all
filtRegions = c("North East",
"North West",
"Yorkshire",
"East Midlands",
"West Midlands",
"East of England",
"London",
"South East",
"South West"))
#......................
# MANUAL ADJUSTMENTS
#......................
# none needed with react adj
#......................
# save out
#......................
dir.create("data/derived/GBR3", recursive = T)
saveRDS(GBR3.agebands.dat, "data/derived/GBR3/GBR3_agebands.RDS")
saveRDS(GBR3.regions.dat, "data/derived/GBR3/GBR3_regions.RDS")
#............................................................
#---- ITA1 #----
#...........................................................
ITA_timeseries<-read.csv("data/raw/ITA_timeseries.csv") %>%
dplyr::mutate(date = lubridate::dmy(date)) %>% # NB, we just convert this to a lubridate format and later within the process data function, dates are converted to international format
dplyr::filter(date<=lubridate::mdy("07-17-2020")) %>%
dplyr::arrange(date) %>%
dplyr::select(date,georegion,deaths)
ITA_timeseries$deaths[2:nrow(ITA_timeseries)]<-ITA_timeseries$deaths[2:nrow(ITA_timeseries)] -
ITA_timeseries$deaths[1:(nrow(ITA_timeseries)-1)]
### quick comparison of JHU vs ITA govt date of deaths
ITAJHU<-filter(JHUdf,georegion=="ITA")
plot(ITA_timeseries$date,ITA_timeseries$deaths,xlab="date",ylab="deaths")
points(ITAJHU$date,ITAJHU$deaths,col="red")
legend("topright",c("Govt","JHU"),pch=1,col=c("black","red"))
# liftover the one negative death day in the govt (same as we do for JHU)
ITA_timeseries <- ITA_timeseries %>%
dplyr::mutate(deaths = ifelse(deaths < 0, 0, deaths))
#......................
# ages
#......................
ITA.agebands.dat <- process_data4(cum_tp_deaths = deathsdf,
time_series_totdeaths_df = ITA_timeseries,
time_series_totdeaths_geocode = "ITA",
population = populationdf,
sero_val = sero_valdf,
seroprev = sero_prevdf,
get_descriptive_dat = TRUE,
groupingvar = "ageband",
study_ids = "ITA1",
agebreaks = c(0, 9, 19,
29, 39, 49,
59, 69, 79,
89, 999))
#......................
# regions
#......................
# regional population data for ITA available
ITA.regions.dat <- process_data4(cum_tp_deaths = deathsdf,
time_series_totdeaths_df = ITA_timeseries,
time_series_totdeaths_geocode = "ITA",
population = populationdf,
sero_val = sero_valdf,
seroprev = sero_prevdf,
get_descriptive_dat = TRUE,
groupingvar = "region",
study_ids = "ITA1")
#......................
# MANUAL ADJUSTMENTS
#......................
# some lack of overlap between serology and deaths data.
# also no seropositives numbers, so must use logit method
# use sero 0-17 for 0-9 and 9-19 yr olds
# use the mean of 18-34 and 35-49 for the 30-39 group (they are very similar anyway)
agebands <- unique(ITA.agebands.dat$deaths_propMCMC$ageband)
ita_adj_seroprev <- tibble::tibble(
ObsDaymin = unique(ITA.agebands.dat$seroprevMCMC$ObsDaymin),
ObsDaymax = unique(ITA.agebands.dat$seroprevMCMC$ObsDaymax),
ageband = agebands,
n_positive = NA,
n_tested = NA,
SeroPrev = NA,
SeroLCI = NA,
SeroUCI = NA)
ita_adj_seroprev$SeroLCI[1:2] <- ITA.agebands.dat$seroprev_group$range_sero_low[1]
ita_adj_seroprev$SeroLCI[3] <- ITA.agebands.dat$seroprev_group$range_sero_low[2]
ita_adj_seroprev$SeroLCI[4] <- mean(ITA.agebands.dat$seroprev_group$range_sero_low[2:3])
ita_adj_seroprev$SeroLCI[5] <- ITA.agebands.dat$seroprev_group$range_sero_low[3]
ita_adj_seroprev$SeroLCI[6] <- ITA.agebands.dat$seroprev_group$range_sero_low[4]
ita_adj_seroprev$SeroLCI[7] <- ITA.agebands.dat$seroprev_group$range_sero_low[5]
ita_adj_seroprev$SeroLCI[8:10] <- ITA.agebands.dat$seroprev_group$range_sero_low[6]
ita_adj_seroprev$SeroUCI[1:2] <- ITA.agebands.dat$seroprev_group$range_sero_high[1]
ita_adj_seroprev$SeroUCI[3] <- ITA.agebands.dat$seroprev_group$range_sero_high[2]
ita_adj_seroprev$SeroUCI[4] <- mean(ITA.agebands.dat$seroprev_group$range_sero_high[2:3])
ita_adj_seroprev$SeroUCI[5] <- ITA.agebands.dat$seroprev_group$range_sero_high[3]
ita_adj_seroprev$SeroUCI[6] <- ITA.agebands.dat$seroprev_group$range_sero_high[4]
ita_adj_seroprev$SeroUCI[7] <- ITA.agebands.dat$seroprev_group$range_sero_high[5]
ita_adj_seroprev$SeroUCI[8:10] <- ITA.agebands.dat$seroprev_group$range_sero_high[6]
ita_adj_seroprev$SeroPrev[1:2] <- ITA.agebands.dat$seroprev_group$seroprevalence_unadjusted[1]
ita_adj_seroprev$SeroPrev[3] <- ITA.agebands.dat$seroprev_group$seroprevalence_unadjusted[2]
ita_adj_seroprev$SeroPrev[4] <- mean(ITA.agebands.dat$seroprev_group$seroprevalence_unadjusted[2:3])
ita_adj_seroprev$SeroPrev[5] <- ITA.agebands.dat$seroprev_group$seroprevalence_unadjusted[3]
ita_adj_seroprev$SeroPrev[6] <- ITA.agebands.dat$seroprev_group$seroprevalence_unadjusted[4]
ita_adj_seroprev$SeroPrev[7] <- ITA.agebands.dat$seroprev_group$seroprevalence_unadjusted[5]
ita_adj_seroprev$SeroPrev[8:10] <- ITA.agebands.dat$seroprev_group$seroprevalence_unadjusted[6]
# overwrite
ITA.agebands.dat$seroprevMCMC <- ita_adj_seroprev
# deal with only reporting confidence intervals for the region model
ita_rgn_lftovr <- ITA.regions.dat$seroprev_group %>%
dplyr::select(c("region", "seroprevalence_unadjusted", "range_sero_low", "range_sero_high")) %>%
dplyr::rename(SeroPrev = seroprevalence_unadjusted,
SeroLCI = range_sero_low,
SeroUCI = range_sero_high)
ITA.regions.dat$seroprevMCMC <- ITA.regions.dat$seroprevMCMC %>%
dplyr::select(-c("SeroPrev")) %>%
dplyr::left_join(., ita_rgn_lftovr)
#......................
# save out
#......................
dir.create("data/derived/ITA1", recursive = T)
saveRDS(ITA.agebands.dat, "data/derived/ITA1/ITA1_agebands.RDS")
saveRDS(ITA.regions.dat, "data/derived/ITA1/ITA1_regions.RDS")
#............................................................
#---- NLD1 #-----
#...........................................................
NLD_timeseries<-read.csv("data/raw/NLD_timeseries.csv") %>%
dplyr::mutate(date = lubridate::dmy(date)) %>% # NB, we just convert this to a lubridate format and later within the process data function, dates are converted to international format
dplyr::filter(date<=lubridate::mdy("07-17-2020")) %>%
dplyr::arrange(date) %>%
dplyr::select(date,georegion,deaths)
NLD_timeseries$deaths[2:nrow(NLD_timeseries)]<-NLD_timeseries$deaths[2:nrow(NLD_timeseries)] -
NLD_timeseries$deaths[1:(nrow(NLD_timeseries)-1)]
### quick comparison of JHU vs govt date of deaths
NLDJHU<-filter(JHUdf,georegion=="NLD")
plot(NLD_timeseries$date,NLD_timeseries$deaths)
points(NLDJHU$date,NLDJHU$deaths,col="red")
#......................
# ages
#......................
NLD.agebands.dat <- process_data4(cum_tp_deaths = deathsdf,
time_series_totdeaths_df = NLD_timeseries,
time_series_totdeaths_geocode = "NLD",
population = populationdf,
sero_val = sero_valdf,
seroprev = sero_prevdf,
get_descriptive_dat = TRUE,
groupingvar = "ageband",
study_ids = "NLD1",
agebreaks = c(0, 49, 59,
69, 79, 89,
999))
#......................
# MANUAL ADJUSTMENTS
#......................
# Netherlands seroprevalence and deaths not perfectly aligned.
# Assumptions:
# Let 18-30, 31-40, and 41-50 stand in for 0-49 ageband
# Let 51-60 stand in for 49-59 ageband
# Let 60-72 stand in for 59-69, 69-79, 79-89, 89++ ageband
timepoint1 <- NLD.agebands.dat$seroprev_group %>%
dplyr::filter(ObsDaymax == 106)
agebands <- unique(NLD.agebands.dat$deaths_propMCMC$ageband)
nld_adj_seroprev <- tibble::tibble(
ObsDaymin = 92, # only timepoint 1
ObsDaymax = 106,
ageband = agebands,
n_positive = NA,
n_tested = NA)
# first assumption, timepoint 1
nld_adj_seroprev$n_positive[nld_adj_seroprev$ageband == "(0,49]"] <- sum(timepoint1$n_positive[1:3])
nld_adj_seroprev$n_tested[nld_adj_seroprev$ageband == "(0,49]"] <- sum(timepoint1$n_tested[1:3])
# second assumption, timepoint 1
nld_adj_seroprev$n_positive[nld_adj_seroprev$ageband == "(49,59]"] <- timepoint1$n_positive[4]
nld_adj_seroprev$n_tested[nld_adj_seroprev$ageband == "(49,59]"] <- timepoint1$n_tested[4]
# third assumption, timepoint 1
nld_adj_seroprev$n_positive[nld_adj_seroprev$ageband %in%
c("(59,69]", "(69,79]", "(79,89]", "(89,999]")] <- timepoint1$n_positive[5]
nld_adj_seroprev$n_tested[nld_adj_seroprev$ageband %in%
c("(59,69]", "(69,79]", "(79,89]", "(89,999]")] <- timepoint1$n_tested[5]
# calculate seroprev for timepoints 1
nld_adj_seroprev <- nld_adj_seroprev %>%
dplyr::mutate(SeroPrev = n_positive/n_tested)
# time point2
timepoint2 <- tibble::tibble(
ObsDaymin = 131,
ObsDaymax = 141,
ageband = nld_adj_seroprev$ageband,
n_positive = 385,
n_tested = 7000) %>%
dplyr::mutate(SeroPrev = n_positive/n_tested)
nld_adj_seroprev <- dplyr::bind_rows(nld_adj_seroprev, timepoint2)
# overwrite
NLD.agebands.dat$seroprevMCMC <- nld_adj_seroprev
#......................
# save out
#......................
dir.create("data/derived/NLD1", recursive = T)
saveRDS(NLD.agebands.dat, "data/derived/NLD1/NLD1_agebands.RDS")
#............................................................
#--- SWE1 #----
#...........................................................
#......................
# ages
#......................
SWE1_timeseries<-read.csv("data/raw/SWE_timeseries.csv") %>%
dplyr::rename(date=Datum_avliden,
deaths=Antal_avlidna) %>%
dplyr::mutate(date = lubridate::dmy(date),
georegion="SWE") %>% # NB, we just convert this to a lubridate format and later within the process data function, dates are converted to international format
dplyr::filter(date<=lubridate::mdy("07-17-2020")) %>%
dplyr::select(date,georegion,deaths)
### quick comparison of JHU vs SWE govt date of deaths
SWEJHU<-filter(JHUdf,georegion=="SWE")
plot(SWE1_timeseries$date,SWE1_timeseries$deaths,xlab="date",ylab="deaths")
points(SWEJHU$date,SWEJHU$deaths,col="red")
legend("topright",c("Swedish govt","JHU"),pch=1,col=c("black","red"))
SWE.agebands.dat <- process_data4(cum_tp_deaths = deathsdf,
time_series_totdeaths_df = SWE1_timeseries,
time_series_totdeaths_geocode = "SWE",
population = populationdf,
sero_val = sero_valdf,
seroprev = sero_prevdf,
get_descriptive_dat = TRUE,
groupingvar = "ageband",
study_ids = "SWE1")
#......................
# MANUAL ADJUSTMENTS
#......................
ndays <- length(SWE.agebands.dat$seroprevMCMC$ObsDaymin)
agelen <- length(SWE.agebands.dat$prop_pop$ageband)
# assume blood donors stand in for all ages
swe_adj_seroprev <- tibble::tibble(
ObsDaymin = SWE.agebands.dat$seroprevMCMC$ObsDaymin,
ObsDaymax = SWE.agebands.dat$seroprevMCMC$ObsDaymax,
n_positive = NA,
n_tested = NA)
swe_adj_seroprev <- lapply(SWE.agebands.dat$prop_pop$ageband, function(x){
swe_adj_seroprev %>%
dplyr::mutate(ageband = x)}) %>%
dplyr::bind_rows() %>%
dplyr::mutate(age_high = as.numeric(stringr::str_extract(ageband, "[0-9]+?(?=-)"))) %>%
dplyr::arrange(ObsDaymin, ObsDaymax, age_high) %>%
dplyr::select(-c("age_high"))
SWEnatprev <- SWE.agebands.dat$seroprev_group %>%
dplyr::select(c("ObsDaymin", "ObsDaymax", "seroprevalence_unadjusted", "range_sero_low", "range_sero_high")) %>%
dplyr::rename(SeroPrev = seroprevalence_unadjusted,
SeroLCI = range_sero_low,
SeroUCI = range_sero_high)
swe_adj_seroprev <- dplyr::left_join(swe_adj_seroprev, SWEnatprev)
# overwrite
SWE.agebands.dat$seroprevMCMC <- swe_adj_seroprev
#......................
# save out
#......................
dir.create("data/derived/SWE1", recursive = T)
saveRDS(SWE.agebands.dat, "data/derived/SWE1/SWE1_agebands.RDS")
#..................................................................................
#---- Preprocess USA Data #-----
#..................................................................................
populationdf <- readr::read_csv("data/raw/USA_County_Demographic_Data.csv") %>%
dplyr::select(-c("Male_Total", "Female_Total")) %>%
dplyr::select(-c(dplyr::starts_with("Both"))) %>%
tidyr::pivot_longer(., cols = -c("State", "County"), names_to = "strata", values_to = "population") %>%
dplyr::mutate(
country = "USA",
Countysp = gsub(" County", "", County),
Countysp = gsub(" ", "-", Countysp),
region = paste0(State, "_", Countysp),
ageband = stringr::str_split_fixed(strata, "[A-Za-z]_", n = 2)[,2],
ageband = ifelse(stringr::str_detect(ageband, "\\+"),
paste0(stringr::str_extract_all(ageband, "[0-9]+", simplify = TRUE), "-", 999),
ageband),
age_low = as.numeric( stringr::str_split_fixed(ageband, "-[0-9]+", n = 2)[,1] ),
age_high = as.numeric( stringr::str_split_fixed(ageband, "[0-9]-", n = 2)[,2] ),
gender = stringr::str_extract_all(strata, "[A-Za-z]+", simplify = TRUE)[,1],
age_breakdown = 1,
for_regional_analysis = 1,
gender_breakdown = 1
) %>%
dplyr::select(c("country", "age_low", "age_high", "region", "gender", "population", "age_breakdown", "for_regional_analysis")) %>%
dplyr::left_join(., readr::read_csv("data/raw/usa_study_id_county_key.csv"), by = "region")
# fix USA coding 0-5, 5-9, 10-14 ... to match int format
populationdf <- populationdf %>%
dplyr::mutate(age_high = ifelse(age_high == 5 & age_low == 0, 4, age_high))
# JHU data for new recast df
JHUdf <- readr::read_csv("data/raw/time_series_covid19_deaths_US.csv") %>%
dplyr::select(-c("UID", "iso2", "code3", "FIPS", "Country_Region", "Lat", "Long_", "Combined_Key", "Population")) %>%
tidyr::pivot_longer(., cols = -c("iso3", "Admin2", "Province_State"),
names_to = "date", values_to = "cumdeaths") %>%
dplyr::filter(!is.na(Admin2)) %>%
dplyr::mutate(Admin2sp = sub(" ", "-", Admin2),
georegion = paste0(Province_State, "_", Admin2sp)) %>%
dplyr::mutate(date = lubridate::mdy(date)) %>% # NB, we just convert this to a lubridate format and later within the process data function, dates are converted to international format
dplyr::group_by(georegion) %>% # group by region for daily deaths
dplyr::mutate(deaths = cumdeaths - dplyr::lag(cumdeaths),
deaths = ifelse(is.na(deaths), 0, deaths), # take care of first value
deaths = ifelse(deaths < 0, 0, deaths)) %>% # take care of cumulative deaths corrections
dplyr::select(c("date", "georegion", "deaths")) %>%
dplyr::filter(date <= lubridate::mdy("07-17-2020")) %>%
dplyr::ungroup(.)
#............................................................
#---- NYS_1: New York State #-----
#...........................................................
#......................
# preprocess New York State
#......................
# groupings of seroprevalence in the study:
wr <- c("New York_Westchester|New York_Rockland")
long_island <- c("New York_Nassau|New York_Suffolk")
nyc <- "New York_New-York|New York_Kings|New York_Bronx|New York_Queens|New York_Richmond"
upstate <- c("Albany|Allegany|Broome|Cattaraugus|Cayuga|Chautauqua|Chemung|Chenango|Clinton|Columbia|Cortland|Delaware|Dutchess|Erie|Essex|Franklin|Fulton|Genessee|Greene|Hamilton|Herkimer|Jefferson|Lewis|Livingston|Madison|Monroe|Montgomery|Niagara|Oneida|Onondaga|Ontario|Orange|Orleans|Oswego|Otsego|Putnam|Rensselaer|St. Lawrence|Saratoga|Schenectady|Schoharie|Schuyler|Seneca|Steuben|Sullivan|Tioga|Tompkins|Ulster|Warren|Washington|Wayne|Wyoming|Yates")
NYSJHU <- JHUdf %>%
dplyr::filter(grepl("New York_", georegion))
wrd <- NYSJHU %>%
dplyr::filter(grepl(wr, georegion)) %>%
dplyr::mutate(georegion = "Westchester and Rockland")
long_islandd <- NYSJHU %>%
dplyr::filter(grepl(long_island, georegion)) %>%
dplyr::mutate(georegion = "Long Island")
nycd <- NYSJHU %>%
dplyr::filter(grepl(nyc, georegion)) %>%
dplyr::mutate(georegion = "New York City")
upstated <- NYSJHU %>%
dplyr::filter(grepl(upstate, georegion)) %>%
dplyr::mutate(georegion = "Upstate New York")
NYSJHU <- rbind(wrd,nycd, long_islandd,upstated)
NYSJHU <- NYSJHU %>%
dplyr::group_by(date, georegion) %>%
dplyr::summarise(deaths = sum(deaths))
NYSdeathsrgndf <- NYSJHU %>%
dplyr::group_by(georegion) %>%
dplyr::summarise(n_deaths = sum(deaths)) %>%
dplyr::mutate(country = "USA",
study_id = "NYS1",
age_low = 0,
age_high = 999,
region = georegion,
gender = "both",
age_breakdown = 0,
for_regional_analysis = 1,
gender_breakdown = 0)
NYSJHU_tseries<- NYSJHU %>%
dplyr::mutate(georegion="NYS") %>%
dplyr::group_by(date,georegion) %>%
dplyr::summarise(deaths = sum(deaths)) %>%
dplyr::ungroup()
NYSpopdf <- populationdf %>%
dplyr::filter(grepl("New York_", region))
wrp <- NYSpopdf %>%
dplyr::filter(grepl(wr,region)) %>%
dplyr::mutate(region="Westchester and Rockland")
long_islandp<-NYSpopdf %>%
dplyr::filter(grepl(long_island,region)) %>%
dplyr::mutate(region="Long Island")
nycp <- NYSpopdf %>%
dplyr::filter(grepl(nyc,region)) %>%
dplyr::mutate(region="New York City")
upstatep <- NYSpopdf %>%
dplyr::filter(grepl(upstate,region)) %>%
dplyr::mutate(region="Upstate New York")
NYpopdf <- rbind(wrp,long_islandp,nycp,upstatep)
NYpopdf <- NYpopdf %>%
dplyr::group_by(region, age_low, age_high, gender) %>%
dplyr::summarise(population = sum(population)) %>%
dplyr::mutate(
country = "USA",
study_id = "NYS1",
age_breakdown = 1,
for_regional_analysis = 1,
gender_breakdown = 1) %>%
dplyr::ungroup(.)
NYSdeathsdf <- deathsdf %>%
dplyr::filter(study_id == "NYS1") %>%
dplyr::mutate(
country = "USA",
study_id = "NYS1",
region = "NYS",
#age_low = 0,
#age_high = 999,
gender = "both",
age_breakdown = 1,
gender_breakdown = 1,
for_regional_analysis = 1)
#......................
# age
#......................
NYS.age.dat <- process_data4(cum_tp_deaths = NYSdeathsdf,
time_series_totdeaths_df = NYSJHU_tseries,
time_series_totdeaths_geocode = "NYS",
population = NYpopdf,
sero_val = sero_valdf,
seroprev = sero_prevdf,
get_descriptive_dat = TRUE,
groupingvar = "ageband",
study_ids = "NYS1",
agebreaks = c(0, seq(9, 79, 10), 999))
#......................
# regions
#......................
NYS.region.dat <- process_data4(cum_tp_deaths = NYSdeathsrgndf,
time_series_totdeaths_df = NYSJHU_tseries,
time_series_totdeaths_geocode = "NYS",
population = NYpopdf,
sero_val = sero_valdf,
seroprev = sero_prevdf,
get_descriptive_dat = TRUE,
groupingvar = "region",
study_ids = "NYS1",
agebreaks = c(0, 999))
#......................
# MANUAL ADJUSTMENTS
#......................
# Age bands deaths and seroprev exactly non overlapping. Set all ages to be the weighted population average except:
# assume 60+ has seroprevalence of 55+
# weighted pop average from the paper for the whole study area is 0.125
nys_adj_seroprev <- tibble::tibble(
ObsDaymin = unique(NYS.age.dat$seroprevMCMC$ObsDaymin),
ObsDaymax = unique(NYS.age.dat$seroprevMCMC$ObsDaymax),
ageband = unique(NYS.age.dat$deaths_propMCMC$ageband),
n_positive = NA,
n_tested = NA,
SeroPrev = NA)
nys_adj_seroprev <- nys_adj_seroprev %>%
dplyr::mutate(SeroPrev = ifelse(ageband=="(59,69]" | ageband=="(69,79]" | ageband=="(79,999]",
NYS.age.dat$seroprevMCMC$SeroPrev[4], 0.125),
n_tested = ifelse(ageband=="(59,69]" | ageband=="(69,79]" | ageband=="(79,999]" ,
NYS.age.dat$seroprevMCMC$n_tested[4],
sum(NYS.age.dat$seroprevMCMC$n_tested[1:3])),
n_positive=round(n_tested*SeroPrev))
# overwrite
NYS.age.dat$seroprevMCMC <- nys_adj_seroprev
# # removing the May 18 date where 4000 deaths from NYC were added on a single day
# likely due to probable deaths being retrospectively added back in
NYS.age.dat$deaths_TSMCMC$deaths[139] <- -1
#......................
# save out
#......................
saveRDS(NYS.region.dat, "data/derived/USA/NYS1_regions.RDS")
saveRDS(NYS.age.dat, "data/derived/USA/NYS1_agebands.RDS")
#..................................................................................
#---- Care Home Data Processing #-----
#..................................................................................
# note agebands won't be quite contiguous since we collapse any agebands that
# contains 65+ (i.e. mean of 59-69 is 64.5, so would NOT go into 65+ new ageband)
dir.create("data/derived/carehomes/", recursive = TRUE)
source("R/remove_carehome_deaths.R")
source("R/covidcurve_helper_functions.R")
# care home deaths
deaths_ch <- readr::read_csv("data/raw/care_home_deaths.csv")
### CHE1
CHE1.agebands_noCH.dat <- remove_ch_deaths(ageband_dat = CHE1.agebands.dat,
carehomesdf = deaths_ch,
studyid = "CHE1")
saveRDS(CHE1.agebands_noCH.dat, "data/derived/carehomes/CHE1_agebands_noCH.RDS")
### CHE2
CHE2.agebands_noCH.dat <- remove_ch_deaths(ageband_dat = CHE2.agebands.dat,
carehomesdf = deaths_ch,
studyid = "CHE2")
saveRDS(CHE2.agebands_noCH.dat, "data/derived/carehomes/CHE2_agebands_noCH.RDS")
## DNK 1
DNK.agebands_noCH.dat <- remove_ch_deaths(ageband_dat = DNK.agebands.dat,
carehomesdf = deaths_ch,
studyid = "DNK1")
saveRDS(DNK.agebands_noCH.dat, "data/derived/carehomes/DNK1_agebands_noCH.RDS")
### ESP1-2
ESP.agebands_noCH.dat <- remove_ch_deaths(ageband_dat = ESP.agebands.dat,
carehomesdf = deaths_ch,
studyid = "ESP1-2")
saveRDS(ESP.agebands_noCH.dat, "data/derived/carehomes/ESP1-2_agebands_noCH.RDS")
### GBR3
GBR3.agebands_noCH.dat <- remove_ch_deaths(ageband_dat = GBR3.agebands.dat,
carehomesdf = deaths_ch,
studyid = "GBR3")
saveRDS(GBR3.agebands_noCH.dat, "data/derived/carehomes/GBR3_agebands_noCH.RDS")
### SWE1
SWE1.agebands_noCH.dat <- remove_ch_deaths(ageband_dat = SWE.agebands.dat,
carehomesdf = deaths_ch,
studyid = "SWE1")
saveRDS(SWE1.agebands_noCH.dat, "data/derived/carehomes/SWE1_agebands_noCH.RDS")
### NYS1
NYS1.agebands_noCH.dat <- remove_ch_deaths(ageband_dat = NYS.age.dat,
carehomesdf = deaths_ch,
studyid = "NYS1")
saveRDS(NYS1.agebands_noCH.dat, "data/derived/carehomes/NYS1_agebands_noCH.RDS")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.