knitr::opts_chunk$set( fig.width = 12, fig.height = 6, echo = FALSE, warning = FALSE, message = FALSE, fig.path = "figures/" )
library(dplyr) library(stringr) library(ggplot2) library(purrr) library(EpiEstim) devtools::load_all()
n.sim <- params$n.sim adm0 <- params$ADM0 bin_from <- params$bin_from
We will assign the national case count to districts according to a multinomial distribution with the probabilities derived from gravity model or simply population density.
First we read in the data that is required by both methods.
mean_SI <- 14.2 CV_SI <- 9.6 / 14.2 SItrunc <- 40 SI_Distr <- sapply(0:SItrunc, function(e) DiscrSI(e, mean_SI, mean_SI * CV_SI)) SI_Distr <- SI_Distr / sum(SI_Distr)
This is the aggregated data that will be used to assign cases to districts.
hm_wide <- here::here( "data/CaseCounts/processed", "HealthMap_Ebola_wide.csv" ) %>% readr::read_csv(.)
We will work with weekly incidence counts.
hm_weekly <- daily.to.weekly(hm_wide)
Also add isoweek column so that we can compare across the three data sets.
hm_weekly$week_of_year <- paste0(lubridate::year(hm_weekly$Date), "-W", stringr::str_pad(lubridate::isoweek(hm_weekly$Date), 2, pad = "0")) hm_weekly$week_of_year <- factor(hm_weekly$week_of_year, ordered = TRUE)
who_wide <- here::here("data/CaseCounts/processed", "who_sitrep_wide_sl_2016-05-11.csv") %>% readr::read_csv() who_wide$week_of_year <- factor(who_wide$week_of_year, ordered = TRUE)
The performance of the model will be validated against the cleaned-up data.
sl_weekly <- here::here( "data/CaseCounts/processed", "sl_weekly_tall.csv" ) %>% readr::read_csv(.) sl_weekly$week_of_year <- paste0(lubridate::year(sl_weekly$Date), "-W", stringr::str_pad(lubridate::isoweek(sl_weekly$Date), 2, pad = "0")) sl_weekly$week_of_year <- factor(sl_weekly$week_of_year, ordered = TRUE)
Maybe get rid of PUJEHUN as it has low incidence (difficult to fit any model)
who_wide <- select(who_wide, -PUJEHUN) sl_weekly <- filter(sl_weekly, district != "PUJEHUN")
Suppose we are doing the disaggregation in the last week of 2014. Read in the latest information published by WHO. Start binning at date that ends week in variable ``bin_from''. It might be better to use recent data rather than all data available upto the point of projection.
use_last <- 6 ## weeks data trng_end_level <- which(levels(who_wide$week_of_year) == bin_from) trng_start_level <- if (trng_end_level < (use_last + 1)) 1 else trng_end_level - use_last trng_start <- levels(who_wide$week_of_year)[trng_start_level] trng_end <- bin_from training <- filter( who_wide, week_of_year >= trng_start, week_of_year <= trng_end )
Add 1 to avoid incidence::fit rejecting all 0s and throwing an error. Dubious but lets go with it for now.
ll_fit <- select_if(training, is.numeric) %>% map(function(I){ message(I) I <- I + 1 incid <- incidence::as.incidence(x = I, interval = 7, isoweek = TRUE) incidence::fit(incid) }) district_rates <- map(ll_fit, function(fit){ epitrix::r2R0(fit$info$r, SI_Distr)}) prob <- matrix(unlist(district_rates), nrow = 1) colnames(prob) <- colnames(training)[-1]
prob_df <- data.frame(prob) %>% tidyr::gather(district, prob) prob_df$rel_prob <- prob_df$prob / sum(prob_df$prob) prob_df <- arrange(prob_df, district) paste0(names(method)[which(method == TRUE)], "_probs.csv") %>% readr::write_csv(x = prob_df, path = .)
Extract the data we want to use for disaggreation.
bin_for <- 6 ## weeks num_weeks <- length(levels(hm_weekly$week_of_year)) start_at <- which(levels(hm_weekly$week_of_year) == bin_from) end_at <- min(num_weeks, start_at + bin_for) bin_end <- levels(hm_weekly$week_of_year)[end_at] bin_this <- filter( hm_weekly, week_of_year >= bin_from, week_of_year <= bin_end ) %>% droplevels() %>% select(week_of_year, `Sierra Leone`)
district_weekly <- split(bin_this, bin_this$week_of_year) %>% lapply(function(df) { mat <- disaggregate( total = df$`Sierra Leone`, pmatrix = prob ) colnames(mat) <- colnames(prob) probs <- c(0.025, 0.5, 0.975) distr <- disaggregate_distr(mat, probs = probs) distr }) %>% bind_rows(.id = "week_of_year") district_weekly$bin_from <- bin_from
Write out the output.
here::here("output", paste0( "projections_", bin_from, ".csv" )) %>% readr::write_csv(x = district_weekly, path = .)
Also write out the training data.
who_tall <- filter(who_wide, week_of_year >= trng_start & week_of_year <= bin_end) %>% tidyr::gather(district, incid, -week_of_year)
here::here("output", paste0( "training_", bin_from, ".csv" )) %>% readr::write_csv(x = who_tall, path = .)
compare <- left_join(bin_this, sl_weekly) compare$week_of_year <- factor(compare$week_of_year) district_weekly$week_of_year <- factor(district_weekly$week_of_year) district_weekly$district <- factor(district_weekly$district) ## Plot training data p <- ggplot(who_tall, aes(week_of_year, incid)) + geom_point(col = "red") p <- p + facet_wrap(~district, scale = "free_y") ## Plot validation data p <- p + geom_point(data = compare, aes(x = week_of_year, y = incid)) ## Plot projections p <- p + geom_line(data = district_weekly, aes(x = week_of_year, y = `0.5`, group = 1), col = "blue") p <- p + geom_ribbon(data = district_weekly, aes(x = week_of_year, ymin = `0.025`, ymax = `0.975`, group = 1), alpha = 0.5, inherit.aes = FALSE) p <- p + theme_classic() p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1)) p here::here("output", paste0(bin_from, ".png")) %>% ggsave(p)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.