knitr::opts_chunk$set( fig.width = 12, fig.height = 6, echo = FALSE, warning = FALSE, message = FALSE, fig.path = "figures/" )
library(ggplot2) library(dplyr) library(EpiEstim) devtools::load_all() pow_dist <- params$pow_dist t.proj <- params$t.proj n.sim <- params$n.sim n.dates.sim <- params$n.dates.sim p.stay <- params$p.stay
w.africa <- c("Guinea", "Liberia", "Sierra Leone")
Culled from literature
mean_SI <- 14.2 CV_SI <- 9.6 / 14.2 SItrunc <- 40 SI_Distr <- sapply(0:SItrunc, function(e) discr_si(e, mean_SI, mean_SI * CV_SI)) SI_Distr <- SI_Distr / sum(SI_Distr) time_window <- 7 * 7
pow_N_to <- pow_N_from <- 1 K <- 1
Read in cleaned-up and wide formatted data.
hm_wide <- here::here( "data/CaseCounts/processed", "HealthMap_Ebola_wide.csv" ) %>% readr::read_csv()
We now use the incidence count to estimate reproduction number.
start <- 2:(length(hm_wide$date) - time_window) end <- start + time_window end.dates <- hm_wide[end, "date"] r.estim <- select(hm_wide, -date) %>% map(function(x) { res <- EstimateR( x, T.Start = start, T.End = end, method = "NonParametricSI", SI.Distr = SI_Distr, plot = FALSE, CV.Posterior = 1, Mean.Prior = 1, Std.Prior = 0.5 ) res$R <- cbind(res$R, date = end.dates) res$R })
lapply(names(r.estim), function(country) { R <- r.estim[[country]] outfile <- paste0("output/", country, "-R.csv") write.csv(R, file = outfile, row.names = F, quote = F) })
imap(r.estim, function(R, country) { p <- ggplot(R, aes(date)) + geom_ribbon( aes( ymin = `Quantile.0.025(R)`, ymax = `Quantile.0.975(R)` ), fill = "grey70" ) + geom_line(aes(y = `Mean(R)`)) + theme_classic() + xlab("") + ylab("Reproduction Number R") ggsave(file = here::here("output", paste0(country, "-R.pdf")), p) })
We assume that the reproduction number remains unchanged for the time period over which we wish to project. For each location, distribution of r_t at t.proj is r_t over the next n.days.sim.
r.j.t <- r.estim %>% lapply(function(R){ cutoff <- which(R$Date %in% by.location_incid[t.proj, "Date"]) shape <- R[cutoff, "Mean(R)"]^2 / R[cutoff, "Std(R)"]^2 scale <- R[cutoff, "Std(R)"]^2 / R[cutoff, "Mean(R)"] return(rgamma(n.sim, shape = shape, scale = scale))}) %>% data.frame
Determine the flow matrix for the countries of interest only.
adm0_centroids <- here::here("data", "Geography/GravityModel/raw/adm0_centroids.tsv") %>% read.csv(stringsAsFactors = FALSE, sep = "\t", header = FALSE) %>% filter(V1 %in% w.africa) names(adm0_centroids) <- c("country", "id", "lon", "lat", "pop") flow.matrix <- flow_matrix(longitude = adm0_centroids[, "lon"], latitude = adm0_centroids[, "lat"], population = adm0_centroids[, "pop"], place_names = adm0_centroids[, "country"], model = "gravity", K = K, pow_N_from = pow_N_from, pow_N_to = pow_N_to, pow_dist = pow_dist) ## Relative risk relative.risk <- flow.matrix / rowSums(flow.matrix, na.rm=TRUE) ## matrix characterising the population movement between geographical units #p.stay <- 0.99 # this can be a vector p.movement <- probability_movement(relative.risk, p.stay)
At this point, all the pieces are in place. by.location_incid contains the incidence count r.j.t contains the estimates of reproduction numbers. p.movement conatins the probabilities. SI_Distr is the serial interval distribution. The model is: lambda_j_t = p.movement * (incidence * r_t) * serial_interval taking care of the dimensions of course. Now divide the dataset into training and validation sets.
We will now split our data into training and validation sets.
training <- by.location_incid[1:t.proj, ] validation <- by.location_incid[(t.proj + 1):nrow(by.location_incid), ]
incidence.count <- select(training, -Date) incid <- as.matrix(incidence.count) dates.all <- training$Date %>% c(seq(max(.) + 1, length.out = n.dates.sim, by = 1)) dates_pred <- seq(from = max(training$Date) + 1, length.out = n.dates.sim, by = 1) t.max <- nrow(incidence.count) + n.dates.sim - 1 daily.projections <- plyr::alply(r.j.t, 1, function(r.t){ r.t <- as.matrix(r.t) out <- project(incid, r.t, SI_Distr, p.movement, n.dates.sim) out <- data.frame(out) out$Date <- dates_pred out})
weekly.available <- c(training = list(training), validation = list(validation)) %>% lapply(daily.to.weekly) %>% bind_rows(.id = "Category") weekly.projections <- lapply(daily.projections, daily.to.weekly) %>% bind_rows(.) projections_distr <- projection_quantiles(weekly.projections) #outfile <- paste0("hm_summary_projections_", p.stay, "_", pow_dist, "_", t.proj, ".csv") #outfile <- here::here("output", outfile) #write.csv(projections_distr, file = outfile, row.names = F, quote = F) #outfile <- paste0("hm_summary_projections_", p.stay, "_", pow_dist, "_", t.proj, ".png") #outfile <- here::here("output", outfile)
num_samples <- 4550 countries <- select(daily.projections[[1]], -Date) %>% colnames() wafrica_all <- purrr::map_dfr(countries, function(x){ out <- sample_projections(daily.projections, x, num_samples) out$date <- dates_pred out }) aggregated <- split(wafrica_all, wafrica_all$date) %>% purrr::map(function(x) select(x, starts_with("sim")) %>% shuffle_cols() %>% colSums) aggregated <- purrr::map_dfr(aggregated, function(x) data.frame(matrix(x, nrow = 1, dimnames = list("1", names(x)))), .id = "Date") aggregated$Date %<>% as.Date wafrica_weekly <- daily.to.weekly(aggregated) wafrica_distr <- tidyr::gather(wafrica_weekly, sim, aggregated, -Date) %>% projection_quantiles
Stack them together and write them out for later.
projections_distr <- rbind(projections_distr, wafrica_distr) projections_distr_wide <- reshape(projections_distr, idvar = 'Date', timevar = 'Country', direction = 'wide') projections_distr_wide$projection_from <- max(training$Date) projections_distr_wide <- select(projections_distr_wide, projection_from, Date, y.Guinea:ymax.aggregated) outfile <- paste0("hm_projections_", p.stay, "_", pow_dist, "_", n.dates.sim, ".csv") outfile <- here::here("output", outfile) append <- file.exists(outfile) readr::write_csv(x = projections_distr_wide, path = outfile, append = append)
tmp <- select(weekly.available, -Category) %>% tidyr::gather(Country, Incidence, -Date) tmp$Date %<>% as.Date tmp2 <- dplyr::left_join(projections_distr, tmp, by = c("Date", "Country")) here::here("output", paste0("healthmap_goodness_fit_", t.proj, ".csv")) %>% readr::write_csv(x = tmp2, path = .) outfile <- paste0("hm_summary_projections_", p.stay, "_", pow_dist, "_", t.proj, ".png") outfile <- here::here("output", outfile) ## write.table(tmp, outfile, ## sep = ",", col.names = col.names, append = T, ## row.names = F, quote = F)
aggregated$place <- "aggregated" aggregated <- select(aggregated, starts_with("sim"), place, date = Date) wafrica_all <- rbind(wafrica_all, aggregated) wafrica_tall <- tidyr::gather(wafrica_all, sim, val, -date, -place) projections_range <- wafrica_tall %>% group_by(date, place) %>% summarise(sim_range = list(range(val))) %>% mutate(sim_range = purrr::map(sim_range, tibble::enframe, name = "range")) %>% tidyr::unnest() %>% tidyr::spread(range, value) outfile <- paste0("hm_projections_", p.stay, "_", pow_dist, "_", t.proj, "_", n.dates.sim, "_range.csv") outfile <- here::here("output", outfile) readr::write_csv(x = projections_range, path = outfile)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.