In annecori/mRIIDSprocessData: Epidemiological forcasting using real-time data

knitr::opts_chunk$set(
  fig.width = 12, fig.height = 6,
  echo = FALSE,
  warning = FALSE, message = FALSE,
  fig.path = "figures/"
)

library(dplyr)
library(stringr)
library(ggplot2)
library(purrr)
library(EpiEstim)
devtools::load_all()

Unpack parameters

n.sim <- params$n.sim
adm0 <- params$ADM0
bin_from <- params$bin_from

Model

We will assign the national case count to districts according to a multinomial distribution with the probabilities derived from gravity model or simply population density.

First we read in the data that is required by both methods.

Ebola Parameters

mean_SI <- 14.2
CV_SI <- 9.6 / 14.2
SItrunc <- 40
SI_Distr <- sapply(0:SItrunc, function(e) DiscrSI(e, mean_SI, mean_SI * CV_SI))
SI_Distr <- SI_Distr / sum(SI_Distr)

Incidence data from HealthMap

This is the aggregated data that will be used to assign cases to districts.

hm_wide <- here::here(
  "data/CaseCounts/processed",
  "HealthMap_Ebola_wide.csv"
) %>%
  readr::read_csv(.)

We will work with weekly incidence counts.

hm_weekly <- daily.to.weekly(hm_wide)

Also add isoweek column so that we can compare across the three data sets.

hm_weekly$week_of_year <- paste0(lubridate::year(hm_weekly$Date),
                            "-W",
                            stringr::str_pad(lubridate::isoweek(hm_weekly$Date),
                                             2,
                                             pad = "0"))

hm_weekly$week_of_year <- factor(hm_weekly$week_of_year, ordered = TRUE)

Weekly Training Incidence Data

who_wide <- here::here("data/CaseCounts/processed",
                       "who_sitrep_wide_sl_2016-05-11.csv") %>%
    readr::read_csv()
who_wide$week_of_year <- factor(who_wide$week_of_year, ordered = TRUE)

Validation data

The performance of the model will be validated against the cleaned-up data.

sl_weekly <- here::here(
  "data/CaseCounts/processed",
  "sl_weekly_tall.csv"
) %>%
  readr::read_csv(.) 

sl_weekly$week_of_year <- paste0(lubridate::year(sl_weekly$Date),
                            "-W",
                            stringr::str_pad(lubridate::isoweek(sl_weekly$Date),
                                      2,
                                      pad = "0"))
sl_weekly$week_of_year <- factor(sl_weekly$week_of_year, ordered = TRUE)

Further cleaning

Maybe get rid of PUJEHUN as it has low incidence (difficult to fit any model)

who_wide <- select(who_wide, -PUJEHUN)  
sl_weekly <- filter(sl_weekly, district != "PUJEHUN")

Method 1: Relative to the reproduction number from recent past

3A: Use data published by WHO

Suppose we are doing the disaggregation in the last week of 2014. Read in the latest information published by WHO. Start binning at date that ends week in variable ``bin_from''. It might be better to use recent data rather than all data available upto the point of projection.

use_last <- 6 ## weeks data
trng_end_level <- which(levels(who_wide$week_of_year) == bin_from)
trng_start_level <- if (trng_end_level < (use_last + 1)) 1 else trng_end_level - use_last
trng_start <- levels(who_wide$week_of_year)[trng_start_level]
trng_end <- bin_from
training <- filter(
  who_wide,
  week_of_year >= trng_start,
  week_of_year <= trng_end
)

Fitting log-linear model

Add 1 to avoid incidence::fit rejecting all 0s and throwing an error. Dubious but lets go with it for now.

ll_fit <- select_if(training, is.numeric) %>%
    map(function(I){
        message(I)
        I <- I + 1
        incid <- incidence::as.incidence(x = I,
                                         interval = 7,
                                         isoweek = TRUE)
        incidence::fit(incid)
    })

district_rates <- map(ll_fit, function(fit){
    epitrix::r2R0(fit$info$r, SI_Distr)})

prob <- matrix(unlist(district_rates), nrow = 1)
colnames(prob) <- colnames(training)[-1]

Assign cases to districts

prob_df <- data.frame(prob) %>% tidyr::gather(district, prob)
prob_df$rel_prob <- prob_df$prob / sum(prob_df$prob)
prob_df <- arrange(prob_df, district)
paste0(names(method)[which(method == TRUE)], "_probs.csv") %>%
  readr::write_csv(x = prob_df, path = .)

Extract the data we want to use for disaggreation.

bin_for <- 6 ## weeks
num_weeks <- length(levels(hm_weekly$week_of_year))
start_at <- which(levels(hm_weekly$week_of_year) == bin_from)
end_at <- min(num_weeks, start_at + bin_for)


bin_end   <- levels(hm_weekly$week_of_year)[end_at]

bin_this <- filter(
  hm_weekly,
  week_of_year >= bin_from,
  week_of_year <= bin_end
) %>%
    droplevels() %>%
    select(week_of_year, `Sierra Leone`)

district_weekly <- split(bin_this, bin_this$week_of_year) %>%
  lapply(function(df) {
    mat <- disaggregate(
      total = df$`Sierra Leone`,
      pmatrix = prob
    )
    colnames(mat) <- colnames(prob)
    probs <- c(0.025, 0.5, 0.975)
    distr <- disaggregate_distr(mat, probs = probs)
    distr
  }) %>%
  bind_rows(.id = "week_of_year")

district_weekly$bin_from <- bin_from

Write out the output.

here::here("output", paste0(
 "projections_",                        
  bin_from,
  ".csv"
)) %>%
    readr::write_csv(x = district_weekly, path = .)

Also write out the training data.

Training Data

who_tall <- filter(who_wide, week_of_year >= trng_start &
                             week_of_year <= bin_end) %>%
    tidyr::gather(district, incid, -week_of_year)

here::here("output", paste0(
  "training_",                         
  bin_from,
  ".csv"
)) %>%
    readr::write_csv(x = who_tall, path = .)

Visualizing the results

Validation data

compare <- left_join(bin_this, sl_weekly)
compare$week_of_year <- factor(compare$week_of_year)
district_weekly$week_of_year <- factor(district_weekly$week_of_year)
district_weekly$district <- factor(district_weekly$district)
## Plot training data
p <- ggplot(who_tall, aes(week_of_year, incid)) + geom_point(col = "red")
p <- p + facet_wrap(~district, scale = "free_y")
## Plot validation data
p <- p + geom_point(data = compare, aes(x = week_of_year, y = incid))
## Plot projections
p <- p + geom_line(data = district_weekly,
                   aes(x = week_of_year, y = `0.5`, group = 1),
                   col = "blue")
p <- p + geom_ribbon(data = district_weekly,
                     aes(x = week_of_year,
                         ymin = `0.025`,
                         ymax = `0.975`,
                         group = 1),
                     alpha = 0.5,
                     inherit.aes = FALSE)
p <- p + theme_classic()
p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1))
p
here::here("output", paste0(bin_from, ".png")) %>% ggsave(p)

annecori/mRIIDSprocessData documentation built on May 29, 2019, 1:16 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

annecori/mRIIDSprocessData
Epidemiological forcasting using real-time data

In annecori/mRIIDSprocessData: Epidemiological forcasting using real-time data

Unpack parameters

Model

Ebola Parameters

Incidence data from HealthMap

Weekly Training Incidence Data

Validation data

Further cleaning

Method 1: Relative to the reproduction number from recent past

3A: Use data published by WHO

Fitting log-linear model

Assign cases to districts

Training Data

Visualizing the results

Validation data

R Package Documentation

Browse R Packages

We want your feedback!

annecori/mRIIDSprocessData Epidemiological forcasting using real-time data

In annecori/mRIIDSprocessData: Epidemiological forcasting using real-time data

Unpack parameters

Model

Ebola Parameters

Incidence data from HealthMap

Weekly Training Incidence Data

Validation data

Further cleaning

Method 1: Relative to the reproduction number from recent past

3A: Use data published by WHO

Fitting log-linear model

Assign cases to districts

Training Data

Visualizing the results

Validation data

R Package Documentation

Browse R Packages

We want your feedback!

annecori/mRIIDSprocessData
Epidemiological forcasting using real-time data