WHO_SL <- WHO_bydistricts %>% filter(Country == "Sierra Leone") %>% droplevels %>% na.omit
ggplot(WHO_SL, aes(Date, incid)) + geom_point() + facet_wrap(~CL_DistrictRes, ncol = 3) + theme_minimal() + ggtitle("Incidence in the districts of Sierra Leone")
We will proceed as we did with HealthMap data - split the data set into training and validation sets, set up the parameters and press play.
SL_wide <- WHO_SL %>% ungroup %>% select(-Country) %>% tidyr::spread(CL_DistrictRes, incid, fill = 0)
SL_wide <- readr::read_csv("data/CaseCounts/who/sl_wide.csv")
We are now ready to estimate R and move forward with the projection exactly as we did for HealthMap data.
start <- 2:(length(SL_wide$Date) - time_window) end <- start + time_window end.dates <- SL_wide[end, "Date"] r.estim <- SL_wide %>% select(-Date) %>% plyr::alply(2, .dims = TRUE, function(incid) { I <- pull(incid, 1) res <- EstimateR(I, T.Start = start , T.End = end, method = "NonParametricSI", SI.Distr = SI_Distr, plot = FALSE , CV.Posterior = 1 , Mean.Prior = 1 , Std.Prior = 0.5) res$R %<>% cbind(Date = end.dates) return(res$R)})
r.estim %>% bind_rows(.id = "District") %>% ggplot(aes(Date, `Mean(R)`)) + geom_point() + facet_wrap(~District) + ggtitle("Mean R for each district in Sierra Leone")
Next, calculate the population flow matrix from gravity model and hence the relative risk for each district.
adm2_centroids <- "data/Geography/GravityModel/raw/adm2.txt" %>% read.csv(stringsAsFactors = FALSE, sep = "\t", header = TRUE) %>% filter(ADM0 == "Sierra Leone") %>% filter(ADM2 != "Western Rural") adm2_centroids$ADM2 %<>% gsub("Western Urban", "Western", .) adm2_centroids$ADM2 %<>% gsub(' ', '', .) %<>% toupper %<>% factor adm2_centroids$ADM2 %<>% factor(levels = levels(factor(WHO_SL$CL_DistrictRes))) adm2_centroids %<>% arrange(ADM2) flow.matrix <- flow_matrix(longitude = adm2_centroids[, "Centroid_Lon"], latitude = adm2_centroids[, "Centroid_Lat"], population = adm2_centroids[, "Pop"], place.names = adm2_centroids[, "ADM2"], model = "gravity", K = K, pow_N_from = pow_N_from, pow_N_to = pow_N_to, pow_dist = pow_dist) ## Relative risk relative.risk <- flow.matrix / rowSums(flow.matrix, na.rm=TRUE) p.movement <- probability_movement(relative.risk, p.stay) ## Test with random matrix. Delete later #prandom <- runif(169) #p.movement <- matrix(prandom, nrow = nrow(p.movement))
At this point, all the pieces are in place. SL_training contains the incidence count we will use for projecting. Estimate of R is in the data frame r.j.t with a row for each simulation we want to run. p.movement conatins the probabilities. We will project over a period of 6 weeks at three different time points - at the start of the epidemic, during the middle and towards the end.
r.j.t <- r.estim %>% lapply(function(R){ cutoff <- which(R$Date %in% SL_wide[t.proj, "Date"]) shape <- R[cutoff, "Mean(R)"]^2 / R[cutoff, "Std(R)"]^2 scale <- R[cutoff, "Std(R)"]^2 / R[cutoff, "Mean(R)"] return(rgamma(n.sim, shape = shape, scale = scale))}) %>% data.frame(check.names = F) SL_training <- SL_wide[1:t.proj, ] validation <- nrow(SL_wide) SL_validation <- SL_wide[(1 + t.proj):validation, ] incid <- as.matrix(select(SL_training, -Date)) dates.all <- SL_training[1:t.proj, ] %>% pull(Date) %>% c(seq(max(.) + 1, length.out = n.dates.sim, by = 1)) t.max <- t.proj + n.dates.sim - 1 sl_daily_projections <- plyr::alply(r.j.t, 1, function(r.t){ r.t <- as.matrix(r.t) out <- project(incid, r.t, SI_Distr, p.movement, n.dates.sim) incidence.proj <- rbind(incid, out) incidence.proj %<>% data.frame(check.names = F) incidence.proj %<>% cbind(Date = dates.all) return(incidence.proj[(nrow(SL_training) + 1):t.max, ])}) sl_weekly_projections <- lapply(sl_daily_projections, daily.to.weekly) %>% bind_rows(.)
At the same time, we will also obtain the weekly incidence count from available data.
weekly.available <- c(training = list(SL_training), validation = list(SL_validation)) %>% lapply(daily.to.weekly) %>% bind_rows(.id = "Category") projections_distr <- projection_quantiles(sl_weekly_projections) outfile <- paste0("output/sl_summary_projections_", p.stay, "_", pow_dist, "_", t.proj, ".csv") write.csv(projections_distr, file = outfile, row.names = F, quote = F) outfile <- paste0("output/sl_summary_projections_", p.stay, "_", pow_dist, "_", t.proj, ".png") t.min <- min(projections_distr$Date) - 63 t.max <- max(projections_distr$Date) + 49 weekly.available %<>% filter(Date >= t.min & Date < t.max) trng.start <- min(projections_distr$Date) - time_window valdtn.end <- max(projections_distr$Date) p <- plot.weekly3(weekly.available, projections_distr, trng.start, valdtn.end) ggsave(outfile, p)
end <- max(dates.all) + 42 weekly.available %<>% filter(Date < end) plots.list <- SL_training %>% select(-Date) %>% colnames %>% lapply(function(location){ available <- weekly.available %>% `[`(, c("Date", "Category", location)) projection <- sl_weekly_projections %>% `[`(, c("Date", location)) plot.weekly(available, projection)}) cowplot::plot_grid(plotlist = plots.list)
sl_projections <- bind_rows(sl_daily_projections) start <- min(sl_projections$Date) end <- max(sl_projections$Date) available_small <- filter(SL_wide, Date >= start & Date <= end)
districts <- select(sl_projections, -Date) %>% colnames names(districts) <- districts sl_loglh <- lapply(districts, function(district){ observed <- available_small[, c("Date", district)] predicted <- sl_projections[, c("Date", district)] colnames(observed) <- c("Date", "incid") colnames(predicted) <- c("Date", "incid") log_likelihoods_atj(observed, predicted) %>% sum}) sl_loglh %<>% data.frame sl_loglh %<>% cbind(data.frame(pow_dist = pow_dist, p.stay = p.stay), .) outfile <- "output/sierra-lh.csv" col.names <- ifelse(file.exists(outfile), FALSE, TRUE) write.table(sl_loglh, outfile, sep = ",", col.names = col.names, append = T, row.names = F, quote = F)
projection_50 <- sl_projections %>% group_by(Date) %>% summarise_all(quantile, probs = 0.5) ## --- goodness of fit --- error <- select(projection_50, -Date) - select(available_small, -Date) sl_rms <- summarise_all(error, funs(rms)) %>% cbind(data.frame(pow_dist = pow_dist, p.stay = p.stay), .) outfile <- "output/sierra-rms.csv" col.names <- ifelse(file.exists(outfile), FALSE, TRUE) write.table(sl_rms, outfile, sep = ",", col.names = col.names, append = T, row.names = F, quote = F)
We can compare the spatial spread of the predicted case counts with the available for the period over which we are projecting. which we are projecting
sierra <- rgdal::readOGR(dsn = "data/Geography/SLE_adm_shp", layer = "SLE_adm2") sierra@data$id <- sierra@data$NAME_2 sierra.points <- broom::tidy(sierra, region="id") sierra.points$id %<>% factor sierra.df <- left_join(sierra.points, sierra@data, by="id") sierra.df$NAME_2 %<>% toupper %<>% gsub(' ', '', .) %<>% factor sierra.df$NAME_2 <- plyr::revalue(sierra.df$NAME_2, c("WESTERNURBAN" = "WESTERN", "WESTERNRURAL" = "WESTERN")) start <- min(sl_weekly_projections$Date) end <- max(sl_weekly_projections$Date) available_small <- filter(weekly.available, Date > start & Date < end) %>% select(-c(Category, Date)) %>% colSums %>% data.frame available_small %<>% tibble::rownames_to_column() colnames(available_small) <- c("NAME_2", "incid") projection_50 <- sl_weekly_projections %>% group_by(Date) %>% summarise_all(quantile, probs = 0.5) %>% ungroup %>% select(-Date) %>% colSums %>% data.frame projection_50 %<>% tibble::rownames_to_column() colnames(projection_50) <- c("NAME_2", "incid") total_incid <- list(Observed = data.frame(available_small), Predicted = data.frame(projection_50)) %>% bind_rows(.id = "Category") incid_map <- left_join(sierra.df, total_incid) incid_map$Category %<>% factor(levels = c("Observed", "Predicted")) incid_map %<>% arrange(Category) p <- ggplot(incid_map) + aes(long, lat, group = group, fill = incid) + geom_polygon() + geom_path(color = "black", alpha = 0.1 ) + coord_equal() + facet_wrap(~Category, strip.position = "top") + scale_fill_gradient2(low = "grey50", high = "firebrick") p <- p + theme_bw() p <- p + theme(strip.text.x = element_text(size = 12, face = "bold")) p <- p + theme(panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_blank(), strip.background = element_blank()) p <- p + theme(axis.title = element_blank(), axis.text = element_blank(), axis.ticks = element_blank())
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.