WHO_Liberia <- WHO_bydistricts %>% filter(Country == "Liberia") %>% droplevels %>% na.omit
ggplot(WHO_Liberia, aes(Date, incid)) + geom_point() + facet_wrap(~CL_DistrictRes, ncol = 3) + theme_minimal() + ggtitle("Incidence in districts of Liberia")
Clean up the names of the districts because they will become column names further down. Remove all spaces, make them upper case.
WHO_Liberia$CL_DistrictRes %<>% gsub(' ', '', .) WHO_Liberia$CL_DistrictRes %<>% factor WHO_Liberia_wide <- WHO_Liberia %>% ungroup %>% select(-Country) %>% tidyr::spread(CL_DistrictRes, incid, fill = 0)
We are now ready to estimate R and move forward with the projection exactly as we did for HealthMap data.
start <- 2:(length(WHO_Liberia_wide$Date) - time_window) end <- start + time_window end.dates <- WHO_Liberia_wide[end, "Date"] r.estim <- WHO_Liberia_wide %>% select(-Date) %>% plyr::alply(2, .dims = TRUE, function(incid) { I <- pull(incid, 1) res <- EstimateR(I, T.Start = start , T.End = end, method = "NonParametricSI", SI.Distr = SI_Distr, plot = FALSE , CV.Posterior = 1 , Mean.Prior = 1 , Std.Prior = 0.5) res$R %<>% cbind(Date = end.dates) return(res$R)}) r.j.t <- r.estim %>% lapply(function(R){ cutoff <- which(R$Date %in% WHO_Liberia_wide[t.proj, "Date"]) shape <- R[cutoff, "Mean(R)"]^2 / R[cutoff, "Std(R)"]^2 scale <- R[cutoff, "Std(R)"]^2 / R[cutoff, "Mean(R)"] return(rgamma(n.sim, shape = shape, scale = scale))}) %>% data.frame
r.estim %>% bind_rows(.id = "County") %>% ggplot(aes(Date, `Mean(R)`)) + geom_point() + facet_wrap(~County) + ggtitle("Mean R for each county in Liberia")
Calculate the population flow matrix and hence the relative risk profile.
adm1_centroids <- "data/Geography/GravityModel/raw/adm1_centroids_fixed.tsv" %>% read.csv(stringsAsFactors = FALSE, sep = "\t", header = TRUE) %>% filter(ADM0 == "Liberia") adm1_centroids$ADM1 %<>% gsub(' ', '', .) %<>% toupper %<>% factor adm1_centroids$ADM1 %<>% factor(levels = levels(WHO_Liberia$CL_DistrictRes)) adm1_centroids %<>% arrange(ADM1) flow.matrix <- flow_matrix(longitude = adm1_centroids[, "Centroid_Lon"], latitude = adm1_centroids[, "Centroid_Lat"], population = adm1_centroids[, "Pop"], place.names = adm1_centroids[, "ADM1"], model = "gravity", K = K, pow_N_from = pow_N_from, pow_N_to = pow_N_to, pow_dist = pow_dist) ## Relative risk relative.risk <- flow.matrix / rowSums(flow.matrix, na.rm=TRUE) p.movement <- probability_movement(relative.risk, p.stay)
At this point, all the pieces are in place. Liberia_training contains the incidence count we will use for projecting. Estimate of R is in the data frame r.j.t with a row for each simulation we want to run. p.movement conatins the probabilities.
Liberia_training <- WHO_Liberia_wide[1:t.proj, ] Liberia_validation <- WHO_Liberia_wide[(1 + t.proj):nrow(WHO_Liberia_wide), ] incid <- as.matrix(select(Liberia_training, -Date)) dates.all <- Liberia_training[1:t.proj, ] %>% pull(Date) %>% c(seq(max(.) + 1, length.out = n.dates.sim, by = 1)) t.max <- t.proj + n.dates.sim - 1 daily.projections <- plyr::alply(r.j.t, 1, function(r.t){ r.t <- as.matrix(r.t) out <- project(incid, r.t, SI_Distr, p.movement, n.dates.sim) incidence.proj <- rbind(incid, out) incidence.proj %<>% data.frame incidence.proj %<>% cbind(Date = dates.all, .) colnames(incidence.proj) <- colnames(Liberia_training) return(incidence.proj[(nrow(Liberia_training) + 1):t.max, ])}) weekly.projections <- lapply(daily.projections, daily.to.weekly) %>% dplyr::bind_rows(.)
At the same time, we will also obtain the weekly incidence count from available data.
weekly.available <- c(training = list(Liberia_training), validation = list(Liberia_validation)) %>% lapply(daily.to.weekly) %>% dplyr::bind_rows(.id = "Category") weekly.available_small <- filter(weekly.available, Date < "2014-09-13") plots.list <- Liberia_training %>% select(-Date) %>% colnames %>% lapply(function(location){ available <- weekly.available_small %>% `[`(, c("Date", "Category", location)) projection <- weekly.projections %>% `[`(, c("Date", location)) plot.weekly(available, projection)}) cowplot::plot_grid(plotlist = plots.list)
weekly.available <- c(training = list(Liberia_training), validation = list(Liberia_validation)) %>% lapply(daily.to.weekly) %>% bind_rows(.id = "Category") start <- min(weekly.projections$Date) end <- max(weekly.projections$Date) available_small <- filter(weekly.available, Date >= start & Date <= end) %>% select(-Category) projection_50 <- weekly.projections %>% group_by(Date) %>% summarise_all(quantile, probs = 0.5) ## --- goodness of fit --- error <- select(projection_50, -Date) - select(available_small, -Date) liberia_rms <- summarise_all(error, funs(rms)) %>% cbind(data.frame(pow_dist = pow_dist, p.stay = p.stay), .) outfile <- "output/liberia-rms.csv" col.names <- ifelse(file.exists(outfile), FALSE, TRUE) write.table(liberia_rms, outfile, sep = ",", col.names = col.names, append = T, row.names = F, quote = F)
We will only plot the incidence data from 1 week before we started projection to the end of the projection period.
liberia <- rgdal::readOGR(dsn = "data/Geography/LBR_adm_shp", layer = "LBR_adm1") liberia@data$id <- liberia@data$NAME_1 liberia.points <- broom::tidy(liberia, region="id") liberia.points$id %<>% factor liberia.df <- left_join(liberia.points, liberia@data, by="id") liberia.df$NAME_1 %<>% toupper %<>% gsub(' ', '', .) %<>% factor start <- min(weekly.projections$Date) end <- max(weekly.projections$Date) available_small <- filter(weekly.available, Date > start & Date < end) %>% select(-c(Category, Date)) %>% colSums %>% data.frame available_small %<>% tibble::rownames_to_column() colnames(available_small) <- c("NAME_1", "incid") projection_50 <- weekly.projections %>% group_by(Date) %>% summarise_all(quantile, probs = 0.5) %>% ungroup %>% select(-Date) %>% colSums %>% data.frame projection_50 %<>% tibble::rownames_to_column() colnames(projection_50) <- c("NAME_1", "incid") total_incid <- list(training = data.frame(available_small), projection = data.frame(projection_50)) %>% bind_rows(.id = "Category") total_incid$NAME_1 %<>% gsub("GBARPOLU", "GBAPOLU", .) incid_map <- left_join(liberia.df, total_incid) incid_map$Category %<>% factor(levels = c("training", "projection")) incid_map %<>% arrange(Category) ggplot(incid_map) + aes(long, lat, group = group, fill = incid) + geom_polygon() + geom_path(color = "white" ) + coord_equal() + facet_grid(.~Category) + scale_fill_gradient2(low = "grey50", high = "firebrick")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.