The key thing is to get the latitudes, longitudes and population estimates for all the districts and determine population flow matrix.
guinea_sierra <- "data/Geography/GravityModel/raw/adm2-fixed.txt" %>% read.csv(stringsAsFactors = FALSE, sep = "\t", header = TRUE) %>% filter(ADM0 %in% c("Guinea", "Sierra Leone")) liberia <- "data/Geography/GravityModel/raw/adm1_centroids_fixed.tsv" %>% read.csv(stringsAsFactors = FALSE, sep = "\t", header = TRUE) %>% filter(ADM0 == "Liberia") guinea_sierra %<>% select(ADM2, Centroid_Lon, Centroid_Lat, Pop) %<>% rename(CL_DistrictRes = ADM2) liberia %<>% select(ADM1, Centroid_Lon, Centroid_Lat, Pop) %<>% rename(CL_DistrictRes = ADM1) all_centroids <- bind_rows(guinea_sierra, liberia) all_centroids$CL_DistrictRes %<>% iconv(to='ASCII//TRANSLIT') %<>% gsub("\\'", '', .) %<>% gsub(' ', '', .) %<>% toupper %<>% factor all_centroids$CL_DistrictRes %<>% gsub("WESTERNURBAN", "WESTERN", .)
Some of the prefectures of Guinea are not represented in the WHO data. These are: Gaoual, Koubia, Koundara, Labe, Lelouma, Mamou and Mandiana. Hence we will drop these from the data frame of centroids.
missing <- setdiff(unique(all_centroids$CL_DistrictRes), unique(WHO_bydistricts$CL_DistrictRes)) notmissing <- !(all_centroids$CL_DistrictRes %in% missing) all_centroids <- all_centroids[notmissing, ] all_centroids$CL_DistrictRes %<>% factor
After this data clean-up we can finally determine the flow matrix.
all_centroids <- read.csv("data/Geography/GravityModel/processed/WHOcentroids.csv")
WHO_bydistricts$CL_DistrictRes %<>% factor WHO_bydistricts$CL_DistrictRes %<>% plyr::mapvalues(from = c("KISSIDOUGO", "YOMOU", "N'ZEREKORE", "GBARPOLU" ), to = c("KISSIDOUGOU", "YAMOU", "NZEREKORE", "GBAPOLU")) all_centroids$CL_DistrictRes %<>% factor(levels = levels(WHO_bydistricts$CL_DistrictRes)) all_centroids %<>% arrange(CL_DistrictRes) flow.matrix <- flow_matrix(longitude = all_centroids[, "Centroid_Lon"], latitude = all_centroids[, "Centroid_Lat"], population = all_centroids[, "Pop"], place.names = all_centroids[, "CL_DistrictRes"], model = "gravity", K = K, pow_N_from = pow_N_from, pow_N_to = pow_N_to, pow_dist = pow_dist) ## Relative risk relative.risk <- flow.matrix / rowSums(flow.matrix, na.rm=TRUE) p.movement <- probability_movement(relative.risk, p.stay)
To estimate R, we convert from long format to the wide format.
infile <- here::here("data", "CaseCounts/processed/WHO_wide.csv") WHO_wide <- readr::read_csv(infile) start <- 2:(length(WHO_wide$Date) - time_window) end <- start + time_window end.dates <- WHO_wide[end, "Date"] r.estim <- WHO_wide %>% select(-Date) %>% plyr::alply(2, .dims = TRUE, function(incid) { I <- pull(incid, 1) res <- EstimateR(I, T.Start = start , T.End = end, method = "NonParametricSI", SI.Distr = SI_Distr, plot = FALSE , CV.Posterior = 1 , Mean.Prior = 1 , Std.Prior = 0.5) res$R %<>% cbind(Date = end.dates) return(res$R)}) r.j.t <- r.estim %>% lapply(function(R){ cutoff <- which(R$Date %in% WHO_wide[t.proj, "Date"]) shape <- R[cutoff, "Mean(R)"]^2 / R[cutoff, "Std(R)"]^2 scale <- R[cutoff, "Std(R)"]^2 / R[cutoff, "Mean(R)"] return(rgamma(n.sim, shape = shape, scale = scale))}) %>% data.frame
r.estim %>% bind_rows(.id = "District") %>% ggplot(aes(Date, `Mean(R)`)) + geom_point() + facet_wrap(~District) + ggtitle("Mean R for each district in Sierra Leone, Liberia and Guinea")
At this point, all the pieces are in place. alldistricts_training contains the incidence count we will use for projecting. Estimate of R is in the data frame r.j.t with a row for each simulation we want to run. p.movement conatins the probabilities.
alldistricts_training <- WHO_wide[1:t.proj, ] alldistricts_validation <- WHO_wide[(1 + t.proj):nrow(WHO_wide), ] incid <- as.matrix(select(alldistricts_training, -Date)) dates.all <- alldistricts_training[1:t.proj, ] %>% pull(Date) %>% c(seq(max(.) + 1, length.out = n.dates.sim, by = 1)) t.max <- t.proj + n.dates.sim - 1 daily.projections <- plyr::alply(r.j.t, 1, function(r.t){ r.t <- as.matrix(r.t) out <- project(incid, r.t, SI_Distr, p.movement, n.dates.sim) incidence.proj <- rbind(incid, out) incidence.proj %<>% data.frame incidence.proj %<>% cbind(Date = dates.all, .) colnames(incidence.proj) <- colnames(alldistricts_training) return(incidence.proj[(nrow(alldistricts_training) + 1):t.max, ])})
At the same time, we will also obtain the weekly incidence count from available data.
weekly.projections <- lapply(daily.projections, daily.to.weekly) %>% dplyr::bind_rows(.) weekly.available <- c(training = list(alldistricts_training), validation = list(alldistricts_validation)) %>% lapply(daily.to.weekly) %>% dplyr::bind_rows(.id = "Category") projections_distr <- projection_quantiles(weekly.projections) outfile <- paste0("output/alldistricts_summary_projections_", p.stay, "_", pow_dist, "_", t.proj, ".csv") write.csv(projections_distr, file = outfile, row.names = F, quote = F)
end <- max(dates.all) + 49 weekly.available_small <- filter(weekly.available, Date < end) plots.list <- alldistricts_training %>% select(-Date) %>% colnames %>% lapply(function(location){ available <- weekly.available_small %>% `[`(, c("Date", "Category", location)) projection <- weekly.projections %>% `[`(, c("Date", location)) plot.weekly(available, projection)}) cowplot::plot_grid(plotlist = plots.list)
daily.projections %<>% bind_rows(.) start <- min(daily.projections$Date) end <- max(daily.projections$Date) available_small <- filter(WHO_wide, Date >= start & Date <= end)
We now determine the likelihood of the parameters using the observed data.
incid <- as.matrix(select(WHO_wide, -Date)) t.max <- t.proj + n.dates.sim - 1 R <- r.j.t[1, ] all_loglh <- log_likelihood_1step(incid, window = t.proj, start = t.proj + 1, end = t.max, pij = p.movement, R = as.matrix(R), si = SI_Distr) %>% sum
districts <- select(daily.projections, -Date) %>% colnames names(districts) <- districts all_loglh <- lapply(districts, function(district){ observed <- available_small[, c("Date", district)] predicted <- daily.projections[, c("Date", district)] colnames(observed) <- c("Date", "incid") colnames(predicted) <- c("Date", "incid") log_likelihoods_atj(observed, predicted) %>% sum})
all_loglh %<>% data.frame all_loglh %<>% cbind(data.frame(pow_dist = pow_dist, p.stay = p.stay), .) outfile <- paste0("output/alldistricts-lh-tproj-", t.proj, ".csv") col.names <- ifelse(file.exists(outfile), FALSE, TRUE) write.table(all_loglh, outfile, sep = ",", col.names = col.names, append = T, row.names = F, quote = F)
projection_50 <- daily.projections %>% group_by(Date) %>% summarise_all(quantile, probs = 0.5) error <- select(projection_50, -Date) - select(available_small, -Date) all_rms <- summarise_all(error, funs(rms)) %>% cbind(data.frame(pow_dist = pow_dist, p.stay = p.stay), .) outfile <- paste0("output/alldistricts-rms-", t.proj, ".csv") col.names <- ifelse(file.exists(outfile), FALSE, TRUE) write.table(all_rms, outfile, sep = ",", col.names = col.names, append = T, row.names = FALSE, quote = FALSE)
liberia <- rgdal::readOGR(dsn = "data/Geography/LBR_adm_shp", layer = "LBR_adm2") liberia@data$id <- liberia@data$NAME_1 liberia.points <- broom::tidy(liberia, region="id") liberia.points$id %<>% factor liberia.df <- left_join(liberia.points, liberia@data, by="id") liberia.df$CL_DistrictRes <- liberia.df$NAME_1 sierra <- rgdal::readOGR(dsn = "data/Geography/SLE_adm_shp", layer = "SLE_adm2") sierra@data$id <- sierra@data$NAME_2 sierra.points <- broom::tidy(sierra, region="id") sierra.points$id %<>% factor sierra.df <- left_join(sierra.points, sierra@data, by="id") sierra.df$NAME_2 <- plyr::revalue(sierra.df$NAME_2, c("Western Rural" = "WESTERN", "Western Urban" = "WESTERN")) sierra.df$CL_DistrictRes <- sierra.df$NAME_2 guinea <- rgdal::readOGR(dsn = "data/Geography/GIN_adm_shp", layer = "GIN_adm2") guinea@data$id <- guinea@data$NAME_2 guinea.points <- broom::tidy(guinea, region="id") guinea.points$id %<>% factor guinea.df <- left_join(guinea.points, guinea@data, by="id") guinea.df$CL_DistrictRes <- guinea.df$NAME_2 wafrica <- rbind(guinea.df, liberia.df) %>% rbind(sierra.df) wafrica$CL_DistrictRes %<>% iconv(to='ASCII//TRANSLIT') %<>% gsub("\\'", '', .) %<>% gsub(' ', '', .) %<>% toupper %<>% factor start <- min(weekly.projections$Date) end <- max(dates.all) available_small <- filter(weekly.available, Date > start & Date < end) %>% select(-c(Category, Date)) %>% colSums %>% data.frame available_small %<>% tibble::rownames_to_column() colnames(available_small) <- c("CL_DistrictRes", "incid") projection_50 <- weekly.projections %>% group_by(Date) %>% summarise_all(quantile, probs = 0.5) %>% ungroup %>% select(-Date) %>% colSums %>% data.frame projection_50 %<>% tibble::rownames_to_column() colnames(projection_50) <- c("CL_DistrictRes", "incid") missing <- setdiff(unique(wafrica$CL_DistrictRes), unique(available_small$CL_DistrictRes)) available_small %<>% rbind(data.frame(CL_DistrictRes = missing, incid = rep(0, length(missing)))) projection_50 %<>% rbind(data.frame(CL_DistrictRes = missing, incid = rep(0, length(missing)))) total_incid <- list(training = data.frame(available_small), projection = data.frame(projection_50)) %>% bind_rows(.id = "Category") incid_map <- left_join(wafrica, total_incid) incid_map$Category %<>% factor(levels = c("training", "projection")) incid_map %<>% arrange(Category) ggplot(incid_map) + aes(long, lat, group = group, fill = incid) + geom_polygon() + geom_path(color = "white" ) + coord_equal() + facet_grid(.~Category) + scale_fill_gradient2(low = "grey50", high = "firebrick")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.