knitr::opts_chunk$set(fig.width=12, fig.height=6, echo=FALSE, warning=FALSE, message=FALSE, fig.path = "figures/")
library(magrittr) library(ggplot2) library(ggthemes) library(dplyr) devtools::load_all()
The pre-processing steps consist of turning the line count into a daily incidence series and turning the given tall data into wide data.
The current data set has three possible values for the epidemiological case definition - confirmed, probable and suspected. Epidemiological case definition of probable and suspected cases differed across countries while confirmed cases were the ones confirmed through lab report. For the purpose of the current analysis, one approach could be to lump all of them together. The inferred date of onset (rather than the date reported) is used for estimation. The columns we use are : Country, EpiCaseDef (probably), DateOnsetInferred and CL_DistrictRes.
WHO_raw <- here::here("data", "CaseCounts/who/rstb20160308supp1.csv") %>% read.csv(colClasses = c(Country = "factor", EpiCaseDef = "character", DateOnsetInferred = "Date", CL_DistrictRes = "factor")) %>% select(Country, EpiCaseDef, DateOnsetInferred,CL_DistrictRes) %>% na.omit WHO_raw$CL_DistrictRes %<>% gsub(' ', '', .) %<>% factor
Some spelling variations between the WHO data and the data set containing the district co-ordinates need to be taken into account.
WHO_raw$CL_DistrictRes %<>% factor WHO_raw$CL_DistrictRes %<>% plyr::mapvalues(from = c("KISSIDOUGO", "YOMOU", "N'ZEREKORE", "GBARPOLU" ), to = c("KISSIDOUGOU", "YAMOU", "NZEREKORE", "GBAPOLU"))
For each district in a country, add the number of records for each date to get incidence count.
WHO_bydistricts <- WHO_raw %>% group_by(Country, CL_DistrictRes, DateOnsetInferred) %>% summarise(incid = n())
Within each district, if there is a date on which no cases are recorded, we assume that there were no cases on that date and add this to the record. At the end of this step, the incidence time series for each district should be a daily time series.
WHO_bydistricts %<>% split(.$CL_DistrictRes) %<>% lapply(add_0incid) %<>% bind_rows WHO_bydistricts %<>% rename(Date = DateOnsetInferred) WHO_bycountry <- WHO_bydistricts %>% group_by(Country,Date) %>% summarise(incid = sum(incid))
ggplot(WHO_bydistricts, aes(Date, incid)) + geom_point() + facet_wrap(~CL_DistrictRes) + ggtitle("Incidence in each district") + xlab("") + theme(axis.text.x = element_text(angle = 45, vjust = 0.2)) + theme_tufte() + geom_rangeframe() ggplot(WHO_bycountry, aes(Date, incid)) + geom_point() + facet_wrap(~Country) + theme_minimal() + ggtitle("Incidence in each country") + xlab("") + theme(axis.text.x = element_text(angle = 45, vjust = 0.2)) + theme_tufte() + geom_rangeframe()
Write the files.
outfile <- here::here("data", "CaseCounts/processed/WHO_bydistricts.csv") readr::write_csv(WHO_bydistricts, outfile) outfile <- here::here("data", "CaseCounts/processed/WHO_bycountry.csv") readr::write_csv(WHO_bycountry, outfile)
Now we will convert the data from the tall to the wide format.
WHO_bydistricts_split <- WHO_bydistricts %>% split(.$Country) lapply(WHO_bydistricts_split, function(df){ country <- df$Country[1] outfile <- paste0("figures/", country, "-incid.png") p <- ggplot(df, aes(Date, incid)) + geom_point(size = 1.1, stroke = 0, shape = 16) + facet_wrap(~CL_DistrictRes) + ggtitle(paste("Incidence in the districts of", country)) p <- p + geom_rangeframe() + theme_tufte() ggsave(outfile, p)})
lapply(WHO_bydistricts_split, function(df){ df %<>% na.omit country <- df$Country[1] country %<>% gsub(' ', '', .) %<>% toupper outfile <- here::here("data", "CaseCounts/processed/") outfile <- paste0(outfile, country, "_wide.csv") df_wide <- df %>% ungroup %>% select(-Country) %>% tidyr::spread(CL_DistrictRes, incid, fill = 0) readr::write_csv(df_wide, outfile)})
Also write out the data for all districts for later analysis.
WHO_wide <- WHO_bydistricts %>% ungroup %>% select(-Country) %>% tidyr::spread(CL_DistrictRes, incid, fill = 0) outfile <- here::here("data", "CaseCounts/processed/WHO_wide.csv") readr::write_csv(WHO_wide, outfile)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.