First read in and clean up the data from ProMed. Since these data are at national level, we will also aggregate the WHO data at country level.
promed <- read.csv("data/CaseCounts/raw/ProMED_Ebola_2014-2016.csv", stringsAsFactors = F) promed$Issue.Date %<>% lubridate::dmy_hm(.) promed$Issue.Date %<>% format(format = "%m/%d/%y %H:%M")
The categories in ProMed data are: Cumulative SC (suspected cases),Cumulative SD, Cumulative CC, and Cumulative CD. For data from the WHO, the case status could be confirmed, probable or suspected. The final clinical outcome (alive/dead) for each case is also recorded. In comparing the two data sets, we sum across all four categories in ProMed and derive incidence data from cumulative data.
promed %<>% rename("SC" = "Cumulative.SC", "SD" = "Cumulative.SD", "CC" = "Cumulative.CC", "CD" = "Cumulative.CD", "HealthMap.Alert.ID" = "HM.Alert.ID") species <- "Humans" disease <- "Ebola" case.type <- "SCC" promed %<>% filter(species == species, disease == disease, Country %in% unique(WHO_raw$Country) ) promed_bycountry <- split(promed, promed$Country) promed_weekly <- promed_bycountry %>% lapply(function(case.count){ location <- case.count$Country[1] case.count %<>% incidence.from.DS1(species, disease, case.type, location, merge_rule = "median") %<>% daily.to.weekly }) %>% bind_rows(.id = "Country") promed_weekly$Date %<>% lubridate::ymd(.)
ggplot(promed_weekly, aes(Date, incid)) + facet_grid(Country~.) + geom_point() + theme_minimal()
WHO_weekly <- split(WHO_bycountry, WHO_bycountry$Country) %>% lapply(function(df){ df %>% ungroup %>% select(Date, incid) %>% daily.to.weekly}) %>% bind_rows(.id = "Country") WHO_weekly$Date %<>% as.Date color_scale <- c("Promed" = "black", "WHO" = "green") list( WHO = WHO_weekly, Promed = promed_weekly[, c("Country", "Date", "incid")]) %>% bind_rows(.id = "Source") %>% ggplot(aes(Date, incid, color = Source)) + geom_point() + facet_grid(Country ~.) + scale_x_date(date_labels="%d/%m/%y",date_breaks = "3 weeks") + theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + scale_colour_manual(values = color_scale)
promed_R <- promed_bycountry %>% lapply(function(case.count){ location <- case.count$Country[1] case.count %<>% incidence.from.DS1(species, disease, case.type, location, merge_rule = "median") start <- 1:(length(case.count$Date) - time_window) end <- start + time_window end.dates <- case.count$Date[end] res <- EstimateR(case.count$incid, T.Start = start , T.End = end, method = "NonParametricSI", SI.Distr = SI_Distr, plot = FALSE , CV.Posterior = 1 , Mean.Prior = 1 , Std.Prior = 0.5) res$R %<>% cbind(Date = end.dates) return(res$R)}) %>% bind_rows(.id = "Country") WHO_R <- WHO_bycountry %>% split(.$Country) %>% lapply(function(case.count){ location <- case.count$Country[1] start <- 1:(length(case.count$Date) - time_window) end <- start + time_window end.dates <- case.count$Date[end] res <- EstimateR(case.count$incid, T.Start = start , T.End = end, method = "NonParametricSI", SI.Distr = SI_Distr, plot = FALSE , CV.Posterior = 1 , Mean.Prior = 1 , Std.Prior = 0.5) res$R %<>% cbind(Date = end.dates) return(res$R)}) %>% bind_rows(.id = "Country") list( WHO = WHO_R, Promed = promed_R) %>% bind_rows(.id = "Source") %>% ggplot(aes(Date, `Mean(R)`, color = Source)) + geom_line() + facet_grid(Country ~.) + scale_x_date(date_labels="%d/%m/%y",date_breaks = "3 weeks") + theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + scale_colour_manual(values = color_scale)
The graphs show that the estimates from the two different sources closely track each other. Here's another way to visualise this.
promed_vs_WHO <- inner_join(WHO_R, promed_R, by = c("Date", "Country")) %>% select(Country, Date, `Mean(R).x`, `Mean(R).y`) %>% rename("WHO" = `Mean(R).x`, "Promed" = `Mean(R).y`) ggplot(promed_vs_WHO, aes(Promed, WHO)) + geom_point() + facet_wrap(~Country, nrow = 3) + xlab("R estimated from ProMed data") + ylab("R estimated from WHO data") + geom_abline(slope = 1)
As the above graph indicates, estimates of the reproduction number from Promed and WHO data are highly correlated (correlation coefficient = 0.76, 95% CI = (0.74, 0.78)).
cor.test(HM_vs_WHO$WHO, HM_vs_WHO$Promed)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.