library(knitr) library(brew) library(maragra) ## Global options options(max.print="75") opts_chunk$set(echo=FALSE, cache=TRUE, prompt=FALSE, tidy=TRUE, comment=NA, message=FALSE, warning=FALSE) opts_knit$set(width=75)
Joe Brew, Laia Cirera, Kizito Gondo, Elton Dorkin, Elisa Sicuri
r format(Sys.Date(), '%B %d, %Y')
# Packages library(dplyr) library(readr) library(ggplot2) library(tidyr) library(ggthemes) library(RColorBrewer) library(pander) library(cism) # Read data clinic_agg <- maragra::clinic_agg
The formalities and agreements pertaining to the transfer of data from Ilovo Maragra to CISM are more or less finished. With Dr. Gondo's help, Joe Brew has retrieved aggregated monthly summarical data pertaining to malaria cases among workers. More time will be needed to record person-specific data, since it is all handwritten and will need to be digitized.
Beginning next week (November 28, 2016), the digitization of all positive cases will be carried out. For this, the following needs to take place.
Full data collection will be carried out over the coming weeks. All digitized data, in addition to being stored by the CISM for this project, will also be given to Ilovo-Maragra as an "in-kind" service for their willingness to have their data examined.
If we combine all workers, we can examine the total incidence of malaria since 2011.
x <- clinic_agg %>% group_by(date) %>% summarise(p = sum(positive), pp = sum(positive) / sum(tested) * 100, tested = sum(tested)) # Gather into long format y <- rbind(x %>% dplyr::select(date, p) %>% rename(val = p) %>% mutate(key = 'positive'), x %>% dplyr::select(date, tested) %>% rename(val = tested) %>% mutate(key = 'tested')) %>% mutate(key = factor(key, levels = c('tested', 'positive'))) ggplot(data = x, aes(x = date, y = p)) + geom_bar(stat = 'identity') + xlab('Date') + ylab('Number of positive cases') + theme_fivethirtyeight() + ggtitle('Malaria miscroscopy positive results', 'All workers') + theme(title = element_text(size = 12))
The below charts show the total number of tests and postive cases, by month, for Mozambican and foreign workers, respectively, at Ilovo-Maragra.
ggplot(data = clinic_agg %>% filter(group == 'nacionais'), aes(x = date, y = tested)) + geom_line() + geom_line(aes(y = positive)) + theme_maragra() + labs(x = 'Date', y = 'People (tested and positive)', title = 'Mozambicans') ggplot(data = clinic_agg %>% filter(group != 'nacionais'), aes(x = date, y = tested)) + geom_line() + geom_line(aes(y = positive)) + theme_maragra() + labs(x = 'Date', y = 'People (tested and positive)', title = 'Expatriates')
The below chart shows both the number of positive cases (all workers, in red) and the number of tests, by month.
ggplot(data = y, aes(x = date, y = val, group = key, fill = key)) + geom_area(stat = 'identity', position = 'stack', alpha = 0.5) + xlab('Date') + ylab('Number of positive cases') + theme_cism() + scale_fill_manual(name = '', values = c('darkgreen', 'darkred')) + ggtitle('Malaria miscroscopy tests and positive results', 'All workers, absolute') + theme(title = element_text(size = 12))
The below chart shows the same data as above, but convers the number of positive cases to a percentage of all tests, rather than an absolute number.
ggplot(data = x, aes(x = date, y = pp)) + geom_area(stat = 'identity', fill = 'darkblue', alpha = 0.5) + xlab('Date') + ylab('Number of positive cases') + theme_cism() + ggtitle('Malaria miscroscopy tests and positive results', 'All workers, relative') + theme(title = element_text(size = 12))
We can examine the annual seasonality of positive cases by overlaying all years' data onto one axis.
make_month_name <- function(number){ format(as.Date(paste0('2015-', number, '-01')), '%B') } x <- clinic_agg %>% # mutate(day_number = as.numeric(format(date, '%j'))) %>% # mutate(day_number = round(day_number, -1) + 5) mutate(month_number = as.numeric(format(date, '%m'))) %>% group_by(month_number, year) %>% summarise(p = sum(positive), pp = sum(positive) / sum(tested) * 100) %>% mutate(year = factor(year, levels = as.character(2011:2016))) %>% ungroup %>% mutate(month_number = factor(make_month_name(month_number), levels = make_month_name(1:12))) cols <- colorRampPalette(brewer.pal(n = 9, 'Spectral'))(length(unique(x$year))) ggplot(data = x, aes(x = month_number, y = p, group = year, color = year)) + geom_line() + scale_color_manual(name = 'Year', values = cols) + theme_wsj() + ggtitle('Malaria miscroscopy positive results', 'Absolute cases, year overlapping') + theme(title = element_text(size = 12)) + xlab('Month') + ylab('Positive results') + theme(axis.text.x = element_text(angle = 90))
The below chart uses the same data as above, but instead of positive cases, it shows the percent of tests which were positive.
ggplot(data = x, aes(x = month_number, y = p, group = year, color = year)) + geom_line() + scale_color_manual(name = 'Year', values = cols) + theme_wsj() + ggtitle('Malaria miscroscopy positive results', 'Percent positive, year overlapping') + theme(title = element_text(size = 12)) + xlab('Month') + ylab('Positive results') + theme(axis.text.x = element_text(angle = 90))
If we aggregate and view distributions (via "violin" charts) at the level of the month, seasonality is more apparent.
ggplot(data = x, aes(x = month_number, y = p)) + geom_violin(alpha = 0.6, fill = 'darkorange') + theme_wsj() + ggtitle('Malaria miscroscopy positive results', 'Percent positive, year overlapping') + theme(title = element_text(size = 12)) + xlab('Month') + ylab('Positive results') + theme(axis.text.x = element_text(angle = 90))
The raw data collected so far appear as such.
clinic_agg %>% dplyr::select(year, month, group, tested, positive, negative) %>% kable
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.