# All beamer themes at http://deic.uab.es/~iblanes/beamer_gallery/index_by_theme_and_color.html library(knitr) opts_chunk$set(comment = NA, echo = FALSE, warning = FALSE, message = FALSE, error = TRUE, cache = FALSE) opts_chunk$set(dev = 'pdf') # Packages library(ggplot2) library(cism) library(rgeos) library(maptools) library(rgdal) library(tidyr) library(RColorBrewer) library(dplyr) library(leaflet) library(readr) library(ggthemes) library(ggrepel) library(knitr) options(knitr.table.format = "latex") library(maragra)
# Print the author names and affiliations x <- make_authors(include_address = FALSE, include_country = TRUE, seperator = ' \n\n ', cat_it = FALSE) cat(x)
# Read in data ab <- maragra::ab ab_panel <- maragra::ab_panel bairros <- maragra::bairros census <- maragra::census clinic <- maragra::clinic clinic_agg <- maragra::clinic_agg irs <- maragra::irs mc <- maragra::mc workers <- maragra::workers source('../paper_prepare.R') # Define function fro breaking the days_since var break_days_since <- function(days_since){ out <- ifelse(days_since > 180, '181+', ifelse(days_since > 90, '091-180', ifelse(days_since > 60, '061-090', ifelse(days_since > 30, '031-060', ifelse(days_since >= 0, '000-030', ifelse(days_since < 0, 'Before', NA)))))) out <- factor(out, levels = c('Before', '000-030', '031-060', '061-090', '091-180', '181+')) return(out) } # Define function for making season make_season <- function(date){ out <- ifelse(as.numeric(format(date, '%m')) %in% c(11:12, 1:3), 'Malaria season', 'Off season') out <- factor(out, levels = c('Off season', 'Malaria season')) return(out) } library(MatchIt) set.seed(1234) # Create matching data for propensity scoring right_side <- irs %>% group_by(unidade) %>% tally %>% ungroup %>% mutate(received = ifelse(n > 0, TRUE, FALSE)) left_side <- workers %>% dplyr::select(oracle_number, unidade, permanent_or_temporary, department, sex, date_of_birth, perm_id) %>% # bring in census address left_join(census %>% dplyr::select(perm_id, longitude, latitude, maragra_bairro, maragra_fabrica)) psm <- left_join(x = left_side, y = right_side, by = 'unidade') %>% # Keep only those who are censed filter(!is.na(longitude), !is.na(latitude)) %>% # Keep only those who live within maragra filter(maragra_fabrica |maragra_bairro) %>% mutate(received = ifelse(is.na(received), FALSE, received)) %>% mutate(age = round((as.numeric(as.Date('2016-01-01') - date_of_birth)) / 365.25, digits = 2)) %>% filter(!is.na(sex), !is.na(age)) %>% dplyr::select(-unidade, -date_of_birth, -n) psmt1 <- psm names(psmt1) <- toupper(names(psmt1)) psmt1 <- psmt1 %>% mutate(RECEIVED = ifelse(RECEIVED == TRUE,'IRS', 'No IRS')) %>% mutate(STATUS = PERMANENT_OR_TEMPORARY) pacman::p_load(tableone) table1 <- CreateTableOne(vars = toupper(c('STATUS', 'department', 'age', 'sex', 'received')), data = psmt1, factorVars = toupper(c('STATUS', 'department', 'sex')), strata = 'RECEIVED') table1 <- print(table1, printToggle = FALSE, noSpaces = TRUE) match.it <- matchit(received ~ age + sex + permanent_or_temporary + department, data = psm, method="nearest", ratio=1) # Save matched samples matched <- match.data(match.it)[1:ncol(psm)] a <- summary(match.it) # Create model data model_data_propensity <- ab_panel %>% filter(oracle_number %in% matched$oracle_number) %>% left_join(irs) %>% left_join(matched) %>% mutate(days_since = break_days_since(days_since)) %>% filter(!is.na(days_since)) %>% # mutate(time_period = lendable::time_period_extract(date)) %>% # mutate(time_period = as.character(time_period)) %>% mutate(time_period = make_season(date = date)) %>% mutate(absent_sick = ifelse(is.na(absent_sick), FALSE, absent_sick)) # Create model data for all obs (not just propensity score) model_data <- ab_panel %>% # filter(oracle_number %in% matched$oracle_number) %>% left_join(irs) %>% # left_join(matched) %>% mutate(spray_month = format(date + days_since, '%b')) %>% mutate(days_since = break_days_since(days_since)) %>% filter(!is.na(days_since)) %>% # mutate(time_period = lendable::time_period_extract(date)) %>% # mutate(time_period = as.character(time_period)) %>% mutate(time_period = make_season(date = date)) %>% mutate(absent_sick = ifelse(is.na(absent_sick), FALSE, absent_sick)) %>% mutate(month = format(date, '%b')) # # remove the period 30 days after spraying # filter(days_since >= 30 | days_since <= 0) # Need to add seasonality # Need to adjust for houses on and off site fit_propensity <- glm(absent ~ days_since * time_period, family = binomial('logit'), data = model_data_propensity) x <- exp(coef(fit_propensity)) ors_propensity <- exp(confint(fit_propensity)) ors_propensity <- data.frame(ors_propensity) names(ors_propensity) <- c('Lower', 'Upper') ors_propensity <- cbind(x, ors_propensity) ors_propensity$Variable <- row.names(ors_propensity) ors_propensity <- ors_propensity %>% dplyr::rename(OR = x) %>% dplyr::select(Variable, OR, Lower, Upper) %>% mutate(OR = round(OR, digits = 3), Lower = round(Lower, digits = 3), Upper = round(Upper, digits = 3)) # Need to add seasonality # Need to adjust for houses on and off site fit_propensity_sick <- glm(absent_sick ~ days_since * time_period, family = binomial('logit'), data = model_data_propensity) x <- exp(coef(fit_propensity_sick)) ors_propensity_sick <- exp(confint(fit_propensity_sick)) ors_propensity_sick <- data.frame(ors_propensity_sick) names(ors_propensity_sick) <- c('Lower', 'Upper') ors_propensity_sick <- cbind(x, ors_propensity_sick) ors_propensity_sick$Variable <- row.names(ors_propensity_sick) ors_propensity_sick <- ors_propensity_sick %>% dplyr::rename(OR = x) %>% dplyr::select(Variable, OR, Lower, Upper) %>% mutate(OR = round(OR, digits = 3), Lower = round(Lower, digits = 3), Upper = round(Upper, digits = 3)) # new fit # new_fit <- lm(absent ~ days_since * time_period + month, data = model_data) # Need to adjust for houses on and off site fit <- glm(absent ~ days_since * time_period, family = binomial('logit'), data = model_data) x <- exp(coef(fit)) ors <- exp(confint(fit)) ors <- data.frame(ors) names(ors) <- c('Lower', 'Upper') ors <- cbind(x, ors) ors$Variable <- row.names(ors) ors <- ors %>% dplyr::rename(OR = x) %>% dplyr::select(Variable, OR, Lower, Upper) %>% mutate(OR = round(OR, digits = 3), Lower = round(Lower, digits = 3), Upper = round(Upper, digits = 3)) # Need to add seasonality # Need to adjust for houses on and off site fit_sick <- glm(absent_sick ~ days_since * time_period, family = binomial('logit'), data = model_data) x <- exp(coef(fit_sick)) ors_sick <- exp(confint(fit_sick)) ors_sick <- data.frame(ors_sick) names(ors_sick) <- c('Lower', 'Upper') ors_sick <- cbind(x, ors_sick) ors_sick$Variable <- row.names(ors_sick) ors_sick <- ors_sick %>% dplyr::rename(OR = x) %>% dplyr::select(Variable, OR, Lower, Upper) %>% mutate(OR = round(OR, digits = 3), Lower = round(Lower, digits = 3), Upper = round(Upper, digits = 3))
This paper provides new empirical evidence regarding the return on investment of privately managed malaria control activities (indoor residual spraying with pesticides) on worker absenteeism in Mozambique. We analyze 4 years of malaria control and worker health and absenteeism data from a large sugar processing facility in Mozambique. We find that the benefits outweight the costs (ie, there is a positive return on investment) even when the consideration of benefits is limited to those directly accrued by the company. These findings suggest that the private sector may have an important role to play in malaria control in endemic areas.
Malaria; Investment; Health; Productivity; Agriculture; Absenteeism
What is the investment case from the investor's perspective?
A significant sector of the economy in Mozambique is dominated by a full large-scale foreing direct investment projects [@Robbins2012], and the role of the private sector in health generally, and malaria specifically, is unequivocally important.
- Large agriculture and extractive industry firms take up wide swaths of land and employ hundreds of thousands [@German2013].
- The Mozambican state has encouraged large-scale entreprise with the aim of general economic development [@Buur2012].
- And where large firms exist, they often take on social roles such as housing and health care [@Winkler] [@Robbins2012] [@CastelBranco2014].
What is the short-term effect of IRS on worker absenteeism and clinical illness among sugarcane workers?
par(mfrow = c(2,2)) plot(cism::africa, col = 'grey', border = 'white', lwd = 0.1) title('Africa') for(i in (seq(0.5, 1.5, length = 2))^1.3){ points(32.779401, -25.452049, col = adjustcolor('red', alpha.f = 0.5), cex = i) } plot(cism::moz2, col = 'grey', border = 'white', lwd = 0.1) title('Mozambique') for(i in (seq(0.5, 2.5, length = 3))^1.3){ points(32.779401, -25.452049, col = adjustcolor('red', alpha.f = 0.5), cex = i) } man <- cism::man3 plot(man, col = 'grey', border = 'white', lwd = 0.1) title('Manhiça district') for(i in (seq(0.5, 3, length = 4))^1.3){ points(32.779401, -25.452049, col = adjustcolor('red', alpha.f = 0.5), cex = i) } plot(man[man@data$NAME_3 %in% c('Manhica - Sede', 'Maluana'),], col = 'grey', border = 'white', lwd = 0.1) title('Manhiça and Maluana posts') for(i in (seq(0.5, 3.5, length = 4))^1.3){ points(32.779401, -25.452049, col = adjustcolor('red', alpha.f = 0.5), cex = i) } par(mfrow = c(1,1))
\includegraphics{images/sat}
Maragra regularly employs IRS at on-site worker households in order to reduce those workers' (and their families') risk of malaria infection. Workers living off-site (our control group) also may have received IRS at some point during the study period (from government programs). Even though we do not have reliable person-level data on IRS carried out by the government, off-site workers are a suitable control in the sense that they represent "business as usual" (ie, what would happen if the company carried out no IRS and relied solely on public interventions). Using company HR and clinical records, we were able to identify absences and episodes of clinical malaria among all workers, as well as identify the time since the most recent IRS episode before the onsent of absence or illness.
We sought to understand the effect of IRS on individual workers' likelihood of absence from work as well as their likelihood of clinical malaria. To estimate this effect, we estimated separate models for absence and illness. We employed interrupted time series [@Lopez_Bernal_2016] and a linear probability approach using the following econometric model.
$$ \hat{Y_{it}} = \beta_{0} + (\beta_{1}) (\text{Season}_{t}) + (\beta_2{IRS}*\beta_3{IRS_t}) + ... + \epsilon $$
$\hat{Y}$ is the rate of absence. $\beta_{1}$ represents the clinical malaria incidence at that time in the entire district of Manhiça. Our demographic confounders (represented by $...$) are sex, age, and worker department (field, factory, or administrative).
Our formula for return on investment can be described in a straightforward fashion...
\begin{center} $R = \dfrac{P_{w} - S_{wa} - S_{wc}}{P_{w}}$
\end{center}
...where $R$ is the return on investment, $P$ is the malaria control program's total operating cost, $w$ refers to costs at the per-worker level, $a$ refers to savings through avoided absences, and $ c $ refers to savings through avoided clinical encounters. We define the malaria control program as "profitable" from an investment standpoint if ROI is greater than 100%, ie if the savings associated with the estimated effect of IRS is greater than the costs of the program's administration.
All data processing and analysis were carried out in R [@R] and all analysis code is freely available online [@brewgit]. Ethical approval for this project was obtained from the Institutional Ethics Review Board for Health at the CISM prior to data collection.
# BES incidence g1 <- ggplot(data = bes %>% mutate(season = ifelse(p >= median(p), 'High season', 'Low season')) %>% filter(weekdays(date) == 'Monday'), aes(x = date, y = p)) + geom_point(alpha = 0.6, aes(color = season), size = 0.2) + geom_line(alpha = 0.1) + theme_maragra(base_size = 9) + labs(x = 'Date', y = 'Incidence', title = 'A: Annualized district clinical\nmalaria incidence per 1,000') + scale_color_manual(name = '', values = c('red', 'black')) + guides(color = guide_legend(ncol = 1)) + theme(legend.position = c(.8, .8), legend.text = element_text(size = 5), legend.background = element_rect(fill=alpha('white', 0))) # Absenteeism rate g2_data <- ab_panel %>% group_by(month = as.Date(paste0(format(date, '%Y-%m'), '-01'))) %>% summarise(absences = length(which(absent)), absences_sick = length(which(absent_sick)), eligibles = n()) %>% ungroup %>% mutate(absenteeism_rate = absences / eligibles * 100, sick_absenteeism_rate = absences_sick / eligibles * 100) g2 <- ggplot(data = g2_data, aes(x = month, y = absenteeism_rate)) + geom_line(alpha = 0.6) + theme_maragra(base_size = 9) + labs(x = 'Date', y = 'Absenteeism rate', title = 'B: Monthly absenteeism rate') g3 <- ggplot(data = g2_data, aes(x = month, y = sick_absenteeism_rate)) + geom_line(alpha = 0.6) + theme_maragra(base_size = 9) + labs(x = 'Date', y = 'Absenteeism rate', title = 'C: Monthly sick absenteeism rate') # Weather g4 <- ggplot(data = weather %>% group_by(date = as.Date(paste0(format(date, '%Y-%m'), '-01'))) %>% summarise(precipitation = sum(precipitation, na.rm = TRUE)), aes(x = date, y = precipitation)) + geom_line(alpha = 0.6) + theme_maragra(base_size = 9) + labs(x = 'Date', y = 'Mililiters', title = 'D: Monthly precipitation') # Clinical incidence of malaria g5 <- clinic_agg %>% filter(date >= '2013-01-01', date <= '2016-12-31') %>% # mutate(date = as.Date(paste0(year, '-', month, '-01'))) %>% group_by(date) %>% summarise(tested = sum(tested), positive = sum(positive)) %>% ungroup %>% mutate(percent_positive = positive / tested * 100) %>% ggplot(aes(x = date, y = positive)) + geom_line(alpha = 0.6) + theme_maragra(base_size = 9) + labs(x = 'Date', y = 'Cases', title = 'E: Monthly malaria cases\nat company clinic') g6 <- clinic_agg %>% filter(date >= '2013-01-01', date <= '2016-12-31') %>% # mutate(date = as.Date(paste0(year, '-', month, '-01'))) %>% group_by(date) %>% summarise(tested = sum(tested), positive = sum(positive)) %>% ungroup %>% mutate(percent_positive = positive / tested * 100) %>% ggplot(aes(x = date, y = percent_positive)) + geom_line(alpha = 0.6) + theme_maragra(base_size = 9) + labs(x = 'Date', y = 'Cases', title = 'F: Monthly malaria test positivity rate\nat company clinic') Rmisc::multiplot(g1, g2, g3, g4, g5, g6, layout = matrix(1:6, nrow=2, byrow=TRUE))
plot_data <- ab_panel %>% # filter(oracle_number %in% matched$oracle_number) %>% left_join(irs) %>% mutate(months_since = days_since %/% 30) %>% filter(!is.na(months_since)) %>% # mutate(time_period = lendable::time_period_extract(date)) %>% # mutate(time_period = as.character(time_period)) %>% mutate(time_period = make_season(date = date)) %>% mutate(absent_sick = ifelse(is.na(absent_sick), FALSE, absent_sick)) x <- plot_data %>% group_by(months_since) %>% summarise(absences = length(which(absent)), sick_absences = length(which(absent_sick)), eligibles = length(absent)) %>% ungroup %>% mutate(absenteeism_rate = absences / eligibles * 100) %>% mutate(sick_absenteeism_rate = sick_absences / eligibles * 100) ggplot(data = x, aes(x = months_since, y = absenteeism_rate)) + geom_step(color = 'darkred', alpha = 0.8) + geom_vline(xintercept = 0, lty = 2, alpha = 0.5) + labs(x = 'Months relative to IRS', y = 'Absenteeism rate', title = 'Before/after IRS', subtitle = 'All absences') + theme_maragra()
plot_data <- ab_panel %>% # filter(oracle_number %in% matched$oracle_number) %>% left_join(irs) %>% mutate(months_since = days_since %/% 30) %>% filter(!is.na(months_since)) %>% # mutate(time_period = lendable::time_period_extract(date)) %>% # mutate(time_period = as.character(time_period)) %>% mutate(time_period = make_season(date = date)) %>% mutate(absent_sick = ifelse(is.na(absent_sick), FALSE, absent_sick)) x <- plot_data %>% group_by(months_since) %>% summarise(absences = length(which(absent)), sick_absences = length(which(absent_sick)), eligibles = length(absent)) %>% ungroup %>% mutate(absenteeism_rate = absences / eligibles * 100) %>% mutate(sick_absenteeism_rate = sick_absences / eligibles * 100) %>% mutate(before = months_since < 0) ggplot(data = x, aes(x = months_since, y = absenteeism_rate, group = before)) + geom_smooth() + geom_step(color = 'darkred', alpha = 0.8) + geom_vline(xintercept = 0, lty = 2, alpha = 0.5) + labs(x = 'Months relative to IRS', y = 'Absenteeism rate', title = 'Before/after IRS', subtitle = 'All absences') + theme_maragra()
plot_data <- ab_panel %>% # filter(oracle_number %in% matched$oracle_number) %>% left_join(irs) %>% mutate(months_since = days_since %/% 30) %>% filter(!is.na(months_since)) %>% # mutate(time_period = lendable::time_period_extract(date)) %>% # mutate(time_period = as.character(time_period)) %>% mutate(time_period = make_season(date = date)) %>% mutate(absent_sick = ifelse(is.na(absent_sick), FALSE, absent_sick)) x <- plot_data %>% group_by(months_since, time_period) %>% summarise(absences = length(which(absent)), sick_absences = length(which(absent_sick)), eligibles = length(absent)) %>% ungroup %>% mutate(absenteeism_rate = absences / eligibles * 100) %>% mutate(sick_absenteeism_rate = sick_absences / eligibles * 100) %>% mutate(before = months_since < 0) ggplot(data = x %>% mutate(time_period = paste0(time_period)), aes(x = months_since, y = absenteeism_rate)) + geom_step(color = 'darkred', alpha = 0.8) + facet_wrap(~time_period, ncol = 1) + geom_vline(xintercept = 0, lty = 2, alpha = 0.5) + labs(x = 'Months relative to IRS', y = 'Absenteeism rate') + theme_maragra()
ggplot(data = x %>% mutate(time_period = paste0(time_period)), aes(x = months_since, y = absenteeism_rate, group = before)) + geom_smooth() + geom_step(color = 'darkred', alpha = 0.8) + facet_wrap(~time_period, ncol = 1) + geom_vline(xintercept = 0, lty = 2, alpha = 0.5) + labs(x = 'Months relative to IRS', y = 'Absenteeism rate') + theme_maragra()
x <- model_data %>% group_by(days_since) %>% summarise(absences = length(which(absent)), eligibles = length(absent)) %>% ungroup %>% mutate(absenteeism_rate = absences / eligibles * 100) ggplot(data = x, aes(x = days_since, y = absenteeism_rate)) + geom_bar(stat = 'identity', alpha = 0.6, fill = 'darkblue') + theme_maragra() + labs(x = 'Days since IRS', y = 'Absenteeism rate', title = 'Raw absenteeism as a function of days since IRS') + geom_label(aes(label = paste0(round(absenteeism_rate, digits = 2), '%')))
x <- model_data %>% group_by(days_since, time_period) %>% summarise(absences = length(which(absent)), eligibles = length(absent)) %>% ungroup %>% mutate(absenteeism_rate = absences / eligibles * 100) ggplot(data = x %>% mutate(time_period = paste0(time_period)), aes(x = days_since, y = absenteeism_rate)) + geom_bar(stat = 'identity', alpha = 0.6, fill = 'darkblue') + theme_maragra() + labs(x = 'Days since IRS', y = 'Absenteeism rate', title = 'Raw absenteeism as a function of days since IRS by time_period') + # geom_label(aes(label = paste0(round(absenteeism_rate, digits = 2), '%'))) + facet_wrap(~time_period, ncol = 1) + theme(axis.text.x = element_text(angle = 90))
The malaria control program at Maragra has an annual operating budget of approximately XX, which includes the purchase of insecticide, the wages of IRS sprayers and drivers, transportation, record-keeping, and general administrative costs. Assuming linearity in costs, the program spends approximately XX per agregado sprayed. With each agregado containing an average of 2.2 workers, this translates to a cost of XX per worker protected per season. Much of the benefit of IRS goes to non-worker residents of sprayed agregados (who constitute a majority), but this benefit is purposefully ignored for this analysis.
Given the likelihood that clinical data does not fully capture all malaria cases, we do not quantify the costs of malaria infection to the company. Rather, we first estimate the reduction in absenteeism attributible to IRS, and then quantify the savings associated with prevented absences. Additionally, we calculate the clinical savings of IRS by first estimating the share of absences which are associated with an episode of clinical malaria, and then applying the clinical cost per case to the equivalent share of prevented absences. We intentionally ignore the savings accrued by the public health system, as well as the likely utility gains in secondary realms such as school absenteeism, producitivity, etc.
We create 4 worker fixed effects models. Different models for field vs not field, permanent vs temporary.
\includegraphics{images/fe1.png}
\includegraphics{images/fe2.png}
\includegraphics{images/fe3.png}
\includegraphics{images/fe4.png}
Details here on ROI calculation outcomes to go here.
# Get yearly absences / presences x <- model_data %>% mutate(time_period = make_season(date = date)) %>% filter(time_period == 'Malaria season') %>% group_by(months_since = days_since) %>% summarise(absences = length(which(absent)), eligibles = n()) %>% ungroup %>% mutate(absenteeism_rate = absences / eligibles * 100)
Savings
Costs
7% ROI (ignoring clinical costs)
Email: joe@economicsofmalaria.com
Paper: economicsofmalaria.com/maragra.pdf
Presentation: economicsofmalaria.com/maragraslides.pdf
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.