In joebrew/maragra: An analysis of malaria at the Maragra sugar company, Mozambique

# csl: journal-of-health-economics.csl

# Packages
library(tidyverse)
library(knitr)
library(Hmisc)
library(brew)
library(maragra)
library(RColorBrewer)
library(extrafont)
library(kableExtra)
library(raster)
loadfonts()
## Global options
options(max.print="75")
opts_chunk$set(echo=FALSE,
                 cache=FALSE,
               prompt=FALSE,
               fig.height = 4,
               fig.width = 5,
               tidy=TRUE,
               comment=NA,
               message=FALSE,
               warning=FALSE,
               dev = "cairo_pdf",
               fig.pos="!h",
               fig.align = 'center')
opts_knit$set(width=75)
options(xtable.comment = FALSE)

source('prepare_data.R')

\begin{center} \begin{large}

Maragra models

\end{large} \end{center}

\vspace{5mm}

\begin{center} \textbf{Overview}
\end{center}

\vspace{5mm} \begin{center} \begin{changemargin}{3cm}{3cm}

This document contains an overview of the modeling approach and simulations results.

\end{changemargin} \end{center}

\vspace{20mm}

\noindent\fbox{% \parbox{\textwidth}{% \subsection*{Main points} \begin{itemize} \item We have a new modeling approach \item We have carried out basic simulations for several malaria control strategies \item Results are consistent/coherent \item We have not yet implemented geographical variation \end{itemize} \vspace{2mm} }% }

\vfill \null

\subsection*{Desinataires} \textbf{Elisa Sicuri; Menno Pradhan}

\vspace{3mm}

\newpage

Current paper

Draft of the paper (\href{https://docs.google.com/document/d/1bUWRBCgVcgjSPHchIQxiTG8Vwv5hV1GLU4Tlu386sWA/edit#}{HERE}

Code

In R, our model looks like this

final_model$call

Where protection is the sum of all nearby IRS, weighted by the inverse of the distance from the residence of the person (including the person's own residence).

\newpage

Reproduction of charts

Number of workers by department

plot_data <- ab_panel %>%
  left_join(workers %>% dplyr::select(oracle_number,
                                      department,
                                      permanent_or_temporary)) %>%
  mutate(year_month = as.Date(paste0(format(date, '%Y-%m'), '-01'))) %>%
  mutate(group = department) %>%
  # mutate(field = ifelse(department == 'Field', 
  #                            'Field worker',
  #                            'Not field worker')) %>%
  # mutate(group = paste0(permanent_or_temporary, ' ',
  #                         tolower(field))) %>%
  group_by(group, year_month) %>%
  summarise(n = length(unique(oracle_number)))

ggplot(data = plot_data,
       aes(x = year_month,
           y = n,
           color = group)) +
  geom_line()

Number of temporary workers over time

plot_data <- ab_panel %>%
  left_join(workers %>% dplyr::select(oracle_number,
                                      department,
                                      permanent_or_temporary)) %>%
  mutate(year_month = as.Date(paste0(format(date, '%Y-%m'), '-01'))) %>%
  # mutate(group = department) %>%
  mutate(group = ifelse(department == 'Field',
                             'Field worker',
                             'Not field worker')) %>%
  # mutate(group = paste0(permanent_or_temporary, ' ',
  #                         tolower(field))) %>%
  group_by(group, year_month) %>%
  summarise(n = length(unique(oracle_number)))

ggplot(data = plot_data,
       aes(x = year_month,
           y = n,
           color = group)) +
  geom_line()

Absenteeism rate over time

plot_data <- ab_panel %>%
  left_join(workers %>% dplyr::select(oracle_number,
                                      department,
                                      permanent_or_temporary)) %>%
  mutate(year_month = as.Date(paste0(format(date, '%Y-%m'), '-01'))) %>%
  mutate(group = department) %>%
  # mutate(group = ifelse(department == 'Field',
  #                            'Field worker',
  #                            'Not field worker')) %>%
  # mutate(group = paste0(permanent_or_temporary, ' ',
  #                         tolower(field))) %>%
  group_by(group, year_month) %>%
  summarise(abs = length(which(absent)),
            eligibles = n()) %>%
  mutate(ab_rate = abs / eligibles * 100)

ggplot(data = plot_data,
       aes(x = year_month,
           y = ab_rate,
           color = group)) +
  geom_line()

Absenteeism rate over time by worker type

plot_data <- ab_panel %>%
  left_join(workers %>% dplyr::select(oracle_number,
                                      department,
                                      permanent_or_temporary)) %>%
  mutate(year_month = as.Date(paste0(format(date, '%Y-%m'), '-01'))) %>%
  # mutate(group = department) %>%
  mutate(field = ifelse(department == 'Field',
                             'Field worker',
                             'Not field worker')) %>%
  mutate(group = paste0(permanent_or_temporary, ' ',
                          tolower(field))) %>%
  group_by(group, year_month) %>%
  summarise(abs = length(which(absent)),
            eligibles = n()) %>%
  mutate(ab_rate = abs / eligibles * 100)

ggplot(data = plot_data,
       aes(x = year_month,
           y = ab_rate,
           color = group)) +
  geom_line()

Number of workers over time by worker type

plot_data <- ab_panel %>%
  left_join(workers %>% dplyr::select(oracle_number,
                                      department,
                                      permanent_or_temporary)) %>%
  mutate(year_month = as.Date(paste0(format(date, '%Y-%m'), '-01'))) %>%
  # mutate(group = department) %>%
  mutate(field = ifelse(department == 'Field',
                             'Field worker',
                             'Not field worker')) %>%
  mutate(group = paste0(permanent_or_temporary, ' ',
                          tolower(field))) %>%
  group_by(group, year_month) %>%
  summarise(n = length(unique(oracle_number)))

ggplot(data = plot_data,
       aes(x = year_month,
           y = n,
           color = group)) +
  geom_line()

Descriptive overview

Herd-only protection score

pq <- quantile(final_data$herd_protection)
x <- final_data %>%
  # mutate(protection_cat = protection_var) %>%
  mutate(protection_cat = ifelse(herd_protection <= pq[2],
                                 'Protection 1',
                                 ifelse(herd_protection <= pq[3],
                                        'Protection 2',
                                        ifelse(herd_protection <= pq[4],
                                               'Protection 3',
                                               ifelse(herd_protection <= pq[5],
                                                      'Protection 4',
                                                      NA))))) %>%
  group_by(protection_cat) %>%
  summarise(ab_rate = length(which(absent))/ n() * 100)

names(x) <- c('Quantile', 'Absenteeism rate')
kable(x)

ggplot(data = x,
       aes(x = `Quantile`,
           y = `Absenteeism rate`)) +
  geom_bar(stat = 'identity',
           alpha = 0.6,
           color = 'black') +
  labs(x = 'Protection score quantile')

\newpage

Individual-only protection score

pq <- quantile(final_data$protection,
               seq(0, 1, 0.1))
pq <- sort(unique(pq))
x <- final_data %>%
  # mutate(protection_cat = protection_var) %>%
  mutate(protection_cat = ifelse(protection <= pq[2],
                                 'Protection 1',
                                 ifelse(protection <= pq[3],
                                        'Protection 2',
                                        ifelse(protection <= pq[4],
                                               'Protection 3',
                                               ifelse(protection <= (pq[5] + 0.1),
                                                      'Protection 4',
                                                      NA))))) %>%
  group_by(protection_cat) %>%
  summarise(ab_rate = length(which(absent))/ n() * 100)

names(x) <- c('Quantile', 'Absenteeism rate')
kable(x)

ggplot(data = x,
       aes(x = `Quantile`,
           y = `Absenteeism rate`)) +
  geom_bar(stat = 'identity',
           alpha = 0.6,
           color = 'black') +
  labs(x = 'Protection score quantile')

\newpage

Overall protection score

pq <- quantile(final_data$protection_var)
x <- final_data %>%
  # mutate(protection_cat = protection_var) %>%
  mutate(protection_cat = ifelse(protection_var <= pq[2],
                                 'Protection 1',
                                 ifelse(protection_var <= pq[3],
                                        'Protection 2',
                                        ifelse(protection_var <= pq[4],
                                               'Protection 3',
                                               ifelse(protection_var <= pq[5],
                                                      'Protection 4',
                                                      NA))))) %>%
  group_by(protection_cat, group) %>%
  summarise(ab_rate = length(which(absent))/ n() * 100) %>%
  ungroup

names(x) <- c('Quantile', 'Group', 'Absenteeism rate')
kable(x)

ggplot(data = x,
       aes(x = `Quantile`,
           y = `Absenteeism rate`)) +
  geom_bar(stat = 'identity',
           alpha = 0.6,
           color = 'black') +
  labs(x = 'Protection score quantile') +
  facet_wrap(~Group)

\newpage

Precipitation lag

rq <- quantile(final_data$rain_var, na.rm = TRUE)
x <- final_data %>%
  # mutate(protection_cat = protection_var) %>%
  mutate(protection_cat = ifelse(rain_var <= rq[2],
                                 'Protection 1',
                                 ifelse(rain_var <= rq[3],
                                        'Protection 2',
                                        ifelse(rain_var <= rq[4],
                                               'Protection 3',
                                               ifelse(rain_var <= rq[5],
                                                      'Protection 4',
                                                      NA))))) %>%
  group_by(protection_cat, group) %>%
  summarise(ab_rate = length(which(absent))/ n() * 100) %>%
  ungroup %>%
  filter(!is.na(protection_cat))

names(x) <- c('Quantile', 'Group', 'Absenteeism rate')
kable(x)

ggplot(data = x,
       aes(x = `Quantile`,
           y = `Absenteeism rate`)) +
  geom_bar(stat = 'identity',
           alpha = 0.6,
           color = 'black') +
  labs(x = 'Lagged precipitation score quantile') +
  facet_wrap(~Group)

\newpage

Results

Regression table

The below table shows the results of the model devised thus far.

# make_final_table(model = final_model, the_caption = "Model results", type = 'latex', multiplier = 10000)

summary(final_model)

Intepretation

The below charts show predicted absenteeism rates at different rain and protection levels.

fake <- expand.grid(#group = sort(unique(monthly$group)),
  rain_var = rq,
  protection_var = pq)#,
fake$predicted <- fake$predicted_upr <- fake$predicted_lwr<- NA

fe <- getfe(final_model)
predictions <- exp(predict(final_model_lm, fake, interval = 'confidence'))-1
# predictions <- data.frame(predictions)
# # Add the fixed effects
# fixed_effects <- fe$effect

fake$predicted<- predictions[,1]
fake$predicted_upr <- predictions[,3]
fake$predicted_lwr <- predictions[,2]

Precipitation's effect

ggplot(data = fake %>% filter(protection_var == pq[1]),
       aes(x = rain_var,
           y = predicted)) +
  geom_point() +
  geom_line() +
  geom_ribbon(aes(ymin = predicted_lwr,
                  ymax = predicted_upr),
              alpha = 0.4,
              fill = 'darkorange') +
  labs(y = 'Predicted absenteeism rate',
       x = '1-3 month precipitation lag',
       title = 'Lagged precipitation and absenteeism')

IRS protection's effect

ggplot(data = fake %>% filter(rain_var == rq[1]),
       aes(x = protection_var,
           y = predicted)) +
  geom_point() +
  geom_line() +
  geom_ribbon(aes(ymin = predicted_lwr,
                  ymax = predicted_upr),
              alpha = 0.4,
              fill = 'darkorange') +
  labs(y = 'Predicted absenteeism rate',
       x = 'Protection index',
       title = 'IRS-afforded protection and absenteeism')

Simulations

sim_data <- simulations
sim_data$idx <- sim_data$oracle_number
fe <- getfe(final_model)
predictions <- exp(predict(final_model_lm, sim_data, interval = 'confidence'))-1
predictions <- data.frame(predictions)
# Add the fixed effects
fixed_effects <- left_join(sim_data,
                           data.frame(fe) %>% dplyr::select(idx, effect) %>%
                             mutate(idx = as.character(idx))) %>%
  .$effect
fixed_effects <- as.numeric(fixed_effects)

predictions$fit <- predictions$fit + fixed_effects
predictions$lwr <- predictions$lwr + fixed_effects
predictions$upr <- predictions$upr + fixed_effects

# combine other info
predictions <- predictions %>% bind_cols(sim_data %>% dplyr::select(date, absent, strategy)) %>%
  mutate(absent = as.numeric(absent)) %>%
  filter(!is.na(fit))

simple <- predictions %>%
  group_by(strategy) %>%
  summarise(n = n(),
            actual_absences = sum(absent),
            predicted_absences = sum(fit, na.rm = TRUE),
            actual = mean(absent, na.rm = TRUE),
            predicted = mean(fit, na.rm = TRUE)) %>%
  dplyr::rename(value = predicted) %>%
  dplyr::select(strategy, value, predicted_absences)

kable(simple)

ggplot(data = simple,
       aes(x = strategy,
           y = value)) +
  geom_bar(stat = 'identity',
           alpha = 0.6,
           color = 'black') +
  labs(y = 'Absenteeism rate',
       x = 'Strategy') +
  geom_label(aes(label = round(value, digits = 3)),
             nudge_y = -0.005)

ggplot(data = simple,
       aes(x = strategy,
           y = predicted_absences)) +
  geom_bar(stat = 'identity',
           alpha = 0.6,
           color = 'black') +
  labs(y = 'Absenteeism rate',
       x = 'Strategy')

Costs

x <- final_data %>%
  left_join(irs %>% dplyr::select(date, unidade, days_since))

out <- data.frame(
  strategy = c('Max',
               'Real',
               'Time-optimized',
               'Zero'),
  sprays = c(nrow(final_data) / 60,
             length(which(x$days_since == 0)),
             length(which(format(final_data$date, '%m-%d') == '12-11')),
             0
  )
) %>%
  mutate(sprays = sprays / length(unique(final_data$oracle_number)))

kable(out)
ggplot(data = out,
       aes(x = strategy,
           y = sprays)) +
  geom_bar(stat = 'identity',
           alpha = 0.6,
           color = 'black') +
  labs(y = 'Number of sprays per person',
       x = 'Strategy')

# Introduction is really all that matters - specifying contribution, summarizing results
# Journal of Health Economics (1) and then Health Economics (2).
# Elisa: American Journal of Health Economics (from American Health Economic Society)
# Ensure that we explicitly explain these 21 days issue to rule out reverse causality.
# Large firms are big enough to capture externalities
# What is the effect of anti-malaria campaign on worker producitivity
# Data special
# Identification problems (no external spraying, no home spraying, unobserved govt - omitted variable bias dealt with through worker and time fixed effects.)
# Exploring herd protection through various functional forms

Bibliography

joebrew/maragra documentation built on Aug. 11, 2020, 8:39 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

joebrew/maragra
An analysis of malaria at the Maragra sugar company, Mozambique

In joebrew/maragra: An analysis of malaria at the Maragra sugar company, Mozambique

Current paper

Code

Reproduction of charts

Number of workers by department

Number of temporary workers over time

Absenteeism rate over time

Absenteeism rate over time by worker type

Number of workers over time by worker type

Descriptive overview

Herd-only protection score

Individual-only protection score

Overall protection score

Precipitation lag

Results

Regression table

Intepretation

Precipitation's effect

IRS protection's effect

Simulations

Costs

Bibliography

R Package Documentation

Browse R Packages

We want your feedback!

joebrew/maragra An analysis of malaria at the Maragra sugar company, Mozambique

In joebrew/maragra: An analysis of malaria at the Maragra sugar company, Mozambique

Current paper

Code

Reproduction of charts

Number of workers by department

Number of temporary workers over time

Absenteeism rate over time

Absenteeism rate over time by worker type

Number of workers over time by worker type

Descriptive overview

Herd-only protection score

Individual-only protection score

Overall protection score

Precipitation lag

Results

Regression table

Intepretation

Precipitation's effect

IRS protection's effect

Simulations

Costs

Bibliography

R Package Documentation

Browse R Packages

We want your feedback!

joebrew/maragra
An analysis of malaria at the Maragra sugar company, Mozambique