inst/rmd/paper_prepare.R

# Packages
library(sp)
library(tidyverse)
library(knitr)
library(Hmisc)
library(brew)
library(maragra)
library(knitr)
library(RColorBrewer)
# library(lme4)
library(lfe)
library(restorepoint)
library(regtools)
if('prepared_data.RData' %in% dir()){
  load('prepared_data.RData')
} else {
  # Read in data
  ab <- maragra::ab
  ab_panel <- maragra::ab_panel
  bairros <- maragra::bairros
  bes <- maragra::bes
  census <- maragra::census
  clinic <- maragra::clinic
  clinic_agg <- maragra::clinic_agg
  mc <- maragra::mc
  weather <- maragra::weather
  workers <- maragra::workers
  
  # Create model data
  
  model_data <-
    ab_panel %>%
    left_join(irs, by = c('unidade', 'date')) %>%
    mutate(days_since = days_since %/% 30) %>%
    mutate(days_since = ifelse(is.na(days_since), 'Never',
                               ifelse(days_since < 0, 'Before',
                                      ifelse(days_since >= 12, '12+', as.character(add_zero(days_since, n = 2)))))) 
  
  model_data <-
    model_data %>%
    mutate(absent_sick = ifelse(is.na(absent_sick), FALSE, absent_sick)) %>%
    # Add malaria incidence
    left_join(bes %>%
                dplyr::select(date, p) %>%
                dplyr::rename(incidence = p),
              by = 'date') %>%
    mutate(season = ifelse(incidence >= median(incidence),
                           'high',
                           'low')) %>%
    mutate(season = factor(season, levels = c('low', 'high'))) %>%
    # Bring in some information on workers
    left_join(workers %>%
                dplyr::select(oracle_number,
                              permanent_or_temporary,
                              department,
                              sex,
                              date_of_birth,
                              perm_id,
                              census_name_match_score) %>%
                # void the permids of anyone with a match score of greater than 0.2
                mutate(perm_id = ifelse(census_name_match_score > 0.25,
                                        NA,
                                        perm_id))) %>%
    # Bring in some info from the census
    left_join(census %>%
                filter(!duplicated(perm_id)) %>%
                dplyr::select(perm_id,
                              maragra_bairro,
                              maragra_fabrica,
                              education,
                              floor_material)) %>%
    # Bring in some data for weather
    left_join(weather %>%
                dplyr::select(date, precipitation,
                              temp) %>%
                mutate(precipitation = ifelse(is.na(precipitation),
                                              0,
                                              precipitation))) %>%
    mutate(rainy = precipitation > 0)
  
  # Aggregate months since 
  model_data <- model_data %>%
    mutate(months_since = days_since) %>%
    mutate(months_since = as.character(months_since)) %>%
    mutate(months_since = ifelse(months_since %in% c('00', '01',
                                                     '02', '03', '04', '05'),
                                 'After',
                                 'Before')) %>%
    # Define whether every sprayed
    ungroup %>%
    group_by(oracle_number) %>%
    mutate(ever_sprayed = length(which(unidade %in% mc$unidade)) > 0) %>%
    ungroup %>%
    mutate(months_since = as.character(months_since)) %>%
    mutate(on_site = ever_sprayed)
  
  model_data <- model_data %>%
    mutate(months_since = factor(months_since, levels = unique(c('Before', sort(unique(months_since)))))) 
  
  # Since ad and factory are same, keep same
  model_data$field <- ifelse(model_data$department == 'Field', 
                             'Field worker',
                             'Not field worker')

  model_data <- model_data %>% 
    mutate(group = paste0(permanent_or_temporary, ' ',
                          tolower(field)))

  # Add a month column
  model_data$month_number <- add_zero(format(model_data$date, '%m'), 2)
  
  # Get a malaria season var
  model_data <- 
    model_data %>%
    mutate(calendar_month = as.numeric(format(date, '%m'))) %>%
    mutate(calendar_year = as.numeric(format(date, '%Y'))) %>%
    mutate(malaria_year = ifelse(calendar_month < 6,
                                 paste0(calendar_year-1, '-', calendar_year),
                                 paste0(calendar_year, '-', calendar_year+1)))
  
  # Get geographic info for externality analysis
  # Get latitude / longitude into model_data
  model_data <- model_data %>%
    left_join(census %>%
                filter(!duplicated(unidade)) %>%
                dplyr::select(unidade,
                              longitude_aura,
                              latitude_aura),
              by = 'unidade') %>%
    dplyr::select(-days_since) %>%
    left_join(irs %>%
                dplyr::select(date, unidade, days_since),
              by = c('date', 'unidade'))
  
  model_data$p <- ifelse(model_data$months_since %in% c('Before', 'Never'), 0, 1)
  
  # Estimate a protection factor based on the weighted protection 
  # scores of nearby houses
  dates <- sort(unique(model_data$date))
  out_list <- list()
  
  weighter <- function(x){
    # x[x == 0] <- 0.01
    out <- (1 / x)#^1.2
    # out[is.infinite(out)] <- 0
    return(out)
  }
  counter <- 0
  
  bairro_loc <- apply(coordinates(maragra::bairros_maragra_bairro), 2, mean)
  for(i in 1:length(dates)){
    this_date <- dates[i]
    message(this_date, ' : ', i, ' of ', length(dates))
    this_model_data <- model_data %>% 
      filter(#!is.na(longitude_aura), 
             #!is.na(latitude_aura),
             date == this_date,
             census_name_match_score <= 0.2) %>%
      # if no geography (due to no census matching, just use average geo)
      mutate(longitude_aura = ifelse(is.na(longitude_aura), 
                                     bairro_loc[1],
                                     longitude_aura)) %>%
      
      mutate(latitude_aura = ifelse(is.na(latitude_aura), 
                                     bairro_loc[2],
                                    latitude_aura)) 
    this_model_data_spatial <- this_model_data
    coordinates(this_model_data_spatial) <- ~longitude_aura+latitude_aura
    proj4string(this_model_data_spatial) <- CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
    # Loop through each house, getting the nearby ones
    distances <- spDists(x = this_model_data_spatial)
    
    # Go through each house and get the distances, protection
    for(j in 1:nrow(this_model_data_spatial)){
      # message('---house number ', j, ' of ', nrow(this_model_data_spatial))
      sub_distances <- distances[j,]
      x <- this_model_data_spatial$protection
      w <- weighter(sub_distances)
      x <- x[is.finite(w)]
      # # Protection is 1 divided months since irs
      # p <- ifelse(this_model_data_spatial$months_since == 'No IRS',
      #             0,
      #             as.numeric(this_model_data_spatial$months_since) + 1)
      
      p <- this_model_data_spatial$p
      p[is.infinite(p)] <- 0
      p <- p[is.finite(w)]
      
      # The weight
      w <- w[is.finite(w)]
      # Number of houses within 1k
      n <- length(which(sub_distances <= 1))
      # # # The weighted protection score (average)
      # # positivity <- stats::weighted.mean(x = x,
      # #                                    w = w,
      # #                                    na.rm = TRUE)
      # # The weighted additive protection score
      # s <- sum(x[sub_distances <= 1] * w[sub_distances <= 1], na.rm = TRUE)
      
      # Product of weighted distance multipled by time since IRS
      # (this is for modeling herd protection naively)
      herdy <- p * w
      herdy <- sum(herdy, na.rm = TRUE)
      w_sum <- sum(w)
      
      
      counter <- counter + 1
      id <- this_model_data_spatial$oracle_number[j]
      # message('-----', id, ': ', positivity)
      
      # Update model_data
      out_data <- data_frame(oracle_number = id,
                             date = this_date,
                             # herd = positivity,
                             n = n,
                             # s = s,
                             herdy = herdy,
                             w_sum = w_sum)
      out_list[[counter]] <- out_data
    }
  }
  
  # Combine all of the time-place risk factor scores into one dataframe
  protection_df <- bind_rows(out_list)
  protection_df <- protection_df %>%
    group_by(oracle_number, date) %>%
    summarise(#herd = mean(herd, na.rm = TRUE),
              n = mean(n, na.rm = TRUE),
              #s = mean(s, na.rm = TRUE),
              herdy = mean(herdy, na.rm = TRUE),
              w_sum = mean(w_sum, na.rm = TRUE))
  
  # Join to model data
  model_data <- left_join(model_data, protection_df,
                 by = c('oracle_number', 'date'))
  # model_data$herd <- model_data$s #model_data$n * model_data$herd
  model_data$herd <- NULL
  model_data$herd <- model_data$herdy
  model_data$rainy_day <- model_data$precipitation >= 0.01
  
  # Get hiv prevalence
  hiv_prevalence <- maragra::hiv_prevalence
  prevs <- model_data %>%
    group_by(longitude_aura,
             latitude_aura) %>%
    summarise(n = n()) %>%
    filter(!is.na(longitude_aura)) %>%
    mutate(x = longitude_aura,
           y = latitude_aura) %>%
    dplyr::select(-n) %>%
    ungroup
  coordinates(prevs) <- ~x+y
  x <- raster::extract(x = hiv_prevalence, y = prevs)
  prevs$hiv_prevalence <- x
  prevs$hiv_prevalence[is.na(prevs$hiv_prevalence)] <- mean(prevs$hiv_prevalence,na.rm = TRUE)
  model_data <- left_join(x = model_data,
                          y = prevs@data,
                          by = c('longitude_aura',
                                 'latitude_aura'))
  
  save.image('temp.RData')

  # Remove nas
  model_data <- model_data %>%
    filter(!is.na(season),
           !is.na(months_since),
           !is.na(oracle_number),
           !is.na(absent),
           !is.na(incidence),
           !is.na(rainy_day),
           !is.na(herd),
           !is.na(malaria_year))
  
  # # Cut down from 4 to 3 groups
  # model_data <- model_data %>%
  #   filter(group != 'Temporary not field worker') %>%
  #   mutate(group = ifelse(group == 'Permanent not field worker',
  #                         'Non-field worker',
  #                         group))
  
  # # REMOVE THE NEVERS
  # model_data <- model_data %>%
  #   filter(ever_sprayed)
  # 
  fe_models <- list()
  sick_models <- list()
  # Secondary analysis
  protection_models <- list()
  groups <- sort(unique(model_data$group))
  # library(lmerTest)
  # library(nlme)
  for (i in 1:length(groups)){
    message(i)
    this_group <- groups[i]
    message(this_group)
    these_data <- model_data %>% filter(group == this_group)

    this_model <- felm(absent ~ season*months_since + rainy_day  | oracle_number + malaria_year | 0 | 0,
                       data = these_data)
    this_sick_model <- felm(absent_sick ~ season*months_since + rainy_day  | oracle_number + malaria_year| 0 | 0,
                            data = these_data)
    this_protection_model <- felm(absent ~ season*months_since + rainy_day  | oracle_number + malaria_year | 0 | 0,
                       data = these_data)
    fe_models[[i]] <- this_model
    sick_models[[i]] <- this_sick_model
    protection_models[[i]] <- this_protection_model
  }
  names(fe_models) <- groups
  names(sick_models) <- groups
  names(protection_models) <- groups
  
  # Get the herd protection score assuming that everyone nearby was protected
  groups <- sort(unique(model_data$group))
  herd_ideal <- 
    model_data %>%
    filter(!is.na(longitude_aura),
           !is.na(latitude_aura)) %>%
    group_by(oracle_number) %>%
    summarise(lng = dplyr::first(longitude_aura),
              lat = dplyr::first(latitude_aura)) %>%
    ungroup
  coordinates(herd_ideal) <- ~lng+lat
  proj4string(herd_ideal) <- CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
  # Get distances
  distances <- spDists(x = herd_ideal)
  out <- apply(distances, 1, function(x){
    x <- weighter(x)
    x <- x[is.finite(x)]
    x <- sum(x, na.rm = TRUE)
    return(x)
  })
  herd_ideal$herd_ideal <- out
  herd_ideal <- herd_ideal@data
  # Join back to model_data
  model_data <-
    left_join(model_data,
              herd_ideal)
  
  # devtools::install_github('skranz/regtools')
  library(regtools)
  # Define function similiar to "predict" but for felm
  predict_felm <- function(model, data,
                           irs0 = FALSE,
                           irs_all = FALSE,
                           herd0 = FALSE,
                           herd_max = FALSE){
    
    # Make overwrites if necessary
    # If IRS is 0 (for prediction purposes, overwrite)
    if(irs0){
      data$months_since <- factor('Before', levels = c('Before', 'After'))
    }
    if(herd0){
      data$herd <- 0
    }
    if(irs_all){
      data$months_since <- factor('After', levels = c('Before', 'After'))
    }
    if(herd_max){
      data$herd <- data$herd_ideal
    }
    predicted <- regtools::predict.felm(object = model,
                                        newdata = data,
                                        use.fe = TRUE)
    return(predicted)
  }
  
  model_data$predicted <-
    model_data$predicted_no_irs <-
    model_data$predicted_no_herd <- 
    model_data$predicted_no_herd_no_irs <-
    model_data$predicted_max_irs <- 
    model_data$predicted_max_herd <-
    model_data$predicted_max_herd_max_irs <- 
    NA
  
  for (i in 1:length(groups)){
    message(i)
    this_group <- groups[i]
    indices <- which(model_data$group == this_group)
    model <- protection_models[[this_group]]
    data <- model_data[indices,]  
    model_data$predicted[indices] <- 
      predict_felm(model = model,
              data = data)
    model_data$predicted_no_irs[indices] <-
      predict_felm(model = model,
                   data = data,
                   irs0 = TRUE)
    model_data$predicted_no_herd[indices] <-
      predict_felm(model = model,
                   data = data,
                   herd0 = TRUE)
    model_data$predicted_no_herd_no_irs[indices] <-
      predict_felm(model = model,
                   data = data,
                   herd0 = TRUE,
                   irs0 = TRUE)
    model_data$predicted_max_irs[indices] <-
      predict_felm(model = model,
                   data = data,
                   irs_all = TRUE)
    model_data$predicted_max_herd[indices] <- 
      predict_felm(model = model,
                   data = data,
                   herd_max = TRUE)
    model_data$predicted_max_herd_max_irs[indices] <- 
      predict_felm(model = model,
                   data = data,
                   herd_max = TRUE,
                   irs_all = TRUE)
  }
  
  
  # Plots of maps
  # Libraries
  library(tidyverse)
  library(raster)
  library(tidyr)
  library(ggplot2)
  library(RColorBrewer)
  library(broom)
  library(ggthemes)
  
  # Get each countries shapefile
  countries <- c('Mozambique')
  iso3s <- c('MOZ')
  for(i in 1:length(countries)){
    message('Fetching data for ', countries[i])
    x <- getData(name = 'GADM', level = 2, country = iso3s[i])
    assign(tolower(countries[i]),
           x,
           envir = .GlobalEnv)
  }
  
  # Define which districts are "special" -------------
  specials <- 'Manhiça'
  
  # Mozambique
  mozambique@data$special <- FALSE
  mozambique@data$special[mozambique@data$NAME_2 %in% specials] <- TRUE
  
  # Combine all data into a "long" / "tidy" format
  make_long <- function(x, region = "NAME_2"){
    shp_df <- broom::tidy(x, region = region)
    return(shp_df)
  }
  mozambique_long <- make_long(mozambique) %>% mutate(country = 'Mozambique')
  combined <- mozambique_long
  combined$special <- combined$id %in% specials
  # combined$special[!combined$special] <- NA
  # Get a map of africa to use as a background
  # from the cism package!
  africa <- cism::africa
  africa_long <- make_long(africa, region = 'COUNTRY')
  africa_long$special <- africa_long$id %in% countries
  
  g1 <-
    ggplot() +
    geom_polygon(data = africa_long,
                 aes(x = long,
                     y = lat,
                     group = group),
                 fill = grey(0.6),
                 alpha = 1,
                 color = 'white',
                 lwd = 0.3) +
    geom_polygon(data = combined,
                 aes(x = long,
                     y = lat,
                     group = group),
                 fill = grey(0.3)) +
    geom_polygon(data = combined %>% filter(!is.na(special) & special),
                 aes(x = long,
                     y = lat,
                     group = group,
                     fill = special)) +
    scale_fill_manual(name = '',
                      values = c('darkorange'),
                      na.value = NA) +
    coord_cartesian() +
    ggthemes::theme_map() +
    theme(legend.position = 'none') +
    # redraw country lines
    geom_polygon(data = africa_long,
                 aes(x = long,
                     y = lat,
                     group = group),
                 fill = NA,
                 alpha = 1,
                 color = 'white',
                 lwd = 0.3) + coord_map() +
    labs(title = 'i.')
  country_map <- function(the_country = 'Gabon'){
    ggplot(data = combined %>% filter(country == the_country),
           aes(x = long,
               y = lat,
               group = group,
               fill = special)) +
      geom_polygon(#alpha = 0.8,
        lwd = 0.3,
        color = grey(0.6)) +
      theme_map() +
      coord_map() + 
      scale_fill_manual(name = '', values = c('darkgrey', 'darkred')) +
      theme(legend.position = 'none')
  }
  g2 <- country_map('Mozambique') +
    labs(title = 'ii.')
  
  mar <- data.frame(lat = -25.4498802, long = 32.777661)
  man3_fortified <- cism::man3_fortified
  g3 <- ggplot(data = man3_fortified,
               aes(x = long,
                   y = lat)) +
    geom_polygon(aes(group = group),
                 # alpha = 0.8,
                 lwd = 0.3,
                 color = grey(0.6)) +
    geom_point(data = mar,
               aes(x = long,
                   y = lat),
               color = 'red') +
    geom_point(data = mar,
               aes(x = long,
                   y = lat),
               color = 'red',
               size = 4, 
               pch = 1) +
    geom_point(data = mar,
               aes(x = long,
                   y = lat),
               color = 'red',
               size = 6, 
               pch = 1) +
    geom_point(data = mar,
               aes(x = long,
                   y = lat),
               color = 'red',
               size = 9, 
               pch = 1) +
    theme_map() +
    coord_map() + 
    theme(legend.position = 'none') +
    labs(title = 'iii')
  
  library(ggmap)
  # if('.hdf.RData' %in% dir()){
  load('.hdf.RData')
  # } else {
  # hdf <- ggmap::get_map(location = c(lon = mar$long, lat = mar$lat), maptype = 'satellite', zoom = 14)
  #   save(hdf, file = '.hdf.RData')
  # }
  
  g4 <- ggmap::ggmap(hdf) +
    theme_map() +
    labs(title = 'iv')
  
  map_list <- list(g1,g2,g3,g4)
  
  save.image(file = 'prepared_data.RData')
  }

clean_up_model <- function(x, multiplier = 100){
  # extract coefficients
  coefs <- data.frame(coef(summary(x)))
  # use normal distribution to approximate p-value
  coefs$p.z <- 2 * (1 - pnorm(abs(coefs$t.value)))
  ps <- coefs$p.z
  x = broom::tidy(x)
  for(j in 2:(ncol(x) -1)){
    x[,j] <- x[,j] * multiplier
  }
  x$term <- gsub('seasonhigh', 'Malaria season', x$term)    
  x$term <- gsub('seasonlow', 'Low malaria season', x$term)
  # x$term <- gsub('months_since', 'Months since IRS: ', x$term)
  x$term <- gsub('months_since', 'IRS status=', x$term)
  x$term <- gsub('department', 'Department: ', x$term)
  x$term <- gsub('precipitation', 'Precipitation (mm) ', x$term)
  x$term <- gsub('maragra_fabricaTRUE', 'Living at factory', x$term)
  x$term <- gsub('sexM', 'Male', x$term)
  x$term <- gsub('permanent_or_temporaryTemporary', 'Temp contract', x$term)
  x$term <- gsub('rainy_dayTRUE', 'Rainy day', x$term)
  x$term <- gsub('rainyTRUE', 'Rainy day', x$term)
  x$term <- gsub('on_siteTRUE', 'On site', x$term)
  x$term <- gsub('on_siteFALSE', 'Off site', x$term)
  x$term <- gsub('fieldNot field worker', 'Not field worker', x$term)
  x$term <- gsub('herd', 'Herd protection', x$term)
  x$term <- gsub('before_after', 'IRS time=', x$term)
  x$term <- gsub('time_since', 'Months after=', x$term)
  x$term <- gsub('incidence', 'Incidence', x$term)
  names(x) <- Hmisc::capitalize(names(x))
  names(x) <- gsub('.', ' ', names(x), fixed = TRUE)
  x$`P value` <- NA
  x$`P value`[1:length(ps)] <- ps
  x <- x %>% 
    mutate(Estimate = paste0(round(Estimate, 3), ' (P',
                             ifelse(`P value` <= 0.001,
                                    '<0.001',
                                    paste0('=', round(`P value`, 3))
                             ), ')')) %>%
    dplyr::select(Term, Estimate)
  x <- x %>% filter(!grepl('sd_', Term))
  x <- x %>% filter(!grepl('10+', Term, fixed = TRUE))
  return(x)
}

make_models_table <- function(model_list, the_caption = "Models with worker fixed effects", type = 'html', multiplier = 100){
  
  out_list <- list()
  for(i in 1:length(model_list)){
    message(i)
    this_model <- names(model_list)[i]
    the_model <- model_list[[which(names(model_list) == this_model)]]
    the_model <- clean_up_model(the_model, multiplier = multiplier)
    names(the_model)[2] <- this_model
    out_list[[i]] <- the_model
  }
  
  df <- out_list[[1]]
  namer <- names(df)[2]
  names(df)[2] <- 'Estimate'
  
  for(i in 2:length(out_list)){
    temp <- out_list[[i]]
    namer[i] <- names(temp)[2]
    names(temp)[2] <- 'Estimate'
    df <- bind_rows(df, temp)
  }
  
  library(kableExtra)
  breaker <- nrow(out_list[[1]])
  lengther <- length(out_list)
  breaks <- c(0, breaker * 1:(lengther))
  k <- kable(df, format = type, caption = the_caption, booktabs = T, longtable = TRUE) %>%
    kable_styling(latex_options = c("hold_position", "repeat_header"))
  for (i in 1:(length(breaks)-1)){
    message(i)
    k <- k %>%
      group_rows(namer[i], 
                 breaks[i] + 1,
                 breaks[i] + breaker,
                 latex_gap_space = "1.5em") 
  }
  print(k)
}

make_models_table(model_list = protection_models)

irs$months_since <- irs$days_since %/% 30

# Define function for making season
make_season <- function(date){
  out <- ifelse(as.numeric(format(date, '%m')) %in% c(11:12, 1:3),
                'Malaria season', 'Off season')
  out <- factor(out, levels = c('Off season', 'Malaria season'))
  return(out)
}

# Create a year-month variable
model_data$year_month <-
  paste0(model_data$calendar_year, 
         '_', model_data$calendar_month)

# Create a months since variable for menno's model (actually months since)
model_data <-
  model_data %>%
  mutate(months_since_menno = days_since %/% 30) %>%
  mutate(months_since_menno = months_since_menno + 1) %>%
  mutate(months_since_menno = ifelse(months_since_menno < 1 |
                                       months_since_menno > 6,
                                     'Before',
                                     add_zero(months_since_menno, n = 2))) %>%
  mutate(months_since_menno = ifelse(is.na(months_since_menno), 'Before',
                                     months_since_menno)) %>%
  mutate(months_since_menno = factor(months_since_menno,
                                     levels = unique(c('Before', sort(unique(months_since_menno))))))

if(!'model_data.csv' %in% dir()){
  write_csv(model_data, 'model_data.csv')
}
if(!'dummy_data.csv' %in% dir()){
  ids <- sample(unique(model_data$oracle_number), 200)
  dummy_data <- model_data %>%
    filter(oracle_number %in% ids) %>%
    mutate(oracle_number = as.numeric(factor(oracle_number)))
  dummy_data$year_month <- paste0(dummy_data$calendar_year, dummy_data$calendar_month)
  
  dummy_data <- dummy_data %>%
    dplyr::select(oracle_number,
                  date,
                  absent,
                  incidence,
                  season,
                  permanent_or_temporary,
                  department,
                  sex,
                  precipitation,
                  rainy_day,
                  months_since,
                  ever_sprayed,
                  group,
                  calendar_year,
                  calendar_month,
                  year_month,
                  malaria_year,
                  days_since,
                  herd,
                  months_since_menno)
  write_csv(dummy_data, 'dummy_data.csv')
  dummy_dictionary <- data_frame(
    variable = c('oracle_number',
                 'date',
                 'absent',
                 'incidence',
                 'season',
                 'permanent_or_temporary',
                 'department',
                 'sex',
                 'precipitation',
                 'rainy_day',
                 'months_since',
                 'ever_sprayed',
                 'group',
                 'calendar_year',
                 'calendar_month',
                 'malaria_year',
                 'days_since',
                 'herd',
                 'months_since_menno'),
    meaning = c('Unique ID Number',
                'The calendar date of the observation',
                'Whether the worker was absent or present',
                'Local clinical malaria incidence',
                'Whether it was high or low malaria season, per district incidence, on the date in question',
                'Whether the worker had a permanent or temporary contract',
                'The department of the worker',
                'The sex of the worker',
                'The amount of precipitation in ml on the date in question',
                'Whether there was any rain on the date in question',
                'Binary: whether the date was before IRS (ie, any time greater than 6 months after previous IRS, or a case where IRS never occurred), or after IRS (ie, 184 days or fewer after IRS spraying)',
                'Whether the house in question was ever sprayed',
                'The combination of permanent/temporary status and work site',
                'The year',
                'The month',
                'The malaria season',
                'The number of days since IRS; in the case of multiple IRS episodes, the most recent IRS episode is the only one taken into account.',
                'The indirect protection score afforded by neighbors on the date in question',
                'The amount of time since IRS (using months, rather than days)'))
  write_csv(dummy_dictionary, 'dummy_dictionary.csv')
  save(dummy_data, file = 'dummy_data.RData')
} else {
  dummy_data <- read_csv('dummy_data.csv')
}

# Secondary analysis
make_models_table(model_list = protection_models)
joebrew/maragra documentation built on Aug. 11, 2020, 8:39 p.m.