knitr::opts_chunk$set(fig.width=12, fig.height=8, fig.path='Figs/',
                      warning=FALSE, message=FALSE)
knitr::opts_knit$set(root.dir="../")
options(width = 250)
myround <- function(x, digits=1) {
  if(digits < 1) stop("This is intended for the case digits >= 1.")
  if(length(digits) > 1) {
    digits <- digits[1]
    warning("Using only digits[1]")
  }
  tmp <- sprintf(paste("%.", digits, "f", sep=""), x)
  # deal with "-0.00" case
  zero <- paste0("0.", paste(rep("0", digits), collapse=""))
  tmp[tmp == paste0("-", zero)] <- zero
  tmp
}

cap <- function(x) {
  s <- strsplit(x, " ")[[1]]
  paste(toupper(substring(s, 1,1)), substring(s, 2),
      sep="", collapse=" ")
}
library(MLlibrary)
library(dplyr)
library(purrr)

THRESHOLD <- 0.4
all_names <- c('niger_pastoral', 'niger_agricultural', 'tanzania_2008', 'tanzania_2010', 'tanzania_2012', 'ghana_pe', 'mexico', 'south_africa_w1', 'south_africa_w2', 'south_africa_w3', 'iraq', 'brazil')
countries <- strsplit(all_names, '_') %>% 
  map(first) %>%
  unique() %>%
  map(cap)

pmt_names <- c('niger_pastoral_pmt', 'niger_agricultural_pmt', 'ghana')
table_stats <- function(tables) {
  lapply(names(tables), function(name) {
    df <- tables[[name]]
    value_name <- colnames(df)[[2]]
    df$dataset <- name
    reshape::cast(df, dataset ~ method, value=value_name)
  })
}
ds_stats <- lapply(c(all_names, pmt_names), function(name) {
  df <- load_dataset(name)
  row_count <- nrow(df)
  col_count <- ncol(df)
  data.frame(dataset=name, N=row_count, K=col_count)
})
ds_stats <- bind_rows(ds_stats)
get_reaches <- function(ds_names) {
  reaches <- lapply(ds_names, function(name) {
    output <- load_validation_models(name)
    reach_by_pct_targeted(output, threshold=THRESHOLD)
  })
  names(reaches) <- ds_names
  reaches
}

get_reach_table <- function(reaches) {
  tables <- lapply(reaches, table_stat)
  combine_tables(tables) %>%
    select(dataset, N, K, ols, enet, ensemble, forest, opf) %>%
    rename(ols_plus_forest=opf)
}

get_budget_table <- function(reaches) {
  tables <- lapply(reaches, budget_change)
  combine_tables(tables)
}

combine_tables <- function(tables) {
  table_stats(tables) %>%
    bind_rows() %>%
    merge(ds_stats, by='dataset') %>%
    select(dataset, N, K, ols, everything()) %>%
    arrange(N)
}

difference_table <- function(reaches) {
  reach_table <- get_reach_table(reaches)
  reach_differences <- reach_table %>%
    mutate(reach_improvement=ensemble-ols) %>%
    mutate(relative_reach_improvement=(ensemble-ols)/ols) %>%
    select(N, K, dataset, reach_improvement, relative_reach_improvement)
  budget_table <- get_budget_table(reaches) %>%
    mutate(budget_reduction=-1 * ensemble) %>%
    select(dataset, budget_reduction)
  merge(reach_differences, budget_table, by='dataset') %>%
    arrange(N)
}

reaches <- get_reaches(all_names)
reacht <- get_reach_table(reaches)
difft  <- difference_table(reaches)

Figures

Table 1

print(reacht, digits=4)

Table 2

print(difft, digits=4)

Table 2

pmt_reaches <- get_reaches(pmt_names)
pmt_table <- difference_table(pmt_reaches)
print(pmt_table, digits=4)


ml-e/ML-library documentation built on May 23, 2019, 2:03 a.m.