knitr::opts_chunk$set(fig.width=12, fig.height=8, fig.path='Figs/',
                      warning=FALSE, message=FALSE)
knitr::opts_knit$set(root.dir="../")
options(width = 250)
library(MLlibrary)
library(dplyr)
ds_names <- c('tanzania', 'ghana_pe', 'niger_pastoral', 'niger_agricultural', 'mexico')
print_tables <- function(name_suffix=NULL) {
  r_squared <- NULL
  ensemble <- NULL
  forest <- NULL
  lpf <- NULL
  ols <- NULL
  rse <- NULL
  ensemble_budget_difference <- NULL
  ensemble_budget_difference_swf <- NULL
  row_count <- NULL
  column_count <- NULL
  for (ds_name in ds_names) {
    if (!is.null(name_suffix)){
      ds_name <- paste(ds_name, name_suffix, sep='_')
    }
    data <- load_dataset(ds_name)
    data <- select(data, -one_of('X'))
    if (grepl('niger', ds_name)) {
      x <- select(data, -one_of('y_real'))
      y <- data[rownames(x), 'y_real']
    } else if (grepl('ghana', ds_name)) {
      x <- select(data, -one_of('lnwelfare'))
      y <- data[rownames(x), 'lnwelfare']
    } else {
      x <- select(data, -one_of('lconsPC'))
      y <- data[rownames(x), 'lconsPC']
    }

    joined <- load_models(ds_name)
    forest_res <- filter(joined, method == 'forest')
    first_forest <- filter(forest_res, fold==1)
    ids <- first_forest$id
    forest_pred <- first_forest$predicted

    test <- x[ids, ]
    ols_on_forest <- lm(forest_pred ~ ., data=data.frame(forest_pred=forest_pred, test))
    r_sq <- summary(ols_on_forest)$r.squared
    rse <- c(rse, summary(ols_on_forest)$sigma)

    forest_reach <- calculate_reach_(forest_res)$reach
    ols_res <- filter(joined, method == 'least_squares')
    ols_reach<- calculate_reach_(ols_res)$reach
    lpf_res <- filter(joined, method == 'linear_plus_forest')
    lpf_reach<- calculate_reach_(lpf_res)$reach
    ensemble_res <- filter(joined, method == 'ensemble')
    ensemble_reach<- calculate_reach_(ensemble_res)$reach

    ensemble_and_ls <- filter(joined, method == 'ensemble' | method == 'least_squares')
    budgets_df <- calculate_budget_reduction_(ensemble_and_ls)
    budgets <- as.list(budgets_df$percent_pop_included)
    names(budgets) <- budgets_df$method
    budget_difference <- (budgets$ensemble - budgets$least_squares) / budgets$least_squares
    ensemble_budget_difference <- c(ensemble_budget_difference, budget_difference)

    budgets_df <- calculate_budget_reduction_swf_(ensemble_and_ls)
    budgets <- as.list(budgets_df$percent_pop_included)
    names(budgets) <- budgets_df$method
    budget_difference <- (budgets$ensemble - budgets$least_squares) / budgets$least_squares
    ensemble_budget_difference_swf <- c(ensemble_budget_difference_swf, budget_difference)

    r_squared <- c(r_squared, r_sq)
    forest <- c(forest, forest_reach)
    ols <- c(ols, ols_reach)
    lpf <- c(lpf, lpf_reach)
    ensemble <- c(ensemble, ensemble_reach)
    row_count <- c(row_count, nrow(data))
    column_count <- c(column_count, ncol(data))
  }

  results <- data.frame(
    dataset=ds_names,
    row_count=row_count,
    column_count=column_count,
    ols=ols,
    forest=forest,
    linear_plus_forest=lpf,
    ensemble=ensemble,
    r_squared=r_squared,
    rse=rse)

  results <- results[order(results$row_count), ]
  print(results, digits=4, row.names=FALSE)

  ensemble_results <- data.frame(
    dataset=ds_names,
    row_count=row_count,
    reach_difference=(ensemble - ols) / ols,
    budget_difference=ensemble_budget_difference,
    budget_difference_sw=ensemble_budget_difference_swf)
  ensemble_results <- ensemble_results[order(ensemble_results$row_count), ]
  print('ensemble')
  print(ensemble_results, digits=4, row.names=FALSE)
}
print_tables()

25 features

print_tables('25')

PMT Variables

ds_names <- c('ghana', 'niger_pastoral_pmt', 'niger_agricultural_pmt')
print_tables()

Weights

ds_names <- c('tanzania_weighted', 'ghana_pe_weighted', 'mexico_weighted')
for (name in ds_names) {
  print(name)
  joined <- load_models(name)
  print(calculate_budget_reduction_swf_(joined))
  plot(plot_swf_(joined))
}

Waste vs Reach plots

ds_names <- c( 'niger_pastoral', 'niger_agricultural', 'tanzania', 'ghana_pe', 'mexico')
for (ds_name in ds_names) {
  print(ds_name)
  joined <- load_models(ds_name)
  p <- plot_reach_vs_waste_(joined, POINT_COUNT=1000, THRESHOLD = 0.4)
  plot(p)
}

Social welfare plots

ds_names <- c( 'niger_pastoral', 'niger_agricultural', 'tanzania', 'ghana_pe', 'mexico')

for (ds_name in ds_names) {
  print(ds_name)
  joined <- load_models(ds_name)
  p <- plot_swf_(joined, POINT_COUNT=1000)
  plot(p)
}


ml-e/ML-library documentation built on May 23, 2019, 2:03 a.m.