Test binary allocation queue with Lalonde training dataset. There are 722 observations, 297 in the treatment group, 425 in the control group.
Already completed optimal ranking analysis and regressions for wage and employment. Here, I combine the results together and generate some joint graphs analyzing:
rm(list = ls(all.names = TRUE)) options(knitr.duplicate.label = 'allow')
library(dplyr) library(tidyr) library(tibble) library(purrr) library(stringr) library(broom) library(haven) library(ggplot2) library(forcats) library(REconTools) library(PrjOptiAlloc) library(knitr) library(kableExtra) bl_save_img = FALSE
Generate four categories by initial height and mother's education levels combinations.
spt_img_save <- '../_img/' spt_img_save_draft <- 'C:/Users/fan/Documents/Dropbox (UH-ECON)/repos/HgtOptiAlloDraft/_img/'
# Load Data data(df_opt_lalonde_training) data(df_opt_lalonde_training_wage) data(df_opt_lalonde_training_employ) data(df_opt_lalonde_training_empage) # dfj, dataframe joint dfj <- df_opt_lalonde_training_employ %>% left_join(df_opt_lalonde_training_wage, by = 'id') %>% left_join(df_opt_lalonde_training_empage, by = 'id') %>% left_join(df_opt_lalonde_training %>% rename(id=X) %>% select(id, re74, re75)) # drop the .y variables, clean the .x out dfj <- dfj %>% select(-contains(".y"), -contains(".x")) # %>% # rename_at(vars(ends_with(".x")), funs(str_replace(., ".x", ""))) # order and organize variables dfj <- dfj %>% select(id, starts_with("A_"), starts_with("alpha_"), starts_with("beta_"), contains("rank"), contains("rho"), everything()) # treatment column to numeric dfj$trt <- as.numeric(dfj$trt) - 1 # Summarize # str(dfj) summary(dfj)
The observed random allocation, number of spots available.
ar_bin_observed <- dfj %>% pull(trt) it_total_spots <- sum(ar_bin_observed) print(it_total_spots)
There are 297 training spots.
A simple allocation rule is to just give training spots to those who had the lowest levels of wage in the initial year. This kind of simple initial condition based allocation rule is often used in practice.
There happen to be 289 individuals with zero wages in 1975. So the Naive allocation could provision to these individuals plus 8 additional individuals with the lowest non-zero wages in 1975.
# 1975 mean wage dfj %>% group_by(trt) %>% summarize_at(vars(contains('re')), .funs = list(mean = ~mean(.))) # 1974 Wage the number of zeros: re75_n297 <- dfj %>% arrange(re75) %>% filter(row_number() <= it_total_spots) %>% select(id, re75) REconTools::ff_summ_percentiles(re75_n297)
# The 297 lowest wages in 1975 dfj <- dfj %>% arrange(re75) %>% mutate(re75_zero = case_when(re75 == 0 ~ 1, re75 != 0 ~ 0)) %>% mutate(re75_rank = row_number()) %>% mutate(alloc_naive = case_when(re75_rank <= it_total_spots ~ 1, TRUE ~ 0)) REconTools::ff_summ_percentiles(dfj %>% select(re75_zero, re75_rank))
Compute Resource Equivalent Variations. Assume here that i use here the same ar_rho vector as used in emloyment and wage regressions.
Note that for each rho, the optimal ranking is different, and there are different optimal rankings for wage and employment outcomes.
Prepare for Various Inputs.
# Plann Preference Array ar_rho = c(-100, -0.001, 0.95) ar_rho <- 1 - (10^(c(seq(-2,2, length.out=30)))) ar_rho <- unique(ar_rho) # Sort all Individuals dfj <- dfj %>% arrange(id) # Alternative Allocationc ar_bin_observed <- dfj %>% pull(trt) ar_bin_alloc_naive <- dfj %>% pull(alloc_naive) # A and alpha Parameter ar_A_wage <- dfj %>% pull(A_i_wage) ar_A_employ <- dfj %>% pull(A_i_employ) ar_A_empage <- dfj %>% pull(A_i_empage) ar_alpha_wage <- dfj %>% pull(alpha_i_wage) ar_alpha_employ <- dfj %>% pull(alpha_i_employ) ar_alpha_empage <- dfj %>% pull(alpha_i_empage) # Relative Preference Weights ar_beta_wage <- dfj %>% pull(beta_i_wage) ar_beta_employ <- dfj %>% pull(beta_i_employ) ar_beta_empage <- dfj %>% pull(beta_i_empage)
Solve for Resource Equivalent Variations:
# REV Results Storage ar_rev_wage_v_rand <- rep(0, length(ar_rho)) ar_rev_employ_v_rand <- rep(0, length(ar_rho)) ar_rev_empage <- rep(0, length(ar_rho)) ar_rev_employ_v_re75rank <- rep(0, length(ar_rho)) # Loop Over for (it_rho_ctr in seq(1, length(ar_rho))) { fl_rho <- ar_rho[it_rho_ctr] svr_rho_employ <- paste0('rho_c', it_rho_ctr, '_rk_employ') svr_rho_wage <- paste0('rho_c', it_rho_ctr, '_rk_wage') svr_rho_empage <- paste0('rho_c', it_rho_ctr, '_rk_empage') ar_queue_optimal_wage <- dfj %>% pull(svr_rho_wage) ar_queue_optimal_employ <- dfj %>% pull(svr_rho_employ) ar_queue_optimal_empage <- dfj %>% pull(svr_rho_empage) # 1. REV of Optimal All X Wage Regression relative to random observed ar_rev_wage_v_rand[it_rho_ctr] <- ffp_opt_sobin_rev(ar_queue_optimal_wage, ar_bin_observed, ar_A_wage, ar_alpha_wage, ar_beta_wage, fl_rho) # 2. REV of Optimal Age X only Employment Regression relative to random observed ar_rev_empage[it_rho_ctr] <- ffp_opt_sobin_rev(ar_queue_optimal_empage, ar_bin_observed, ar_A_empage, ar_alpha_empage, ar_beta_empage, fl_rho) # 3. REV of Optimal All X Employment Regression relative to random observed ar_rev_employ_v_rand[it_rho_ctr] <- ffp_opt_sobin_rev(ar_queue_optimal_employ, ar_bin_observed, ar_A_employ, ar_alpha_employ, ar_beta_employ, fl_rho) # 4. REV of Optimal All X Employment Regression relative to NAIVE ALLOCATION by Initial ar_rev_employ_v_re75rank[it_rho_ctr] <- ffp_opt_sobin_rev(ar_queue_optimal_employ, ar_bin_alloc_naive, ar_A_employ, ar_alpha_employ, ar_beta_employ, fl_rho) } # Print results REconTools::ff_sup_lst2str(round(ar_rev_wage_v_rand,2)) REconTools::ff_sup_lst2str(round(ar_rev_employ_v_rand,2)) REconTools::ff_sup_lst2str(round(ar_rev_empage,2)) REconTools::ff_sup_lst2str(round(ar_rev_employ_v_re75rank,2))
Combine REV results to one dataframe:
# combine results mt_combine <- cbind(ar_rho, ar_rev_wage_v_rand*100, ar_rev_employ_v_rand*100, ar_rev_empage*100, ar_rev_employ_v_re75rank*100) colnames(mt_combine) <- c('rho', 'rev_expected_wage', 'rev_employment_prob', 'rev_employage_prob', 'rev_employ_v_re75rank') tb_rev_wage_employ <- as_tibble(mt_combine) %>% rowid_to_column(var = "eval") # Transform x-scale to 1-rho tb_rev_wage_employ <- tb_rev_wage_employ %>% mutate(one_minus_rho = 1 - rho) # Reshape to long tb_rev_wage_employ_long <- tb_rev_wage_employ %>% pivot_longer( cols = starts_with('rev'), names_to = c('Outcome'), names_pattern = paste0('rev', "_(.*)"), values_to = 'rev' ) print(round(tb_rev_wage_employ %>% mutate(rand_v_init = rev_employment_prob/rev_employ_v_re75rank),2))
Draw the REV graph. Following this graph code.
# x-labels x.labels <- c('λ=0.99', 'λ=0.90', 'λ=0', 'λ=-10', 'λ=-100') x.breaks <- c(0.01, 0.10, 1, 10, 100) # title line 2 # title_line1 <- sprintf("Percentage of Training Spots Misallocated, NSW Lalonde (AER, 1986)") # title_line2 <- sprintf("REV (Resource Equivalent Variation) Along Planner Spectrum") st_title <- sprintf(paste0('How much Fewer Resources are Needed (Shares) to Achieve the Same Welfare')) title_line1 <- sprintf("Compare alternative allocations to optimal allocations given observables and estimates") title_line2 <- sprintf("Solid Red Line: train 297 random NSW treatment individuals vs. optimally allocating 297 spots") title_line3 <- sprintf("Dashed Blue Line: train 297 lowest baseline wage individuals vs. optimally allocating 297 spots") # Relabel Variable Outcome_levels <- c("random (RCT)" = "employment_prob", "baseline wage" = "employ_v_re75rank") tb_rev_wage_employ_long_fig <- tb_rev_wage_employ_long %>% filter(Outcome == 'employment_prob' | Outcome == 'employ_v_re75rank') %>% mutate(Outcome = as_factor(Outcome)) %>% mutate(Outcome = fct_recode(Outcome, !!!Outcome_levels)) # Graph Results--Draw line.plot <- tb_rev_wage_employ_long_fig %>% ggplot(aes(x=one_minus_rho, y=rev, group=Outcome, colour=Outcome, linetype=Outcome, shape=Outcome)) + geom_line() + geom_point() + # geom_vline(xintercept=c(1), linetype="dotted") + labs(title = st_title, subtitle = paste0(title_line1, '\n', title_line2, '\n', title_line3), x = 'log10 Rescale of λ, Log10(1-λ)\nλ=1 Utilitarian (Maximize Average), λ=-infty Rawlsian (Maximize Minimum)', y = paste0('100 x REV (Resource Equivalent Variations)'), caption = 'Based on a logistic regression of the employment effects of a training RCT. Data: Lalonde (AER, 1986).') + scale_x_continuous(trans='log10', labels = x.labels, breaks = x.breaks) + theme_bw(base_size=8) + ylim(0, 100) # + # guides(colour=FALSE) # Labeling line.plot$labels$linetype <- "REV\nOptimal\nvs.\nAlternatives" line.plot$labels$colour <- "REV\nOptimal\nvs.\nAlternatives" line.plot$labels$shape <- "REV\nOptimal\nvs.\nAlternatives" # Print print(line.plot) if (bl_save_img) { snm_cnts <- 'Lalonde_employ_rev.png' png(paste0(spt_img_save, snm_cnts), width = 135, height = 96, units='mm', res = 300, pointsize=7) print(line.plot) dev.off() png(paste0(spt_img_save_draft, snm_cnts), width = 135, height = 96, units='mm', res = 300, pointsize=5) print(line.plot) dev.off() }
Generate some tables where the distributions of $A$ and $\alpha$ are compared.
# Binary Marginal Effects and Prediction without Binary ggplot.A.alpha <- function(df, svr_alpha = 'alpha_i', svr_A = "A_i", slb_title = 'A_i and alpha_i (red)'){ scatter <- ggplot(df, aes(x=!!sym(svr_A))) + geom_point(aes(y=!!sym(svr_alpha)), size=4, shape=4, color="red") + labs(title = paste0(slb_title), x = 'A_i = Prob Employ no Training', y = 'alpha_i = Effect of Training of Prob Employ', caption = paste0('Logit Regression on training. Control for age, educ, race. Age <= 23 or > 23 interaction.')) + theme_bw() return(scatter) } # Plot over multiple ggplot.A.alpha(df = dfj, svr_alpha = 'alpha_i_wage', svr_A = "A_i_wage", slb_title = 'A_i and alpha_i, wage') ggplot.A.alpha(df = dfj, svr_alpha = 'alpha_i_employ', svr_A = "A_i_employ", slb_title = 'Employment A_i and alpha_i Joint Distribution')
What is the distribution of A given alpha. This is relevant in the linear regression case
# Keep only relevant columns, and reshape data dfj_hist <- dfj %>% select(id, A_i_wage, alpha_i_wage) dfj_hist$alpha_i_wage <- factor(dfj_hist$alpha_i_wage) dfj_hist$alpha_i_wage <- factor(dfj_hist$alpha_i_wage, labels = c('Age <= 23\nalpha_i=$454', 'Age > 23\nalpha_i=$1071')) title_line1 <- sprintf("Wage A_i Distribution (Linear Wage), NSW Training Lalonde (AER, 1986)") title_line2 <- sprintf("By Two Unique Levels of alpha_i: for Age <= 23 and for Age > 23") # Graph dfj_hist %>% ggplot(aes(x=A_i_wage, fill=alpha_i_wage)) + geom_density( color="#e9ecef", alpha=0.6, position = 'identity') + scale_fill_manual(values=c("#69b3a2", "#404080")) + labs(fill="") %>% labs(title = paste0(title_line1, '\n', title_line2), x = 'A_i = Expected Wage no Training (in thousands, 1979)', y = 'density', caption = paste0('Linear Wage Regression on training. Control for age, educ, race. Age <= 23 or > 23 interaction.'))
The max calculated by ffp_opt_anlyz_rhgin_bin is the top rank, small in number. The min calculated by the function is the lowest ranked, largest number.
Looked highest rank reached for each (highest rank (smallest number ever reached)).
# Generate min and max gaps dfj_highest <- dfj %>% select(id, rank_max_wage, rank_max_employ) # Wide to long st_gap_prefix <- 'rank_max' dfj_highest_long <- dfj_highest %>% pivot_longer( cols = starts_with(st_gap_prefix), names_to = c('employvswwage'), names_pattern = paste0(st_gap_prefix, "_(.*)"), values_to = 'highestrank' ) # Rank change to categories dfj_highest_long <- dfj_highest_long %>% mutate(highestrank_grp = case_when(highestrank == 1 ~ "Top A 1", highestrank <= 10 & highestrank > 1 ~ "Top B 10", highestrank <= 50 & highestrank > 10 ~ "Top C 11 to 50", highestrank <= 100 & highestrank > 50 ~ "Top D 51 to 100", highestrank <= 297 & highestrank > 100 ~ "Top E 101 to 297", highestrank > 297 ~ "Top F > 297")) # Graph dfj_highest_long %>% ggplot(aes(x=highestrank, fill=employvswwage)) + geom_histogram(color="#e9ecef", alpha=0.6, position = 'identity') + scale_fill_manual(values=c("#69b3a2", "#404080")) + labs(fill="") # Table dfj_highest_long %>% group_by(employvswwage, highestrank_grp) %>% summarize(count = n())
From the wage and employment analysis, each generates min and max rank
# Generate min and max gaps dfj_gap <- dfj %>% mutate(rank_gap_wage = rank_min_wage - rank_max_wage, rank_gap_employ = rank_min_employ - rank_max_employ) %>% select(id, rank_gap_wage, rank_gap_employ) # Wide to long st_gap_prefix <- 'rank_gap' dfj_gap_long <- dfj_gap %>% pivot_longer( cols = starts_with(st_gap_prefix), names_to = c('employvswwage'), names_pattern = paste0(st_gap_prefix, "_(.*)"), values_to = 'rank_gap' ) # Rank change to categories dfj_gap_long <- dfj_gap_long %>% mutate(rank_gap_grp = case_when(rank_gap == 0 ~ "change A no change", rank_gap <= 50 & rank_gap > 0 ~ "change B 50 positions", rank_gap <= 100 & rank_gap > 50 ~ "change C 51 to 100", rank_gap <= 200 & rank_gap > 100 ~ "change D 101 to 200", rank_gap <= 400 & rank_gap > 200 ~ "change E 201 to 400", rank_gap > 400 ~ "change F more than 401")) # Graph dfj_gap_long %>% ggplot(aes(x=rank_gap, fill=employvswwage)) + geom_histogram(color="#e9ecef", alpha=0.6, position = 'identity') + scale_fill_manual(values=c("#69b3a2", "#404080")) + labs(fill="") # Table dfj_gap_long %>% group_by(employvswwage, rank_gap_grp) %>% summarize(count = n())
# # # Calculat the maximum rank reached by each from all the rhos we have # # The difference between this and the other max, this computes across all rhos, employment and wage # # dfj_top10 <- dfj %>% # select(contains("rho_")) %>% # reduce(pmin) %>% # mutate(dfj, max_rank_rhos = .) # # dfj_top10 <- dfj_top10 %>% filter(max_rank_rhos <= 10) %>% # select(id, age, educ, black, race, # rho_c1_rk_employ, rho_c16_rk_employ, rho_c30_rk_employ, # rho_c1_rk_wage, rho_c16_rk_wage, rho_c30_rk_wage) %>% # rename(emp_util = rho_c1_rk_employ, # emp_CD = rho_c16_rk_employ, # emp_rawl = rho_c30_rk_employ, # wag_util = rho_c1_rk_wage, # wag_CD = rho_c16_rk_wage, # wag_rawl = rho_c30_rk_wage, # ) %>% # arrange(age, educ, race) # # dim(dfj_top10) # # # Graphing Library # library(kableExtra) # # Load Data # dt <- mtcars[1:5, 1:6] # # Generate latex string variable # st_out_tex <- kable(dfj_top10, "latex") # print(st_out_tex) # # File out # # fileConn <- file("./../../_file/tex/tex_sample_a_tab.tex") # fileConn <- file("C:/users/fan/HgtOptiAlloDraft/_tab/Lalonde_wage_employ_top10.tex") # writeLines(st_out_tex, fileConn) # close(fileConn) # # # Display results here # dfj_top10 %>% # kable() %>% # kable_styling(bootstrap_options = c("striped", "hover", "responsive"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.