Test discrete optimal allocation solution line by line without function. Use the California student test score dataset. Regress student English and Math test scores on Student-Teacher-Ratio.
This function produces the same results as DISCRETE--Discrete Optimal Allocation California Teacher Student Ratio (Line by Line). The differences are:
This file, by invoking functions, is much shorter than the line by line program.
rm(list = ls(all.names = TRUE)) options(knitr.duplicate.label = 'allow')
library(dplyr) library(tidyr) library(tibble) library(stringr) library(broom) library(ggplot2) library(REconTools) library(PrjOptiAlloc) library(knitr) library(kableExtra)
Set Preference Array:
ar_rho <- 1 - (10^(c(seq(-2,2, length.out=4)))) ar_rho <- unique(ar_rho) it_w_agg <- 5
id_i <- c(1,1,1,2,2,2,3,3,3) id_il <- c(1,2,3,4,5,6,7,8,9) D_max_i <- c(3,3,3,3,3,3,3,3,3) D_il <- c(1,2,3,1,2,3,1,2,3) A_il_i1 <- c(4,3,2,1) A_il_i2 <- c(2.7,2.2,1.5,1.1) A_il_i3 <- c(1.9,1.8,1.1,0.1) A_il <- c(A_il_i1[2:4], A_il_i2[2:4], A_il_i3[2:4]) alpha_il <- c(diff(-A_il_i1), diff(-A_il_i2), diff(-A_il_i3)) beta_i <- c(1/3,1/3,1/3,1/3,1/3,1/3,1/3,1/3,1/3) df_handinput_input_il <- as_tibble(cbind(id_i, id_il, D_max_i, D_il, A_il, alpha_il, beta_i)) print(df_handinput_input_il)
ls_df_queue_hand <- PrjOptiAlloc::ffp_opt_anlyz_rhgin_dis(ar_rho, it_w_agg, bl_df_alloc_il = TRUE, bl_return_V = TRUE, bl_return_allQ_V = TRUE, bl_return_inner_V = TRUE, df_input_il = df_handinput_input_il) df_queue_il_long <- ls_df_queue_hand$df_queue_il_long df_queue_il_wide <- ls_df_queue_hand$df_queue_il_wide df_alloc_i_long <- ls_df_queue_hand$df_alloc_i_long df_rho_gini <- ls_df_queue_hand$df_rho_gini df_alloc_il_long <- ls_df_queue_hand$df_alloc_il_long
print(df_queue_il_long) print(df_queue_il_wide) print(df_alloc_i_long) print(df_rho_gini) print(df_alloc_il_long)
These are data thata were generated in the DISCRETE--Discrete Optimal Allocation California Teacher Student Ratio (Line by Line) function.
# Load Data data(df_opt_caschool_prep_i) data(df_opt_caschool_input_il) data(df_opt_caschool_input_ib) # Show Variables str(df_opt_caschool_prep_i) str(df_opt_caschool_input_il) str(df_opt_caschool_input_ib) # Modifying Beta Value # When individuals have the same weights, beta values do not matter df_opt_caschool_input_il <- df_opt_caschool_input_il %>% mutate(beta_i = 1) df_opt_caschool_input_ib <- df_opt_caschool_input_ib %>% mutate(beta_i = 1)
Note that input_il function already is based on fl_fi_max.
# 100 percent teacher at most per school, discretize floor as needed # This is not a parameter that matters here, already a part of the input_il function fl_fi_max = 1.00 # 20 percent total additional of all teachers fl_fa_max = 0.20 # What is the number of teachers we can increase by fl_teacher_increase_number <- sum(df_opt_caschool_prep_i$teachers)*fl_fa_max fl_teacher_increase_number <- floor(fl_teacher_increase_number) # Rho values to consider ar_rho <- 1 - (10^(c(seq(-2,2, length.out=4)))) ar_rho <- unique(ar_rho)
Use the discrete allocation function across preference ffp_opt_anlyz_rhgin_dis.html function. This computes optimal allocation for multiple planner inequality aversion $\lambda$ parameters at the same time. Note that in the function $\lambda=\rho$.
# Optimal Allocation ls_df_queue <- PrjOptiAlloc::ffp_opt_anlyz_rhgin_dis(ar_rho, fl_teacher_increase_number, bl_df_alloc_il = TRUE, df_input_il = df_opt_caschool_input_il, svr_rho = 'rho', svr_id_i = 'id_i', svr_id_il = 'id_il', svr_D_max_i = 'D_max_i', svr_D_il = 'D_il', svr_D_star_i = 'D_star_i', svr_F_star_i = 'F_star_i', svr_inpalc = 'Q_il', svr_D_Wbin_il = 'D_Wbin_il', svr_A_il = 'A_il', svr_alpha_il = 'alpha_il', svr_beta_i = 'beta_i', svr_expout = 'opti_exp_outcome', st_idcol_prefix = 'sid_') df_queue_il_long <- ls_df_queue$df_queue_il_long df_queue_il_wide <- ls_df_queue$df_queue_il_wide df_alloc_i_long <- ls_df_queue$df_alloc_i_long df_rho_gini <- ls_df_queue$df_rho_gini df_alloc_il_long <- ls_df_queue$df_alloc_il_long
# Display Results print(head(df_queue_il_long, 10)) # print(str(df_queue_il_long)) print(head(df_queue_il_wide, 10)) # print(str(df_queue_il_wide)) print(head(df_alloc_i_long, 10)) # print(str(df_queue_il_wide)) print(df_rho_gini) # print(str(df_alloc_i_long)) print(head(df_alloc_il_long, 10))
tb_rho_rev <- PrjOptiAlloc::ffp_opt_anlyz_sodis_rev(ar_rho, fl_teacher_increase_number, df_input_ib = df_opt_caschool_input_ib, df_queue_il_long_with_V = df_queue_il_long)
# Display Results print(tb_rho_rev)
To generate some graphs and more easily readable results, rather than using all school districts, select some random subset of school districts from the existing data frames.
# select four school districts # cas = california schools it_O <- 4 set.seed(1) df_cas_prep_sub_i <- df_opt_caschool_prep_i %>% filter(teachers <= 100 & teachers >= 50) df_cas_prep_sub_i <- df_cas_prep_sub_i[sample(dim(df_cas_prep_sub_i)[1], it_O, replace=FALSE),] ar_cas_id_selected <- df_cas_prep_sub_i %>% pull(id_i) # Select from il and ib only ids that are randomly selected df_cas_input_sub_il <- df_opt_caschool_input_il %>% filter(id_i %in% ar_cas_id_selected) df_cas_input_sub_ib <- df_opt_caschool_input_ib %>% filter(id_i %in% ar_cas_id_selected) # Print print(df_cas_prep_sub_i) print(df_cas_input_sub_ib) print(df_cas_input_sub_il)
Now we also have a new total number of teacher increase, no longer based on all districts. The policy here is to potentially increase each school district by fl_fi_max, already coded into the input_il file, so that can not change. The total number of available new teachers is fl_fa_max times the existing number of teachers in the selected districts overall
fl_fa_max = 0.20 # What is the number of teachers we can increase by fl_teacher_increase_sub_number <- sum(df_cas_prep_sub_i$teachers)*fl_fa_max fl_teacher_increase_sub_number <- floor(fl_teacher_increase_sub_number)
Use the discrete allocation function across preference ffp_opt_anlyz_rhgin_dis.html function. This computes optimal allocation for multiple planner inequality aversion $\lambda$ parameters at the same time. Note that in the function $\lambda=\rho$.
# Rho values to consider ar_rho <- 1 - (10^(c(seq(-2,2, length.out=4)))) ar_rho <- unique(ar_rho) # Optimal Allocation ls_df_sub_queue <- PrjOptiAlloc::ffp_opt_anlyz_rhgin_dis(ar_rho, fl_teacher_increase_sub_number, df_input_il = df_cas_input_sub_il, bl_df_alloc_il = TRUE) df_queue_il_sub_long <- ls_df_sub_queue$df_queue_il_long df_queue_il_sub_wide <- ls_df_sub_queue$df_queue_il_wide df_alloc_i_long <- ls_df_sub_queue$df_alloc_i_long df_rho_gini <- ls_df_sub_queue$df_rho_gini df_alloc_il_long <- ls_df_sub_queue$df_alloc_il_long # REV ar_util_rev_loop_func <- ffp_opt_anlyz_sodis_rev(ar_rho,fl_teacher_increase_sub_number, df_input_ib = df_cas_input_sub_ib, df_queue_il_long_with_V = df_queue_il_sub_long)
Show Results
# Display Results print(df_queue_il_sub_long) print(df_alloc_i_long) print(df_rho_gini) print(df_alloc_il_long) print(ar_util_rev_loop_func)
Consider the population of schools is measure one, there are $N$ distinct types of schools, and the mass of each unique type is different, but they sum up to 1. If each school can have 5 additional teachers, than the mass of teachers is measure 5.
Use the number of students per school as its mass, and introduce a new vector for the allocation function that is this mass. This is to illustrate ideas. The mass should not be the number of students, but the number of schools of the same type.
Note that under discrete allocation, there were 10840 available teachers to be allocated. But in the problem below, the idea is:
Several things to note:
Analyze previous allocation not based on measure, do this only for one planner:
Note that this is to be done inside the allocation function, after queue has been found, cumulative sum mass along the queue, and set cut-off at the total measure of resources available.
# Cumulative Mass df_queue_il_long_mass_cumu <- df_queue_il_long %>% filter(rho_val == ar_rho[1]) %>% select(-rho, -rho_val) %>% left_join(df_opt_caschool_prep_i %>% mutate(mass = enrltot/sum(enrltot)) %>% select(id_i, mass), by='id_i') %>% arrange(Q_il) %>% mutate(mass_cumu_queue = cumsum(mass)) # View Around the Teacher Available Cut-off mass df_queue_il_long_mass_cumu %>% filter(Q_il >= fl_teacher_increase_number - 5 & Q_il <= fl_teacher_increase_number + 5) # Summarize REconTools::ff_summ_percentiles(df_queue_il_long_mass_cumu, bl_statsasrows=FALSE)
The mass variable is not contained in df_opt_caschool_prep_i, but is contained in df_opt_caschool_prep_i, merge together to get that number of students as mass. This variable does not need to sum to one. Reweighting to sum to one should not change results.
# Summarize REconTools::ff_summ_percentiles(df_opt_caschool_input_il, bl_statsasrows=FALSE) REconTools::ff_summ_percentiles(df_opt_caschool_input_ib, bl_statsasrows=FALSE) # Mass of Students for IL file df_opt_caschool_input_mass_il <- df_opt_caschool_input_il %>% left_join(df_opt_caschool_prep_i %>% mutate(mass_i = enrltot/sum(enrltot)) %>% select(id_i, mass_i) , by='id_i') %>% ungroup() # Mass of Students for IL file # IB file done with the Assumption that fl_fa_max additional teachers per school # Since mass per teacher/school same, total mass per school for ib allocation # is determined by mass_i still. Do not nuum to multiply mass by the number of teachers # assigned. df_opt_caschool_input_mass_ib <- df_opt_caschool_input_ib %>% left_join(df_opt_caschool_prep_i %>% mutate(mass_i = enrltot/sum(enrltot)) %>% mutate(cumu_sum_group_i = mass_i*round(teachers*fl_fa_max)) %>% select(id_i, mass_i, cumu_sum_group_i, teachers) , by='id_i') %>% ungroup() # Total mass sum(df_opt_caschool_input_mass_il$mass_i) sum(df_opt_caschool_input_mass_ib$cumu_sum_group_i) # Alternative Allocation and Mass for Alternative Allocations REconTools::ff_summ_percentiles(df_opt_caschool_input_mass_il, bl_statsasrows=FALSE) REconTools::ff_summ_percentiles(df_opt_caschool_input_mass_ib, bl_statsasrows=FALSE)
Within Group Cumulative Mass Sum with Allocations:
# Within Group Cumulative Sum of Mass # For the Value Calculation, the A and alpha are updated to be given allocation up to that point # that means the mass calculation is not mass at a particular unit of allocation, but mass # for that individual group/type, up to the point. This is a within group cumulative sum, # Note that so for welfare and for allocation queue, in the welfare case, we need cumulative within # individual group mass, and in the allocation queue, need point mass df_opt_caschool_input_mass_il %>% arrange(id_i, D_il) %>% group_by(id_i) %>% arrange(D_il) %>% mutate(cumu_sum_group_i = cumsum(mass_i)) %>% arrange(id_i, D_il) %>% ungroup()
If want to analyze more closely what is happening in a subset of cases, use the subsetting option:
it_O <- 2 set.seed(1) df_cas_prep_sub_i <- df_opt_caschool_prep_i %>% filter(teachers <= 100 & teachers >= 50) df_cas_prep_sub_i <- df_cas_prep_sub_i[sample(dim(df_cas_prep_sub_i)[1], it_O, replace=FALSE),] ar_cas_id_selected <- df_cas_prep_sub_i %>% pull(id_i) # Select from il and ib only ids that are randomly selected bl_subsetting <- FALSE if (bl_subsetting) { df_opt_caschool_input_mass_il <- df_opt_caschool_input_mass_il %>% filter(id_i %in% ar_cas_id_selected) df_opt_caschool_input_mass_ib <- df_opt_caschool_input_mass_ib %>% filter(id_i %in% ar_cas_id_selected) } # Summarize REconTools::ff_summ_percentiles(df_opt_caschool_input_mass_il, bl_statsasrows=FALSE) REconTools::ff_summ_percentiles(df_opt_caschool_input_mass_ib, bl_statsasrows=FALSE)
Copying the code from prior, use fl_teacher_increase_sub_number and df_opt_caschool_input_mass_il without the mass column mass_i specified.
# Measure of Available Teachers fl_teacher_increase_sub_number <- sum(df_opt_caschool_input_mass_ib$teachers)*fl_fa_max fl_teacher_increase_sub_number <- floor(fl_teacher_increase_sub_number) # Allocate Based on the Measure of Available Teachers ls_df_queue <- PrjOptiAlloc::ffp_opt_anlyz_rhgin_dis( ar_rho, fl_teacher_increase_sub_number, bl_df_alloc_il = TRUE, df_input_il = df_opt_caschool_input_mass_il, svr_rho = 'rho', svr_id_i = 'id_i', svr_id_il = 'id_il', svr_D_max_i = 'D_max_i', svr_D_il = 'D_il', svr_D_star_i = 'D_star_i', svr_F_star_i = 'F_star_i', svr_inpalc = 'Q_il', svr_D_Wbin_il = 'D_Wbin_il', svr_A_il = 'A_il', svr_alpha_il = 'alpha_il', svr_beta_i = 'beta_i', svr_expout = 'opti_exp_outcome', st_idcol_prefix = 'sid_') df_queue_il_long_count <- ls_df_queue$df_queue_il_long df_queue_il_wide_count <- ls_df_queue$df_queue_il_wide df_alloc_i_long_count <- ls_df_queue$df_alloc_i_long df_rho_gini_count <- ls_df_queue$df_rho_gini df_alloc_il_long_count <- ls_df_queue$df_alloc_il_long
Analyze results using the total measure threshold from the measure allocation
# Cumulative Mass df_queue_il_long_mass_cntcumu <- df_queue_il_long_count %>% filter(rho_val == ar_rho[1]) %>% select(-rho, -rho_val) %>% left_join(df_opt_caschool_prep_i %>% mutate(mass_i = enrltot/sum(enrltot)) %>% select(id_i, mass_i), by='id_i') %>% arrange(Q_il) %>% mutate(mass_cumu_il = cumsum(mass_i)) # View Around the Teacher Available Cut-off mass df_queue_il_long_mass_cntcumu %>% filter(Q_il >= fl_teacher_increase_sub_number - 4 & Q_il <= fl_teacher_increase_sub_number + 4) # Summarize REconTools::ff_summ_percentiles(df_queue_il_long_mass_cntcumu, bl_statsasrows=FALSE)
Same as before, but now include the mass column:
# Measure of Available Teachers fl_measure_teacher_increase_number <- sum(df_opt_caschool_input_mass_ib$cumu_sum_group_i) # Allocate Based on the Measure of Available Teachers ls_df_queue <- PrjOptiAlloc::ffp_opt_anlyz_rhgin_dis( ar_rho, fl_measure_teacher_increase_number, bl_df_alloc_il = TRUE, df_input_il = df_opt_caschool_input_mass_il, svr_rho = 'rho', svr_id_i = 'id_i', svr_id_il = 'id_il', svr_D_max_i = 'D_max_i', svr_D_il = 'D_il', svr_D_star_i = 'D_star_i', svr_F_star_i = 'F_star_i', svr_inpalc = 'Q_il', svr_D_Wbin_il = 'D_Wbin_il', svr_A_il = 'A_il', svr_alpha_il = 'alpha_il', svr_beta_i = 'beta_i', svr_measure_i = 'mass_i', svr_expout = 'opti_exp_outcome', st_idcol_prefix = 'sid_') df_queue_il_long_mass <- ls_df_queue$df_queue_il_long df_queue_il_wide_mass <- ls_df_queue$df_queue_il_wide df_alloc_i_long_mass <- ls_df_queue$df_alloc_i_long df_rho_gini_mass <- ls_df_queue$df_rho_gini df_alloc_il_long_mass <- ls_df_queue$df_alloc_il_long
Analyze the Allocation Results based on measure:
df_queue_il_long_mass %>% filter(rho_val == ar_rho[3]) %>% select(-rho, -rho_val) %>% arrange(Q_il) %>% filter(mass_cumu_il >= fl_measure_teacher_increase_number - 0.01 & mass_cumu_il <= fl_measure_teacher_increase_number + 0.01) # Summarize REconTools::ff_summ_percentiles(df_queue_il_long_mass_cumu, bl_statsasrows=FALSE)
Under the Utilitarian filter(rho_val == ar_rho[1]):
Based on count, there were 30 teacher spots to allocate, and we move up to queue position 30 for allocation. Based on relative measure/weight, because the schools with more teacher spots assigned under alternative allocation also had higher weight, so we end up with a measure such that the 10 + 20 teachers assigned to each type of school, when looked at on the queue, sorted by queue position, and cumulatively summed with the weight of school school type, ends up giving us enough to go to the 38th queue position. So ignoring the weight of the schools, we would have allocated incorrectly, because we thought there were only 30 slots to be allocated, but given weights, there are actually up to the 38th spot on the queue.
Under filter(rho_val == ar_rho[2], [3], [4]):
Under alternative planners, importantly, the queue is different, hence cumulative weight up the queue is different, so the number of "spots" that could be allocated is different. There are 37, 31, 24 spots that are allocated under increasing Rawlsian preferences. This reflects the fact that under more Rawlsian preferences, the districts more more students are ranked higher on the queue, and they take up more mass. At the extreme, given N=2, all the mass from the smaller mass type go to the larger mass type, so the queue goes up to how many spots the larger school districts were assigned uniformly, and additional spot for the larger school if the school smallers' additional teachers are equally assigned to larger schools given relative mass of smaller and larger schools.
tb_rho_rev_count <- PrjOptiAlloc::ffp_opt_anlyz_sodis_rev(ar_rho, fl_teacher_increase_sub_number, df_input_ib = df_opt_caschool_input_mass_ib, df_queue_il_long_with_V = df_queue_il_long_count, svr_beta_i = 'beta_i')
# Display Results print(tb_rho_rev_count)
tb_rho_rev_mass <- PrjOptiAlloc::ffp_opt_anlyz_sodis_rev(ar_rho, fl_measure_teacher_increase_number, df_input_ib = df_opt_caschool_input_mass_ib, df_queue_il_long_with_V = df_queue_il_long_mass, svr_beta_i = 'beta_i', svr_measure_i = 'mass_i')
# Display Results print(tb_rho_rev_mass)
Since we are comparing against uniform allocation, the REV difference is the smallest when we consider close to cobb-douglas preferences. The Utilitarian as well as the Rawlsian planner are both interested in allocating in an unequal way. The Utilitarian wants to allocate more to for higher alpha, the Rawlsian wants to allocate more to lower A. The Cobb Douglas planner seeks a balance between the two alternatives.
Depending on the Alternative allocation, the REV results might be decreasing, increasing, or have different patterns.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.