Test discrete optimal allocation solution line by line without function. Use the California student test score dataset. Regress student English and Math test scores on Student-Teacher-Ratio.

This function produces the same results as DISCRETE--Discrete Optimal Allocation California Teacher Student Ratio (Line by Line). The differences are:

  1. the program here uses the dataframes created by the line by line file as Inputs
  2. the program here uses functions that are identical to the line by line function, but are saved in the R folder.

This file, by invoking functions, is much shorter than the line by line program.

rm(list = ls(all.names = TRUE))
options(knitr.duplicate.label = 'allow')
library(dplyr)
library(tidyr)
library(tibble)
library(stringr)
library(broom)
library(ggplot2)
library(REconTools)

library(PrjOptiAlloc)

library(knitr)
library(kableExtra)

Hand Input Data Example

Set Preference Array:

ar_rho <- 1 - (10^(c(seq(-2,2, length.out=4))))
ar_rho <- unique(ar_rho)
it_w_agg <- 5
id_i    <- c(1,1,1,2,2,2,3,3,3)
id_il   <- c(1,2,3,4,5,6,7,8,9)
D_max_i <- c(3,3,3,3,3,3,3,3,3)
D_il    <- c(1,2,3,1,2,3,1,2,3)

A_il_i1 <- c(4,3,2,1)
A_il_i2 <- c(2.7,2.2,1.5,1.1)
A_il_i3 <- c(1.9,1.8,1.1,0.1)
A_il <- c(A_il_i1[2:4], A_il_i2[2:4], A_il_i3[2:4])
alpha_il <- c(diff(-A_il_i1), diff(-A_il_i2), diff(-A_il_i3))
beta_i <- c(1/3,1/3,1/3,1/3,1/3,1/3,1/3,1/3,1/3)

df_handinput_input_il <- as_tibble(cbind(id_i, id_il, D_max_i, D_il, A_il, alpha_il, beta_i))

print(df_handinput_input_il)
ls_df_queue_hand <- PrjOptiAlloc::ffp_opt_anlyz_rhgin_dis(ar_rho, it_w_agg,
                                            bl_df_alloc_il = TRUE,
                                            bl_return_V = TRUE,
                                            bl_return_allQ_V = TRUE,
                                            bl_return_inner_V = TRUE,
                                            df_input_il = df_handinput_input_il)
df_queue_il_long <- ls_df_queue_hand$df_queue_il_long
df_queue_il_wide <- ls_df_queue_hand$df_queue_il_wide
df_alloc_i_long  <- ls_df_queue_hand$df_alloc_i_long
df_rho_gini  <- ls_df_queue_hand$df_rho_gini
df_alloc_il_long <- ls_df_queue_hand$df_alloc_il_long
print(df_queue_il_long)
print(df_queue_il_wide)
print(df_alloc_i_long)
print(df_rho_gini)
print(df_alloc_il_long)

Load Pre-Generated Data

These are data thata were generated in the DISCRETE--Discrete Optimal Allocation California Teacher Student Ratio (Line by Line) function.

# Load Data
data(df_opt_caschool_prep_i)
data(df_opt_caschool_input_il)
data(df_opt_caschool_input_ib)

# Show Variables
str(df_opt_caschool_prep_i)
str(df_opt_caschool_input_il)
str(df_opt_caschool_input_ib)

# Modifying Beta Value
# When individuals have the same weights, beta values do not matter
df_opt_caschool_input_il <- df_opt_caschool_input_il %>% mutate(beta_i = 1)
df_opt_caschool_input_ib <- df_opt_caschool_input_ib %>% mutate(beta_i = 1)

Solve All Districts Discrete Optimal Allocation Across Planner Preferences

Specify Policy Parameters

Note that input_il function already is based on fl_fi_max.

# 100 percent teacher at most per school, discretize floor as needed
# This is not a parameter that matters here, already a part of the input_il function
fl_fi_max = 1.00
# 20 percent total additional of all teachers
fl_fa_max = 0.20
# What is the number of teachers we can increase by
fl_teacher_increase_number <- sum(df_opt_caschool_prep_i$teachers)*fl_fa_max
fl_teacher_increase_number <- floor(fl_teacher_increase_number)

# Rho values to consider
ar_rho <- 1 - (10^(c(seq(-2,2, length.out=4))))
ar_rho <- unique(ar_rho)

Optimal Queue and Allocations

Use the discrete allocation function across preference ffp_opt_anlyz_rhgin_dis.html function. This computes optimal allocation for multiple planner inequality aversion $\lambda$ parameters at the same time. Note that in the function $\lambda=\rho$.

# Optimal Allocation
ls_df_queue <- PrjOptiAlloc::ffp_opt_anlyz_rhgin_dis(ar_rho,
                                       fl_teacher_increase_number,
                                       bl_df_alloc_il = TRUE,
                                       df_input_il = df_opt_caschool_input_il,
                                       svr_rho = 'rho',
                                       svr_id_i = 'id_i', svr_id_il = 'id_il',
                                       svr_D_max_i = 'D_max_i', svr_D_il = 'D_il',
                                       svr_D_star_i = 'D_star_i', svr_F_star_i = 'F_star_i',
                                       svr_inpalc = 'Q_il', svr_D_Wbin_il = 'D_Wbin_il',
                                       svr_A_il = 'A_il', svr_alpha_il = 'alpha_il', svr_beta_i = 'beta_i',
                                       svr_expout = 'opti_exp_outcome',
                                       st_idcol_prefix = 'sid_')
df_queue_il_long <- ls_df_queue$df_queue_il_long
df_queue_il_wide <- ls_df_queue$df_queue_il_wide
df_alloc_i_long <- ls_df_queue$df_alloc_i_long
df_rho_gini  <- ls_df_queue$df_rho_gini
df_alloc_il_long <- ls_df_queue$df_alloc_il_long
# Display Results
print(head(df_queue_il_long, 10))
# print(str(df_queue_il_long))
print(head(df_queue_il_wide, 10))
# print(str(df_queue_il_wide))
print(head(df_alloc_i_long, 10))
# print(str(df_queue_il_wide))
print(df_rho_gini)
# print(str(df_alloc_i_long))
print(head(df_alloc_il_long, 10))

REV Compute

tb_rho_rev <-
  PrjOptiAlloc::ffp_opt_anlyz_sodis_rev(ar_rho,
                          fl_teacher_increase_number,
                          df_input_ib = df_opt_caschool_input_ib,
                          df_queue_il_long_with_V = df_queue_il_long)
# Display Results
print(tb_rho_rev)

Optimal Allocation Among Subset of Districts

Select a Small Subset of Districts

To generate some graphs and more easily readable results, rather than using all school districts, select some random subset of school districts from the existing data frames.

  1. Select a random subset of district IDs
  2. New input df that only includes these IDs.
# select four school districts
# cas = california schools
it_O <- 4
set.seed(1)
df_cas_prep_sub_i <- df_opt_caschool_prep_i %>% filter(teachers <= 100 & teachers >= 50)
df_cas_prep_sub_i <- df_cas_prep_sub_i[sample(dim(df_cas_prep_sub_i)[1], it_O, replace=FALSE),]
ar_cas_id_selected <- df_cas_prep_sub_i %>% pull(id_i)

# Select from il and ib only ids that are randomly selected
df_cas_input_sub_il <- df_opt_caschool_input_il %>% filter(id_i %in% ar_cas_id_selected)
df_cas_input_sub_ib <- df_opt_caschool_input_ib %>% filter(id_i %in% ar_cas_id_selected)

# Print
print(df_cas_prep_sub_i)
print(df_cas_input_sub_ib)
print(df_cas_input_sub_il)

Now we also have a new total number of teacher increase, no longer based on all districts. The policy here is to potentially increase each school district by fl_fi_max, already coded into the input_il file, so that can not change. The total number of available new teachers is fl_fa_max times the existing number of teachers in the selected districts overall

fl_fa_max = 0.20
# What is the number of teachers we can increase by
fl_teacher_increase_sub_number <- sum(df_cas_prep_sub_i$teachers)*fl_fa_max
fl_teacher_increase_sub_number <- floor(fl_teacher_increase_sub_number)

Optimal Queue, Allocations and REV

Use the discrete allocation function across preference ffp_opt_anlyz_rhgin_dis.html function. This computes optimal allocation for multiple planner inequality aversion $\lambda$ parameters at the same time. Note that in the function $\lambda=\rho$.

# Rho values to consider
ar_rho <- 1 - (10^(c(seq(-2,2, length.out=4))))
ar_rho <- unique(ar_rho)

# Optimal Allocation
ls_df_sub_queue <- PrjOptiAlloc::ffp_opt_anlyz_rhgin_dis(ar_rho, fl_teacher_increase_sub_number,
                                           df_input_il = df_cas_input_sub_il,
                                           bl_df_alloc_il = TRUE)
df_queue_il_sub_long <- ls_df_sub_queue$df_queue_il_long
df_queue_il_sub_wide <- ls_df_sub_queue$df_queue_il_wide
df_alloc_i_long <- ls_df_sub_queue$df_alloc_i_long
df_rho_gini <- ls_df_sub_queue$df_rho_gini
df_alloc_il_long <- ls_df_sub_queue$df_alloc_il_long

# REV
ar_util_rev_loop_func <- ffp_opt_anlyz_sodis_rev(ar_rho,fl_teacher_increase_sub_number,
                                                 df_input_ib = df_cas_input_sub_ib,
                                                 df_queue_il_long_with_V = df_queue_il_sub_long)

Show Results

# Display Results
print(df_queue_il_sub_long)
print(df_alloc_i_long)
print(df_rho_gini)
print(df_alloc_il_long)
print(ar_util_rev_loop_func)

Differential Mass Optimal Allocation

Consider the population of schools is measure one, there are $N$ distinct types of schools, and the mass of each unique type is different, but they sum up to 1. If each school can have 5 additional teachers, than the mass of teachers is measure 5.

Use the number of students per school as its mass, and introduce a new vector for the allocation function that is this mass. This is to illustrate ideas. The mass should not be the number of students, but the number of schools of the same type.

Note that under discrete allocation, there were 10840 available teachers to be allocated. But in the problem below, the idea is:

  1. suppose there is some system that currently allocates teachers by school attributes
  2. we know the number of teachers allocated to each type of school
  3. we know the proportion of each type of school in the school population.
  4. given a population measure of 1 as the sum of all schools, if on average 4 teachers are allocated to each school, the total of teachers allocated is measure 4. In discrete terms, that just means, if there are in total 1000 different schools (not different unique type, just different schools), and on average each school gets 4 teachers, we are allocating 4000 teachers.

Several things to note:

  1. Get the total measure of resources used by the alternative allocation, this is not summing up just the mass of all individuals, which sums to one, but sums the measure of resources allocated given individual mass, and how much is allocated to each individual
  2. Allocation queue: sort by allocation queue, and cumulative sum over individual mass, repeatedly over each incremental allocation for each individual
  3. Welfare calculation: the welfare/utility is not by cumulative mass, but by probability mass for each type, this does not vary depending on how much is allocated. Allocation changes alpha and A, not the mass in utility.

Analyze Previous Level Allocation with Measure Units

Analyze previous allocation not based on measure, do this only for one planner:

  1. sort by queue
  2. cumulative sum the mass variable: mass_cumu_queue, cumulative mass (resource allocated) by this queue point.

Note that this is to be done inside the allocation function, after queue has been found, cumulative sum mass along the queue, and set cut-off at the total measure of resources available.

# Cumulative Mass
df_queue_il_long_mass_cumu <- df_queue_il_long %>% filter(rho_val == ar_rho[1]) %>%
  select(-rho, -rho_val) %>%
  left_join(df_opt_caschool_prep_i %>% 
              mutate(mass = enrltot/sum(enrltot)) %>%
              select(id_i, mass), by='id_i') %>%
  arrange(Q_il) %>%
  mutate(mass_cumu_queue = cumsum(mass))
# View Around the Teacher Available Cut-off mass
df_queue_il_long_mass_cumu %>% 
  filter(Q_il >= fl_teacher_increase_number - 5 
          & Q_il <= fl_teacher_increase_number + 5)
# Summarize
REconTools::ff_summ_percentiles(df_queue_il_long_mass_cumu, bl_statsasrows=FALSE)

Generate The mass Variable

The mass variable is not contained in df_opt_caschool_prep_i, but is contained in df_opt_caschool_prep_i, merge together to get that number of students as mass. This variable does not need to sum to one. Reweighting to sum to one should not change results.

# Summarize
REconTools::ff_summ_percentiles(df_opt_caschool_input_il, bl_statsasrows=FALSE)
REconTools::ff_summ_percentiles(df_opt_caschool_input_ib, bl_statsasrows=FALSE)

# Mass of Students for IL file 
df_opt_caschool_input_mass_il <- df_opt_caschool_input_il %>% 
  left_join(df_opt_caschool_prep_i %>% 
              mutate(mass_i = enrltot/sum(enrltot)) %>%
              select(id_i, mass_i)
            , by='id_i') %>% ungroup()

# Mass of Students for IL file 
# IB file done with the Assumption that fl_fa_max additional teachers per school
# Since mass per teacher/school same, total mass per school for ib allocation 
# is determined by mass_i still. Do not nuum to multiply mass by the number of teachers
# assigned. 
df_opt_caschool_input_mass_ib <- df_opt_caschool_input_ib %>%
  left_join(df_opt_caschool_prep_i %>% 
              mutate(mass_i = enrltot/sum(enrltot)) %>%
              mutate(cumu_sum_group_i = mass_i*round(teachers*fl_fa_max)) %>%
              select(id_i, mass_i, cumu_sum_group_i, teachers)
            , by='id_i') %>% ungroup()

# Total mass
sum(df_opt_caschool_input_mass_il$mass_i)
sum(df_opt_caschool_input_mass_ib$cumu_sum_group_i)
# Alternative Allocation and Mass for Alternative Allocations
REconTools::ff_summ_percentiles(df_opt_caschool_input_mass_il, bl_statsasrows=FALSE)
REconTools::ff_summ_percentiles(df_opt_caschool_input_mass_ib, bl_statsasrows=FALSE)

Within Group Cumulative Mass Sum with Allocations:

# Within Group Cumulative Sum of Mass
# For the Value Calculation, the A and alpha are updated to be given allocation up to that point
# that means the mass calculation is not mass at a particular unit of allocation, but mass
# for that individual group/type, up to the point. This is a within group cumulative sum, 
# Note that so for welfare and for allocation queue, in the welfare case, we need cumulative within
# individual group mass, and in the allocation queue, need point mass

df_opt_caschool_input_mass_il %>%
  arrange(id_i, D_il) %>% 
  group_by(id_i) %>% 
  arrange(D_il) %>% 
  mutate(cumu_sum_group_i = cumsum(mass_i)) %>%
  arrange(id_i, D_il) %>%
  ungroup()

Select Subset of Individuals For Allocaitons

If want to analyze more closely what is happening in a subset of cases, use the subsetting option:

it_O <- 2
set.seed(1)
df_cas_prep_sub_i <- df_opt_caschool_prep_i %>% filter(teachers <= 100 & teachers >= 50)
df_cas_prep_sub_i <- df_cas_prep_sub_i[sample(dim(df_cas_prep_sub_i)[1], it_O, replace=FALSE),]
ar_cas_id_selected <- df_cas_prep_sub_i %>% pull(id_i)

# Select from il and ib only ids that are randomly selected
bl_subsetting <- FALSE
if (bl_subsetting) {
  df_opt_caschool_input_mass_il <- df_opt_caschool_input_mass_il %>% filter(id_i %in% ar_cas_id_selected)
  df_opt_caschool_input_mass_ib <- df_opt_caschool_input_mass_ib %>% filter(id_i %in% ar_cas_id_selected)
}

# Summarize
REconTools::ff_summ_percentiles(df_opt_caschool_input_mass_il, bl_statsasrows=FALSE)
REconTools::ff_summ_percentiles(df_opt_caschool_input_mass_ib, bl_statsasrows=FALSE)

Optimal Allocations

Optimal Allocation with Number of Teachers as before

Copying the code from prior, use fl_teacher_increase_sub_number and df_opt_caschool_input_mass_il without the mass column mass_i specified.

# Measure of Available Teachers
fl_teacher_increase_sub_number <- sum(df_opt_caschool_input_mass_ib$teachers)*fl_fa_max
fl_teacher_increase_sub_number <- floor(fl_teacher_increase_sub_number)
# Allocate Based on the Measure of Available Teachers
ls_df_queue <- PrjOptiAlloc::ffp_opt_anlyz_rhgin_dis(
  ar_rho,
  fl_teacher_increase_sub_number,
  bl_df_alloc_il = TRUE,
  df_input_il = df_opt_caschool_input_mass_il,
  svr_rho = 'rho',
  svr_id_i = 'id_i', svr_id_il = 'id_il',
  svr_D_max_i = 'D_max_i', svr_D_il = 'D_il',
  svr_D_star_i = 'D_star_i', svr_F_star_i = 'F_star_i',
  svr_inpalc = 'Q_il', svr_D_Wbin_il = 'D_Wbin_il',
  svr_A_il = 'A_il', svr_alpha_il = 'alpha_il', 
  svr_beta_i = 'beta_i',
  svr_expout = 'opti_exp_outcome',
  st_idcol_prefix = 'sid_')
df_queue_il_long_count <- ls_df_queue$df_queue_il_long
df_queue_il_wide_count <- ls_df_queue$df_queue_il_wide
df_alloc_i_long_count <- ls_df_queue$df_alloc_i_long
df_rho_gini_count  <- ls_df_queue$df_rho_gini
df_alloc_il_long_count <- ls_df_queue$df_alloc_il_long
Analyze Measure at Count Allocation

Analyze results using the total measure threshold from the measure allocation

# Cumulative Mass
df_queue_il_long_mass_cntcumu <- df_queue_il_long_count %>% filter(rho_val == ar_rho[1]) %>%
  select(-rho, -rho_val) %>%
  left_join(df_opt_caschool_prep_i %>% 
            mutate(mass_i = enrltot/sum(enrltot)) %>%
            select(id_i, mass_i), by='id_i') %>%  
  arrange(Q_il) %>%
  mutate(mass_cumu_il = cumsum(mass_i))
# View Around the Teacher Available Cut-off mass
df_queue_il_long_mass_cntcumu %>% 
  filter(Q_il >= fl_teacher_increase_sub_number - 4
          & Q_il <= fl_teacher_increase_sub_number + 4)
# Summarize
REconTools::ff_summ_percentiles(df_queue_il_long_mass_cntcumu, bl_statsasrows=FALSE)

Optimal Allocation with Mass

Same as before, but now include the mass column:

# Measure of Available Teachers
fl_measure_teacher_increase_number <- sum(df_opt_caschool_input_mass_ib$cumu_sum_group_i)
# Allocate Based on the Measure of Available Teachers
ls_df_queue <- PrjOptiAlloc::ffp_opt_anlyz_rhgin_dis(
  ar_rho,
  fl_measure_teacher_increase_number,
  bl_df_alloc_il = TRUE,
  df_input_il = df_opt_caschool_input_mass_il,
  svr_rho = 'rho',
  svr_id_i = 'id_i', svr_id_il = 'id_il',
  svr_D_max_i = 'D_max_i', svr_D_il = 'D_il',
  svr_D_star_i = 'D_star_i', svr_F_star_i = 'F_star_i',
  svr_inpalc = 'Q_il', svr_D_Wbin_il = 'D_Wbin_il',
  svr_A_il = 'A_il', svr_alpha_il = 'alpha_il', 
  svr_beta_i = 'beta_i',
  svr_measure_i = 'mass_i',
  svr_expout = 'opti_exp_outcome',
  st_idcol_prefix = 'sid_')
df_queue_il_long_mass <- ls_df_queue$df_queue_il_long
df_queue_il_wide_mass <- ls_df_queue$df_queue_il_wide
df_alloc_i_long_mass <- ls_df_queue$df_alloc_i_long
df_rho_gini_mass  <- ls_df_queue$df_rho_gini
df_alloc_il_long_mass <- ls_df_queue$df_alloc_il_long
Analyze Measure at Mass Allocation

Analyze the Allocation Results based on measure:

df_queue_il_long_mass %>% filter(rho_val == ar_rho[3]) %>%
  select(-rho, -rho_val) %>% arrange(Q_il) %>%
  filter(mass_cumu_il >= fl_measure_teacher_increase_number - 0.01
          & mass_cumu_il <= fl_measure_teacher_increase_number + 0.01)
# Summarize
REconTools::ff_summ_percentiles(df_queue_il_long_mass_cumu, bl_statsasrows=FALSE)

Compare Measure and Count Allocation Results

Under the Utilitarian filter(rho_val == ar_rho[1]):

Based on count, there were 30 teacher spots to allocate, and we move up to queue position 30 for allocation. Based on relative measure/weight, because the schools with more teacher spots assigned under alternative allocation also had higher weight, so we end up with a measure such that the 10 + 20 teachers assigned to each type of school, when looked at on the queue, sorted by queue position, and cumulatively summed with the weight of school school type, ends up giving us enough to go to the 38th queue position. So ignoring the weight of the schools, we would have allocated incorrectly, because we thought there were only 30 slots to be allocated, but given weights, there are actually up to the 38th spot on the queue.

Under filter(rho_val == ar_rho[2], [3], [4]):

Under alternative planners, importantly, the queue is different, hence cumulative weight up the queue is different, so the number of "spots" that could be allocated is different. There are 37, 31, 24 spots that are allocated under increasing Rawlsian preferences. This reflects the fact that under more Rawlsian preferences, the districts more more students are ranked higher on the queue, and they take up more mass. At the extreme, given N=2, all the mass from the smaller mass type go to the larger mass type, so the queue goes up to how many spots the larger school districts were assigned uniformly, and additional spot for the larger school if the school smallers' additional teachers are equally assigned to larger schools given relative mass of smaller and larger schools.

REV Compute

REV Compute with Count

tb_rho_rev_count <-
  PrjOptiAlloc::ffp_opt_anlyz_sodis_rev(ar_rho,
                          fl_teacher_increase_sub_number,
                          df_input_ib = df_opt_caschool_input_mass_ib,
                          df_queue_il_long_with_V = df_queue_il_long_count,
                          svr_beta_i = 'beta_i')
# Display Results
print(tb_rho_rev_count)

REV Compute with Mass

tb_rho_rev_mass <-
  PrjOptiAlloc::ffp_opt_anlyz_sodis_rev(ar_rho,
                          fl_measure_teacher_increase_number,
                          df_input_ib = df_opt_caschool_input_mass_ib,
                          df_queue_il_long_with_V = df_queue_il_long_mass,
                          svr_beta_i = 'beta_i', svr_measure_i = 'mass_i')
# Display Results
print(tb_rho_rev_mass)

REV results Analysis

Since we are comparing against uniform allocation, the REV difference is the smallest when we consider close to cobb-douglas preferences. The Utilitarian as well as the Rawlsian planner are both interested in allocating in an unequal way. The Utilitarian wants to allocate more to for higher alpha, the Rawlsian wants to allocate more to lower A. The Cobb Douglas planner seeks a balance between the two alternatives.

Depending on the Alternative allocation, the REV results might be decreasing, increasing, or have different patterns.



FanWangEcon/PrjOptiAlloc documentation built on Jan. 25, 2022, 6:55 a.m.