Test lower-bounded linear-continuous optimal allocation solution line by line without function. Use the Guatemala-Cebu scrambled nutrition and height early childhood data. Regress the effect of nutritional supplements on height.
The objective of this file is to solve the linear $N_i$ and $H_i$ problem without invoking any functions. This function first tested out the linear solution algorithm.
File name ffv_opt_solin_relow_allrw:
There is a dataset with child attributes, nutritional inputs and outputs. Run regression to estimate some input output relationship first. Then generate required inputs for code.
Doing this allows for lagged intereaction that are time specific in an arbitrary way.
rm(list = ls(all.names = TRUE)) options(knitr.duplicate.label = 'allow')
library(dplyr) library(tidyr) library(stringr) library(broom) library(ggplot2) library(REconTools) library(knitr) library(kableExtra)
Generate four categories by initial height and mother's education levels combinations.
# Load Library # Select Cebu Only df_hw_cebu_m24 <- df_hgt_wgt %>% filter(S.country == 'Cebu' & svymthRound == 24 & prot > 0 & hgt > 0) %>% drop_na() # Generate Discrete Version of momEdu df_hw_cebu_m24 <- df_hw_cebu_m24 %>% mutate(momEduRound = cut(momEdu, breaks=c(-Inf, 10, Inf), labels=c("MEduLow","MEduHigh"))) %>% mutate(hgt0med = cut(hgt0, breaks=c(-Inf, 50, Inf), labels=c("h0low","h0high"))) df_hw_cebu_m24$momEduRound = as.factor(df_hw_cebu_m24$momEduRound) df_hw_cebu_m24$hgt0med = as.factor(df_hw_cebu_m24$hgt0med) # Attach attach(df_hw_cebu_m24)
Estimation Production functions or any other function. Below, Regression height outcomes on input interacted with the categories created before. This will generate category specific marginal effects.
# Input Matrix mt_lincv <- model.matrix(~ hgt0 + wgt0) mt_linht <- model.matrix(~ sex:hgt0med - 1) # Regress Height At Month 24 on Nutritional Inputs with controls rs_hgt_prot_lin = lm(hgt ~ prot:mt_linht + mt_lincv - 1) print(summary(rs_hgt_prot_lin)) rs_hgt_prot_lin_tidy = tidy(rs_hgt_prot_lin)
Now log-linear regressions, where inptut coefficient differs by input groups.
# Input Matrix Generation mt_logcv <- model.matrix(~ hgt0 + wgt0) mt_loght <- model.matrix(~ sex:hgt0med - 1) # Log and log regression for month 24 rs_hgt_prot_log = lm(log(hgt) ~ log(prot):mt_loght + mt_logcv - 1) print(summary(rs_hgt_prot_log)) rs_hgt_prot_log_tidy = tidy(rs_hgt_prot_log)
Multiply coefficient vector by covariate matrix to generate A vector that is child/individual specific.
# Generate A_i ar_Ai_lin <- mt_lincv %*% as.matrix(rs_hgt_prot_lin_tidy %>% filter(!str_detect(term, 'prot')) %>% select(estimate)) ar_Ai_log <- mt_logcv %*% as.matrix(rs_hgt_prot_log_tidy %>% filter(!str_detect(term, 'prot')) %>% select(estimate)) # Generate alpha_i ar_alphai_lin <- mt_linht %*% as.matrix(rs_hgt_prot_lin_tidy %>% filter(str_detect(term, 'prot')) %>% select(estimate)) ar_alphai_log <- mt_loght %*% as.matrix(rs_hgt_prot_log_tidy %>% filter(str_detect(term, 'prot')) %>% select(estimate)) # Child Weight ar_beta <- rep(1/length(ar_Ai_lin), times=length(ar_Ai_lin)) # Initate Dataframe that will store all estimates and optimal allocation relevant information # combine identifying key information along with estimation A, alpha results # note that we only need indi.id as key mt_opti <- cbind(ar_alphai_lin, ar_Ai_lin, ar_beta) ar_st_varnames <- c('alpha', 'A', 'beta') df_esti_alpha_A_beta <- as_tibble(mt_opti) %>% rename_all(~c(ar_st_varnames)) tb_key_alpha_A_beta <- bind_cols(df_hw_cebu_m24, df_esti_alpha_A_beta) %>% select(one_of(c('S.country', 'vil.id', 'indi.id', 'svymthRound', ar_st_varnames))) # Unique beta, A, and alpha groups tb_opti_unique <- tb_key_alpha_A_beta %>% group_by(!!!syms(ar_st_varnames)) %>% arrange(!!!syms(ar_st_varnames)) %>% summarise(n_obs_group=n())
# Child Count df_hw_cebu_m24_full <- df_hw_cebu_m24 it_obs = dim(df_hw_cebu_m24)[1] # Total Resource Count ar_prot_data = df_hw_cebu_m24$prot fl_N_agg = sum(ar_prot_data) # Vector of Planner Preference ar_rho <- c(-100, 0.8) ar_rho <- c(-50, -25, -10) ar_rho <- c(-100, -5, -1, 0.1, 0.6, 0.8) ar_rho <- c(seq(-200, -100, length.out=5), seq(-100, -25, length.out=5), seq(-25, -5, length.out=5), seq(-5, -1, length.out=5), seq(-1, -0.01, length.out=5), seq(0.01, 0.25, length.out=5), seq(0.25, 0.90, length.out=5)) ar_rho <- unique(ar_rho)
This also works with any CRS CES.
# Optimal Linear Equation # accumulate allocation results tb_opti_alloc_all_rho <- tb_key_alpha_A_beta # A. First Loop over Planner Preference # Generate Rank Order for (it_rho_ctr in seq(1,length(ar_rho))) { rho = ar_rho[it_rho_ctr] # B. Generate V4, Rank Index Value, rho specific # tb_opti <- tb_opti %>% mutate(!!paste0('rv_', it_rho_ctr) := A/((alpha*beta))^(1/(1-rho))) tb_opti <- tb_key_alpha_A_beta %>% mutate(rank_val = A/((alpha*beta))^(1/(1-rho))) # c. Generate Rank Index tb_opti <- tb_opti %>% arrange(rank_val) %>% mutate(rank_idx = row_number()) # d. Populate lowest index alpha, beta, and A to all rows tb_opti <- tb_opti %>% mutate(lowest_rank_A = A[rank_idx==1]) %>% mutate(lowest_rank_alpha = alpha[rank_idx==1]) %>% mutate(lowest_rank_beta = beta[rank_idx==1]) # e. relative slope and relative intercept with respect to lowest index tb_opti <- tb_opti %>% mutate(rela_slope_to_lowest = (((lowest_rank_alpha*lowest_rank_beta)/(alpha*beta))^(1/(rho-1))*(lowest_rank_alpha/alpha)) ) %>% mutate(rela_intercept_to_lowest = ((((lowest_rank_alpha*lowest_rank_beta)/(alpha*beta))^(1/(rho-1))*(lowest_rank_A/alpha)) - (A/alpha)) ) # f. cumulative sums tb_opti <- tb_opti %>% mutate(rela_slope_to_lowest_cumsum = cumsum(rela_slope_to_lowest) ) %>% mutate(rela_intercept_to_lowest_cumsum = cumsum(rela_intercept_to_lowest) ) # g. inverting cumulative slopes and intercepts tb_opti <- tb_opti %>% mutate(rela_slope_to_lowest_cumsum_invert = (1/rela_slope_to_lowest_cumsum) ) %>% mutate(rela_intercept_to_lowest_cumsum_invert = ((-1)*(rela_intercept_to_lowest_cumsum)/(rela_slope_to_lowest_cumsum)) ) # h. Relative x-intercept points tb_opti <- tb_opti %>% mutate(rela_x_intercept = (-1)*(rela_intercept_to_lowest/rela_slope_to_lowest) ) # i. Inverted relative x-intercepts tb_opti <- tb_opti %>% mutate(opti_lowest_spline_knots = (rela_intercept_to_lowest_cumsum + rela_slope_to_lowest_cumsum*rela_x_intercept) ) # j. Sort by order of receiving transfers/subsidies tb_opti <- tb_opti %>% arrange(rela_x_intercept) # k. Find position of subsidy tb_opti <- tb_opti %>% arrange(opti_lowest_spline_knots) %>% mutate(tot_devi = opti_lowest_spline_knots - fl_N_agg) %>% arrange((-1)*case_when(tot_devi < 0 ~ tot_devi)) %>% mutate(allocate_lowest = case_when(row_number() == 1 ~ rela_intercept_to_lowest_cumsum_invert + rela_slope_to_lowest_cumsum_invert*fl_N_agg)) %>% mutate(allocate_lowest = allocate_lowest[row_number() == 1]) %>% mutate(opti_allocate = rela_intercept_to_lowest + rela_slope_to_lowest*allocate_lowest) %>% mutate(opti_allocate = case_when(opti_allocate >= 0 ~ opti_allocate, TRUE ~ 0)) %>% mutate(allocate_total = sum(opti_allocate, na.rm=TRUE)) # l. Predictes Expected choice: A + alpha*opti_allocate tb_opti <- tb_opti %>% mutate(opti_exp_outcome = A + alpha*opti_allocate) # m. Keep for df collection individual key + optimal allocation # _on stands for optimal nutritional choices # _eh stands for expected height tb_opti_main_results <- tb_opti %>% select(-one_of(c('lowest_rank_alpha', 'lowest_rank_beta'))) tb_opti_allocate_wth_key <- tb_opti %>% select(one_of('indi.id','opti_allocate', 'opti_exp_outcome')) %>% rename(!!paste0('rho_c', it_rho_ctr, '_on') := !!sym('opti_allocate'), !!paste0('rho_c', it_rho_ctr, '_eh') := !!sym('opti_exp_outcome')) # n. merge optimal allocaiton results from different planner preference tb_opti_alloc_all_rho <- tb_opti_alloc_all_rho %>% left_join(tb_opti_allocate_wth_key, by='indi.id') # print subset if (it_rho_ctr == 1 | it_rho_ctr == length(ar_rho) | it_rho_ctr == round(length(ar_rho)/2)) { print('') print('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx') print(paste0('xxx rho:', rho)) print('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx') print('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx') print(summary(tb_opti_main_results)) } } # o. print results print(summary(tb_opti_alloc_all_rho)) str(tb_opti_alloc_all_rho)
Compute Gini coefficient for each expected outcome and input vector, and store results in separate vectors for input allocation and expected outcomes, then combine.
See how apply works here: R use Apply, Sapply and dplyr Mutate to Evaluate one Function Across Rows of a Matrix from R4Econ.
See how gini function works: ff_dist_gini_vector_pos from REconTools.
# Select out columns for _on = optimal nutrition mt_opti_alloc_all_rho <- data.matrix(tb_opti_alloc_all_rho %>% select(ends_with('_on'))) mt_expc_outcm_all_rho <- data.matrix(tb_opti_alloc_all_rho %>% select(ends_with('_eh'))) # Compute gini for each rho ar_opti_alloc_gini <- suppressMessages(apply(t(mt_opti_alloc_all_rho), 1, ff_dist_gini_vector_pos)) ar_expc_outcm_gini <- suppressMessages(apply(t(mt_expc_outcm_all_rho), 1, ff_dist_gini_vector_pos)) # Combine results # column names look like: rho_c1_on rho_c2_on rho_c3_on rho_c1_eh rho_c2_eh rho_c3_eh tb_gini_onerow_wide <- cbind(as_tibble(t(ar_opti_alloc_gini)), as_tibble(t(ar_expc_outcm_gini))) tb_gini_long <- tb_gini_onerow_wide %>% pivot_longer( cols = starts_with("rho"), names_to = c("it_rho_ctr", "oneh"), names_pattern = "rho_c(.*)_(.*)", values_to = "gini" ) %>% pivot_wider( id_cols = it_rho_ctr, names_from = oneh, values_from = gini ) planner_elas <- log(1/(1-ar_rho)+2) tb_gini_long <- cbind(tb_gini_long, planner_elas)
scatter <- tb_gini_long %>% ggplot(aes(x=on, y=eh)) + geom_point(size=planner_elas) + labs(title = 'Scatter Plot of Optimal Allocation and Expected Outcomes Gini', x = paste0('GINI for Optimal Allocations'), y = paste0('GINI for Exp Outcome given Optimal Allocations'), caption = 'Optimal Allocations and Expected Outcomes, Cebu Height and Protein Example (Wang 2020)') + theme_bw() print(scatter)
# p. graph scatter of opposing preference correlation svr_lowrho_ctr <- 1 svr_higrho_ctr <- length(ar_rho)-1 svr_lowrho <- paste0('rho_c', svr_lowrho_ctr, '_on') svr_higrho <- paste0('rho_c', svr_higrho_ctr, '_eh') tb_min_max_rho <- tb_opti_alloc_all_rho %>% select(one_of(c(svr_lowrho, svr_higrho))) scatter <- tb_min_max_rho %>% ggplot(aes(x=!!sym(svr_lowrho), y=!!sym(svr_higrho))) + geom_point(size=1) + labs(title = 'Scatter Plot of Optimal Allocation Low and High Rho', x = paste0('Allocation High Rho = ', ar_rho[svr_higrho_ctr], ', More Efficient'), y = paste0('Allocation Low Rho = ', ar_rho[svr_lowrho_ctr], ', More Equal'), caption = 'Optimal linear allocation results, given height reg on protein results. (Wang 2020)') + theme_bw() print(scatter) # lineplot <- tb_opti %>% # gather(variable, value, -month) %>% # ggplot(aes(x=month, y=value, colour=variable, linetype=variable)) + # geom_line() + # geom_point() + # labs(title = 'Mean and SD of Temperature Acorss US Cities', # x = 'Months', # y = 'Temperature in Fahrenheit', # caption = 'Temperature data 2017') + # scale_x_continuous(labels = as.character(df_temp_mth_summ$month), # breaks = df_temp_mth_summ$month)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.