The effects of mother's smoking status on birth weight. If we aim to improve health outcomes, what is the priority sequence of mothers whom we would want to discourage from smoking.

Load Packages and Data

Load Dependencies

rm(list = ls(all.names = TRUE))
options(knitr.duplicate.label = 'allow')
library(dplyr)
library(tidyr)
library(tibble)
library(stringr)
library(broom)
library(forcats)

library(ggplot2)

library(REconTools)

library(PrjOptiAlloc)

library(knitr)
library(kableExtra)

Load Data

Generate four categories by initial height and mother's education levels combinations.

# Load Data
data(df_opt_birthwt)

# Summarize
str(df_opt_birthwt)
summary(df_opt_birthwt)

# Generate Discrete Version of continuous variables
df_opt_birthwt <- df_opt_birthwt %>%
    mutate(momwgtLowHigh = cut(lwt,
                               breaks=c(-Inf, 129, Inf),
                               labels=c("LW","HW"))) %>%
    mutate(mombirthage = cut(age,
                               breaks=c(-Inf, 24, Inf),
                               labels=c("young","older"))) %>%
    mutate(ftvm3 = cut(ftv,
                               breaks=c(-Inf, 1, 2, Inf),
                               labels=c("novisit","1visit","morevisit"))) %>%
    mutate(ftvm3 = cut(ftv,
                               breaks=c(-Inf, 1, 2, Inf),
                               labels=c("novisit","1visit","morevisit")))

# Generate Not Smoke Variable
not_smoke_levels <- c("1" = "0", "0" = "1")
df_opt_birthwt <- df_opt_birthwt %>% 
    mutate(notsmoke = case_when(smoke == 0 ~ 1, TRUE ~ 0))

attach(df_opt_birthwt)
summary(df_opt_birthwt)

Exploration

Tabulation

Tabulate:

# tabulate
df_opt_birthwt %>%
  group_by(race, smoke, ftv) %>%
  summarize(freq = n()) %>%
  pivot_wider(names_from = race, values_from = freq)

Summarize all, conditional on smoking or not:

# summarize all
REconTools::ff_summ_percentiles(df_opt_birthwt, bl_statsasrows = FALSE)
# summarize smoking only
REconTools::ff_summ_percentiles(df_opt_birthwt %>% filter(smoke == 0), bl_statsasrows = FALSE)
# summarize non-smoking only, 74 smokers
REconTools::ff_summ_percentiles(df_opt_birthwt %>% filter(smoke == 1), bl_statsasrows = FALSE) 

Explorational Regressions

# birth weight on mother age, race, ht and ui
summary(lm(bwt ~ lwt + factor(mombirthage) + factor(race) + factor(ht) + factor(ui)))
# # summary(lm(bwt ~ factor(ftv) - 1))
# summary(lm(bwt ~ factor(ftv) + factor(ht) + factor(ui) + factor(ptl) - 1))

# Birth wegith on mother's weight last menstral cycle, and mother age, race and smoking interaction
summary(lm(bwt ~ lwt + age + factor(race) + factor(race):factor(notsmoke) - 1))

# summary(lm(bwt ~ lwt + factor(race) + (ftv) - 1))

Regression with Data and Construct Input Arrays

Binary Problem Regress Birth Weight on Mother's Smoking Status

# A. Regress child birth weight on mother's age, race and mother's weight
# print(summary(lm(bwt ~ lwt + factor(mombirthage) + factor(race))))
# print(summary(lm(bwt ~ lwt + factor(mombirthage) + factor(race) + factor(ht) + factor(ui))))
# print(summary(lm(bwt ~ lwt + age + factor(race) + factor(ht) + factor(ui) + factor(ptl))))
# 
# print(summary(lm(bwt ~ lwt + age + factor(race) + factor(smoke))))
# print(summary(lm(bwt ~ lwt + age + factor(race) + factor(ht) + factor(ui) +  factor(smoke))))
# 
# print(summary(lm(bwt ~ lwt + age + factor(momwgtLowHigh):factor(smoke) - 1)))
# print(summary(lm(bwt ~ lwt + age + factor(ht) + factor(ui) + factor(race) + factor(race):factor(smoke))))
print(summary(lm(bwt ~ lwt + age + factor(race) + factor(race):factor(smoke) - 1)))

# Preferred way of appearing, in the following regression we can se:
# 1. more more power less MPG, Straight engine slightly higher MPG
# 2. V-shape engine car, going from auto to manual trans gain 4.1 MPG
# 2. straight shape engine, going from auto to manual trans gain 6.67 MPG

# Store Regression Results
mt_model <- model.matrix(~ lwt + age + factor(race) + factor(notsmoke):factor(race))
rs_wgt_on_smke = lm(bwt ~ mt_model - 1)
print(summary(rs_wgt_on_smke))
rs_wgt_on_smke_tidy = tidy(rs_wgt_on_smke)
rs_wgt_on_smke_tidy

Construct Input Arrays $A_i$ and $\alpha_i$

Multiply coefficient vector by covariate matrix to generate A vector that is child/individual specific.

# Estimates Table
head(rs_wgt_on_smke_tidy, 6)
# Covariates
head(mt_model, 5)

# Covariates coefficients from regression (including constant)
ar_fl_cova_esti <- as.matrix(rs_wgt_on_smke_tidy %>% filter(!str_detect(term, 'notsmoke')) %>% select(estimate))
ar_fl_main_esti <- as.matrix(rs_wgt_on_smke_tidy %>% filter(str_detect(term, 'notsmoke')) %>% select(estimate))
head(ar_fl_cova_esti, 5)
head(ar_fl_main_esti, 5)

# Select Matrix subcomponents
mt_cova <- as.matrix(as_tibble(mt_model) %>% select(-contains("notsmoke")))
mt_intr <- model.matrix(~ factor(race) - 1)

# Generate A_i, use mt_cova_wth_const
ar_A_m <- mt_cova %*% ar_fl_cova_esti
head(ar_A_m, 5)

# Generate alpha_i
ar_alpha_m <- mt_intr %*% ar_fl_main_esti
head(ar_alpha_m, 5)

Individual Weight

# Child Weight
ar_beta_m <- rep(1/length(ar_A_m), times=length(ar_A_m))

Matrix with Required Inputs for Allocation

# Initate Dataframe that will store all estimates and optimal allocation relevant information
# combine identifying key information along with estimation A, alpha results
# note that we only need indi.id as key
mt_opti <- cbind(ar_alpha_m*0.01, ar_A_m*0.01, ar_beta_m)
ar_st_varnames <- c('alpha', 'A', 'beta')
df_esti_alpha_A_beta <- as_tibble(mt_opti) %>% rename_all(~c(ar_st_varnames))
tb_key_alpha_A_beta <- bind_cols(df_opt_birthwt, df_esti_alpha_A_beta) %>%
              select(one_of(c('Index', 'age', 'lwt', 'race', 'notsmoke', 'ptl', 'ht', 'ui', ar_st_varnames)))

# Need to only include the smokers here
tb_key_alpha_A_beta <- tb_key_alpha_A_beta %>% filter(smoke == 1)

# Randomly select 15 smokers for easier visualization.
set.seed(999)
it_include <- 15
tb_key_alpha_A_beta <- tb_key_alpha_A_beta[sample(dim(tb_key_alpha_A_beta)[1], it_include, replace=FALSE),]

# Unique beta, A, and alpha check
tb_opti_unique <- tb_key_alpha_A_beta %>% group_by(!!!syms(ar_st_varnames)) %>%
                    arrange(!!!syms(ar_st_varnames)) %>%
                    summarise(n_obs_group=n())

# Show cars
head(tb_key_alpha_A_beta, 32)

Optimal Linear Allocations

Parameters for Optimal Allocation

# Child Count
it_obs = dim(tb_opti_unique)[1]

# Vector of Planner Preference
ar_rho <- 1 - (10^(c(seq(-2, 2, length.out=30))))
ar_rho <- unique(ar_rho)
print(ar_rho)

Optimal binary Allocation

This also works with any CRS CES.

# Optimal Linear Equation
svr_inpalc <- 'rank'

# Solve with Function
ls_bin_solu_all_rhos <-
  ffp_opt_anlyz_rhgin_bin(tb_key_alpha_A_beta, svr_id_i = 'Index',
                          svr_A_i = 'A', svr_alpha_i = 'alpha', svr_beta_i = 'beta',
                          ar_rho = ar_rho,
                          svr_inpalc = svr_inpalc,
                          svr_expout = 'opti_exp_outcome')

tb_opti_alloc_all_rho <- ls_bin_solu_all_rhos$df_all_rho
tb_opti_alloc_all_rho_long <- ls_bin_solu_all_rhos$df_all_rho_long

# Converge rho to numeric so the graph below sorts along x-axis properly 
tb_opti_alloc_all_rho_long <- tb_opti_alloc_all_rho_long %>% mutate(rho_num = as.numeric(rho))

# Merge to get Race Data
tb_opti_alloc_all_rho_long <- tb_opti_alloc_all_rho_long %>% 
  left_join(tb_key_alpha_A_beta %>% select(Index, race), by='Index')

Bump Plot for Optimal Binary Allocations

st_title = paste0("Binary Deallocation Rank, Birth Weight and Mother Smoking")
st_subtitle = paste0("Which individual to encourage to stop smoking first\n",
                     "The expected outcome of interest is child birth weight\n",
                     "Deallocate smoking among 15 randomly selected smokers\n",
                     "Rank = 1, encourage this woman to stop smoking first")
st_caption = paste0("Linear regression of birth weight on smoking. Data from Hosmer et. al. (1998)")
st_x_lab = paste0("Efficiency (Utilitarian) --> Cobb-Douglas --> Equity (Rawlsian)")
st_y_lab = paste0("Rank Along Binary Allocation Queue ( 1 = highest ranked)")

tb_opti_alloc_all_rho_long %>%
  ggplot(aes(x = rho_num, y = !!sym(svr_inpalc), group = Index)) +
    geom_line(aes(color = race, alpha = 1), size = 2) +
    geom_point(aes(color = race, alpha = 1), size = 4) +
    scale_x_discrete(expand = c(0.85,0))+
    scale_y_reverse(breaks = 1:nrow(tb_opti_alloc_all_rho_long))+
    theme(legend.position = "none") +
    labs(x = st_x_lab,
         y = st_y_lab, 
         title = st_title, 
         subtitle = st_subtitle,
         caption = st_caption) +
    ffy_opt_ghthm_dk() +
    geom_text(data =tb_opti_alloc_all_rho,aes(y=rho_c1_rk,x=0.6,label=Index),hjust="right")


FanWangEcon/PrjOptiAlloc documentation built on Jan. 25, 2022, 6:55 a.m.