The effects of mother's smoking status on birth weight. If we aim to improve health outcomes, what is the priority sequence of mothers whom we would want to discourage from smoking.
rm(list = ls(all.names = TRUE)) options(knitr.duplicate.label = 'allow')
library(dplyr) library(tidyr) library(tibble) library(stringr) library(broom) library(forcats) library(ggplot2) library(REconTools) library(PrjOptiAlloc) library(knitr) library(kableExtra)
Generate four categories by initial height and mother's education levels combinations.
# Load Data data(df_opt_birthwt) # Summarize str(df_opt_birthwt) summary(df_opt_birthwt) # Generate Discrete Version of continuous variables df_opt_birthwt <- df_opt_birthwt %>% mutate(momwgtLowHigh = cut(lwt, breaks=c(-Inf, 129, Inf), labels=c("LW","HW"))) %>% mutate(mombirthage = cut(age, breaks=c(-Inf, 24, Inf), labels=c("young","older"))) %>% mutate(ftvm3 = cut(ftv, breaks=c(-Inf, 1, 2, Inf), labels=c("novisit","1visit","morevisit"))) %>% mutate(ftvm3 = cut(ftv, breaks=c(-Inf, 1, 2, Inf), labels=c("novisit","1visit","morevisit"))) # Generate Not Smoke Variable not_smoke_levels <- c("1" = "0", "0" = "1") df_opt_birthwt <- df_opt_birthwt %>% mutate(notsmoke = case_when(smoke == 0 ~ 1, TRUE ~ 0)) attach(df_opt_birthwt) summary(df_opt_birthwt)
Tabulate:
# tabulate df_opt_birthwt %>% group_by(race, smoke, ftv) %>% summarize(freq = n()) %>% pivot_wider(names_from = race, values_from = freq)
Summarize all, conditional on smoking or not:
# summarize all REconTools::ff_summ_percentiles(df_opt_birthwt, bl_statsasrows = FALSE) # summarize smoking only REconTools::ff_summ_percentiles(df_opt_birthwt %>% filter(smoke == 0), bl_statsasrows = FALSE) # summarize non-smoking only, 74 smokers REconTools::ff_summ_percentiles(df_opt_birthwt %>% filter(smoke == 1), bl_statsasrows = FALSE)
# birth weight on mother age, race, ht and ui summary(lm(bwt ~ lwt + factor(mombirthage) + factor(race) + factor(ht) + factor(ui))) # # summary(lm(bwt ~ factor(ftv) - 1)) # summary(lm(bwt ~ factor(ftv) + factor(ht) + factor(ui) + factor(ptl) - 1)) # Birth wegith on mother's weight last menstral cycle, and mother age, race and smoking interaction summary(lm(bwt ~ lwt + age + factor(race) + factor(race):factor(notsmoke) - 1)) # summary(lm(bwt ~ lwt + factor(race) + (ftv) - 1))
# A. Regress child birth weight on mother's age, race and mother's weight # print(summary(lm(bwt ~ lwt + factor(mombirthage) + factor(race)))) # print(summary(lm(bwt ~ lwt + factor(mombirthage) + factor(race) + factor(ht) + factor(ui)))) # print(summary(lm(bwt ~ lwt + age + factor(race) + factor(ht) + factor(ui) + factor(ptl)))) # # print(summary(lm(bwt ~ lwt + age + factor(race) + factor(smoke)))) # print(summary(lm(bwt ~ lwt + age + factor(race) + factor(ht) + factor(ui) + factor(smoke)))) # # print(summary(lm(bwt ~ lwt + age + factor(momwgtLowHigh):factor(smoke) - 1))) # print(summary(lm(bwt ~ lwt + age + factor(ht) + factor(ui) + factor(race) + factor(race):factor(smoke)))) print(summary(lm(bwt ~ lwt + age + factor(race) + factor(race):factor(smoke) - 1))) # Preferred way of appearing, in the following regression we can se: # 1. more more power less MPG, Straight engine slightly higher MPG # 2. V-shape engine car, going from auto to manual trans gain 4.1 MPG # 2. straight shape engine, going from auto to manual trans gain 6.67 MPG # Store Regression Results mt_model <- model.matrix(~ lwt + age + factor(race) + factor(notsmoke):factor(race)) rs_wgt_on_smke = lm(bwt ~ mt_model - 1) print(summary(rs_wgt_on_smke)) rs_wgt_on_smke_tidy = tidy(rs_wgt_on_smke) rs_wgt_on_smke_tidy
Multiply coefficient vector by covariate matrix to generate A vector that is child/individual specific.
# Estimates Table head(rs_wgt_on_smke_tidy, 6) # Covariates head(mt_model, 5) # Covariates coefficients from regression (including constant) ar_fl_cova_esti <- as.matrix(rs_wgt_on_smke_tidy %>% filter(!str_detect(term, 'notsmoke')) %>% select(estimate)) ar_fl_main_esti <- as.matrix(rs_wgt_on_smke_tidy %>% filter(str_detect(term, 'notsmoke')) %>% select(estimate)) head(ar_fl_cova_esti, 5) head(ar_fl_main_esti, 5) # Select Matrix subcomponents mt_cova <- as.matrix(as_tibble(mt_model) %>% select(-contains("notsmoke"))) mt_intr <- model.matrix(~ factor(race) - 1) # Generate A_i, use mt_cova_wth_const ar_A_m <- mt_cova %*% ar_fl_cova_esti head(ar_A_m, 5) # Generate alpha_i ar_alpha_m <- mt_intr %*% ar_fl_main_esti head(ar_alpha_m, 5)
# Child Weight ar_beta_m <- rep(1/length(ar_A_m), times=length(ar_A_m))
# Initate Dataframe that will store all estimates and optimal allocation relevant information # combine identifying key information along with estimation A, alpha results # note that we only need indi.id as key mt_opti <- cbind(ar_alpha_m*0.01, ar_A_m*0.01, ar_beta_m) ar_st_varnames <- c('alpha', 'A', 'beta') df_esti_alpha_A_beta <- as_tibble(mt_opti) %>% rename_all(~c(ar_st_varnames)) tb_key_alpha_A_beta <- bind_cols(df_opt_birthwt, df_esti_alpha_A_beta) %>% select(one_of(c('Index', 'age', 'lwt', 'race', 'notsmoke', 'ptl', 'ht', 'ui', ar_st_varnames))) # Need to only include the smokers here tb_key_alpha_A_beta <- tb_key_alpha_A_beta %>% filter(smoke == 1) # Randomly select 15 smokers for easier visualization. set.seed(999) it_include <- 15 tb_key_alpha_A_beta <- tb_key_alpha_A_beta[sample(dim(tb_key_alpha_A_beta)[1], it_include, replace=FALSE),] # Unique beta, A, and alpha check tb_opti_unique <- tb_key_alpha_A_beta %>% group_by(!!!syms(ar_st_varnames)) %>% arrange(!!!syms(ar_st_varnames)) %>% summarise(n_obs_group=n()) # Show cars head(tb_key_alpha_A_beta, 32)
# Child Count it_obs = dim(tb_opti_unique)[1] # Vector of Planner Preference ar_rho <- 1 - (10^(c(seq(-2, 2, length.out=30)))) ar_rho <- unique(ar_rho) print(ar_rho)
This also works with any CRS CES.
# Optimal Linear Equation svr_inpalc <- 'rank' # Solve with Function ls_bin_solu_all_rhos <- ffp_opt_anlyz_rhgin_bin(tb_key_alpha_A_beta, svr_id_i = 'Index', svr_A_i = 'A', svr_alpha_i = 'alpha', svr_beta_i = 'beta', ar_rho = ar_rho, svr_inpalc = svr_inpalc, svr_expout = 'opti_exp_outcome') tb_opti_alloc_all_rho <- ls_bin_solu_all_rhos$df_all_rho tb_opti_alloc_all_rho_long <- ls_bin_solu_all_rhos$df_all_rho_long # Converge rho to numeric so the graph below sorts along x-axis properly tb_opti_alloc_all_rho_long <- tb_opti_alloc_all_rho_long %>% mutate(rho_num = as.numeric(rho)) # Merge to get Race Data tb_opti_alloc_all_rho_long <- tb_opti_alloc_all_rho_long %>% left_join(tb_key_alpha_A_beta %>% select(Index, race), by='Index')
st_title = paste0("Binary Deallocation Rank, Birth Weight and Mother Smoking") st_subtitle = paste0("Which individual to encourage to stop smoking first\n", "The expected outcome of interest is child birth weight\n", "Deallocate smoking among 15 randomly selected smokers\n", "Rank = 1, encourage this woman to stop smoking first") st_caption = paste0("Linear regression of birth weight on smoking. Data from Hosmer et. al. (1998)") st_x_lab = paste0("Efficiency (Utilitarian) --> Cobb-Douglas --> Equity (Rawlsian)") st_y_lab = paste0("Rank Along Binary Allocation Queue ( 1 = highest ranked)") tb_opti_alloc_all_rho_long %>% ggplot(aes(x = rho_num, y = !!sym(svr_inpalc), group = Index)) + geom_line(aes(color = race, alpha = 1), size = 2) + geom_point(aes(color = race, alpha = 1), size = 4) + scale_x_discrete(expand = c(0.85,0))+ scale_y_reverse(breaks = 1:nrow(tb_opti_alloc_all_rho_long))+ theme(legend.position = "none") + labs(x = st_x_lab, y = st_y_lab, title = st_title, subtitle = st_subtitle, caption = st_caption) + ffy_opt_ghthm_dk() + geom_text(data =tb_opti_alloc_all_rho,aes(y=rho_c1_rk,x=0.6,label=Index),hjust="right")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.