In mustafaascha/nsqipr: Using ACS-NSQIP in R (unofficially)

knitr::opts_chunk$set(echo = FALSE, warning = FALSE, error = FALSE, message = FALSE)
library(pander)
library(tidyverse)
library(mice)

devtools::document()
#devtools::build()
devtools::load_all()

otl <- outcomes_list <- list()
otl[["everyone"]] <- paste0(c("returnor", "supinfec"), "_01")
otl[["cosmetics"]] <- paste0(c("returnor", "supinfec"), "_01")
otl[["no_cosmetic"]] <- paste0(c("returnor", "supinfec"), "_01")

nsqip <- 
  map(c("everyone", "cosmetics", "no cosmetics"), 
      function(which_df) make_nsqip(1:6, which_df)) 
names(nsqip) <- c("everyone", "cosmetics", "no_cosmetic")

#tbl_ones_pgy <- map(nsqip, tbl_one, strat = "pgy_bin")

nsqip <- map(nsqip, function(df) mutate(df, admqtr_t = ifelse(admqtr == "0", "Any Quarter", admqtr)))
tbl_ones <- 
  map(nsqip, 
      function(df) {
        print(tableone::CreateTableOne(c("surgspec", "pgy_bin", 
                                         paper$mona$predictors, 
                                         paper$vti$outcomes), 
                             strata = "admqtr_t", data = df), printToggle = FALSE)
  }) %>% 
  map(fix_to_names)

nsqip <- nsqip %>% map(function(df){
  preds <- nsqipr::paper$mona$predictors
  the_form <- paste0("pgy01 ~ ", paste0(preds, collapse = " + "))
  df[["propensity_score"]] <-
    predict(glm(the_form, "binomial", df), df, type = "response")
  df
})

paper$mona[["predictors"]] <-
   c(nsqipr::paper$mona[["predictors"]], "propensity_score")

nsqip <- 
  nsqip %>% 
  map(binarize_outcomes) %>% 
  map(~ filter(.x, 
               attend != "Attending Not Present, but Available" & 
               attend != "Not entered")
      )
#check which variables are cool
#map(nsqip, function(x) map(x[paper$mona$outcomes], table))

nsqip <- 
  map(nsqip, function(df){
    list(select(df, "pgy01", "pgy_bin", "admqtr", "admqtr23_01",
                one_of(nsqipr::paper$mona$predictors, 
                       unique(unlist(otl)))), 
         select(df,  "propensity_score"))
    })
pscores <- map(nsqip, 2)
nsqip <- 
  map(nsqip, 1) %>% 
  map(function(df) select(df, -admqtr23_01))

not_imputed_nsqip <- nsqip

impute_fn <- function(df) mice(df, m=1, maxit=50, seed=400, ridge=0.01)
nsqip <- map(nsqip, function(df) mice::complete(impute_fn(df), 1))
nsqip <- map2(nsqip, pscores, function(x, y) na.omit(bind_cols(x, y)))

matched_data <- 
  map(nsqip, function(df) {
  MatchIt::match.data(MatchIt::matchit(pgy01 ~ 
                       age + sex + bmi + race + smoke + 
                       diabetes + hypermed + workrvu + 
                       optime + inout + tothlos + wndclas + 
                       asaclas + attend, 
                     method = "nearest", data = df))
  })

unmatched_data <- nsqip

#r_rules <- map2(unmatched_data, matched_data, rubin_rules)

#match_fn(nsqip[[1]])

Missingness

proportion_missing <- 
  map_df(not_imputed_nsqip, 
      function(df) map_dbl(df, ~ length(which(is.na(.x))) / length(.x))) %>% 
  mutate(average = (everyone + cosmetics + no_cosmetic) / 3, 
         Variable = names(not_imputed_nsqip[[1]]))
pander(proportion_missing, caption = "Proportion of missing observations for each variable. These were imputed.")

glms <- do_glms(matched_data, unmatched_data)

aucs <- 
  map2(glms, names(glms), classification_metrics) %>% 
  flatten_dfr() %>% 
  modify_at(c("c_stat", "h_l_stat"), ~ sprintf("%.3f", .x))

aucs$outcome <- gsub("_01$", "", aucs$outcome)


tidied_models <-
  map2(glms, names(glms),
       function(dflist, dfnm) {
         map(dflist, function(mdl) {
           mutate(broom::tidy(mdl[[1]]), glm = dfnm, outcome = mdl[[2]])
             })
         }) %>% 
  flatten_dfr


confint_models <-
  map2(glms, names(glms),
       function(dflist, dfnm) {
         map(dflist, function(mdl) {
           cbind(confint(mdl[[1]]), glm = dfnm, outcome = mdl[[2]])
           })
         }) %>% 
  (function(matrices_list){
    matrices_list <- flatten(matrices_list)
    the_rownames <- unlist(map(matrices_list, rownames))
    the_df <- bind_rows(map(matrices_list, data.frame, stringsAsFactors = FALSE))
    the_df[["rownames"]] <- the_rownames
    names(the_df) <- c("conf.lower", "conf.upper", "glm", "outcome", "term")
    the_df
  })



models_df <- full_join(tidied_models, confint_models)
models_df <-
  models_df[,!grepl("1$", names(models_df))] %>%
  select(-statistic, -std.error)

to_exp <- c("estimate", "conf.lower", "conf.upper")
models_df[,to_exp] <- lapply(models_df[,to_exp], function(x) {
  sprintf("%.4f", exp(as.numeric(x)))})

for_show <- c("term", "glm", "outcome", "estimate", "conf.lower", "conf.upper", "p.value")
models_df <- models_df[,for_show]
models_df$outcome <- gsub("_01$", "", models_df$outcome)

models_df <- models_df %>% 
  filter(!(term %in% c("(Intercept)", "propensity_score"))) 

models_df$propensity <- 
  ifelse(grepl("no_prop", models_df$glm), "Unmatched", "Matched")

models_df$population <- gsub("no_prop_", "", models_df$glm)
models_df$population <- gsub("_", " ", models_df$population)

to_replace <- 
  c("admqtr2", "admqtr3", 
    "pgy_binFour or above", "pgy_binThree\\ or\\ lower")
replacements <- c("Q2", "Q3", "PGY4+", "PGY3-")

models_df$term <- 
  reduce2(to_replace, replacements, 
          function(innit, tr, rep) gsub(tr, rep, innit), 
          .init = models_df$term)

fxr_uppers <- 
  with(models_df, as.numeric(estimate) == 0 |
                 as.numeric(conf.upper) > 10 |   
                 is.na(conf.lower))
fxr_uppers <- ifelse(is.na(fxr_uppers), TRUE, fxr_uppers)

models_df[fxr_uppers,c("estimate", "conf.lower", "conf.upper", "p.value")] <- 
  NA

to_show <- 
  c("term", "population", "propensity", "outcome", 
    "estimate", "conf.lower", "conf.upper", "p.value", 
    #"c_stat", 
    "h_l_stat")

models_df <- left_join(models_df, aucs)

models_df <- models_df[,to_show]

\pagebreak

Table ones

Whole Population

pander(tbl_ones[[1]][,1:3])

Cosmetics procedures

\pagebreak

pander(tbl_ones[[2]][,1:3])

Non-cosmetics procedures

\pagebreak

pander(tbl_ones[[3]][,1:3])

\pagebreak

Odds of outcome during 3rd quarter

Here, we are looking at an estimate of the odds ratio of an outcome for the third quarter compared to the second quarter, PGY3 and below versus PGY4 and above. Each estimate in the following two tables that corresponds to the same outcome, population, and "propensity" label is the result of a single logistic regression model, where these models are predicted by each of admission quarter (3 versus any other), resident involvement, and propensity score + matching for the "Matched" populations.

models_df %>% 
  filter(term == "Q3") %>% 
  arrange(desc(outcome), propensity) %>% 
  pander(split.table = Inf)

models_df %>% 
  filter(term != "Q3") %>% 
  arrange(desc(outcome), propensity) %>% 
  pander(split.table = Inf)

Propensity score visualization

These visualizations are meant to ensure comparable propensity scores. If one of the populations has a visually different number of observations in a particular propensity score range, then we would be concerned that propensity scoring didn't work.

propplot <- function(df) ggplot(df, aes(propensity_score, fill = admqtr)) + geom_histogram()
map(nsqip, propplot)

Odds ratios visualization

pmodels_df <- 
  models_df %>%
    modify_at("population", stringr::str_to_title) %>%
    modify_at("outcome",
              ~ case_when(
                .x == "returnor" ~ "Return to OR", 
                .x == "supinfec" ~ "SSI"
              )) %>% 
    modify_at("population", 
              ~ case_when(
                .x == "Everyone" ~ "All",
                .x == "No Cosmetic" ~ "Reconstructive",
                .x == "Cosmetics" ~ "Cosmetic"
              )) %>% 
    modify_at("term", 
              ~ case_when(
                .x == "Q3" ~ "Quarter 3", 
                .x == "PGY3-" ~ "Junior"
              )) %>% 
    modify_at(c("estimate", "conf.lower", "conf.upper"), as.numeric)

p <- 
  ggplot(pmodels_df, aes(x = population, 
               y = estimate, 
               color = propensity, 
               shape = term)) + 
      geom_hline(yintercept = 1, color = "grey70", linetype = 2) + 
      geom_errorbar(aes(ymin = conf.lower, ymax = conf.upper), 
                    position = position_dodge(width = 0.75), 
                    width = 0.25) + 
      geom_point(position = position_dodge(width = 0.75)) + 
      facet_wrap(~ outcome + term, scales = "free") + 
      scale_shape_manual(values = c(19, 1)) + 
      scale_color_grey(start = 0, end = 0.5) + 
      theme(legend.position = "bottom") + 
      labs(color = "", shape = "", x = "", y = "Odds ratio")

p