knitr::opts_chunk$set(echo = FALSE, warning = FALSE, error = FALSE, message = FALSE) library(pander) library(tidyverse) library(mice) devtools::document() #devtools::build() devtools::load_all() otl <- outcomes_list <- list() otl[["everyone"]] <- paste0(c("returnor", "supinfec"), "_01") otl[["cosmetics"]] <- paste0(c("returnor", "supinfec"), "_01") otl[["no_cosmetic"]] <- paste0(c("returnor", "supinfec"), "_01")
nsqip <- map(c("everyone", "cosmetics", "no cosmetics"), function(which_df) make_nsqip(1:6, which_df)) names(nsqip) <- c("everyone", "cosmetics", "no_cosmetic")
#tbl_ones_pgy <- map(nsqip, tbl_one, strat = "pgy_bin") nsqip <- map(nsqip, function(df) mutate(df, admqtr_t = ifelse(admqtr == "0", "Any Quarter", admqtr))) tbl_ones <- map(nsqip, function(df) { print(tableone::CreateTableOne(c("surgspec", "pgy_bin", paper$mona$predictors, paper$vti$outcomes), strata = "admqtr_t", data = df), printToggle = FALSE) }) %>% map(fix_to_names)
nsqip <- nsqip %>% map(function(df){ preds <- nsqipr::paper$mona$predictors the_form <- paste0("pgy01 ~ ", paste0(preds, collapse = " + ")) df[["propensity_score"]] <- predict(glm(the_form, "binomial", df), df, type = "response") df }) paper$mona[["predictors"]] <- c(nsqipr::paper$mona[["predictors"]], "propensity_score") nsqip <- nsqip %>% map(binarize_outcomes) %>% map(~ filter(.x, attend != "Attending Not Present, but Available" & attend != "Not entered") ) #check which variables are cool #map(nsqip, function(x) map(x[paper$mona$outcomes], table)) nsqip <- map(nsqip, function(df){ list(select(df, "pgy01", "pgy_bin", "admqtr", "admqtr23_01", one_of(nsqipr::paper$mona$predictors, unique(unlist(otl)))), select(df, "propensity_score")) }) pscores <- map(nsqip, 2) nsqip <- map(nsqip, 1) %>% map(function(df) select(df, -admqtr23_01)) not_imputed_nsqip <- nsqip impute_fn <- function(df) mice(df, m=1, maxit=50, seed=400, ridge=0.01) nsqip <- map(nsqip, function(df) mice::complete(impute_fn(df), 1)) nsqip <- map2(nsqip, pscores, function(x, y) na.omit(bind_cols(x, y))) matched_data <- map(nsqip, function(df) { MatchIt::match.data(MatchIt::matchit(pgy01 ~ age + sex + bmi + race + smoke + diabetes + hypermed + workrvu + optime + inout + tothlos + wndclas + asaclas + attend, method = "nearest", data = df)) }) unmatched_data <- nsqip #r_rules <- map2(unmatched_data, matched_data, rubin_rules) #match_fn(nsqip[[1]])
proportion_missing <- map_df(not_imputed_nsqip, function(df) map_dbl(df, ~ length(which(is.na(.x))) / length(.x))) %>% mutate(average = (everyone + cosmetics + no_cosmetic) / 3, Variable = names(not_imputed_nsqip[[1]])) pander(proportion_missing, caption = "Proportion of missing observations for each variable. These were imputed.")
glms <- do_glms(matched_data, unmatched_data) aucs <- map2(glms, names(glms), classification_metrics) %>% flatten_dfr() %>% modify_at(c("c_stat", "h_l_stat"), ~ sprintf("%.3f", .x)) aucs$outcome <- gsub("_01$", "", aucs$outcome) tidied_models <- map2(glms, names(glms), function(dflist, dfnm) { map(dflist, function(mdl) { mutate(broom::tidy(mdl[[1]]), glm = dfnm, outcome = mdl[[2]]) }) }) %>% flatten_dfr confint_models <- map2(glms, names(glms), function(dflist, dfnm) { map(dflist, function(mdl) { cbind(confint(mdl[[1]]), glm = dfnm, outcome = mdl[[2]]) }) }) %>% (function(matrices_list){ matrices_list <- flatten(matrices_list) the_rownames <- unlist(map(matrices_list, rownames)) the_df <- bind_rows(map(matrices_list, data.frame, stringsAsFactors = FALSE)) the_df[["rownames"]] <- the_rownames names(the_df) <- c("conf.lower", "conf.upper", "glm", "outcome", "term") the_df }) models_df <- full_join(tidied_models, confint_models) models_df <- models_df[,!grepl("1$", names(models_df))] %>% select(-statistic, -std.error) to_exp <- c("estimate", "conf.lower", "conf.upper") models_df[,to_exp] <- lapply(models_df[,to_exp], function(x) { sprintf("%.4f", exp(as.numeric(x)))}) for_show <- c("term", "glm", "outcome", "estimate", "conf.lower", "conf.upper", "p.value") models_df <- models_df[,for_show] models_df$outcome <- gsub("_01$", "", models_df$outcome) models_df <- models_df %>% filter(!(term %in% c("(Intercept)", "propensity_score"))) models_df$propensity <- ifelse(grepl("no_prop", models_df$glm), "Unmatched", "Matched") models_df$population <- gsub("no_prop_", "", models_df$glm) models_df$population <- gsub("_", " ", models_df$population) to_replace <- c("admqtr2", "admqtr3", "pgy_binFour or above", "pgy_binThree\\ or\\ lower") replacements <- c("Q2", "Q3", "PGY4+", "PGY3-") models_df$term <- reduce2(to_replace, replacements, function(innit, tr, rep) gsub(tr, rep, innit), .init = models_df$term) fxr_uppers <- with(models_df, as.numeric(estimate) == 0 | as.numeric(conf.upper) > 10 | is.na(conf.lower)) fxr_uppers <- ifelse(is.na(fxr_uppers), TRUE, fxr_uppers) models_df[fxr_uppers,c("estimate", "conf.lower", "conf.upper", "p.value")] <- NA to_show <- c("term", "population", "propensity", "outcome", "estimate", "conf.lower", "conf.upper", "p.value", #"c_stat", "h_l_stat") models_df <- left_join(models_df, aucs) models_df <- models_df[,to_show]
\pagebreak
pander(tbl_ones[[1]][,1:3])
\pagebreak
pander(tbl_ones[[2]][,1:3])
\pagebreak
pander(tbl_ones[[3]][,1:3])
\pagebreak
Here, we are looking at an estimate of the odds ratio of an outcome for the third quarter compared to the second quarter, PGY3 and below versus PGY4 and above. Each estimate in the following two tables that corresponds to the same outcome, population, and "propensity" label is the result of a single logistic regression model, where these models are predicted by each of admission quarter (3 versus any other), resident involvement, and propensity score + matching for the "Matched" populations.
models_df %>% filter(term == "Q3") %>% arrange(desc(outcome), propensity) %>% pander(split.table = Inf)
models_df %>% filter(term != "Q3") %>% arrange(desc(outcome), propensity) %>% pander(split.table = Inf)
These visualizations are meant to ensure comparable propensity scores. If one of the populations has a visually different number of observations in a particular propensity score range, then we would be concerned that propensity scoring didn't work.
propplot <- function(df) ggplot(df, aes(propensity_score, fill = admqtr)) + geom_histogram() map(nsqip, propplot)
pmodels_df <- models_df %>% modify_at("population", stringr::str_to_title) %>% modify_at("outcome", ~ case_when( .x == "returnor" ~ "Return to OR", .x == "supinfec" ~ "SSI" )) %>% modify_at("population", ~ case_when( .x == "Everyone" ~ "All", .x == "No Cosmetic" ~ "Reconstructive", .x == "Cosmetics" ~ "Cosmetic" )) %>% modify_at("term", ~ case_when( .x == "Q3" ~ "Quarter 3", .x == "PGY3-" ~ "Junior" )) %>% modify_at(c("estimate", "conf.lower", "conf.upper"), as.numeric) p <- ggplot(pmodels_df, aes(x = population, y = estimate, color = propensity, shape = term)) + geom_hline(yintercept = 1, color = "grey70", linetype = 2) + geom_errorbar(aes(ymin = conf.lower, ymax = conf.upper), position = position_dodge(width = 0.75), width = 0.25) + geom_point(position = position_dodge(width = 0.75)) + facet_wrap(~ outcome + term, scales = "free") + scale_shape_manual(values = c(19, 1)) + scale_color_grey(start = 0, end = 0.5) + theme(legend.position = "bottom") + labs(color = "", shape = "", x = "", y = "Odds ratio") p
The log scale makes more sense because logistic regression relies on a log transformation, so the following version uses a log scale on the y-axis:
p + scale_y_log10()
\pagebreak
devtools::session_info()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.