knitr::opts_chunk$set( echo = TRUE, eval = TRUE, collapse = TRUE, fig.width = 6)
library(DataTrainCausalLearning) # required for data management and plots library(data.table) library(tidyverse) library(ggplot2) # required for analysis library(stdReg) library(ipw) library(survey) library(cobalt) library(sandwich) library(AIPW) library(SuperLearner)
data(bcrot)
Compare descriptively QOL for those who do and do not take hormonal therapy
# qol ggplot(bcrot, aes(x=qol, after_stat(density))) + geom_histogram(fill="#69b3a2", color="#e9ecef", alpha=0.9, binwidth = 5) + labs(x="Quality of life", y="Density") + theme_light() # age ~ hormon plot.age <- ggplot(bcrot, aes(x=age, after_stat(density)), fill=as.factor(hormon)) + geom_histogram(aes(fill=as.factor(hormon)), color=c("#e9ecef"), binwidth = 2) + facet_grid(as.factor(hormon) ~ .) + labs(x = "Age", y = "Density") + scale_fill_manual(values=c("#69b3a2", "#337CA0"), name="Hormonal\ntreatment", breaks=c("0", "1"), labels=c("no", "yes")) + theme_light() plot.age ggplot(bcrot, aes(y=age, x=as.factor(hormon), fill=as.factor(hormon))) + geom_boxplot() + labs(y = "Age", x = "Treatment") + scale_fill_manual(values=c("#69b3a2", "#337CA0"), name="Hormonal\ntreatment", breaks=c("0", "1"), labels=c("no", "yes")) + theme_light() # Lymph nodes ~ hormon plot.nodes <- ggplot(bcrot, aes(x=nodes, after_stat(density)), fill=as.factor(hormon)) + geom_histogram(aes(fill=as.factor(hormon)), color=c("#e9ecef"), binwidth = 1) + facet_grid(as.factor(hormon) ~ .) + labs(x = "Number of positive lymph nodes", y = "Density") + scale_fill_manual(values=c("#69b3a2", "#337CA0"), name="Hormonal\ntreatment", breaks=c("0", "1"), labels=c("no", "yes")) + theme_light() plot.nodes ggplot(bcrot, aes(x=enodes, after_stat(density)), fill=as.factor(hormon)) + geom_histogram(aes(fill=as.factor(hormon)), color=c("#e9ecef"), binwidth = 0.1) + facet_grid(as.factor(hormon) ~ .) + labs(x = "Number of positive lymph nodes (transformed: exp(-0.12 * nodes))", y = "Density") + scale_fill_manual(values=c("#69b3a2", "#337CA0"), name="Hormonal\ntreatment", breaks=c("0", "1"), labels=c("no", "yes")) + theme_light() ggplot(bcrot, aes(x=as.factor(hormon), y=enodes, fill=as.factor(hormon))) + geom_boxplot() + labs(y = "Number of positive lymph nodes (transformed: exp(-0.12 * nodes))", x = "Treatment") + scale_fill_manual(values=c("#69b3a2", "#337CA0"), name="Hormonal\ntreatment", breaks=c("0", "1"), labels=c("no", "yes")) + theme_light() # PgR (fmol/l), log ~ hormon ggplot(bcrot, aes(x=pr_1, after_stat(density)), fill=as.factor(hormon)) + geom_histogram(aes(fill=as.factor(hormon)), color=c("#e9ecef"), binwidth = 0.5) + facet_grid(as.factor(hormon) ~ .) + labs(x = "PgR [fmol/l] (transformed: log(pr)", y = "Density") + scale_fill_manual(values=c("#69b3a2", "#337CA0"), name="Hormonal\ntreatment", breaks=c("0", "1"), labels=c("no", "yes")) + theme_light() ggplot(bcrot, aes(y=pr_1, x=as.factor(hormon), fill=as.factor(hormon))) + geom_boxplot() + labs(y = "PgR [fmol/l] (transformed: log(pr)", x = "Density") + scale_fill_manual(values=c("#69b3a2", "#337CA0"), name="Hormonal\ntreatment", breaks=c("0", "1"), labels=c("no", "yes")) + theme_light()
# Unadjusted linear regression lm.ua <- lm(qol ~ hormon, data = bcrot) summary(lm.ua)
# Main-effects linear regression lm.a <- lm(qol ~ hormon + age + enodes + pr_1, data = bcrot) summary(lm.a)
ps <- glm(hormon ~ age + enodes + pr_1, data = bcrot, family = binomial(link="logit")) # add fitted values to data set bcrot$ps <- fitted(ps) # plot density ggplot(bcrot, aes(ps, fill = as.factor(hormon))) + geom_density(alpha = 0.5) + labs(x = "Propensity score", y = "Density", fill = "Hormone\ntreatment") + scale_fill_manual(values=c("#69b3a2", "#337CA0"), name="Hormonal\ntreatment", breaks=c("0", "1"), labels=c("no", "yes")) + theme_light() # Histogram ggplot(data = bcrot, aes(x = ps, after_stat(density), fill = as.factor(hormon))) + #geom_histogram(alpha = 0.5, position = "identity", binwidth = 0.05) + geom_histogram(aes(fill=as.factor(hormon)), color=c("#e9ecef"), binwidth = 0.05) + facet_grid(hormon ~ .) + labs(x = "Propensity score", y = "Density") + scale_fill_manual(values=c("#69b3a2", "#337CA0"), name="Hormonal\ntreatment", breaks=c("0", "1"), labels=c("no", "yes")) + theme_light()
bcrot$w <- (as.numeric(as.character(bcrot$hormon)) / bcrot$ps) + ((1 - as.numeric(as.character(bcrot$hormon))) / (1 - bcrot$ps)) summary(bcrot$w) # Plot inverse probability weights vs. index ggplot(bcrot, aes(x = 1:nrow(bcrot), y = w)) + geom_point() + xlab(" ") + ylab(" ") + ylim(0, 65) + theme_minimal() # Covariate balance plot (love plot) cobalt::love.plot(hormon ~ age + enodes + pr_1, data = bcrot, weights = bcrot$w, s.d.denom = "pooled", thresholds = c(m = .1))
plot.age plot.nodes bcrot2 <- bcrot %>% filter(age >= 40, nodes > 0) %>% mutate(age.2 = age * age) %>% # add age² select(-c(ps, w)) table(bcrot2$hormon) table(bcrot$hormon)
ipw2 <- ipw::ipwpoint(exposure = hormon, family = "binomial", link = "logit", denominator = ~ age + age.2 + enodes + age*pr_1, data = bcrot2) bcrot2$ipw <- ipw2$ipw.weights # Plot Inverse Probability Weights summary(ipw2$ipw.weights) ipw::ipwplot(weights = ipw2$ipw.weights, logscale = FALSE, main = "Stabilized weights", xlab = "Weights", xlim = c(1, 10)) # Marginal structural model for the causal effect of hormon on qol # corrected for confounding using inverse probability weighting # with robust standard error from the survey package. model_sm2 <- survey::svyglm(qol ~ hormon, design = svydesign(~ 1, weights = ~ipw, data = bcrot2)) summary(model_sm2) confint(model_sm2)
ps <- glm(hormon ~ age + age.2 + enodes + age*pr_1, data = bcrot2, family = binomial(link="logit")) bcrot2$ps <- fitted(ps) bcrot2$w <- (as.numeric(as.character(bcrot2$hormon)) / bcrot2$ps) + ((1 - as.numeric(as.character(bcrot2$hormon))) / (1 - bcrot2$ps)) # Weights based on ~age + age.2 + enodes + age*pr_1 model_w <- lm(qol ~ hormon , weights = w, data = bcrot2) summary(model_w) # Variance estimation using the robust sandwich variance estimator library(sandwich) (sandwich_se <- diag(vcovHC(model_w, type = "HC"))^0.5) # confidence interval sandwichCI <- c(coef(model_w)[2] - 1.96 * sandwich_se[2], coef(model_w)[2] + 1.96 * sandwich_se[2])
msm.out <- rbind(cbind(coef(model_w), confint(model_w))[2,], c(coef(model_w)[2], sandwichCI), cbind(coef(model_sm2), confint(model_sm2))[2,]) dimnames(msm.out) <- list(c("MSM:", "MSM, robust SE (sandwich):","MSM, robust SE (ipw):"), c("est", colnames(msm.out)[2:3])) msm.out
# truncate weights wq <- quantile(bcrot2$ipw, probs = c(0.025, 0.975)) bcrot2$wt <- if_else(bcrot2$ipw < wq[1], wq[1], bcrot2$ipw) bcrot2$wt <- if_else(bcrot2$wt > wq[2], wq[2], bcrot2$wt) summary(bcrot2$ipw) summary(bcrot2$wt) # Plot weight vs. index without and with truncation ggplot(bcrot2, aes(x = 1:nrow(bcrot2), y = ipw)) + geom_point() + xlab(" ") + ylab(" ") + ylim(0, 65) + labs(title = "Not truncated") + theme_minimal() ggplot(bcrot2, aes(x = 1:nrow(bcrot2), y = wt)) + geom_point() + xlab(" ") + ylab(" ") + ylim(0, 10) + labs(title="Truncated weights") + theme_minimal() ggplot(bcrot2, aes(ipw, fill = as.factor(hormon))) + geom_density(alpha = 0.5) + labs(x = "Propensity score", y = "Density", fill = "Hormone\ntreatment") + scale_fill_manual(values=c("#69b3a2", "#337CA0"), name="Hormonal\ntreatment", breaks=c("0", "1"), labels=c("no", "yes")) + labs(title="Not truncated") + theme_light() ggplot(bcrot2, aes(wt, fill = as.factor(hormon))) + geom_density(alpha = 0.5) + labs(x = "Propensity score", y = "Density", fill = "Hormone\ntreatment") + scale_fill_manual(values=c("#69b3a2", "#337CA0"), name="Hormonal\ntreatment", breaks=c("0", "1"), labels=c("no", "yes")) + labs(title="Truncated weights") + theme_light() # loveplot without and with truncation cobalt::love.plot(hormon ~ age + age.2 + enodes + age*pr_1, data = bcrot2, weights = bcrot2$ipw, s.d.denom = "pooled", thresholds = c(m = .1), title = "Not truncated" ) cobalt::love.plot(hormon ~ age + age.2 + enodes + age*pr_1, data = bcrot2, weights = bcrot2$wt, s.d.denom = "pooled", thresholds = c(m = .1), title = "Truncated weights")
fit <- glm(qol ~ hormon*age + hormon*age.2 + hormon*enodes + hormon*pr_1 + age*pr_1, data = bcrot2) fit.std <- stdGlm(fit = fit, data = as.data.frame(bcrot2), X = "hormon") print(summary(fit.std)) plot(fit.std) # Confidence interval: Mean difference summary(fit.std, contrast = "difference", reference = "0")
# Build SuperLearner libraries for outcome (Q) and exposure models (g) sl.lib <- c("SL.mean","SL.glm") # Construct an aipw object for later estimations AIPW_SL <- AIPW::AIPW$new(Y = bcrot2$qol, A = as.integer(as.character(bcrot2$hormon)), W = subset(bcrot2, select = c("enodes", "age", "pr_1")), Q.SL.library = sl.lib, g.SL.library = sl.lib, k_split = 10, verbose = TRUE) # Fit the data to the AIPW object and check the balance of propensity scores raipw <- AIPW_SL$fit()$summary() AIPW_SL$fit()$plot.p_score()$plot.ip_weights() # Truncate weights (default truncation is set to 0.025) AIPW_SL$fit()$summary(g.bound = c(0.05, 0.95))$plot.p_score()$plot.ip_weights() # Calculate average treatment effects among the treated/untreated (controls) (ATT/ATC) AIPW_SL$stratified_fit()$summary() # extract weights for loveplots AIPW_SL$plot.ip_weights() bcrot2$aipw <- AIPW_SL$ip_weights.plot$data$ip_weights # loveplots + AIPW cobalt::love.plot(hormon ~ age*enodes*pr_1 + age.2*enodes*pr_1, data = bcrot2, weights = bcrot2$aipw, s.d.denom = "pooled", thresholds = c(m = .1))
# Main-effects linear regression lm.ar <- lm(qol ~ hormon + age + enodes + pr_1, data = bcrot2)
out <- rbind(c(2.07, rep(NA,3)), c(coef(summary(lm.ar))[2,1:2],confint(lm.ar)[2,]), c(summary(model_sm2)$coefficients[2,1:2], confint(model_sm2)[2,]), summary(fit.std, contrast = "difference", reference = "0")$est.table[2,], raipw$estimates$RD) dimnames(out) <- list(c("True ATE", "LM", "IPW", "RS", "AIPW"), c("Estimate", "SE", "lower", "upper")) print(round(out,3))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.