In bips-hb/DataTrainCausalLearning: Practicals for the Data Train Course "Causal learning" 2021 (V Didelez)

knitr::opts_chunk$set(
  echo = TRUE,
  eval = TRUE,
  collapse = TRUE,
  fig.width = 6)

Load packages

library(DataTrainCausalLearning)
# required for data management and plots
library(data.table)
library(tidyverse)
library(ggplot2)

# required for analysis
library(stdReg)
library(ipw)
library(survey)
library(cobalt)
library(sandwich)
library(AIPW)
library(SuperLearner)

Load the Rotterdam Breast Cancer data set

data(bcrot)

Descriptive Analysis

Compare descriptively QOL for those who do and do not take hormonal therapy

# qol
ggplot(bcrot, aes(x=qol, after_stat(density))) +
  geom_histogram(fill="#69b3a2", color="#e9ecef", alpha=0.9, binwidth = 5) +
  labs(x="Quality of life", y="Density") +
  theme_light()

# age ~ hormon
plot.age <- ggplot(bcrot, aes(x=age, after_stat(density)), fill=as.factor(hormon)) +
    geom_histogram(aes(fill=as.factor(hormon)), color=c("#e9ecef"), binwidth = 2) +
    facet_grid(as.factor(hormon) ~ .) +
    labs(x = "Age", y = "Density") +
    scale_fill_manual(values=c("#69b3a2", "#337CA0"),
                      name="Hormonal\ntreatment",
                      breaks=c("0", "1"),
                      labels=c("no", "yes")) +
    theme_light()
plot.age

ggplot(bcrot, aes(y=age, x=as.factor(hormon), fill=as.factor(hormon))) +
  geom_boxplot() +
  labs(y = "Age", x = "Treatment") +
  scale_fill_manual(values=c("#69b3a2", "#337CA0"),
                    name="Hormonal\ntreatment",
                    breaks=c("0", "1"),
                    labels=c("no", "yes")) +
  theme_light()


# Lymph nodes ~ hormon
plot.nodes <- ggplot(bcrot, aes(x=nodes, after_stat(density)), fill=as.factor(hormon)) +
    geom_histogram(aes(fill=as.factor(hormon)), color=c("#e9ecef"), binwidth = 1) +
    facet_grid(as.factor(hormon) ~ .) +
    labs(x = "Number of positive lymph nodes", y = "Density") +
    scale_fill_manual(values=c("#69b3a2", "#337CA0"),
                      name="Hormonal\ntreatment",
                      breaks=c("0", "1"),
                      labels=c("no", "yes")) +
    theme_light()
plot.nodes

ggplot(bcrot, aes(x=enodes, after_stat(density)), fill=as.factor(hormon)) +
  geom_histogram(aes(fill=as.factor(hormon)), color=c("#e9ecef"), binwidth = 0.1) +
  facet_grid(as.factor(hormon) ~ .) +
  labs(x = "Number of positive lymph nodes (transformed: exp(-0.12 * nodes))", 
       y = "Density") +
  scale_fill_manual(values=c("#69b3a2", "#337CA0"),
                    name="Hormonal\ntreatment",
                    breaks=c("0", "1"),
                    labels=c("no", "yes")) +
  theme_light()

ggplot(bcrot, aes(x=as.factor(hormon), y=enodes, fill=as.factor(hormon))) +
  geom_boxplot() +
  labs(y = "Number of positive lymph nodes (transformed: exp(-0.12 * nodes))", 
       x = "Treatment") +
  scale_fill_manual(values=c("#69b3a2", "#337CA0"),
                    name="Hormonal\ntreatment",
                    breaks=c("0", "1"),
                    labels=c("no", "yes")) +
  theme_light()


# PgR (fmol/l), log ~ hormon
ggplot(bcrot, aes(x=pr_1, after_stat(density)), fill=as.factor(hormon)) +
  geom_histogram(aes(fill=as.factor(hormon)), color=c("#e9ecef"), binwidth = 0.5) +
  facet_grid(as.factor(hormon) ~ .) +
  labs(x = "PgR [fmol/l] (transformed: log(pr)", y = "Density") +
  scale_fill_manual(values=c("#69b3a2", "#337CA0"),
                    name="Hormonal\ntreatment",
                    breaks=c("0", "1"),
                    labels=c("no", "yes")) +
  theme_light()

ggplot(bcrot, aes(y=pr_1, x=as.factor(hormon), fill=as.factor(hormon))) +
  geom_boxplot() +
  labs(y = "PgR [fmol/l] (transformed: log(pr)", x = "Density") +
  scale_fill_manual(values=c("#69b3a2", "#337CA0"),
                    name="Hormonal\ntreatment",
                    breaks=c("0", "1"),
                    labels=c("no", "yes")) +
  theme_light()

Analysis

Linear Model

# Unadjusted linear regression
lm.ua <- lm(qol ~ hormon, data = bcrot)
summary(lm.ua)

# Main-effects linear regression
lm.a <- lm(qol ~ hormon + age + enodes + pr_1, data = bcrot)
summary(lm.a)

Propensity score

ps <- glm(hormon ~ age + enodes + pr_1, 
          data = bcrot, 
          family = binomial(link="logit"))

# add fitted values to data set
bcrot$ps <- fitted(ps)

# plot density
ggplot(bcrot, aes(ps, fill = as.factor(hormon))) +
  geom_density(alpha = 0.5) +
  labs(x = "Propensity score",
       y = "Density",
       fill = "Hormone\ntreatment") +
  scale_fill_manual(values=c("#69b3a2", "#337CA0"),
                    name="Hormonal\ntreatment",
                    breaks=c("0", "1"),
                    labels=c("no", "yes")) +
  theme_light()


# Histogram
ggplot(data = bcrot, aes(x = ps, after_stat(density), fill = as.factor(hormon))) +
  #geom_histogram(alpha = 0.5, position = "identity", binwidth = 0.05) +
  geom_histogram(aes(fill=as.factor(hormon)), color=c("#e9ecef"), binwidth = 0.05) +
  facet_grid(hormon ~ .) +
  labs(x = "Propensity score", y = "Density") +
  scale_fill_manual(values=c("#69b3a2", "#337CA0"),
                    name="Hormonal\ntreatment",
                    breaks=c("0", "1"),
                    labels=c("no", "yes")) +
  theme_light()

Inverse Probability Treatment Weighting

bcrot$w <- (as.numeric(as.character(bcrot$hormon)) / bcrot$ps) + 
           ((1 - as.numeric(as.character(bcrot$hormon))) / (1 - bcrot$ps))
summary(bcrot$w)

# Plot inverse probability weights vs. index
ggplot(bcrot, aes(x = 1:nrow(bcrot), y = w)) + 
  geom_point()  +
  xlab(" ") +
  ylab(" ") + 
  ylim(0, 65) +
  theme_minimal()


# Covariate balance plot (love plot)
cobalt::love.plot(hormon ~ age + enodes + pr_1, 
                  data = bcrot, 
                  weights = bcrot$w,
                  s.d.denom = "pooled",
                  thresholds = c(m = .1))

Use subset

plot.age
plot.nodes

bcrot2 <- bcrot %>% filter(age >= 40,
                           nodes > 0) %>%
                    mutate(age.2 = age * age) %>%    # add age²
                    select(-c(ps, w))  

table(bcrot2$hormon)
table(bcrot$hormon)

Estimate Marginal structural model using the IPW package

ipw2 <- ipw::ipwpoint(exposure = hormon, 
                      family = "binomial", link = "logit",
                      denominator = ~ age + age.2 + enodes + age*pr_1,
                      data = bcrot2)
bcrot2$ipw <- ipw2$ipw.weights

# Plot Inverse Probability Weights
summary(ipw2$ipw.weights)
ipw::ipwplot(weights = ipw2$ipw.weights, 
             logscale = FALSE,
             main = "Stabilized weights", 
             xlab = "Weights",
             xlim = c(1, 10))

# Marginal structural model for the causal effect of hormon on qol
# corrected for confounding using inverse probability weighting
# with robust standard error from the survey package.
model_sm2 <- survey::svyglm(qol ~ hormon,
                            design = svydesign(~ 1, 
                                               weights = ~ipw,
                                               data = bcrot2))
summary(model_sm2)
confint(model_sm2)

Estimate Marginal structural model "by hand"

ps <- glm(hormon ~ age + age.2 + enodes + age*pr_1,
          data = bcrot2, 
          family = binomial(link="logit"))
bcrot2$ps <- fitted(ps)
bcrot2$w <- (as.numeric(as.character(bcrot2$hormon)) / bcrot2$ps) + 
           ((1 - as.numeric(as.character(bcrot2$hormon))) / (1 - bcrot2$ps))

# Weights based on ~age + age.2 + enodes + age*pr_1
model_w <- lm(qol ~ hormon , weights = w, data = bcrot2)
summary(model_w)

# Variance estimation using the robust sandwich variance estimator
library(sandwich)
(sandwich_se <- diag(vcovHC(model_w, type = "HC"))^0.5)
# confidence interval
sandwichCI <- c(coef(model_w)[2] - 1.96 * sandwich_se[2],
                      coef(model_w)[2] + 1.96 * sandwich_se[2])

Compare confidence intervals

msm.out <- rbind(cbind(coef(model_w), confint(model_w))[2,],
                 c(coef(model_w)[2], sandwichCI),
                 cbind(coef(model_sm2), confint(model_sm2))[2,])
dimnames(msm.out) <- list(c("MSM:", "MSM, robust SE (sandwich):","MSM, robust SE (ipw):"), 
                          c("est", colnames(msm.out)[2:3]))
msm.out

Investigate (extreme) weights

# truncate weights
wq <- quantile(bcrot2$ipw, probs = c(0.025, 0.975)) 
bcrot2$wt <- if_else(bcrot2$ipw < wq[1], wq[1], bcrot2$ipw)
bcrot2$wt <- if_else(bcrot2$wt  > wq[2], wq[2], bcrot2$wt)

summary(bcrot2$ipw)
summary(bcrot2$wt)

# Plot weight vs. index without and with truncation
ggplot(bcrot2, aes(x = 1:nrow(bcrot2), y = ipw)) + 
  geom_point()  +
  xlab(" ") +
  ylab(" ") + 
  ylim(0, 65) +
  labs(title = "Not truncated") +
  theme_minimal()

ggplot(bcrot2, aes(x = 1:nrow(bcrot2), y = wt)) + 
  geom_point()  +
  xlab(" ") +
  ylab(" ") + 
  ylim(0, 10) +
  labs(title="Truncated weights") + 
  theme_minimal()


ggplot(bcrot2, aes(ipw, fill = as.factor(hormon))) +
  geom_density(alpha = 0.5) +
  labs(x = "Propensity score",
       y = "Density",
       fill = "Hormone\ntreatment") +
  scale_fill_manual(values=c("#69b3a2", "#337CA0"),
                    name="Hormonal\ntreatment",
                    breaks=c("0", "1"),
                    labels=c("no", "yes")) +
  labs(title="Not truncated") +
  theme_light()


ggplot(bcrot2, aes(wt, fill = as.factor(hormon))) +
  geom_density(alpha = 0.5) +
  labs(x = "Propensity score",
       y = "Density",
       fill = "Hormone\ntreatment") +
  scale_fill_manual(values=c("#69b3a2", "#337CA0"),
                    name="Hormonal\ntreatment",
                    breaks=c("0", "1"),
                    labels=c("no", "yes")) +
  labs(title="Truncated weights") +
  theme_light()


# loveplot without and with truncation
cobalt::love.plot(hormon ~ age + age.2 + enodes + age*pr_1, 
                     data = bcrot2, 
                     weights = bcrot2$ipw,
                     s.d.denom = "pooled",
                     thresholds = c(m = .1),
                     title = "Not truncated"
                     )

cobalt::love.plot(hormon ~ age + age.2 + enodes + age*pr_1, 
                      data = bcrot2, 
                      weights = bcrot2$wt,
                      s.d.denom = "pooled",
                      thresholds = c(m = .1),
                      title = "Truncated weights")

Regression standardization

fit <- glm(qol ~ hormon*age + hormon*age.2 + hormon*enodes + hormon*pr_1 + age*pr_1,
           data = bcrot2)
fit.std <- stdGlm(fit = fit, data = as.data.frame(bcrot2), X = "hormon")
print(summary(fit.std))
plot(fit.std)

# Confidence interval: Mean difference 
summary(fit.std, contrast = "difference", reference = "0")

Double machine learning - Augmented Inverse Probability Weighting

# Build SuperLearner libraries for outcome (Q) and exposure models (g)
sl.lib <- c("SL.mean","SL.glm")

# Construct an aipw object for later estimations 
AIPW_SL <- AIPW::AIPW$new(Y = bcrot2$qol,
                          A = as.integer(as.character(bcrot2$hormon)),
                          W = subset(bcrot2, select = c("enodes", "age", "pr_1")), 
                          Q.SL.library = sl.lib,
                          g.SL.library = sl.lib,
                          k_split = 10,
                          verbose = TRUE)

# Fit the data to the AIPW object and check the balance of propensity scores
raipw <- AIPW_SL$fit()$summary()
AIPW_SL$fit()$plot.p_score()$plot.ip_weights()

# Truncate weights (default truncation is set to 0.025)
AIPW_SL$fit()$summary(g.bound = c(0.05, 0.95))$plot.p_score()$plot.ip_weights()

# Calculate average treatment effects among the treated/untreated (controls) (ATT/ATC)
AIPW_SL$stratified_fit()$summary()


# extract weights for loveplots
AIPW_SL$plot.ip_weights()
bcrot2$aipw <- AIPW_SL$ip_weights.plot$data$ip_weights

# loveplots + AIPW
cobalt::love.plot(hormon ~ age*enodes*pr_1 + 
                           age.2*enodes*pr_1, 
                  data = bcrot2, 
                  weights = bcrot2$aipw,
                  s.d.denom = "pooled",
                  thresholds = c(m = .1))

Linear Model in the restricted sample

# Main-effects linear regression
lm.ar <- lm(qol ~ hormon + age + enodes + pr_1, data = bcrot2)

Compare results in the restricted sample

out <- rbind(c(2.07, rep(NA,3)), 
             c(coef(summary(lm.ar))[2,1:2],confint(lm.ar)[2,]),
             c(summary(model_sm2)$coefficients[2,1:2], confint(model_sm2)[2,]),
             summary(fit.std, contrast = "difference", reference = "0")$est.table[2,],
             raipw$estimates$RD)
dimnames(out) <- list(c("True ATE", "LM", "IPW", "RS", "AIPW"),
                      c("Estimate", "SE", "lower", "upper"))
print(round(out,3))

bips-hb/DataTrainCausalLearning documentation built on Dec. 5, 2023, 8:32 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

bips-hb/DataTrainCausalLearning
Practicals for the Data Train Course "Causal learning" 2021 (V Didelez)

In bips-hb/DataTrainCausalLearning: Practicals for the Data Train Course "Causal learning" 2021 (V Didelez)

Load packages

Load the Rotterdam Breast Cancer data set

Descriptive Analysis

Analysis

Linear Model

Propensity score

Inverse Probability Treatment Weighting

Use subset

Estimate Marginal structural model using the IPW package

Estimate Marginal structural model "by hand"

Compare confidence intervals

Investigate (extreme) weights

Regression standardization

Double machine learning - Augmented Inverse Probability Weighting

Linear Model in the restricted sample

Compare results in the restricted sample

R Package Documentation

Browse R Packages

We want your feedback!

bips-hb/DataTrainCausalLearning Practicals for the Data Train Course "Causal learning" 2021 (V Didelez)

In bips-hb/DataTrainCausalLearning: Practicals for the Data Train Course "Causal learning" 2021 (V Didelez)

Load packages

Load the Rotterdam Breast Cancer data set

Descriptive Analysis

Analysis

Linear Model

Propensity score

Inverse Probability Treatment Weighting

Use subset

Estimate Marginal structural model using the IPW package

Estimate Marginal structural model "by hand"

Compare confidence intervals

Investigate (extreme) weights

Regression standardization

Double machine learning - Augmented Inverse Probability Weighting

Linear Model in the restricted sample

Compare results in the restricted sample

R Package Documentation

Browse R Packages

We want your feedback!

bips-hb/DataTrainCausalLearning
Practicals for the Data Train Course "Causal learning" 2021 (V Didelez)