knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(ltmle)

Marginal Structural Models (MSMs) - Multiple regimes with a single outcome

In this example there are 5 time points with a treatment and time varying covariate. At each, treatment can be 0 or 1. There is a single outcome Y.

L_0 L_1 A_1 L_2 A_2 ... L_5 A_5 Y

There are 2^5 = 32 regimes of interest. Some of these may have limited support because there are few patients who follow a particular regime. We pool over all regimes using a working marginal structural model. In this example we want to know the effect of time on treatment on Y. We include time.on.treatment and time.on.treatment^2.

rexpit <- function(x) rbinom(n=length(x), size=1, prob=plogis(x))
n <- 5000
time.points <- 5
prev.L <- rnorm(n)
prev.A <- rep(0, n)
sum.A <- rep(0, n)
data <- data.frame(L_0 = prev.L)
for (t in 1:time.points) {
  L <- 0.1 * prev.L + 0.3 * prev.A + rnorm(n)
  A <- rexpit(L)

  data1 <- data.frame(L, A)
  names(data1) <- paste0(c("L_", "A_"), t)
  data <- cbind(data, data1)

  prev.A <- A
  prev.L <- L

  sum.A <- sum.A + A
}
data$Y <- rexpit(sum.A / time.points + L)
head(data)
regime.matrix <- as.matrix(expand.grid(rep(list(0:1), time.points)))
dim(regime.matrix)
head(regime.matrix, 20)
num.regimes <- 2^time.points
regimes <- array(dim = c(n, time.points, num.regimes)) #n x numAnodes x numRegimes = n x time.points x 2^time.points
summary.measures <- array(dim = c(num.regimes, 1, 1)) #numRegimes x num.summary.measures x num.final.Ynodes = 2^time.points x 1 x 1
for (i in 1:num.regimes) {
  regimes[, , i] <- matrix(regime.matrix[i, ], byrow = TRUE, nrow = n, ncol = time.points)
  summary.measures[i, 1, 1] <- sum(regime.matrix[i, ])
}
colnames(summary.measures) <- "time.on.treatment"
regimes[1:3, , 1:3]
summary.measures

For large numbers of regimes, variance.method = "ic" is much faster, but may give anticonservative confidence intervals. You may want to use variance.method = "ic" first to make sure the MSM coefficients look reasonable and then use variance.method = "tmle" (the default) for your final estimates.

result1 <- ltmleMSM(data, Anodes = paste0("A_", 1:time.points), 
                    Lnodes = paste0("L_", 0:time.points), Ynodes = "Y", 
                    regimes = regimes, summary.measures = summary.measures, 
                    working.msm = "Y ~ time.on.treatment + I(time.on.treatment^2)",
                    variance.method = "ic")
result2 <- ltmleMSM(data, Anodes = paste0("A_", 1:time.points), 
                    Lnodes = paste0("L_", 0:time.points), Ynodes = "Y", 
                    regimes = regimes, summary.measures = summary.measures, 
                    working.msm = "Y ~ time.on.treatment + I(time.on.treatment^2)")
summary(result1)
summary(result2)

Suppose we are only interested in the effect of time on treatment on Y considering regimes that include at least 3 periods on treatment.

at.least.3 <- summary.measures[, "time.on.treatment", 1] >= 3
regimes.3 <- regimes[, , at.least.3]
summary.measures.3 <- summary.measures[at.least.3, , , drop = F]
dim(regimes.3)
summary.measures.3

result <- ltmleMSM(data, Anodes = paste0("A_", 1:time.points), 
                   Lnodes = paste0("L_", 0:time.points), 
                   Ynodes = "Y", regimes = regimes.3, 
                   summary.measures = summary.measures.3, 
                   working.msm = "Y ~ time.on.treatment + I(time.on.treatment^2)")
summary(result)

Marginal Structural Models (MSMs) - Switching time example - Multiple regimes and outcomes

Given data over 3 time points where A switches to 1 once and then stays 1. We want to know how death varies as a function of gender, time and an indicator of whether a patient's intended regime was to switch before time. Note that working.msm includes time and switch.time, which are columns of summary.measures; working.msm also includes male, which is ok because it is a baseline covariate (it comes before any A/C/L/Y nodes).

data(sampleDataForLtmleMSM)
head(sampleDataForLtmleMSM$data, 20)
dim(sampleDataForLtmleMSM$regimes)
sampleDataForLtmleMSM$regimes[1:5, , ]
sampleDataForLtmleMSM$summary.measures
Anodes <- c("A0", "A1", "A2")
Lnodes <- c("CD4_1", "CD4_2")
Ynodes <- c("Y1", "Y2", "Y3")

Here msm.weights is just an example. It could also be a 200x3x4 array or NULL (for no weights), or "empirical" (the default).

msm.weights <- matrix(1:12, nrow=4, ncol=3)  
result.regimes <- ltmleMSM(sampleDataForLtmleMSM$data, Anodes=Anodes, 
                   Lnodes=Lnodes, Ynodes=Ynodes, 
                   survivalOutcome=TRUE,
                   regimes=sampleDataForLtmleMSM$regimes, 
                   summary.measures=sampleDataForLtmleMSM$summary.measures,
                   final.Ynodes=Ynodes, 
                   working.msm="Y ~ male + time + pmax(time - switch.time, 0)", 
                   msm.weights=msm.weights, estimate.time=FALSE)
print(summary(result.regimes))

regimes can also be specified as a list of rule functions where each rule is a function applied to each row of data which returns a numeric vector of the same length as Anodes.

regimesList <- list(function(row) c(1,1,1),
                     function(row) c(0,1,1),
                     function(row) c(0,0,1),
                     function(row) c(0,0,0))
result.regList <- ltmleMSM(sampleDataForLtmleMSM$data, Anodes=Anodes, 
                   Lnodes=Lnodes, Ynodes=Ynodes, 
                   survivalOutcome=TRUE, regimes=regimesList, 
                   summary.measures=sampleDataForLtmleMSM$summary.measures,
                   final.Ynodes=Ynodes, 
                   working.msm="Y ~ male + time + pmax(time - switch.time, 0)", 
                   msm.weights=msm.weights, estimate.time=FALSE)

This should be the same as result.regimes:

print(summary(result.regList))         

Suppose we are only interested in pooling over the result at Y1 and Y3.

result <- ltmleMSM(sampleDataForLtmleMSM$data, Anodes=Anodes, 
                   Lnodes=Lnodes, Ynodes=Ynodes, 
                   survivalOutcome=TRUE,
                   regimes=sampleDataForLtmleMSM$regimes, 
                   summary.measures=sampleDataForLtmleMSM$summary.measures[, , c(1, 3)],
                   final.Ynodes=c("Y1", "Y3"), 
                   working.msm="Y ~ male + time + pmax(time - switch.time, 0)", 
                   estimate.time=FALSE)
summary(result)

We could be only interested in the result at Y3. time is now a constant in working.msm, so let's remove it.

result <- ltmleMSM(sampleDataForLtmleMSM$data, Anodes=Anodes, 
                    Lnodes=Lnodes, Ynodes=Ynodes, 
                    survivalOutcome=TRUE,
                    regimes=sampleDataForLtmleMSM$regimes, 
                    summary.measures=sampleDataForLtmleMSM$summary.measures[, , 3],
                    final.Ynodes="Y3", 
                    working.msm="Y ~ male + switch.time", 
                    estimate.time=FALSE)
summary(result)

Marginal Structural Models (MSMs) - Dynamic Treatment

In this example there are two treatment nodes and one outcome: W A1 L A2 Y W is normally distributed and L is continuous in (0, 1). We are interested in treatments where A1 is set to either 0 or 1 and A2 is set dynamically. The treatment for A2 is indexed by theta between 0 and 1. If $L > theta$, set A2 to 1, otherwise set A2 to 0.
Here is a function that can be used to generate observed data (if abar = NULL) or generate counterfactual truth (if abar is a list with a1 and theta):

GenerateData <- function(n, abar = NULL) {
  W <- rnorm(n)
  if (is.null(abar)) {
    A1 <- rexpit(W)
  } else {
    A1 <- abar$a1
  }
  L <- plogis(rnorm(n) + 0.3 * W + 0.5 * A1)
  if (is.null(abar)) {
    A2 <- rexpit(-0.5 * W + A1 - 0.6 * L)
  } else {
    A2 <- as.integer(L > abar$theta)
  }
  Y <- rexpit(-2 + W + A1 + L + 2 * A2)
  if (is.null(abar)) {
    return(data.frame(W, A1, L, A2, Y))
  } else {
    return(mean(Y))
  }
}

Set up regimes and summary.measures:

set.seed(11)
n <- 10000
data <- GenerateData(n)
regimes <- array(dim = c(n, 2, 10)) #n x num.Anodes x num.regimes
theta.set <- seq(0, 1, length.out = 5)
summary.measures <- array(theta.set, dim = c(10, 2, 1))
colnames(summary.measures) <- c("a1", "theta")
cnt <- 0
for (a1 in 0:1) {
  for (theta.index in 1:5) {
    cnt <- cnt + 1
    regimes[, 1, cnt] <- a1
    regimes[, 2, cnt] <- data$L > theta.set[theta.index]
    summary.measures[cnt, , 1] <- c(a1, theta.set[theta.index])
  }
}
summary.measures
head(data, 3)
regimes[1:3, , ]
working.msm <- "Y ~ a1*theta"
summary(ltmleMSM(data, Anodes = c("A1", "A2"), Lnodes = "L", Ynodes = "Y", 
              regimes = regimes, summary.measures = summary.measures, 
              working.msm = working.msm))

Let's compare to the true coefficients of the MSM. First we find the true value of $E[Y_{a1, theta}]$ for 5 values of theta.

truth <- rep(NA_real_, 10)
cnt <- 0
for (a1 in 0:1) {
  for (theta.index in 1:5) {
    cnt <- cnt + 1
    truth[cnt] <- GenerateData(n = 1e6, 
                    abar = list(a1 = a1, theta = theta.set[theta.index]))
  }
}

Fit a working MSM to the true values of $E[Y_{a1, theta}]$.

m.true <- glm(working.msm, 
              data = data.frame(Y = truth, summary.measures[, , 1]), 
              family = "quasibinomial")
m.true

The estimated MSM coefficients are close to the true MSM coefficients.



joshuaschwab/ltmle documentation built on April 20, 2023, 12:05 p.m.