knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.retina = 3, # change to 3 
  fig.align = "center",
  fig.width = 7,
  fig.height = 4,
  #dev.args = list(bg = "transparent"),
  #fig.ext = "svg",
  echo = FALSE#,
  #dpi = 320
)
library(CauseSpecCovarMI)
library(ggplot2)
library(ggpubr)

theme_set(
  theme_bw(base_size = 11)
)

set.seed(1984)

1 Event times

Please refer to the main paper for introduction of the notation. Given standard normal $Z$ and a standard normal or binary $X$, event times were generated using the following latent parametrisation:

\begin{align} T_1 &\sim \text{Weibull}(\kappa_1, \lambda_1 = \lambda_{10}e^{\beta_1 X + \gamma_1 Z}), \ T_2 &\sim \text{Weibull}(\kappa_2, \lambda_2 = \lambda_{20}e^{\beta_2 X + \gamma_2 Z}), \ C &\sim \text{Exp}(\lambda_C), \end{align} where $\text{Weibull}(\cdot)$ is a Weibull distribution with hazard $h(t) = \lambda\kappa t^{\kappa -1}$, with $\kappa$ and $\lambda$ the shape and rate parameters respecitively. We varied $\beta_1 = {0, 0.5, 1}$, and fixed $\gamma_1 = 1$, $\beta_2 = 0.5$ and $\gamma_2 = 0.5$. The censoring rate was also fixed at $\lambda_C = 0.14$.

Concerning the baseline hazard shapes, there were two parametrisations: 'similar' and 'different':

baseline_evs <- CauseSpecCovarMI::mds_shape_rates

# Set event-generating parameters for REL
ev1_pars <- list(
  "a1" = baseline_evs[baseline_evs$state == "REL", "shape"], 
  "h1_0" = baseline_evs[baseline_evs$state == "REL", "rate"],
  "b1" = 0, 
  "gamm1" = 0
)    

# For different hazards condition
ev1_pars_diff <- list(
  "a1" = 1.5, 
  "h1_0" = 0.04,
  "b1" = 0, 
  "gamm1" = 0
)  

# NRM
ev2_pars <- list(
  "a2" = baseline_evs[baseline_evs$state == "NRM", "shape"], 
  "h2_0" = baseline_evs[baseline_evs$state == "NRM", "rate"], 
  "b2" = 0, 
  "gamm2" = 0
)

The figures below visualise the baseline hazards, as well as the corresponding baseline cumulative incidences.

# Over first 10 years
baseline_hazards <- data.table::data.table("times" = seq(0.01, 10, by = 0.1))

baseline_hazards[, ':=' (
  haz_rel_similar = haz_weib(alph = ev1_pars$a1, lam = ev1_pars$h1_0, t = times),
  haz_rel_different = haz_weib(alph = ev1_pars_diff$a1, lam = ev1_pars_diff$h1_0, t = times),
  haz_cens = baseline_evs[baseline_evs$state == "EFS", "rate"],
  haz_nrm = haz_weib(alph = ev2_pars$a2, lam = ev2_pars$h2_0, t = times)
)]

# Plot of similar
p_similar_hazards <- data.table::melt.data.table(
  data = baseline_hazards,
  id.vars = "times",
  value.name = "hazard",
  measure.vars = c("haz_cens", "haz_rel_similar", "haz_nrm"), 
  variable.name = "event"
) %>% 
  ggplot(aes(times, hazard, col = event)) +
  geom_line(size = 1, alpha = 0.8) +
  scale_colour_manual(
    labels = c("Cens", "REL", "NRM"),
    values = 1:3
  ) +
  labs(
    x = "Time (years)",
    y = "Hazard",
    col = "Event",
    title = "Similar hazard shapes"
  ) +
  theme(plot.title = element_text(hjust = 0.5))

# Different ones
p_different_hazards <- data.table::melt.data.table(
  data = baseline_hazards,
  id.vars = "times",
  value.name = "hazard",
  measure.vars = c("haz_cens", "haz_rel_different", "haz_nrm"), 
  variable.name = "event"
) %>% 
  ggplot(aes(times, hazard, col = event)) +
  geom_line(size = 1, alpha = 0.8) +
  scale_colour_manual(
    labels = c("Cens", "REL", "NRM"),
    values = 1:3
  ) +
  labs(
    x = "Time (years)",
    y = "Hazard",
    col = "Event",
    title = "Different hazard shapes"
  ) +
  theme(plot.title = element_text(hjust = 0.5))

# Plot it
ggpubr::ggarrange(
  p_similar_hazards, 
  p_different_hazards,
  ncol = 2, 
  common.legend = TRUE,
  legend = "bottom"
)
# Get true cuminc - similar and different
baseline_cuminc_similar <- get_true_cuminc(
  ev1_pars = ev1_pars,
  ev2_pars = ev2_pars,
  combo = data.frame("val_X" = 0, "val_Z" = 0),
  times = baseline_hazards$times
)

p_similar_cumincs <- data.table::melt.data.table(
  data = data.table::data.table(baseline_cuminc_similar),
  id.vars = "times",
  value.name = "cuminc",
  measure.vars = paste0("true_pstate", 1:3),
  variable.name = "state"
) %>% 
  ggplot(aes(times, cuminc, col = state)) +
  geom_line(size = 1, alpha = 0.8) +
  scale_colour_manual(
    labels = c("Cens", "REL", "NRM"),
    values = 1:3
  ) +
  labs(
    x = "Time (years)",
    y = "Cumulative incidence",
    col = "Event",
    title = "Similar hazard shapes"
  ) +
  theme(plot.title = element_text(hjust = 0.5))


baseline_cuminc_different <- get_true_cuminc(
  ev1_pars = ev1_pars_diff,
  ev2_pars = ev2_pars,
  combo = data.frame("val_X" = 0, "val_Z" = 0),
  times = baseline_hazards$times
)

p_different_cumincs <- data.table::melt.data.table(
  data = data.table::data.table(baseline_cuminc_different),
  id.vars = "times",
  value.name = "cuminc",
  measure.vars = paste0("true_pstate", 1:3),
  variable.name = "state"
) %>% 
  ggplot(aes(times, cuminc, col = state)) +
  geom_line(size = 1, alpha = 0.8) +
  scale_colour_manual(
    labels = c("EFS", "REL", "NRM"),
    values = 1:3
  ) +
  labs(
    x = "Time (years)",
    y = "Cumulative incidence",
    col = "State",
    title = "Difference hazard shapes"
  ) +
  theme(plot.title = element_text(hjust = 0.5))

# Plot it
ggpubr::ggarrange(
  p_similar_cumincs, 
  p_different_cumincs,
  ncol = 2, 
  common.legend = TRUE,
  legend = "bottom"
)

2 Missingness mechanisms

As defined in section 5.1 of the main paper, $R_X$ denotes whether elements of $X$ are fully observed ($R_X = 1$) or missing ($R_X = 0$). There were four different missingness mechanisms for $X$:

Setting $\eta_1 = -2$ and solving for $\eta_0$ such that the average probability of missingness was 50\%, we can visualise each of the mechanisms as follows:

miss_mechs <- c("MCAR", "MAR", "MAR_GEN", "MNAR")

miss_plotlist <- lapply(miss_mechs, function(miss_mech) {

  # Check if MCAR
  eta1 <- ifelse(miss_mech == "MCAR", NA, -2)
  x_axis <- ifelse(miss_mech == "MAR_GEN", "t", "Z")

  # Generate data
  dat <- generate_dat(
    n = 500,
    X_type = "continuous",
    r = 0.5,
    ev1_pars = ev1_pars,
    ev2_pars = ev2_pars,
    rate_cens = baseline_evs[baseline_evs$state == "EFS", "rate"], 
    mech = miss_mech,
    p = 0.5,
    eta1 = eta1
  )

  # Make plot
  p_title <- ifelse(miss_mech == "MAR_GEN", "MAR-T", miss_mech)
  p <- ggplot(data = dat, aes_string(x_axis, "X_orig")) +
    geom_point(alpha = .75, aes(col = factor(miss_ind))) +
    labs(
      y = "X",
      x = ifelse(miss_mech == "MAR_GEN", "T (observed)", x_axis),
      title = p_title
    ) +
    scale_colour_manual(
      "Missing indicator",
      labels = c("Observed", "Missing"),
      values = c(9, 6)
    ) +
    theme(plot.title = element_text(hjust = 0.5))

  return(p)
})

ggpubr::ggarrange(
  plotlist = miss_plotlist,
  ncol = 2,
  nrow = 2,
  common.legend = TRUE,
  legend = "bottom"
)

2.1 Mechanism strength

The $\eta_1$ values represents the strength of the mechanism. Using the MAR mechanism as an example, we can visualise the effect of changing $\eta_1$ while fixing the proportion of missing values at 50\% as follows.

In term of probability of $X$ missing as a function of $Z$:

etas <- c(0, -0.25, -0.5, -0.75, -1, -2)
names(etas) <- etas
Z <- rnorm(500)

etas_MAR <- lapply(etas, function(eta) {

  cbind.data.frame(
    "times" = baseline_hazards$times, 
    "Z" = Z,
    #"prob" = plogis(eta * scale(log(dat$t))),
    "prob" = plogis(eta * Z),
    "eta1" = eta
  )
})

data.table::rbindlist(etas_MAR) %>% 
  ggplot(aes(Z, prob, col = factor(eta1), group = factor(eta1))) +
  geom_line(size = 1.5) +
  scale_color_brewer(palette = "Dark2") +
  labs(
    y = "Probability of X missing",
    colour = expression(eta[1])
  )

With the data itself:

etas_MAR_dats <- lapply(etas, function(eta) {

  dat <- generate_dat(
    n = 500,
    X_type = "continuous",
    r = 0.5,
    ev1_pars = ev1_pars,
    ev2_pars = ev2_pars,
    rate_cens = baseline_evs[baseline_evs$state == "EFS", "rate"], 
    mech = "MAR",
    p = 0.5,
    eta1 = eta
  )
})

ploto <- data.table::rbindlist(etas_MAR_dats, idcol = "eta1") 

ploto[, ':=' (
  miss_ind = factor(miss_ind),
  eta1 = factor(
    eta1, levels = as.character(etas), labels = paste0("eta1 = ", as.character(etas))
  )
)]

ploto %>% 
  ggplot(aes(Z, X_orig, col = miss_ind)) +
  geom_point(alpha = 0.75, size = 1) +
  scale_colour_manual(
    "Missing indicator",
    labels = c("Observed", "Missing"),
    values = c(9, 6)
  ) +
  xlab("Z") +
  ylab("X") +
  facet_wrap(. ~ eta1) +
  theme(legend.position = "top")


survival-lumc/CauseSpecCovarMI documentation built on June 16, 2022, 9:51 a.m.