knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.retina = 3, # change to 3 fig.align = "center", fig.width = 7, fig.height = 4, #dev.args = list(bg = "transparent"), #fig.ext = "svg", echo = FALSE#, #dpi = 320 )
library(CauseSpecCovarMI) library(ggplot2) library(ggpubr) theme_set( theme_bw(base_size = 11) ) set.seed(1984)
Please refer to the main paper for introduction of the notation. Given standard normal $Z$ and a standard normal or binary $X$, event times were generated using the following latent parametrisation:
\begin{align} T_1 &\sim \text{Weibull}(\kappa_1, \lambda_1 = \lambda_{10}e^{\beta_1 X + \gamma_1 Z}), \ T_2 &\sim \text{Weibull}(\kappa_2, \lambda_2 = \lambda_{20}e^{\beta_2 X + \gamma_2 Z}), \ C &\sim \text{Exp}(\lambda_C), \end{align} where $\text{Weibull}(\cdot)$ is a Weibull distribution with hazard $h(t) = \lambda\kappa t^{\kappa -1}$, with $\kappa$ and $\lambda$ the shape and rate parameters respecitively. We varied $\beta_1 = {0, 0.5, 1}$, and fixed $\gamma_1 = 1$, $\beta_2 = 0.5$ and $\gamma_2 = 0.5$. The censoring rate was also fixed at $\lambda_C = 0.14$.
Concerning the baseline hazard shapes, there were two parametrisations: 'similar' and 'different':
Similar, ${\kappa_1,\lambda_{10},\kappa_2,\lambda_{20}} = {0.58,0.19,0.53,0.21}$
Different, ${\kappa_1,\lambda_{10},\kappa_2,\lambda_{20}} = {1.5,0.04,0.53,0.21}$
baseline_evs <- CauseSpecCovarMI::mds_shape_rates # Set event-generating parameters for REL ev1_pars <- list( "a1" = baseline_evs[baseline_evs$state == "REL", "shape"], "h1_0" = baseline_evs[baseline_evs$state == "REL", "rate"], "b1" = 0, "gamm1" = 0 ) # For different hazards condition ev1_pars_diff <- list( "a1" = 1.5, "h1_0" = 0.04, "b1" = 0, "gamm1" = 0 ) # NRM ev2_pars <- list( "a2" = baseline_evs[baseline_evs$state == "NRM", "shape"], "h2_0" = baseline_evs[baseline_evs$state == "NRM", "rate"], "b2" = 0, "gamm2" = 0 )
The figures below visualise the baseline hazards, as well as the corresponding baseline cumulative incidences.
# Over first 10 years baseline_hazards <- data.table::data.table("times" = seq(0.01, 10, by = 0.1)) baseline_hazards[, ':=' ( haz_rel_similar = haz_weib(alph = ev1_pars$a1, lam = ev1_pars$h1_0, t = times), haz_rel_different = haz_weib(alph = ev1_pars_diff$a1, lam = ev1_pars_diff$h1_0, t = times), haz_cens = baseline_evs[baseline_evs$state == "EFS", "rate"], haz_nrm = haz_weib(alph = ev2_pars$a2, lam = ev2_pars$h2_0, t = times) )] # Plot of similar p_similar_hazards <- data.table::melt.data.table( data = baseline_hazards, id.vars = "times", value.name = "hazard", measure.vars = c("haz_cens", "haz_rel_similar", "haz_nrm"), variable.name = "event" ) %>% ggplot(aes(times, hazard, col = event)) + geom_line(size = 1, alpha = 0.8) + scale_colour_manual( labels = c("Cens", "REL", "NRM"), values = 1:3 ) + labs( x = "Time (years)", y = "Hazard", col = "Event", title = "Similar hazard shapes" ) + theme(plot.title = element_text(hjust = 0.5)) # Different ones p_different_hazards <- data.table::melt.data.table( data = baseline_hazards, id.vars = "times", value.name = "hazard", measure.vars = c("haz_cens", "haz_rel_different", "haz_nrm"), variable.name = "event" ) %>% ggplot(aes(times, hazard, col = event)) + geom_line(size = 1, alpha = 0.8) + scale_colour_manual( labels = c("Cens", "REL", "NRM"), values = 1:3 ) + labs( x = "Time (years)", y = "Hazard", col = "Event", title = "Different hazard shapes" ) + theme(plot.title = element_text(hjust = 0.5)) # Plot it ggpubr::ggarrange( p_similar_hazards, p_different_hazards, ncol = 2, common.legend = TRUE, legend = "bottom" )
# Get true cuminc - similar and different baseline_cuminc_similar <- get_true_cuminc( ev1_pars = ev1_pars, ev2_pars = ev2_pars, combo = data.frame("val_X" = 0, "val_Z" = 0), times = baseline_hazards$times ) p_similar_cumincs <- data.table::melt.data.table( data = data.table::data.table(baseline_cuminc_similar), id.vars = "times", value.name = "cuminc", measure.vars = paste0("true_pstate", 1:3), variable.name = "state" ) %>% ggplot(aes(times, cuminc, col = state)) + geom_line(size = 1, alpha = 0.8) + scale_colour_manual( labels = c("Cens", "REL", "NRM"), values = 1:3 ) + labs( x = "Time (years)", y = "Cumulative incidence", col = "Event", title = "Similar hazard shapes" ) + theme(plot.title = element_text(hjust = 0.5)) baseline_cuminc_different <- get_true_cuminc( ev1_pars = ev1_pars_diff, ev2_pars = ev2_pars, combo = data.frame("val_X" = 0, "val_Z" = 0), times = baseline_hazards$times ) p_different_cumincs <- data.table::melt.data.table( data = data.table::data.table(baseline_cuminc_different), id.vars = "times", value.name = "cuminc", measure.vars = paste0("true_pstate", 1:3), variable.name = "state" ) %>% ggplot(aes(times, cuminc, col = state)) + geom_line(size = 1, alpha = 0.8) + scale_colour_manual( labels = c("EFS", "REL", "NRM"), values = 1:3 ) + labs( x = "Time (years)", y = "Cumulative incidence", col = "State", title = "Difference hazard shapes" ) + theme(plot.title = element_text(hjust = 0.5)) # Plot it ggpubr::ggarrange( p_similar_cumincs, p_different_cumincs, ncol = 2, common.legend = TRUE, legend = "bottom" )
As defined in section 5.1 of the main paper, $R_X$ denotes whether elements of $X$ are fully observed ($R_X = 1$) or missing ($R_X = 0$). There were four different missingness mechanisms for $X$:
Setting $\eta_1 = -2$ and solving for $\eta_0$ such that the average probability of missingness was 50\%, we can visualise each of the mechanisms as follows:
miss_mechs <- c("MCAR", "MAR", "MAR_GEN", "MNAR") miss_plotlist <- lapply(miss_mechs, function(miss_mech) { # Check if MCAR eta1 <- ifelse(miss_mech == "MCAR", NA, -2) x_axis <- ifelse(miss_mech == "MAR_GEN", "t", "Z") # Generate data dat <- generate_dat( n = 500, X_type = "continuous", r = 0.5, ev1_pars = ev1_pars, ev2_pars = ev2_pars, rate_cens = baseline_evs[baseline_evs$state == "EFS", "rate"], mech = miss_mech, p = 0.5, eta1 = eta1 ) # Make plot p_title <- ifelse(miss_mech == "MAR_GEN", "MAR-T", miss_mech) p <- ggplot(data = dat, aes_string(x_axis, "X_orig")) + geom_point(alpha = .75, aes(col = factor(miss_ind))) + labs( y = "X", x = ifelse(miss_mech == "MAR_GEN", "T (observed)", x_axis), title = p_title ) + scale_colour_manual( "Missing indicator", labels = c("Observed", "Missing"), values = c(9, 6) ) + theme(plot.title = element_text(hjust = 0.5)) return(p) }) ggpubr::ggarrange( plotlist = miss_plotlist, ncol = 2, nrow = 2, common.legend = TRUE, legend = "bottom" )
The $\eta_1$ values represents the strength of the mechanism. Using the MAR mechanism as an example, we can visualise the effect of changing $\eta_1$ while fixing the proportion of missing values at 50\% as follows.
In term of probability of $X$ missing as a function of $Z$:
etas <- c(0, -0.25, -0.5, -0.75, -1, -2) names(etas) <- etas Z <- rnorm(500) etas_MAR <- lapply(etas, function(eta) { cbind.data.frame( "times" = baseline_hazards$times, "Z" = Z, #"prob" = plogis(eta * scale(log(dat$t))), "prob" = plogis(eta * Z), "eta1" = eta ) }) data.table::rbindlist(etas_MAR) %>% ggplot(aes(Z, prob, col = factor(eta1), group = factor(eta1))) + geom_line(size = 1.5) + scale_color_brewer(palette = "Dark2") + labs( y = "Probability of X missing", colour = expression(eta[1]) )
With the data itself:
etas_MAR_dats <- lapply(etas, function(eta) { dat <- generate_dat( n = 500, X_type = "continuous", r = 0.5, ev1_pars = ev1_pars, ev2_pars = ev2_pars, rate_cens = baseline_evs[baseline_evs$state == "EFS", "rate"], mech = "MAR", p = 0.5, eta1 = eta ) }) ploto <- data.table::rbindlist(etas_MAR_dats, idcol = "eta1") ploto[, ':=' ( miss_ind = factor(miss_ind), eta1 = factor( eta1, levels = as.character(etas), labels = paste0("eta1 = ", as.character(etas)) ) )] ploto %>% ggplot(aes(Z, X_orig, col = miss_ind)) + geom_point(alpha = 0.75, size = 1) + scale_colour_manual( "Missing indicator", labels = c("Observed", "Missing"), values = c(9, 6) ) + xlab("Z") + ylab("X") + facet_wrap(. ~ eta1) + theme(legend.position = "top")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.