knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
Let's examine the relationship between MCAR, MAR, and MNAR more in-depth. These aren't clear delineations, even with simulations, which by definition are clear and neat and tidy. In the real world, the distinctions between these types of drop-out are even murkier
Take-away: MNAR is ugly, but if correlations are high, you're good!
Let's see if we can illustrate these relationships via simulation.
We just saw that the MMRM helps the estimates of MNAR drop-out. Let's explore how much. Increase the correlation to 0.95, AKA, it's the same across timepoints.
library(MASS) library(emmeans) library(nlme) library(SPR) set.seed(03232021) out <- sim_dat(N = 2000, number.groups = 2 , number.timepoints = 4, reg.formula = formula( ~ Time + Group + Time*Group), Beta = 2, corr = 'ar1', cor.value = 0.95, var.values = 2) dat <- out$dat dat <- dropout(dat = dat, type_dropout = c('mnar'), prop.miss = 0.3) # OLS Regression Model mod.ols1 <- lm(Y_comp ~ Time + Group + Time*Group, data = dat) mod.ols2 <- lm(Y_mnar ~ Group + Time + Group*Time, data = dat) # MMRM library(nlme) mod.gls1 <- gls(Y_comp ~ Group + Time + Group*Time, data = dat, correlation = corSymm(form = ~ 1 | USUBJID), # unstructured correlation weights = varIdent(form = ~ 1 | Time), # freely estimate variance at subsequent timepoints na.action = na.exclude) mod.gls2 <- gls(Y_mnar ~ Group + Time + Group*Time, data = dat, correlation = corSymm(form = ~ 1 | USUBJID), # unstructured correlation weights = varIdent(form = ~ 1 | Time), # freely estimate variance at subsequent timepoints na.action = na.exclude) # Compute marginal mean contrasts # Don't worry about the degrees of freedom, we aren't focused on the Type I error or Power # Just look at the parameter estimates to check for bias library(emmeans) mod.emms.ols1 <- emmeans(mod.ols1, pairwise ~ Group | Time, adjust = 'none', mode = 'df.error') mod.emms.ols2 <- emmeans(mod.ols2, pairwise ~ Group | Time, adjust = 'none', mode = 'df.error') mod.emms.gls1 <- emmeans(mod.gls1, pairwise ~ Group | Time, adjust = 'none', mode = 'df.error') mod.emms.gls2 <- emmeans(mod.gls2, pairwise ~ Group | Time, adjust = 'none', mode = 'df.error') # Examine output: out$Beta # Look at drop-out: aggregate(Y_mnar ~ Time, FUN = function(x) mean(is.na(x)), dat = dat, na.action = na.pass) aggregate(Y_mnar ~ Group + Time, FUN = function(x) mean(is.na(x)), dat = dat, na.action = na.pass) # Substntially different drop out rates across the treatment arms # Descriptive Statistics # Sufficient with MCAR drop out - both complete and missing align, yield unbiased estimates: aggregate(Y_comp ~ Group + Time, FUN = function(x) round(mean(x, na.rm = T),2), dat = dat, na.action = na.pass) aggregate(Y_mnar ~ Group + Time, FUN = function(x) round(mean(x, na.rm = T),2), dat = dat, na.action = na.pass) # > 1.15+.29 # [1] 1.44 # > 1.44/2 # [1] 0.72 # OLS likewise has substantial bias in the parameter estimates with MNAR drop out, as we expect mod.emms.ols1$contrasts mod.emms.ols2$contrasts mod.emms.gls1$contrasts mod.emms.gls2$contrasts # MMRM yields decent estimates - still some bias remaining, but much less # This suggests that the MMRM can still help you out$cor.mat # especially if your residuals are highly correlated # Hypothesis: if your correlations are very high, then your response at timepoint t is very well predicted by # response at timepoint t-1; # Thus, whether or not you drop out at timepoint t is very well predicted by your response at timepoint t-1 # The latter is the definition of MAR # We can see that the distinction between MAR and MNAR becomes murkier as the correlation across timepoints increases
Let's further explore this hypothesis.
set.seed(03232021) out <- sim_dat(N = 2000, number.groups = 2 , number.timepoints = 4, reg.formula = formula( ~ Time + Group + Time*Group), Beta = 2, corr = 'ar1', cor.value = 0.05, var.values = 2) dat <- out$dat dat <- dropout(dat = dat, type_dropout = c('mnar'), prop.miss = 0.3) # OLS Regression Model mod.ols1 <- lm(Y_comp ~ Time + Group + Time*Group, data = dat) mod.ols2 <- lm(Y_mnar ~ Group + Time + Group*Time, data = dat) # MMRM library(nlme) mod.gls1 <- gls(Y_comp ~ Group + Time + Group*Time, data = dat, correlation = corSymm(form = ~ 1 | USUBJID), # unstructured correlation weights = varIdent(form = ~ 1 | Time), # freely estimate variance at subsequent timepoints na.action = na.exclude) mod.gls2 <- gls(Y_mnar ~ Group + Time + Group*Time, data = dat, correlation = corSymm(form = ~ 1 | USUBJID), # unstructured correlation weights = varIdent(form = ~ 1 | Time), # freely estimate variance at subsequent timepoints na.action = na.exclude) # Compute marginal mean contrasts # Don't worry about the degrees of freedom, we aren't focused on the Type I error or Power # Just look at the parameter estimates to check for bias library(emmeans) mod.emms.ols1 <- emmeans(mod.ols1, pairwise ~ Group | Time, adjust = 'none', mode = 'df.error') mod.emms.ols2 <- emmeans(mod.ols2, pairwise ~ Group | Time, adjust = 'none', mode = 'df.error') mod.emms.gls1 <- emmeans(mod.gls1, pairwise ~ Group | Time, adjust = 'none', mode = 'df.error') mod.emms.gls2 <- emmeans(mod.gls2, pairwise ~ Group | Time, adjust = 'none', mode = 'df.error') # Examine output: out$Beta # Look at drop-out: aggregate(Y_mnar ~ Group + Time, FUN = function(x) mean(is.na(x)), dat = dat, na.action = na.pass) # Substntially different drop out rates across the treatment arms # Descriptive Statistics # Sufficient with MCAR drop out - both complete and missing align, yield unbiased estimates: aggregate(Y_comp ~ Group + Time, FUN = function(x) round(mean(x, na.rm = T),2), dat = dat, na.action = na.pass) aggregate(Y_mnar ~ Group + Time, FUN = function(x) round(mean(x, na.rm = T),2), dat = dat, na.action = na.pass) # Again, not good # OLS likewise has substantial bias in the parameter estimates with MNAR drop out, as we expect mod.emms.ols1$contrasts mod.emms.ols2$contrasts # bad again mod.emms.gls1$contrasts mod.emms.gls2$contrasts out$cor.mat
MMRM doesn't help you at all. Estimates just as bad as the OLS. This correlation matrix explains why - you get NO help in what your score would've been at timepoint t from your score at timepoint t-1. This is the most extreme version of MNAR. There's not much you can do here.
If our hypothesis about the correlations is accurate, then we should be able to predict what happens here as well. MAR dropout with a correlation approaching zero (0.05, let's say) will be approximately MCAR drop out. In other words, the bias in descriptive statistics with MAR dropout will go away.
dat <- out$dat dat <- dropout(dat = dat, type_dropout = c('mar'), prop.miss = 0.3) # OLS Regression Model mod.ols2 <- lm(Y_mar ~ Group + Time + Group*Time, data = dat) # MMRM library(nlme) mod.gls2 <- gls(Y_mar ~ Group + Time + Group*Time, data = dat, correlation = corSymm(form = ~ 1 | USUBJID), # unstructured correlation weights = varIdent(form = ~ 1 | Time), # freely estimate variance at subsequent timepoints na.action = na.exclude) # Compute marginal mean contrasts # Don't worry about the degrees of freedom, we aren't focused on the Type I error or Power # Just look at the parameter estimates to check for bias library(emmeans) mod.emms.ols1 <- emmeans(mod.ols1, pairwise ~ Group | Time, adjust = 'none', mode = 'df.error') mod.emms.ols2 <- emmeans(mod.ols2, pairwise ~ Group | Time, adjust = 'none', mode = 'df.error') mod.emms.gls1 <- emmeans(mod.gls1, pairwise ~ Group | Time, adjust = 'none', mode = 'df.error') mod.emms.gls2 <- emmeans(mod.gls2, pairwise ~ Group | Time, adjust = 'none', mode = 'df.error') # Examine output: out$Beta # Look at drop-out: aggregate(Y_mar ~ Group + Time, FUN = function(x) mean(is.na(x)), dat = dat, na.action = na.pass) # Substantially different drop out rates across the treatment arms # Descriptive Statistics # Sufficient with MCAR drop out - both complete and missing align, yield unbiased estimates: aggregate(Y_comp ~ Group + Time, FUN = function(x) round(mean(x, na.rm = T),2), dat = dat, na.action = na.pass) aggregate(Y_mar ~ Group + Time, FUN = function(x) round(mean(x, na.rm = T),2), dat = dat, na.action = na.pass) # No bias - looks good to go # OLS likewise has substantial bias in the parameter estimates with MNAR drop out, as we expect mod.emms.ols1$contrasts mod.emms.ols2$contrasts # No bias! Good to go - similar to MCAR, you have a little noise due to 30% dropout mod.emms.gls1$contrasts mod.emms.gls2$contrasts # Again, we see this turning out like MCAR dropout out$cor.mat
Our hypothesis seems validated - it looks like as correlations go to zero under MAR dropout, the dropout type approaches MCAR.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.