# Continuation
# Let's use Conditional MCAR
# Show use of the test for MCAR & CMCAR
#---------------------------------------------------------------------------------------
#
#
#
#
#
# VIGNETTE: HOW DO I HANDLE MISSING DATA? Part 2 - What is the relationship between MCAR/MAR/Conditional MCAR?
#---------
rm(list = ls())
gc()
library(MASS)
library(emmeans)
library(nlme)
source("C:/Users/ciaconangelo/Documents/RESEARCH_NEW_LAPTOP/R_CODE_Long_Mixed_Models/sim_dat.R")
source("C:/Users/ciaconangelo/Documents/RESEARCH_NEW_LAPTOP/R_CODE_Long_Mixed_Models/dropout.R")
source("C:/Users/ciaconangelo/Documents/RESEARCH_NEW_LAPTOP/R_CODE_Long_Mixed_Models/VarCov_gls.R")
# --------------------------------------------------------------------------------
# Conditional MCAR with correlation = 0.8
# Will use this to demonstrate the test of MCAR!
set.seed(03232021)
out <- sim_dat(N = 2000 ,
number.groups = 2 ,
number.timepoints = 4 ,
cond.mcar = T ,
#reg.formula = formula( ~ Time + Group + Time*Group),
Beta = 2 ,
corr = 'ar1' ,
cor.value = 0.8 ,
var.values = 2 )
dat <- out$dat
dat <- dropout(dat = dat,
type_dropout = c('cmcar'),
prop.miss = 0.3)
# Test for MCAR
# Does your score at a previous timepoint predict your score at the next timepoint?
dat1 <- dat[dat$Time == 'Time_3', 'Y_cmcar']
dat2 <- dat[dat$Time == 'Time_4', 'Y_cmcar']
dropped.out <- !is.na(dat1) & is.na(dat2) # these subjects dropped out at timepoint 4
table(dropped.out)
# Does the score at timepoint 3 predict drop-out at timepoint 4?
mod <- glm(dropped.out ~ dat1, family = 'binomial')
summary(mod)
# # TODO 3.23.21 - why is this not stat sig? resolve
# Return to the Covariate adjustment later
# What is the correctly specified model? Check data generation:
out$reg.formula
# OLS Regression Model
mod.ols1 <- lm(Y_comp ~ Group + Time + Covariate + Time * Group + Covariate * Time, data = dat)
# Correctly specified OLS model with missing data:
mod.ols2 <- lm(Y_cmcar ~ Group + Time + Covariate + Time * Group + Covariate * Time, data = dat)
# Misspecified OLS Model:
mod.ols3 <- lm(Y_cmcar ~ Group + Time + Time * Group, data = dat)
# MMRM
# library(nlme)
# mod.gls1 <- gls(Y_comp ~ Group + Time + Time * Group + Covariate * Time,
# data = dat,
# correlation = corSymm(form = ~ 1 | USUBJID), # unstructured correlation
# weights = varIdent(form = ~ 1 | Time), # freely estimate variance at subsequent timepoints
# na.action = na.exclude)
#
#
# mod.gls2 <- gls(Y_cmcar ~ Group + Time + Time * Group + Covariate * Time,
# data = dat,
# correlation = corSymm(form = ~ 1 | USUBJID), # unstructured correlation
# weights = varIdent(form = ~ 1 | Time), # freely estimate variance at subsequent timepoints
# na.action = na.exclude)
# Compute marginal mean contrasts
# Don't worry about the degrees of freedom, we aren't focused on the Type I error or Power
# Just look at the parameter estimates to check for bias
library(emmeans)
mod.emms.ols1 <- emmeans(mod.ols1, pairwise ~ Group | Time, adjust = 'none', mode = 'df.error')
mod.emms.ols2 <- emmeans(mod.ols2, pairwise ~ Group | Time, adjust = 'none', mode = 'df.error')
mod.emms.ols3 <- emmeans(mod.ols3, pairwise ~ Group | Time, adjust = 'none', mode = 'df.error')
# mod.emms.gls1 <- emmeans(mod.gls1, pairwise ~ Group | Time, adjust = 'none', mode = 'df.error')
# mod.emms.gls2 <- emmeans(mod.gls2, pairwise ~ Group | Time, adjust = 'none', mode = 'df.error')
# Examine output:
out$Beta
# Data generation done specifically to demonstrate this point
# Look at drop-out:
aggregate(Y_cmcar ~ Time, FUN = function(x) mean(is.na(x)), dat = dat, na.action = na.pass)
aggregate(Y_cmcar ~ Group + Time, FUN = function(x) mean(is.na(x)), dat = dat, na.action = na.pass)
# Substntially different drop out rates across the treatment arms
# Descriptive Statistics
aggregate(Y_comp ~ Group + Time, FUN = function(x) round(mean(x, na.rm = T),2), dat = dat, na.action = na.pass)
aggregate(Y_cmcar ~ Group + Time, FUN = function(x) round(mean(x, na.rm = T),2), dat = dat, na.action = na.pass)
# You can't adjust for the Covariate with descriptive statistics!
# OLS
mod.emms.ols1$contrasts
mod.emms.ols2$contrasts
mod.emms.ols3$contrasts
# Misspecified model leads to biased estimates
# With conditional MCAR data, the key is to control for the covariate causing dropout
# Once you do that, you're back to MCAR data, and a simple OLS works fine
# Of course, we can confirm that MMRM also yields unbiased estimates
#----------------------------------------------------------------------------------
# Test for MCAR
# Return to this - adjust for Covariate, recheck Test for MCAR:
dat1 <- dat[dat$Time == 'Time_3', 'Y_cmcar']
dat2 <- dat[dat$Time == 'Time_4', 'Y_cmcar']
dropped.out <- !is.na(dat1) & is.na(dat2) # these subjects dropped out at timepoint 4
table(dropped.out)
# Does the score at the previous timepoint predict drop-out AFTER adjusting for Covariate?
cov <- dat$Covariate[dat$Time == 'Time_4']
mod.mcar <- glm(dropped.out ~ dat1 + cov, family = 'binomial')
summary(mod.mcar)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.