sandbox/LMM_Vig_CMCAR_Part3.R

# Continuation

# Let's use Conditional MCAR
# Show use of the test for MCAR & CMCAR


#---------------------------------------------------------------------------------------
#
#
#
#
#
#   VIGNETTE: HOW DO I HANDLE MISSING DATA? Part 2 - What is the relationship between MCAR/MAR/Conditional MCAR?


#---------
rm(list = ls())
gc()

library(MASS)
library(emmeans)
library(nlme)
source("C:/Users/ciaconangelo/Documents/RESEARCH_NEW_LAPTOP/R_CODE_Long_Mixed_Models/sim_dat.R")
source("C:/Users/ciaconangelo/Documents/RESEARCH_NEW_LAPTOP/R_CODE_Long_Mixed_Models/dropout.R")
source("C:/Users/ciaconangelo/Documents/RESEARCH_NEW_LAPTOP/R_CODE_Long_Mixed_Models/VarCov_gls.R")

# --------------------------------------------------------------------------------
# Conditional MCAR with correlation = 0.8
# Will use this to demonstrate the test of MCAR!

 set.seed(03232021)

  out <- sim_dat(N = 2000  , 
                 number.groups = 2  , 
                 number.timepoints = 4  , 
                 cond.mcar = T  ,
                 #reg.formula = formula( ~ Time + Group + Time*Group),
                 Beta = 2  ,
                 corr = 'ar1'  , 
                 cor.value = 0.8  , 
                 var.values = 2  )
                
  
  dat <- out$dat
  dat <- dropout(dat = dat, 
                 type_dropout  = c('cmcar'), 
                 prop.miss = 0.3)

# Test for MCAR 
# Does your score at a previous timepoint predict your score at the next timepoint?
dat1 <- dat[dat$Time == 'Time_3', 'Y_cmcar']
dat2 <- dat[dat$Time == 'Time_4', 'Y_cmcar']
dropped.out <- !is.na(dat1) & is.na(dat2) # these subjects dropped out at timepoint 4
table(dropped.out)

# Does the score at timepoint 3 predict drop-out at timepoint 4?
mod <- glm(dropped.out ~ dat1, family = 'binomial')
summary(mod)  
# # TODO 3.23.21 - why is this not stat sig? resolve
# Return to the Covariate adjustment later
  
  
# What is the correctly specified model? Check data generation:
  out$reg.formula
  
  
#  OLS Regression Model 
  mod.ols1 <- lm(Y_comp ~ Group + Time + Covariate + Time * Group + Covariate * Time,  data = dat)

# Correctly specified OLS model with missing data:
  mod.ols2 <- lm(Y_cmcar ~ Group + Time + Covariate + Time * Group + Covariate * Time, data = dat)
  
  # Misspecified OLS Model:
  mod.ols3 <- lm(Y_cmcar ~ Group + Time + Time * Group, data = dat)

#   MMRM 
  # library(nlme)
  # mod.gls1 <- gls(Y_comp ~ Group + Time + Time * Group + Covariate * Time,
  #                 data = dat,
  #                 correlation = corSymm(form = ~ 1 | USUBJID),    #  unstructured correlation
  #                 weights = varIdent(form = ~ 1 | Time),          #  freely estimate variance at subsequent timepoints
  #                 na.action = na.exclude)
  # 
  # 
  # mod.gls2 <- gls(Y_cmcar ~ Group + Time + Time * Group + Covariate * Time,
  #                 data = dat,
  #                 correlation = corSymm(form = ~ 1 | USUBJID),    #  unstructured correlation
  #                 weights = varIdent(form = ~ 1 | Time),          #  freely estimate variance at subsequent timepoints
  #                 na.action = na.exclude)

  
  # Compute marginal mean contrasts
  # Don't worry about the degrees of freedom, we aren't focused on the Type I error or Power
  # Just look at the parameter estimates to check for bias
  library(emmeans)
  mod.emms.ols1 <- emmeans(mod.ols1, pairwise ~ Group | Time, adjust = 'none',  mode = 'df.error')
  mod.emms.ols2 <- emmeans(mod.ols2, pairwise ~ Group | Time, adjust = 'none',  mode = 'df.error')
  mod.emms.ols3 <- emmeans(mod.ols3, pairwise ~ Group | Time, adjust = 'none',  mode = 'df.error')

  # mod.emms.gls1 <- emmeans(mod.gls1, pairwise ~ Group | Time, adjust = 'none',  mode = 'df.error')
  # mod.emms.gls2 <- emmeans(mod.gls2, pairwise ~ Group | Time, adjust = 'none',  mode = 'df.error')

 # Examine output:
  out$Beta 
  # Data generation done specifically to demonstrate this point
  
  # Look at drop-out:
  aggregate(Y_cmcar ~ Time, FUN = function(x) mean(is.na(x)), dat = dat, na.action = na.pass)
  aggregate(Y_cmcar ~ Group + Time, FUN = function(x) mean(is.na(x)), dat = dat, na.action = na.pass)
  # Substntially different drop out rates across the treatment arms 
  
  # Descriptive Statistics 
  aggregate(Y_comp ~ Group + Time, FUN = function(x) round(mean(x, na.rm = T),2), dat = dat, na.action = na.pass)
  aggregate(Y_cmcar ~ Group + Time, FUN = function(x) round(mean(x, na.rm = T),2), dat = dat, na.action = na.pass)
 # You can't adjust for the Covariate with descriptive statistics!
  
  # OLS
  mod.emms.ols1$contrasts
  mod.emms.ols2$contrasts
  mod.emms.ols3$contrasts
 # Misspecified model leads to biased estimates
 # With conditional MCAR data, the key is to control for the covariate causing dropout
 # Once you do that, you're back to MCAR data, and a simple OLS works fine
 # Of course, we can confirm that MMRM also yields unbiased estimates 
  
#----------------------------------------------------------------------------------
# Test for MCAR
# Return to this - adjust for Covariate, recheck Test for MCAR:
dat1 <- dat[dat$Time == 'Time_3', 'Y_cmcar']
dat2 <- dat[dat$Time == 'Time_4', 'Y_cmcar']
dropped.out <- !is.na(dat1) & is.na(dat2) # these subjects dropped out at timepoint 4
table(dropped.out)

# Does the score at the previous timepoint predict drop-out AFTER adjusting for Covariate?
cov <- dat$Covariate[dat$Time == 'Time_4']
mod.mcar <- glm(dropped.out ~ dat1 + cov, family = 'binomial')
summary(mod.mcar)
CJangelo/SPR documentation built on Feb. 24, 2022, 5:02 a.m.