In CJangelo/SPR: Tools to Learn Statistical Programming in R

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

The simulation study will evaluate Type I error and Power. The Family wise error (FWE) will be computed. The FDR adjustment for multiplicity is included as well.

Assignment: Simulation Study

Evaluate the performance of these different approaches to estimating the MMRM. Compute the Type I error and Power under different conditions.

library(SPR)
library(MASS)
library(glmmTMB) # https://cran.r-project.org/web/packages/glmmTMB/vignettes/covstruct.html
library(emmeans)
library(nlme)
library(lme4)


  all.p <- vector()
  all.est <- vector()
  start.time <- Sys.time()


for(repl in 1:100){

  set.seed(03232021 + repl)
  out <- sim_dat(N = 40, 
                 number.groups = 2 , 
                 number.timepoints = 4, 
                 reg.formula = formula( ~ Time + Group + Time*Group),
                 Beta = 0,
                 corr = 'ar1', 
                 cor.value = 0.8, 
                 var.values = 1)


  dat <- out$dat
  dat <- dropout(dat = dat, 
                 type_dropout  = c('mar'), 
                 prop.miss = 0.3)


#  OLS Regression Model 
  mod.ols1 <- lm(Y_comp ~ Time + Group + Time*Group, 
                 data = dat)

  mod.ols2 <- lm(Y_mar ~ Group + Time + Group*Time, 
                            data = dat)


#   MMRM 
  library(nlme)
  mod.gls1 <- gls(Y_comp ~ Group + Time + Group*Time,
                  data = dat,
                  correlation = corSymm(form = ~ 1 | USUBJID),    #  unstructured correlation
                  weights = varIdent(form = ~ 1 | Time),          #  freely estimate variance at subsequent timepoints
                  na.action = na.exclude)


  mod.gls2 <- gls(Y_mar ~ Group + Time + Group*Time,
                  data = dat,
                  correlation = corSymm(form = ~ 1 | USUBJID),    #  unstructured correlation
                  weights = varIdent(form = ~ 1 | Time),          #  freely estimate variance at subsequent timepoints
                  na.action = na.exclude)


# MMRM - 
  library(glmmTMB)
  mod.us1 <- glmmTMB(Y_comp ~ Group + Time + Group*Time + us(Time + 0 | USUBJID), 
                  data=dat, 
                  REML = T,
                  dispformula=~0)

  mod.us2 <- glmmTMB(Y_mar ~ Group + Time + Group*Time + us(Time + 0 | USUBJID),
                  data=dat,
                  REML = T,
                  dispformula=~0)

  # MMRM - 
  library(lme4)
  #  https://bit.ly/2ZSxBRG
  #  https://drive.google.com/file/d/1sOZUAFOc004H4jO8vuUc_4HyYHEgu45b/view?usp=sharing&usp=embed_facebook
  mod.lmer1 <- lmer(Y_comp ~ Group + Time + Group*Time + ( 0 + Time | USUBJID), 
                    data = dat, 
                    control = lmerControl(check.nobs.vs.nRE = 'ignore')) 


  mod.lmer2 <- lmer(Y_mar ~ Group + Time + Group*Time + (0 + Time | USUBJID), 
                    data = dat, 
                    control = lmerControl(check.nobs.vs.nRE = 'ignore')) 



  # Compute marginal means
  library(emmeans)
  mod.emms.ols1 <- emmeans(mod.ols1, pairwise ~ Group | Time, adjust = 'none',  mode = 'df.error')
  mod.emms.ols2 <- emmeans(mod.ols2, pairwise ~ Group | Time, adjust = 'none',  mode = 'df.error')

  mod.emms.gls1 <- emmeans(mod.gls1, pairwise ~ Group | Time, adjust = 'none',  mode = 'satterthwaite')
  mod.emms.gls2 <- emmeans(mod.gls2, pairwise ~ Group | Time, adjust = 'none',  mode = 'satterthwaite')

  mod.emms.us1 <- emmeans(mod.us1, pairwise ~ Group | Time, adjust = 'none',  mode = 'df.error')
  mod.emms.us2 <- emmeans(mod.us2, pairwise ~ Group | Time, adjust = 'none',  mode = 'df.error')

  # Kenward Rogers Degrees of Freedom
  mod.emms.lmer1 <- emmeans(mod.lmer1, pairwise ~ Group | Time, adjust = 'none', mode = "kenward" )
  mod.emms.lmer2 <- emmeans(mod.lmer2, pairwise ~ Group | Time, adjust = 'none', mode = "kenward" )


  # Post Processing
    est <- vector()
    p <- vector()
    la <- vector()


    mod.list <- c('ols1', 'ols2', 'gls1', 'gls2', 'us1', 'us2', 'lmer1', 'lmer2')



  for(nn in mod.list){

    mod.out <- get(paste0('mod.emms.', nn))
    mod.out <- as.data.frame(mod.out$contrasts)
    est <- c(est, mod.out[ , 'estimate'] )
    p <- c(p, mod.out[ , 'p.value'] )
    la <- c(la, paste0(nn, '_timepoint_', 1:number.timepoints) )

  }

  names(est) <- names(p) <- la
  all.est <- rbind(all.est, est)
  all.p <- rbind(all.p, p)
  cat(paste0('Replication: ', repl, '\n'))

}# end repl

  end.time <- Sys.time()
  end.time - start.time

Evaluate Estimates

Rejection Rate of the individual tests/predictors

  colMeans(all.p < 0.05)
  round(colMeans(all.est), 2)

Family-Wise - The FWE is key part of evaluating Type I error control

    out.FWE <- vector()
    mn <- unique((unlist(lapply(colnames(all.p), function(x) strsplit(x, '_')[[1]][1]))))
  for (nn in mn) {
      pcomp <- all.p[ , grep(nn, colnames(all.p))] 
        print( table(apply(pcomp < 0.05, 1, function(x) sum(x) > 0)) )
        out.FWE <- rbind(out.FWE, 
                         mean(apply(pcomp < 0.05, 1, function(x) sum(x) > 0)) )
  }
data.frame('Model' = mn, out.FWE)

Use FDR Adjustments

Common multiplicity correction is Benjamini & Hochberg FDR. Evaluate how FDR affects the power.

    all.out <- vector()

  for (nn in mn) {
      out <- all.p[ , grep(nn, colnames(all.p))] 
      out.FDR <- matrix(0, nrow = nrow(out), ncol = ncol(out))
        colnames(out.FDR) <- colnames(out)
        alpha0 <- 0.05

        for(repl in 1:nrow(out)){
            # Order the p-values
            tmp <- sort(out[repl,], decreasing=T) # step-up test proceeds from least to most significant p-value
            #eq15 <- alpha0 * (1/(1 + number.endpoints - number.endpoints:1)) # Eq 14 in the paper (Hochberg step-up test)
            eq15 <- alpha0 * (number.timepoints:1/number.timepoints) # Eq 15 in the paper (FDR)
            # Note: Benjamini & HochbergFDR - this is flipped from the way it's written in their 1995 paper, this way I can use match() function
              if(any(tmp <= eq15)){ #if something is stat sig, otherwise, next repl
                  stat.sig.endpoints <- names(tmp)[ match(TRUE, tmp <= eq15):number.timepoints] # first sig p-value and everything smaller, match() will pull the first "TRUE" - "match returns a vector of the positions of (first) matches of its first argument in its second."
                  out.FDR[repl, stat.sig.endpoints ] <- 1
                }# end if statement

          }# end loop over replications

        # Summarize
        all.out <- rbind(all.out, colMeans(out.FDR) )
      print(table( apply(out.FDR, 1, function(x) sum(x) > 0) ))

  }# end loop over model types

# Power after FDR Correction:
  data.frame('Model' = mn, all.out)
# FDR works well - next step is to compare to bootstrap approach