# Development Simulation Study, version 2
# Missing Data Handing in COA analysis
# This illustrates that you can re-capture the correct values via MMRM
#-------------------------------------------------------
# Notes:
# 5.6.21
# Generate Data such that the variance is much greater than the Beta value
# This would seem to reflect reality
# It also is the only case where there is bias in the estimates
# Why this happens:
# IF you have a small variance relative to your Beta value,
# even after you have drop-out, the remaining values are close to the
# true value because your variance is minimal!
# Further explanation:
# Imagine having a variance of zero - even after half of the subjects are
# gone, you still have a few remaining subjects with perfect estimates
# 5.6.21 - okay so if I turn down my variance from 2 to 1,
# then there is no problem with the MAR drop-out!!!
# Why is that?
# If I crank variance up to 5, I get real serious drop-out --- why!?!?
# Do development here, then add functions
rm(list = ls())
gc()
library(ggplot2)
library(grid)
library(gridExtra)
library(COA34)
library(nlme)
library(emmeans)
library(dplyr)
number.repl <- 100
repl <- 4
ag <- c('Improved_2+', 'Improved_1', 'Maintained', 'Deteriorated_1', 'Deteriorated_2+')
comp.mean <- as.data.frame(matrix(ncol=length(ag),nrow=0, dimnames=list(NULL, ag)))
comp.mmrm <- as.data.frame(matrix(ncol=length(ag),nrow=0, dimnames=list(NULL, ag)))
mar.mean <- as.data.frame(matrix(ncol=length(ag),nrow=0, dimnames=list(NULL, ag)))
mar.mmrm <- as.data.frame(matrix(ncol=length(ag),nrow=0, dimnames=list(NULL, ag)))
for (repl in 1:number.repl){
set.seed(as.numeric(5072021 + repl))
# Generate data
##sim.out <- COA34::sim_pro_dat(N=1000, polychor.value = 0)
sim.out <- COA34::sim_pro_dat(N=50,
polychor.value = 0.4,
corr = 'ar1',
cor.value = 0.8,
var.values = c(5))
#sim.out <- sim_pro_dat2(N=1e4, polychor.value = 0.8)
dat <- sim.out$dat
# Implement drop-out
dat <- COA34::dropout(dat = dat,
type_dropout = c('mcar', 'mar', 'mnar'),
prop.miss = c(0, 0.1, 0.2, 0.3),
stochastic.component = 0.2)
# You already have the PGIS_bl and PGIS_delta in the generated data,
# you wouldn't have that in a real dataset, so drop that first
dat <- dat[, !(colnames(dat) %in% c('PGIS_bl', 'PGIS_delta'))]
# Compute PGIS delta
dat <- COA34::compute_anchor_delta(dat = dat,
subject.id = 'USUBJID',
time.var = 'Time',
anchor = 'PGIS')
# Compute PRO Score delta
dat <- COA34::compute_change_score(dat = dat,
subject.id = 'USUBJID',
time.var = 'Time',
score = c('Y_comp', 'Y_mcar', 'Y_mar', 'Y_mnar'))
# Compute anchor groups
dat <- COA34::compute_anchor_group(dat = dat,
anchor.variable = 'PGIS_delta',
number.of.anchor.groups = 5)
#-------------------------------------------------------------------------------
#------------------------------------------------------------
# Descriptive Statistics:
out <- aggregate(Y_comp_delta ~ anchor.groups, function(x) mean(x, na.rm = T), data = dat[dat$Time == 'Time_4',], na.action = na.pass)
tmp <- out$Y_comp_delta
names(tmp) <- paste0(out$anchor.groups)
comp.mean <- dplyr::bind_rows(comp.mean, tmp )
out2 <- aggregate(Y_mar_delta ~ anchor.groups, function(x) mean(x, na.rm = T), data = dat[dat$Time == 'Time_4',], na.action = na.pass)
tmp2 <- out2$Y_mar_delta
names(tmp2) <- paste0(out2$anchor.groups)
mar.mean <- dplyr::bind_rows(mar.mean, tmp2 )
# Complete
mod.gls1 <- gls(Y_comp_delta ~ PGIS_bl + anchor.groups*Time,
data = dat[dat$Time != 'Time_1',],
correlation = corSymm(form = ~ 1 | USUBJID), # unstructured correlation
weights = varIdent(form = ~ 1 | Time), # freely estimate variance at subsequent timepoints
na.action = na.exclude)
out <- emmeans(mod.gls1, ~ anchor.groups | Time, mode = 'df.error')
out <- as.data.frame(out)
out <- out[out$Time == 'Time_4', ]
tmp <- out$emmean
names(tmp) <- paste0(out$anchor.groups)
comp.mmrm <- dplyr::bind_rows(comp.mmrm, tmp )
#
# MAR
mod.gls2 <- gls(Y_mar_delta ~ PGIS_bl + anchor.groups*Time,
data = dat[dat$Time != 'Time_1',],
correlation = corSymm(form = ~ 1 | USUBJID), # unstructured correlation
weights = varIdent(form = ~ 1 | Time), # freely estimate variance at subsequent timepoints
na.action = na.exclude)
out <- emmeans(mod.gls2, ~ anchor.groups | Time, mode = 'df.error')
out <- as.data.frame(out)
out <- out[out$Time == 'Time_4', ]
tmp <- out$emmean
names(tmp) <- paste0(out$anchor.groups)
mar.mmrm <- dplyr::bind_rows(mar.mmrm, tmp )
cat(paste0('Replication: ', repl, '\n'))
}# end repl
#-----------------------------
table(complete.cases(mar.mean))
table(complete.cases(mar.mmrm))
which(!(complete.cases(mar.mean)))
which(!(complete.cases(mar.mmrm)))
# Complete Data:
# Means
round(colMeans(comp.mean),2)
# MMRM
round(colMeans(comp.mmrm),2)
# MAR drop out
# Means
round(colMeans(mar.mean, na.rm = T), 2)
# MMRM
round(colMeans(mar.mmrm, na.rm = T), 2)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.