MNAR_simulation_results: Missing not at random (MNAR) simulation results.

Description Usage Format Details Examples

Description

Results of a simulation study on the effect of missing data on estimates of parameters of a hidden Markov model. In the simulation, 1000 datasets were simulated, each consisting of 100 time series of length 50. The model generating the data was a hidden Markov model with 3 states. Missing values were generated for 5% of cases in state 1, 25% of cases in state 2, and 50% of cases in state 3 (i.e. data can be considered missing not at random, as missingness is state-dependent).

Usage

1
data("MNAR_simulation_results")

Format

A data.frame with the following variables:

parameter

Parameter name

true

True value of parameter

MAR_estimates_mean

Average of parameter estimates for a model that assumes MAR.

MAR_estimates_sd

Standard deviation of parameter estimates for a model that assumes MAR.

MAR_estimates_MAE

Mean Absolute Error of parameter estimates for a model that assumes MAR.

MNAR_estimates_mean

Average of parameter estimates for a model that assumes MNAR.

MNAR_estimates_sd

Standard deviation of parameter estimates for a model that assumes MNAR.

MNAR_estimates_MAE

Mean Absolute Error of parameter estimates for a model that assumes MNAR.

percMAE

Relative MAE of the MNAR model over the MAR model (e.g. MAE_MNAR/MAE_MAR)

Details

This data frame was generated with the following code

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
library(depmixS4)

### Start simulation

nsim <- 1000
nrep <- 100
nt <- 50

set.seed(1234)
randomSeeds <- sample(seq(1,nsim*1000),nsim)
out <- rep(list(vector("list",3)),nsim)

prior <- c(8,1,1)
prior <- prior/sum(prior)
transition <- 5*diag(3) + 1
transition <- transition/rowSums(transition)
means <- c(-1,0,1)
sds <- c(3,3,3)
pmiss <- c(.05,.25,.5)

truepars1 <- c(prior,as.numeric(t(transition)),as.numeric(rbind(means,sds)))
truepars2 <- c(prior,as.numeric(t(transition)),as.numeric(rbind(means,sds,1-pmiss,pmiss)))

for(sim in 1:nsim) {
  set.seed(randomSeeds[sim])
  truestate <- matrix(nrow=nt,ncol=nrep) 
  for(i in 1:nrep) {
    truestate[1,i] <- sample(1:3,size=1,prob=prior)
    for(t in 2:nt) {
      truestate[t,i] <- sample(1:3,size=1,prob=transition[truestate[t-1,i],])
    }
  }
  dat <- data.frame(trueState=as.numeric(truestate),trial=1:nt)
  dat$trueResponse <- rnorm(nrow(dat),mean=means[dat$trueState],sd=sds[dat$trueState])
  dat$missing <- rbinom(nrow(dat),size=1,prob=pmiss[dat$trueState])
  dat$response <- dat$trueResponse
  dat$response[dat$missing==1] <- NA
  
  set.seed(randomSeeds[sim])
  mod <- depmix(list(response~1),family=list(gaussian()),data=dat,nstates=3,ntimes=rep(nt,nrep))
  mod <- setpars(mod,truepars1)
  ok <- FALSE
  ntry <- 1
  while(!ok & ntry <= 50) {
    fmod <- try(fit(mod,emcontrol=em.control(maxit = 5000, random.start=FALSE),verbose=FALSE))
    if(!inherits(fmod,"try-error")) {
      if(fmod@message == "Log likelihood converged to within tol. (relative change)") ok <- TRUE
    }
    ntry <- ntry + 1
  }
  out[[sim]][[1]] <- list(pars=getpars(fmod),logLik=logLik(fmod),viterbi=posterior(fmod)[,1],trueState=dat$trueState)
  
  set.seed(randomSeeds[sim])
  mod <- depmix(list(response~1,missing~1),family=list(gaussian(),multinomial("identity")),data=dat,nstates=3,ntimes=rep(nt,nrep))
  mod <- setpars(mod,truepars2)
  ok <- FALSE
  ntry <- 1
  while(!ok & ntry <= 50) {
    fmod <- try(fit(mod,emcontrol=em.control(maxit = 5000, random.start = FALSE),verbose=FALSE))
    if(!inherits(fmod,"try-error")) {
      if(fmod@message == "Log likelihood converged to within tol. (relative change)") ok <- TRUE
    }
    ntry <- ntry + 1
  }
  out[[sim]][[2]] <- list(pars=getpars(fmod),logLik=logLik(fmod),viterbi=posterior(fmod)[,1],trueState=dat$trueState)
}

### End simulation

### Process results
library(reshape2)
library(dplyr)
library(tidyr)

simi <- out

bias1 <- matrix(0.0,ncol=length(truepars1),nrow=nsim)
bias2 <- matrix(0.0,ncol=length(truepars2),nrow=nsim)

colnames(bias1) <- names(simi[[1]][[1]][[1]])
colnames(bias2) <- names(simi[[1]][[2]][[1]])

pcorstate1 <- rep(0.0,nsim)
pcorstate2 <- rep(0.0,nsim)

for(sim in 1:nsim) {
  tmp <- simi[[sim]][[1]][[1]]
  pr <- tmp[1:3]
  trt <- matrix(tmp[4:12],ncol=3)
  ms <- tmp[c(13,15,17)]
  sds <- tmp[c(14,16,18)] 
  ord <- order(ms)
  bias1[sim,] <- c(pr[ord],trt[ord,ord],as.numeric(rbind(ms[ord],sds[ord]))) - truepars1
  fsta <- recode(simi[[sim]][[1]]$viterbi,`1` = which(ord == 1), `2` = which(ord == 2), `3` = which(ord == 3))
  pcorstate1[sim] <- sum(fsta == simi[[sim]][[1]]$trueState)/(nrep*nt)
  
  tmp <- simi[[sim]][[2]][[1]]
  pr <- tmp[1:3]
  trt <- matrix(tmp[4:12],ncol=3)
  ms <- tmp[c(13,17,21)]
  sds <- tmp[c(14,18,22)]
  ps0 <- tmp[c(15,19,23)]
  ps1 <- tmp[c(16,20,24)]
  ord <- order(ms)
  bias2[sim,] <- c(pr[ord],trt[ord,ord],as.numeric(rbind(ms[ord],sds[ord],ps0[ord],ps1[ord]))) - truepars2
  fsta <- recode(simi[[sim]][[2]]$viterbi,`1` = which(ord == 1), `2` = which(ord == 2), `3` = which(ord == 3))
  pcorstate2[sim] <- sum(fsta == simi[[sim]][[2]]$trueState)/(nrep*nt)
}

sim2_bias1 <- bias1
sim2_bias2 <- bias2
sim2_est1 <- t(t(bias1) + truepars1)
sim2_est2 <- t(t(bias2) + truepars2)
sim2_pcorstate1 <- pcorstate1
sim2_pcorstate2 <- pcorstate2

tmp <- as.data.frame(sim2_est1)
colnames(tmp) <- c("$\pi_1$","$\pi_2$","$\pi_3$","$a_{11}$","$a_{12}$","$a_{13}$","$a_{21}$","$a_{22}$","$a_{23}$","$a_{31}$","$a_{32}$", "$a_{33}$", "$\mu_1$","$\sigma_1$","$\mu_2$","$\sigma_2$","$\mu_3$","$\sigma_3$")
tmp$sim <- 1:nsim
tmp <- gather(tmp,key="param",value="estimate",-sim)
tmp$true <- rep(truepars1,each=nsim)
tmp$method <- "MAR"
tmp$variance <- "low"
tmp$missing <- "equal"
sim2_est1_long <- tmp

tmp <- as.data.frame(sim2_est2)
colnames(tmp) <- c("$\pi_1$","$\pi_2$","$\pi_3$","$a_{11}$","$a_{12}$","$a_{13}$","$a_{21}$","$a_{22}$","$a_{23}$","$a_{31}$","$a_{32}$", "$a_{33}$", "$\mu_1$","$\sigma_1$","pnm_1","$p(M=1|S=1)$","$\mu_2$","$\sigma_2$","pnm_2","$p(M=1|S=2)$","$\mu_3$","$\sigma_3$","pnm_3","$p(M=1|S=3)$")
tmp$sim <- 1:nsim
tmp <- gather(tmp,key="param",value="estimate",-sim)
tmp$true <- rep(truepars2,each=nsim)
tmp$method <- "MNAR"
tmp$variance <- "low"
tmp$missing <- "equal"
sim2_est2_long <- tmp

all_long <- rbind(
  sim2_est1_long,
  subset(sim2_est2_long,!(param 
)

my_table <- all_long 
  group_by(param, method, variance, missing) 
  summarize(true = mean(true),
            mean = mean(estimate),
            sd = sd(estimate),
            bias = mean(abs(true - estimate)/abs(true)),
            MAE = mean(abs(true - estimate))) 
  gather(measure,value,-c(param,method,variance,missing)) 
  recast(param + variance + missing  ~  method + measure)

MAR_simulation_results <- my_table 
  filter(variance == "low" & missing == "equal") 
  dplyr::select(param,MNAR_true,paste0("MAR_",c("mean","sd","MAE"),sep=""),
                paste0("MNAR_",c("mean","sd","MAE"),sep="")) 
  mutate(percMAE = MNAR_MAE/MAR_MAE) 

colnames(MNAR_simulation_results) <- c("parameter", "true", "MAR_estimates_mean", 
  "MAR_estimates_sd", "MAR_estimates_MAE", "MNAR_estimates_mean",
  "MNAR_estimates_sd", "MNAR_estimates_MAE", "percMAE")
MNAR_simulation_results <- MNAR_simulation_results[c(1:3,7:9,4:6,10:21),]

Examples

1

hmmr documentation built on May 27, 2021, 9:10 a.m.