Description Usage Format Details Examples
Results of a simulation study on the effect of missing data on estimates of parameters of a hidden Markov model. In the simulation, 1000 datasets were simulated, each consisting of 100 time series of length 50. The model generating the data was a hidden Markov model with 3 states. Missing values were generated for 5% of cases in state 1, 25% of cases in state 2, and 50% of cases in state 3 (i.e. data can be considered missing not at random, as missingness is state-dependent).
1 | data("MNAR_simulation_results")
|
A data.frame
with the following variables:
parameter
Parameter name
true
True value of parameter
MAR_estimates_mean
Average of parameter estimates for a model that assumes MAR.
MAR_estimates_sd
Standard deviation of parameter estimates for a model that assumes MAR.
MAR_estimates_MAE
Mean Absolute Error of parameter estimates for a model that assumes MAR.
MNAR_estimates_mean
Average of parameter estimates for a model that assumes MNAR.
MNAR_estimates_sd
Standard deviation of parameter estimates for a model that assumes MNAR.
MNAR_estimates_MAE
Mean Absolute Error of parameter estimates for a model that assumes MNAR.
percMAE
Relative MAE of the MNAR model over the MAR model (e.g. MAE_MNAR/MAE_MAR)
This data frame was generated with the following code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 | library(depmixS4)
### Start simulation
nsim <- 1000
nrep <- 100
nt <- 50
set.seed(1234)
randomSeeds <- sample(seq(1,nsim*1000),nsim)
out <- rep(list(vector("list",3)),nsim)
prior <- c(8,1,1)
prior <- prior/sum(prior)
transition <- 5*diag(3) + 1
transition <- transition/rowSums(transition)
means <- c(-1,0,1)
sds <- c(3,3,3)
pmiss <- c(.05,.25,.5)
truepars1 <- c(prior,as.numeric(t(transition)),as.numeric(rbind(means,sds)))
truepars2 <- c(prior,as.numeric(t(transition)),as.numeric(rbind(means,sds,1-pmiss,pmiss)))
for(sim in 1:nsim) {
set.seed(randomSeeds[sim])
truestate <- matrix(nrow=nt,ncol=nrep)
for(i in 1:nrep) {
truestate[1,i] <- sample(1:3,size=1,prob=prior)
for(t in 2:nt) {
truestate[t,i] <- sample(1:3,size=1,prob=transition[truestate[t-1,i],])
}
}
dat <- data.frame(trueState=as.numeric(truestate),trial=1:nt)
dat$trueResponse <- rnorm(nrow(dat),mean=means[dat$trueState],sd=sds[dat$trueState])
dat$missing <- rbinom(nrow(dat),size=1,prob=pmiss[dat$trueState])
dat$response <- dat$trueResponse
dat$response[dat$missing==1] <- NA
set.seed(randomSeeds[sim])
mod <- depmix(list(response~1),family=list(gaussian()),data=dat,nstates=3,ntimes=rep(nt,nrep))
mod <- setpars(mod,truepars1)
ok <- FALSE
ntry <- 1
while(!ok & ntry <= 50) {
fmod <- try(fit(mod,emcontrol=em.control(maxit = 5000, random.start=FALSE),verbose=FALSE))
if(!inherits(fmod,"try-error")) {
if(fmod@message == "Log likelihood converged to within tol. (relative change)") ok <- TRUE
}
ntry <- ntry + 1
}
out[[sim]][[1]] <- list(pars=getpars(fmod),logLik=logLik(fmod),viterbi=posterior(fmod)[,1],trueState=dat$trueState)
set.seed(randomSeeds[sim])
mod <- depmix(list(response~1,missing~1),family=list(gaussian(),multinomial("identity")),data=dat,nstates=3,ntimes=rep(nt,nrep))
mod <- setpars(mod,truepars2)
ok <- FALSE
ntry <- 1
while(!ok & ntry <= 50) {
fmod <- try(fit(mod,emcontrol=em.control(maxit = 5000, random.start = FALSE),verbose=FALSE))
if(!inherits(fmod,"try-error")) {
if(fmod@message == "Log likelihood converged to within tol. (relative change)") ok <- TRUE
}
ntry <- ntry + 1
}
out[[sim]][[2]] <- list(pars=getpars(fmod),logLik=logLik(fmod),viterbi=posterior(fmod)[,1],trueState=dat$trueState)
}
### End simulation
### Process results
library(reshape2)
library(dplyr)
library(tidyr)
simi <- out
bias1 <- matrix(0.0,ncol=length(truepars1),nrow=nsim)
bias2 <- matrix(0.0,ncol=length(truepars2),nrow=nsim)
colnames(bias1) <- names(simi[[1]][[1]][[1]])
colnames(bias2) <- names(simi[[1]][[2]][[1]])
pcorstate1 <- rep(0.0,nsim)
pcorstate2 <- rep(0.0,nsim)
for(sim in 1:nsim) {
tmp <- simi[[sim]][[1]][[1]]
pr <- tmp[1:3]
trt <- matrix(tmp[4:12],ncol=3)
ms <- tmp[c(13,15,17)]
sds <- tmp[c(14,16,18)]
ord <- order(ms)
bias1[sim,] <- c(pr[ord],trt[ord,ord],as.numeric(rbind(ms[ord],sds[ord]))) - truepars1
fsta <- recode(simi[[sim]][[1]]$viterbi,`1` = which(ord == 1), `2` = which(ord == 2), `3` = which(ord == 3))
pcorstate1[sim] <- sum(fsta == simi[[sim]][[1]]$trueState)/(nrep*nt)
tmp <- simi[[sim]][[2]][[1]]
pr <- tmp[1:3]
trt <- matrix(tmp[4:12],ncol=3)
ms <- tmp[c(13,17,21)]
sds <- tmp[c(14,18,22)]
ps0 <- tmp[c(15,19,23)]
ps1 <- tmp[c(16,20,24)]
ord <- order(ms)
bias2[sim,] <- c(pr[ord],trt[ord,ord],as.numeric(rbind(ms[ord],sds[ord],ps0[ord],ps1[ord]))) - truepars2
fsta <- recode(simi[[sim]][[2]]$viterbi,`1` = which(ord == 1), `2` = which(ord == 2), `3` = which(ord == 3))
pcorstate2[sim] <- sum(fsta == simi[[sim]][[2]]$trueState)/(nrep*nt)
}
sim2_bias1 <- bias1
sim2_bias2 <- bias2
sim2_est1 <- t(t(bias1) + truepars1)
sim2_est2 <- t(t(bias2) + truepars2)
sim2_pcorstate1 <- pcorstate1
sim2_pcorstate2 <- pcorstate2
tmp <- as.data.frame(sim2_est1)
colnames(tmp) <- c("$\pi_1$","$\pi_2$","$\pi_3$","$a_{11}$","$a_{12}$","$a_{13}$","$a_{21}$","$a_{22}$","$a_{23}$","$a_{31}$","$a_{32}$", "$a_{33}$", "$\mu_1$","$\sigma_1$","$\mu_2$","$\sigma_2$","$\mu_3$","$\sigma_3$")
tmp$sim <- 1:nsim
tmp <- gather(tmp,key="param",value="estimate",-sim)
tmp$true <- rep(truepars1,each=nsim)
tmp$method <- "MAR"
tmp$variance <- "low"
tmp$missing <- "equal"
sim2_est1_long <- tmp
tmp <- as.data.frame(sim2_est2)
colnames(tmp) <- c("$\pi_1$","$\pi_2$","$\pi_3$","$a_{11}$","$a_{12}$","$a_{13}$","$a_{21}$","$a_{22}$","$a_{23}$","$a_{31}$","$a_{32}$", "$a_{33}$", "$\mu_1$","$\sigma_1$","pnm_1","$p(M=1|S=1)$","$\mu_2$","$\sigma_2$","pnm_2","$p(M=1|S=2)$","$\mu_3$","$\sigma_3$","pnm_3","$p(M=1|S=3)$")
tmp$sim <- 1:nsim
tmp <- gather(tmp,key="param",value="estimate",-sim)
tmp$true <- rep(truepars2,each=nsim)
tmp$method <- "MNAR"
tmp$variance <- "low"
tmp$missing <- "equal"
sim2_est2_long <- tmp
all_long <- rbind(
sim2_est1_long,
subset(sim2_est2_long,!(param
)
my_table <- all_long
group_by(param, method, variance, missing)
summarize(true = mean(true),
mean = mean(estimate),
sd = sd(estimate),
bias = mean(abs(true - estimate)/abs(true)),
MAE = mean(abs(true - estimate)))
gather(measure,value,-c(param,method,variance,missing))
recast(param + variance + missing ~ method + measure)
MAR_simulation_results <- my_table
filter(variance == "low" & missing == "equal")
dplyr::select(param,MNAR_true,paste0("MAR_",c("mean","sd","MAE"),sep=""),
paste0("MNAR_",c("mean","sd","MAE"),sep=""))
mutate(percMAE = MNAR_MAE/MAR_MAE)
colnames(MNAR_simulation_results) <- c("parameter", "true", "MAR_estimates_mean",
"MAR_estimates_sd", "MAR_estimates_MAE", "MNAR_estimates_mean",
"MNAR_estimates_sd", "MNAR_estimates_MAE", "percMAE")
MNAR_simulation_results <- MNAR_simulation_results[c(1:3,7:9,4:6,10:21),]
|
1 |
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.