tests/test_mice.R
In qgcomp: Quantile G-Computation

cat("# multiple imputation through chained equations test\n")
library("qgcomp")

N = 100
set.seed(123)
dat <- data.frame(y=runif(N), x1=runif(N), x2=runif(N), z=runif(N))
true = qgcomp.noboot(f=y ~ z + x1 + x2, expnms = c('x1', 'x2'), 
                     data=dat, q=2, family=gaussian())
mdat <- dat
mdat$x1 = ifelse(mdat$x1>0.5, mdat$x1, NA)
mdat$x2 = ifelse(mdat$x2>0.75, mdat$x2, NA)
true <- qgcomp.noboot(f=y ~ z + x1 + x2, expnms = c('x1', 'x2'), 
                    data=dat, q=2, family=gaussian())
cc <- qgcomp.noboot(f=y ~ z + x1 + x2, expnms = c('x1', 'x2'), 
                    data=mdat[complete.cases(mdat),], q=2, family=gaussian())
                    

cdat = mdat
cdat$x1 = ifelse(is.na(cdat$x1), 0.5/sqrt(2), cdat$x1)
#data = cdat
ff <- function(data){
  nms = names(data)
  j = which(nms == "x2")
  f <- function(){
    nms = names(data)
    res = mice.impute.leftcenslognorm(y=cdat$x2, 
                              ry=!is.na(cdat$x2), 
                              x=cdat[,c("x1", "y", "z")], 
                              wy=is.na(cdat$x2), 
                              lod=NULL, 
                              debug=TRUE)
    res
  }
  f()
}

# works when LOD is not specified and based on minimum non-missing value
ff(cdat)


f0 <- function(data, j){
  print(sys.parent())
  mice.impute.leftcenslognorm(y=data$x2, 
                                    ry=!is.na(data$x2), 
                                    x=data[,c("x1", "y", "z")], 
                                    wy=is.na(data$x2), 
                                    lod=c(NA, 0.5, 0.75, NA), 
                                    debug=TRUE)
}

f1 <- function(data, j){
  print(sys.parent())
  #print(eval(as.name("data"), envir = parent.frame(n=1)))
  f0(data, j=j)
}

f2 <- function(data, j){
  print(sys.parent())
  f1(data, j)
}
f3 <- function(data, j){
  print(sys.parent())
  f2(data, j)
}
f4 <- function(data, j){
  print(sys.parent())
  f3(data, j)
}
# none of these appears to work because "j" is never found
f1(cdat, 3)
f2(cdat, 3)
f3(cdat, 3)
f4(cdat, 3)



#  library("mice")
#  library("survival")
#  set.seed(1231)
#  impdat = mice(data = mdat, 
#                method = c("", "leftcenslognorm", "leftcenslognorm", ""),
#                lod=c(NA, 0.5, 0.75, NA), debug=FALSE, maxit = 10, m = 5)
#  set.seed(1231)
#  impdat2 = mice(data = mdat, 
#                method = c("", "leftcenslognorm", "leftcenslognorm", ""),
#                lod=list(rep(NA, N), rep(0.5, N), rep(0.75, N), rep(NA, N)), debug=FALSE, maxit = 10, m = 5)
#  qc.fit.imp <- list(
#    call = call("qgcomp.noboot(y~., expnms = c('x1', 'x2'), family=gaussian())"),
#    call1 = impdat$call,
#    nmis = impdat$nmis,
#    analyses = lapply(1:5, function(x) qgcomp.noboot(y~., expnms = c("x1", "x2"),
#                                                     data=complete(impdat, x), family=gaussian(), bayes=FALSE))
#  )
#  qc.fit.imp2 <- list(
#    call = call("qgcomp.noboot(y~., expnms = c('x1', 'x2'), family=gaussian())"),
#    call1 = impdat$call,
#    nmis = impdat$nmis,
#    analyses = lapply(1:5, function(x) qgcomp.noboot(y~., expnms = c("x1", "x2"),
#                                                     data=complete(impdat2, x), family=gaussian(), bayes=FALSE))
#  )
#  qc1 = pool(qc.fit.imp)
#  qc2 = pool(qc.fit.imp2)
#  stopifnot(all.equal(qc1$pooled$estimate, qc2$pooled$estimate))
# 
#  
# newlods = data.frame(rep(NA, N), runif(N, 0, .01), runif(N, 0, 10), rep(NA, N))
# impdat3 = mice(data = mdat, 
#                method = c("", "leftcenslognorm", "leftcenslognorm", ""),
#                lod=newlods, debug=FALSE, maxit = 10, m = 5)
# qc.fit.imp2 <- list(
#   call = call("qgcomp.noboot(y~., expnms = c('x1', 'x2'), family=gaussian())"),
#   call1 = impdat$call,
#   nmis = impdat$nmis,
#   analyses = lapply(1:5, function(x) qgcomp.noboot(y~., expnms = c("x1", "x2"),
#                                                    data=complete(impdat2, x), family=gaussian(), bayes=FALSE))
# )
# xcomp = complete(impdat3, 1)
# stopifnot(all(xcomp[is.na(mdat[,2]),2]<  newlods[is.na(mdat[,2]),2]))
# stopifnot(all(xcomp[is.na(mdat[,3]),3] < newlods[is.na(mdat[,3]),3] ))


# # note the following example imputes from the wrong parametric model and is expected to be biased
# # as a result
# library("mice")
# library("survival")
# set.seed(1231)
# impdat = mice(data = mdat, 
#               method = c("", "leftcenslognorm", "leftcenslognorm", ""),
#               lod=c(NA, 0.5, 0.75, NA), debug=FALSE, maxit = 10, m = 50)
# qc.fit.imp <- list(
#   call = call("qgcomp.noboot(y~., expnms = c('x1', 'x2'), family=gaussian())"),
#   call1 = impdat$call,
#   nmis = impdat$nmis,
#   analyses = lapply(1:50, function(x) qgcomp.noboot(y~., expnms = c("x1", "x2"),
#                                                    data=complete(impdat, x), family=gaussian(), bayes=FALSE))
# )
# qc.fit.imp2 <- list(
#   call = call("qgcomp.noboot(y~., expnms = c('x1', 'x2'), family=gaussian())"),
#   call1 = impdat$call,
#   nmis = impdat$nmis,
#   analyses = lapply(1:50, function(x) lm(y~., data=complete(impdat, x)))
# )
# 
# summary(complete(impdat, 1))
# obj <- pool(as.mira(qc.fit.imp))# are results at a given seed numerically stable across versions?
# 
# 
# summary(obj)
# #                 estimate  std.error statistic       df      p.value
# #  (Intercept)  0.67956728 0.09189112  7.395353 64.56892 3.582918e-10
# #  psi1        -0.09551483 0.04876708 -1.958592 49.71615 5.578245e-02
# 
# true
# #              Estimate Std. Error Lower CI  Upper CI t value
# # (Intercept)  0.582729   0.068033  0.44939 0.716071 1.663e-13
# # psi1        -0.099867   0.086641 -0.26968 0.069947    0.2519
# 
# cc
# #             Estimate Std. Error Lower CI  Upper CI t value
# # (Intercept)  0.58390    0.11158  0.36520  0.802600  0.0008
# # psi1        -0.29313    0.12952 -0.54697 -0.039282  0.0534
# 
# 
# obj2 <- pool(as.mira(qc.fit.imp2))# are results at a given seed numerically stable across versions?
# true = lm(y~., data=dat)
# cc = lm(y~., data=cdat)
# 
# summary(obj2)
# #                estimate  std.error  statistic       df      p.value
# # (Intercept)  0.88780523 0.21350133  4.1583123 52.88431 0.0001180606
# # x1          -0.19722820 0.14363657 -1.3731057 77.01935 0.1737039541
# # x2          -0.35849252 0.24710409 -1.4507754 47.94448 0.1533539928
# # z           -0.08929342 0.09794892 -0.9116325 91.46288 0.3643587309
# 
# 
# summary(true)
# #             Estimate Std. Error t value Pr(>|t|)    
# # (Intercept)  0.62516    0.09538   6.554 2.79e-09 ***
# # x1          -0.10569    0.10982  -0.962    0.338    
# # x2          -0.07144    0.09854  -0.725    0.470    
# # z           -0.07624    0.09822  -0.776    0.440    
# 
# summary(cc)
# #             Estimate Std. Error t value Pr(>|t|)
# # (Intercept)  -0.4987     0.9012  -0.553    0.586
# # x1           -0.1079     0.3138  -0.344    0.734
# # x2            1.1194     0.8712   1.285    0.214
# # z            -0.0159     0.1771  -0.090    0.929
#