## See Paul T von Hippel, The American Statistician 58:160-164, 2004
mvector <- c(0,0)
msigma <- matrix(c(1,0.5,0.5,1), nrow=2)
library(mvtnorm)
library(Hmisc)
# XZ <- rmvnorm(1000, mvector, msigma)
mvrnorm <- function(n, p = 1, u = rep(0, p), S = diag(p)) {
Z <- matrix(rnorm(n * p), p, n)
t(u + t(chol(S)) %*% Z)
}
XZ <- mvrnorm(1000, 2, mvector, msigma)
U <- rnorm(1000)
Y <- XZ[,1]+XZ[,2]+U
summary(lm(Y ~ XZ))
X <- XZ[,1]
Z <- XZ[,2]
Z.ni <- Z
type <- c('random','X<0','Y<0','Z<0')[3]
i <- switch(type,
random= runif(1000) < .5,
'X<0' = X<0,
'Y<0' = Y<0,
'Z<0' = Z<0)
Zna <- Z
Zna[i] <- NA
summary(lm(Y ~ X + Zna))
#w <- aregImpute(~monotone(Y)+monotone(X)+monotone(Zna))
#w <- aregImpute(~I(Y)+I(X)+I(Zna),fweight=.75)
w <- aregImpute(~monotone(Y)+monotone(X)+monotone(Zna), n.impute=5,
type='regression')
plot(w)
Ecdf(Zna, add=T, col='red')
Ecdf(Z, add=T, col='green')
# plot(w$imputed$Zna, Z[is.na(Zna)]) # use if n.impute=1
# abline(a=0,b=1,lty=2)
# lm(Z[is.na(Zna)] ~ w$imputed$Zna)
coef(fit.mult.impute(Y~X+Zna, lm, w, data=data.frame(X,Zna,Y),pr=F))
#--------------------------------------------------------------------
## From Ewout Steyerberg
# Missing values: illustrate MCAR, MAR, MNAR mechanism
# linear models
library(rms)
## 1. x1 and x2 with y1 outcome
## A) X only
## B) X+Y
#########################
### Test Imputation ###
### use aregImpute in default settings
#########################
n <- 20000 # arbitrary sample size
x2 <- rnorm(n=n, mean=0, sd=1) # x2 standard normal
x1 <- sqrt(.5) * x2 + rnorm(n=n, mean=0, sd=sqrt(1-.5)) # x2 correlated with x1
y1 <- 1 * x1 + 1 * x2 + rnorm(n=n, mean=0, sd=sqrt(1-0)) # generate y
# var of y1 larger with correlated x1 - x2
x1MCAR <- ifelse(runif(n) < .5, x1, NA) # MCAR mechanism for 50% of x1
x1MARx <- ifelse(rnorm(n=n,sd=.8) < x2, x1, NA) # MAR on x2, R2 50%, 50% missing (since mean x2==0)
x1MARy <- ifelse(rnorm(n=n,sd=(sqrt(3)*.8)) >y1, x1, NA) # MAR on y, R2 50%, 50% missing (since mean y1==0)
# x1MNAR <- ifelse(rnorm(n=n,sd=.8) < x1, x1, NA) # MNAR on x1, R2 50%, 50% missing (since mean x1==0)
x1MNAR <- ifelse(rnorm(n=n,sd=.8) < x1, x1, NA) # MNAR on x1, R2 50%, 50% missing (since mean x1==0)
plot(x2, x1MARx)
plsmo(x2, is.na(x1MARx), datadensity=TRUE)
dd <- datadist(x2,x1MARx)
options(datadist='dd')
f <- lrm(is.na(x1MARx) ~ rcs(x2,4))
plot(Predict(f, x2, fun=plogis))
d <- data.frame(y1,x1,x2,x1MCAR, x1MARx,x1MARy,x1MNAR)
ols(y1~x1+x2)
ols(y1 ~ x1MARx + x2)
# MAR on x: 3 approaches; CC, MI with X, MI with X+Y
g <- aregImpute(~I(y1) + I(x1MARx) + I(x2), n.impute=5, data=d, pr=F,
type=c('pmm','regression')[1], match='closest', plotTrans=TRUE)
plot(g)
Ecdf(x1, add=TRUE, col='red',q=.5)
Ecdf(x1MARx, add=TRUE, col='blue',q=.5)
f <- fit.mult.impute(y1 ~ x1MARx + x2, ols, xtrans=g, data=d, pr=F)
g <- aregImpute(~y1 + x1MARx + x2, n.impute=5, data=d, pr=F, type='regression', plotTrans=TRUE)
f <- fit.mult.impute(y1 ~ x1MARx + x2, ols, xtrans=g, data=d, pr=F)
# MAR on y: 3 approaches; CC, MI with X, MI with X+Y
f <- ols(y1~x1MARy+x2)
if(FALSE) {
Mat.imputation[i,29:32] <- c(coef(f)[2:3], sqrt(diag(vcov(f)))[2:3])
g <- aregImpute(~x1MARy + x2, n.impute=5, data=d, pr=F, type='regression')
f <- fit.mult.impute(y1 ~ x1MARy + x2, ols, xtrans=g, data=d, pr=F)
Mat.imputation[i,33:36] <- c(coef(f)[2:3], sqrt(diag(vcov(f)))[2:3])
g <- aregImpute(~y1 + x1MARy + x2, n.impute=5, data=d, pr=F, type='regression')
f <- fit.mult.impute(y1 ~ x1MARy + x2, ols, xtrans=g, data=d, pr=F)
Mat.imputation[i,37:40] <- c(coef(f)[2:3], sqrt(diag(vcov(f)))[2:3])
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.