imi.lm <-
function(data.miss,
M0 = 2,
max.M = 500,
epsilon,
method = 'pmm',
resp,
regressors,
conv.plot = TRUE,
dis.method = 'mahalanobis',
mah.scale = 'combined',
successive.valid = 3,
print.progress=TRUE) {
# possible values for method:
# all possible things in MICE,
# in case mvn is chosen, then it switchs to amelia.
# possibilities for dis.method and mah.scale
# possible methods for distance
# dis.method='euclidean'
# dis.method='inf.norm'
# dis.method='mahalanobis'
# # possible methods for scale in Mahalanobis
# mah.scale='within'
# mah.scale='between'
# mah.scale='combined'
# successive.valid: length of distances successively smaller than epsilon to stop, minimum is zero
if (is.character(successive.valid) == FALSE) {
if (successive.valid < 1 |
floor(successive.valid) != successive.valid) {
stop('Please enter a postive integer successive.valid')
}
}
if (is.character(M0) == FALSE) {
if (M0 < 2) {
stop('Please select an initial number of imputations larger than 1')
}
}
if (is.character(M0) == FALSE) {
if (floor(M0) != M0) {
stop('Please select an integer initial number of imputations')
}
}
## nothing is manual
if (M0 != 'manual' & successive.valid != 'manual') {
if (print.progress==TRUE){
cat('We are working on the initial imputation.',fill = TRUE)
}
if (method == 'mvn') {
require(Amelia)
data.imp0 = amelia(data.miss, m = M0,p2s =0)$imputations
}
if (method != 'mvn' & method!='auto') {
require(mice)
data.imp0_ini = mice(data.miss, m = M0, method = method,print=FALSE)
data.imp0 = lapply(seq(M0), function(i)
complete(data.imp0_ini, action = i))
}
if (method=='auto') {
require(mice)
data.imp0_ini = mice(data.miss, m = M0,print=FALSE)
data.imp0 = lapply(seq(M0), function(i)
complete(data.imp0_ini, action = i))
}
# Now fitting the modle on these
# first creating the modle formula
model.expression <-
as.formula(paste(paste(resp, "~"), paste(regressors, collapse = "+")))
# fit the model to the first one to find the length of parameters
model.fit = lm(model.expression, data = data.imp0[[1]])
param.est1 = model.fit$coefficients
param.cov1 = vcov(model.fit)
param.est = matrix(0, M0, length(param.est1))
param.cov = NULL
param.est[1, ] = param.est1
param.cov[[1]] = param.cov1
# now find the estimate for the rest
for (i in 2:M0) {
model.fit = lm(model.expression, data = data.imp0[[i]])
param.est[i, ] = model.fit$coefficients
param.cov[[i]] = vcov(model.fit)
}
comb.mi0 = combine.mi(param.est, param.cov)
data.imp = data.imp0
# Now we should add imputations till we reach that point
# variable to define
#epsilon=0.05
M = M0
}
## interactive part: both manual
if (M0 == 'manual' & successive.valid == 'manual') {
t1 = proc.time()
if (method == 'mvn') {
require(Amelia)
data.imp0 = amelia(data.miss, m = 2,p2s =0)$imputations
}
if (method != 'mvn' & method!='auto') {
require(mice)
data.imp0_ini = mice(data.miss, m = 2, method = method,print=FALSE)
data.imp0 = lapply(seq(2), function(i)
complete(data.imp0_ini, action = i))
}
if (method=='auto') {
require(mice)
data.imp0_ini = mice(data.miss, m = 2,print=FALSE)
data.imp0 = lapply(seq(2), function(i)
complete(data.imp0_ini, action = i))
}
# Now fitting the modle on these
# first creating the modle formula
model.expression <-
as.formula(paste(paste(resp, "~"), paste(regressors, collapse = "+")))
# fit the model to the first one to find the length of parameters
model.fit = lm(model.expression, data = data.imp0[[1]])
param.est1 = model.fit$coefficients
param.cov1 = vcov(model.fit)
param.est = matrix(0, 2, length(param.est1))
param.cov = NULL
param.est[1, ] = param.est1
param.cov[[1]] = param.cov1
model.fit = lm(model.expression, data = data.imp0[[2]])
param.est[2, ] = model.fit$coefficients
param.cov[[2]] = vcov(model.fit)
comb.mi0 = combine.mi(param.est, param.cov)
data.imp = data.imp0
M = length(data.imp)
t2 = proc.time()
print(
paste(
'The time it takes (in seconds) to imput the data two times and fit the model to them is:',
round((t2 - t1)[3], 3)
)
)
M0.ini = readline('What is your choice of initial number of imputations?')
successive.valid.ini = readline('What is your choice for successive steps validation?')
if (print.progress==TRUE){
cat('We are working on the initial imputation.',fill = TRUE)
}
M0 = as.numeric(unlist(strsplit(M0.ini, ","))) - 2
successive.valid = as.numeric(unlist(strsplit(successive.valid.ini, ",")))
if (successive.valid < 1 |
floor(successive.valid) != successive.valid) {
stop('Please enter a postive integer successive.valid')
}
data.imp = data.imp0
if (floor(M0 + 1) != M0 + 1) {
stop('Please select an integer initial number of imputations')
}
if ((M0 + 2) < 2) {
stop('Please select an initial number of imputations larger than 1')
}
if (M0 > 0) {
if (method == 'mvn') {
require(Amelia)
data.imp0 = amelia(data.miss, m = M0,p2s =0)$imputations
}
if (method != 'mvn' & method!='auto') {
require(mice)
data.imp0_ini = mice(data.miss, m = M0, method = method,print=FALSE)
data.imp0 = lapply(seq(M0), function(i)
complete(data.imp0_ini, action = i))
}
if (method=='auto') {
require(mice)
data.imp0_ini = mice(data.miss, m = M0,print=FALSE)
data.imp0 = lapply(seq(M0), function(i)
complete(data.imp0_ini, action = i))
}
# Now fitting the modle on these
# first creating the modle formula
#model.expression<-as.formula(paste(paste(resp,"~"),paste(regressors,collapse="+")))
# fit the model to the first one to find the length of parameters
model.fit = lm(model.expression, data = data.imp0[[1]])
param.est1 = model.fit$coefficients
param.cov1 = vcov(model.fit)
#param.est=matrix(0,M0,length(param.est1))
#param.cov=NULL
param.est = rbind(param.est, param.est1)
param.cov[[1 + 2]] = param.cov1
# now find the estimate for the rest
if (M0 > 1) {
for (i in 2:M0) {
model.fit = lm(model.expression, data = data.imp0[[i]])
param.est = rbind(param.est, model.fit$coefficients)
param.cov[[i + 2]] = vcov(model.fit)
}
}
comb.mi0 = combine.mi(param.est, param.cov)
data.imp[3:(M0 + 2)] = data.imp0
# Now we should add imputations till we reach that point
# variable to define
#epsilon=0.05
M = length(data.imp)
}
}
## interactive part: M0 manual
if (M0 == 'manual' & successive.valid != 'manual') {
t1 = proc.time()
if (method == 'mvn') {
require(Amelia)
data.imp0 = amelia(data.miss, m = 2,p2s =0)$imputations
}
if (method != 'mvn' & method!='auto') {
require(mice)
data.imp0_ini = mice(data.miss, m = 2, method = method,print=FALSE)
data.imp0 = lapply(seq(2), function(i)
complete(data.imp0_ini, action = i))
}
if (method=='auto') {
require(mice)
data.imp0_ini = mice(data.miss, m = 2,print=FALSE)
data.imp0 = lapply(seq(2), function(i)
complete(data.imp0_ini, action = i))
}
# Now fitting the modle on these
# first creating the modle formula
model.expression <-
as.formula(paste(paste(resp, "~"), paste(regressors, collapse = "+")))
# fit the model to the first one to find the length of parameters
model.fit = lm(model.expression, data = data.imp0[[1]])
param.est1 = model.fit$coefficients
param.cov1 = vcov(model.fit)
param.est = matrix(0, 2, length(param.est1))
param.cov = NULL
param.est[1, ] = param.est1
param.cov[[1]] = param.cov1
model.fit = lm(model.expression, data = data.imp0[[2]])
param.est[2, ] = model.fit$coefficients
param.cov[[2]] = vcov(model.fit)
comb.mi0 = combine.mi(param.est, param.cov)
data.imp = data.imp0
M = length(data.imp)
t2 = proc.time()
print(
paste(
'The time it takes (in seconds) to imput the data two times and fit the model to them is:',
round((t2 - t1)[3], 3)
)
)
M0.ini = readline('What is your choice of initial number of imputations?')
if (print.progress==TRUE){
cat('We are working on the initial imputation.',fill = TRUE)
}
M0 = as.numeric(unlist(strsplit(M0.ini, ","))) - 2
data.imp = data.imp0
if (floor(M0 + 1) != M0 + 1) {
stop('Please select an integer initial number of imputations')
}
if ((M0 + 2) < 2) {
stop('Please select an initial number of imputations larger than 1')
}
if (M0 > 0) {
if (method == 'mvn') {
require(Amelia)
data.imp0 = amelia(data.miss, m = M0,p2s =0)$imputations
}
if (method != 'mvn' & method!='auto') {
require(mice)
data.imp0_ini = mice(data.miss, m = M0, method = method,print=FALSE)
data.imp0 = lapply(seq(M0), function(i)
complete(data.imp0_ini, action = i))
}
if (method=='auto') {
require(mice)
data.imp0_ini = mice(data.miss, m = M0,print=FALSE)
data.imp0 = lapply(seq(M0), function(i)
complete(data.imp0_ini, action = i))
}
# Now fitting the modle on these
# first creating the modle formula
#model.expression<-as.formula(paste(paste(resp,"~"),paste(regressors,collapse="+")))
# fit the model to the first one to find the length of parameters
model.fit = lm(model.expression, data = data.imp0[[1]])
param.est1 = model.fit$coefficients
param.cov1 = vcov(model.fit)
#param.est=matrix(0,M0,length(param.est1))
#param.cov=NULL
param.est = rbind(param.est, param.est1)
param.cov[[1 + 2]] = param.cov1
# now find the estimate for the rest
if (M0 > 1) {
for (i in 2:M0) {
model.fit = lm(model.expression, data = data.imp0[[i]])
param.est = rbind(param.est, model.fit$coefficients)
param.cov[[i + 2]] = vcov(model.fit)
}
}
comb.mi0 = combine.mi(param.est, param.cov)
data.imp[3:(M0 + 2)] = data.imp0
# Now we should add imputations till we reach that point
# variable to define
#epsilon=0.05
M = length(data.imp)
}
}
## interactive part: successive.valid is manual
if (M0 != 'manual' & successive.valid == 'manual') {
t1 = proc.time()
if (method == 'mvn') {
require(Amelia)
data.imp0 = amelia(data.miss, m = 2,p2s =0)$imputations
}
if (method != 'mvn' & method!='auto') {
require(mice)
data.imp0_ini = mice(data.miss, m = 2, method = method,print=FALSE)
data.imp0 = lapply(seq(2), function(i)
complete(data.imp0_ini, action = i))
}
if (method=='auto') {
require(mice)
data.imp0_ini = mice(data.miss, m = 2,print=FALSE)
data.imp0 = lapply(seq(2), function(i)
complete(data.imp0_ini, action = i))
}
# Now fitting the modle on these
# first creating the modle formula
model.expression <-
as.formula(paste(paste(resp, "~"), paste(regressors, collapse = "+")))
# fit the model to the first one to find the length of parameters
model.fit = lm(model.expression, data = data.imp0[[1]])
param.est1 = model.fit$coefficients
param.cov1 = vcov(model.fit)
param.est = matrix(0, 2, length(param.est1))
param.cov = NULL
param.est[1, ] = param.est1
param.cov[[1]] = param.cov1
model.fit = lm(model.expression, data = data.imp0[[2]])
param.est[2, ] = model.fit$coefficients
param.cov[[2]] = vcov(model.fit)
comb.mi0 = combine.mi(param.est, param.cov)
data.imp = data.imp0
M = length(data.imp)
t2 = proc.time()
print(
paste(
'The time it takes (in seconds) to imput the data two times and fit the model to them is:',
round((t2 - t1)[3], 3)
)
)
successive.valid.ini = readline('What is your choice for successive steps validation?')
if (print.progress==TRUE){
cat('We are working on the initial imputation.',fill = TRUE)
}
successive.valid = as.numeric(unlist(strsplit(successive.valid.ini, ",")))
if (successive.valid < 1 |
floor(successive.valid) != successive.valid) {
stop('Please enter a postive integer successive.valid')
}
data.imp = data.imp0
if (floor(M0 + 1) != M0 + 1) {
stop('Please select an integer initial number of imputations')
}
if ((M0 + 2) < 2) {
stop('Please select an initial number of imputations larger than 1')
}
if (M0 > 0) {
if (method == 'mvn') {
require(Amelia)
data.imp0 = amelia(data.miss, m = M0,p2s =0)$imputations
}
if (method != 'mvn' & method!='auto') {
require(mice)
data.imp0_ini = mice(data.miss, m = M0, method = method,print=FALSE)
data.imp0 = lapply(seq(M0), function(i)
complete(data.imp0_ini, action = i))
}
if (method=='auto') {
require(mice)
data.imp0_ini = mice(data.miss, m = M0,print=FALSE)
data.imp0 = lapply(seq(M0), function(i)
complete(data.imp0_ini, action = i))
}
# Now fitting the modle on these
# first creating the modle formula
#model.expression<-as.formula(paste(paste(resp,"~"),paste(regressors,collapse="+")))
# fit the model to the first one to find the length of parameters
model.fit = lm(model.expression, data = data.imp0[[1]])
param.est1 = model.fit$coefficients
param.cov1 = vcov(model.fit)
#param.est=matrix(0,M0,length(param.est1))
#param.cov=NULL
param.est = rbind(param.est, param.est1)
param.cov[[1 + 2]] = param.cov1
# now find the estimate for the rest
if (M0 > 1) {
for (i in 2:M0) {
model.fit = lm(model.expression, data = data.imp0[[i]])
param.est = rbind(param.est, model.fit$coefficients)
param.cov[[i + 2]] = vcov(model.fit)
}
}
comb.mi0 = combine.mi(param.est, param.cov)
data.imp[3:(M0 + 2)] = data.imp0
# Now we should add imputations till we reach that point
# variable to define
#epsilon=0.05
M = length(data.imp)
}
}
# Doing the rest of initial imputations
#max.M=500
dis.extra = epsilon + 1
dis.all = 0
while (sum(dis.extra > epsilon) > 0 & M < max.M) {
M = M + 1
if (print.progress=='TRUE'){
cat(paste('We are working on imputed dataset number ',M),fill = TRUE)
}
if (method == 'mvn') {
data.imp1 = amelia(data.miss, m = 1,p2s =0)$imputations$imp1
data.imp[[M]] = data.imp1
}
if (method != 'mvn' & method!='auto') {
data.imp1_ini = mice(data.miss, m = 1, method = method,print=FALSE)
data.imp1 = complete(data.imp1_ini, action = 1)
data.imp[[M]] = data.imp1
}
if (method=='auto') {
data.imp1_ini = mice(data.miss, m = 1,print=FALSE)
data.imp1 = complete(data.imp1_ini, action = 1)
data.imp[[M]] = data.imp1
}
# now fit the model to the new imputed data
model.fit = lm(model.expression, data = data.imp1)
param.est = rbind(param.est, model.fit$coefficients)
param.cov[[M]] = vcov(model.fit)
comb.mi1 = combine.mi(param.est, param.cov)
# now compute the distance between those two based on the specified method
diff.est = comb.mi1$param.est - comb.mi0$param.est
if (dis.method == 'euclidean') {
dis = sqrt(t(diff.est) %*% diff.est)
}
if (dis.method == 'inf.norm') {
dis = max(abs(diff.est))
}
if (dis.method == 'mahalanobis') {
require('MASS')
if (mah.scale == 'within') {
#S.mah=comb.mi0$within.cov + comb.mi1$within.cov
S.mah = comb.mi1$within.cov
}
if (mah.scale == 'between') {
#S.mah=comb.mi0$between.cov + comb.mi1$between.cov
S.mah = comb.mi1$between.cov
}
if (mah.scale == 'combined') {
#S.mah=comb.mi0$param.cov + comb.mi1$param.cov
S.mah = comb.mi1$param.cov
}
dis = sqrt((t(diff.est) %*% ginv(S.mah)) %*% diff.est)
}
comb.mi0 = comb.mi1
dis.all = c(dis.all, dis)
dis.extra = tail(dis.all, n = successive.valid)
}
dis.all = dis.all[-1]
if (conv.plot == TRUE) {
plot(dis.all, xlab = 'Number of imputations', ylab = 'Distance')
}
conv.status = 0
if (sum(dis.extra < epsilon) > 0) {
conv.status = 1
}
if (print.progress==TRUE){
if (conv.status==1){
cat(paste('We are done! The convergence is acheived and the sufficient number of imputations is ',M),
fill = TRUE)
}
if (conv.status==0){
cat('The convergence could not be achieved, please increase max.M or deacrese espsilon or
number of successive validations steps.',
fill = TRUE)
}
}
return(
list(
mi.param = comb.mi1,
data.imp = data.imp,
dis.steps = dis.all,
conv.status = conv.status,
M = M
)
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.