fit.mult.impute <- function(formula, fitter, xtrans, data,
n.impute=xtrans$n.impute, fit.reps=FALSE,
dtrans, derived, fun,
vcovOpts=NULL,
robust=FALSE, cluster,
robmethod=c('huber', 'efron'),
method=c('ordinary', 'stack', 'only stack'),
funstack=TRUE, lrt=FALSE,
pr=TRUE, subset, fitargs)
{
call <- match.call()
robmethod <- match.arg(robmethod)
method <- match.arg(method)
if(lrt) {
if(missing(fitargs)) fitargs <- list(x=TRUE, y=TRUE)
else {fitargs$x <- fitargs$y <- TRUE}
method <- 'stack'
fun <- function(fit) list(anova = anova(fit, test='LR'))
}
if(deparse(substitute(fitter))[1] == 'lm')
warning('If you use print, summary, or anova on the result, lm methods use the\nsum of squared residuals rather than the Rubin formula for computing\nresidual variance and standard errors. It is suggested to use ols\ninstead of lm.')
using.Design <- FALSE
used.mice <- any(class(xtrans)=='mids')
if (used.mice && !requireNamespace("mice", quietly = TRUE))
stop("This data requires the 'mice' package.")
if(used.mice && missing(n.impute)) n.impute <- xtrans$m
fits <- if(fit.reps) vector('list', n.impute)
stats.ok2average <- c('linear.predictors','fitted.values','stats', 'means',
'icoef', 'scale', 'center', 'y.imputed')
if(! missing(fun) && method != 'only stack')
funresults <- vector('list', n.impute)
if(! missing(data) && inherits(data, 'data.table'))
data <- as.data.frame(data)
if(! missing(cluster)) robust <- TRUE
if(robust) {
if(! requireNamespace('rms', quietly=TRUE))
stop('the rms package must be installed to use robust or cluster')
if(missing(fitargs)) fitargs <- list(x=TRUE, y=TRUE)
else { fitargs$x <- fitargs$y <- TRUE }
}
if(method != 'ordinary') Data <- vector('list', n.impute)
for(i in 1 : n.impute) {
if(! missing(fun) && pr) cat('Imputation', i, '\r')
if(used.mice) {
completed.data <- mice::complete(xtrans, i)
for(impvar in names(completed.data))
if(length(attr(completed.data[[impvar]], 'contrasts')))
attr(completed.data[[impvar]], 'contrasts') <- NULL
}
else {
completed.data <- data
imputed.data <-
impute.transcan(xtrans, imputation=i, data=data,
list.out=TRUE, pr=FALSE, check=FALSE)
## impute.transcan works for aregImpute
completed.data[names(imputed.data)] <- imputed.data
}
if(!missing(dtrans)) completed.data <- dtrans(completed.data)
if(! missing(derived)) {
stop('derived variables in fit.mult.imputed not yet implemented')
eval(derived, completed.data)
}
if(method != 'ordinary') Data[[i]] <- completed.data
if(method != 'only stack') { # individual fits needed
if(using.Design) options(Design.attr=da)
afit <- list(formula=formula,
data=if(missing(subset)) completed.data
else completed.data[subset,])
if(! missing(fitargs)) afit <- c(afit, fitargs)
f <- do.call(fitter, afit)
stats <- f$stats
if(robust) f <- if(missing(cluster))
rms::robcov(f, method=robmethod)
else rms::robcov(f, cluster, method=robmethod)
## For some reason passing subset= causes model.frame bomb in R
if(fit.reps) fits[[i]] <- f
if(! missing(fun) && method != 'only stack')
funresults[[i]] <- fun(f)
cof <- f$coef
v <- do.call('vcov', c(list(object=f, intercepts='all'), vcovOpts))
if(i == 1) {
if(inherits(f, 'orm') && length(f$na.action) &&
length(f$na.action$nmiss) && f$na.action$nmiss[1] > 0)
warning('When using fit.mult.impute with orm, there should not be any missing\nY values because different imputations will result in differing numbers\nof intercepts')
assign <- f$assign
ns <- num.intercepts(f)
ik <- coef.intercepts <- NULL
if(ns > 0) {
ik <- attr(v, 'intercepts') # intercepts kept in f$var
if(length(ik)) {
if(ik == 'all') ik <- 1 : ns else if(ik == 'none') ik <- 0
lenik <- length(ik); if(length(ik) == 1 && ik == 0) lenik <- 0
## Shift parameter indexes to left b/c of omitted intercepts for orm
if(lenik != ns) {
for(j in 1 : length(assign))
assign[[j]] <- assign[[j]] - (ns - lenik)
coef.intercepts <- ik
}
}
}
}
if(length(ik)) cof <- c(cof[ik], cof[-(1 : ns)])
## From Rainer Dyckerhoff to work correctly with models that have
## a scale parameter (e.g. psm). Check whether length of the
## coefficient vector is different from the the number of rows of
## the covariance matrix. If so, the model contains scale
## parameters that are not fixed at some value and we have to
## append the scale parameters to the coefficient vector.
nvar0 <- length(cof)
nvar <- nrow(v)
if(nvar > nvar0) {
cof <- c(cof, log(f$scale))
names(cof) <- c(names(f$coef),
if((nvar - nvar0) == 1) "Log(scale)"
else names(f$scale))
}
if(i==1) {
vavg <- 0*v
p <- length(cof)
bar <- rep(0, p)
vname <- names(cof)
cov <- matrix(0, nrow=p, ncol=p, dimnames=list(vname,vname))
astats <- NULL
fitcomp <- names(f)[names(f) %in% stats.ok2average]
if(length(fitcomp)) for(ncomp in fitcomp)
astats[[ncomp]] <- f[[ncomp]]
if(inherits(f,'Design') | inherits(f, 'rms')) {
using.Design <- TRUE
da <- f$Design
}
}
vavg <- vavg + v
bar <- bar + cof
cof <- as.matrix(cof)
cov <- cov + cof %*% t(cof)
if(i > 1 && length(fitcomp))
for(ncomp in fitcomp)
astats[[ncomp]] <- astats[[ncomp]] + f[[ncomp]]
} # end ordinary or stacked including individual fits
} # end main loop i over n.impute
if(method == 'only stack')
warning("standard errors for frequently missing variables will",
"be underestimated with method='only stack'")
else {
vavg <- vavg / n.impute ## matrix \bar{U} in Rubin's notation
bar <- bar / n.impute
bar <- as.matrix(bar)
## Matrix B in Rubin's notation:
cov <- (cov - n.impute * bar %*% t(bar)) / (n.impute - 1)
U <- diag(vavg)
B <- diag(cov) ## save the diagonals of U and B
cov <- vavg + (n.impute + 1) / n.impute * cov ## final covariance matrix
r <- diag(cov) / diag(vavg)
names(r) <- vname
tau <- (1 + 1/n.impute) * B / U
missingInfo <- tau / (1 + tau)
dfmi <- (n.impute - 1) * ((1 + 1/tau)^2)
## Same as dfmi <- (n.impute - 1) * (1 + U / (B * (1 + 1 / n.impute))) ^ 2
if(length(fitcomp))
for(ncomp in fitcomp)
f[[ncomp]] <- astats[[ncomp]] / n.impute
if(pr) {
cat('\nWald Statistic Information\n\nVariance Inflation Factors Due to Imputation:\n\n')
print(round(r,2))
cat('\nFraction of Missing Information:\n\n')
print(round(missingInfo,2))
cat('\nd.f. for t-distribution for Tests of Single Coefficients:\n\n')
print(round(dfmi,2))
if(length(fitcomp)) {
cat('\nThe following fit components were averaged over the',
n.impute, 'model fits:\n\n')
cat(' ', fitcomp, '\n\n')
}
}
f$coefficients <- drop(bar)
if(length(coef.intercepts))
attr(f$coefficients, 'intercepts') <- coef.intercepts
attr(cov, 'intercepts') <- ik
f$var <- cov
f$variance.inflation.impute <- r
f$missingInfo <- missingInfo
f$dfmi <- dfmi
f$fits <- fits
f$formula <- formula
f$assign <- assign
f$call <- call
if(! missing(fun)) f$funresults <- funresults
if(using.Design) options(Design.attr=NULL)
class(f) <- c('fit.mult.impute', class(f))
f$fmimethod <- 'ordinary'
f$n.impute <- n.impute
if(method == 'ordinary') return(f)
} # end ordinary
## Final result based on single fit of stacked data
## Stack individual data frames into one tall one
f <- do.call(fitter, c(list(formula=formula, data=do.call(rbind, Data)),
if(! missing(fitargs)) fitargs))
f$call <- call
## Fix variances/covariances to not reward > 1 row per observation
M <- n.impute
if(method != 'only stack') f$var <- cov
else if(length(f$var)) f$var <- f$var * M
## For statistics that are sample-size related such as frequency counts
## and chi-square divide them by the number of imputations
st <- f$stats
ns <- length(st)
sn <- intersect(names(st),
c('Model L.R.', 'Obs', 'Events', 'error d.f.',
'Sum of Weights', 'n'))
if(length(sn)) for(s in sn) st[s] <- st[s] / M
lr <- st['Model L.R.']
if('Model L.R.' %in% sn)
st['P'] <- 1. - pchisq(lr, st['d.f.'])
if('freq' %in% names(f)) f$freq <- f$freq / M
if('clusterInfo' %in% names(f)) f$clusterInfo$n <- f$clusterInfo$n / M
## For ols fits make the residual d.f. "real"
if(inherits(f, 'ols')) f$df.residual <- unname(st['n'] - st['d.f.'] - 1)
f$fmimethod <- method
f$n.impute <- n.impute
if(! missing(fun)) {
if(funstack) {
if(method == 'only stack') funresults <- fun(f)
else funresults[[n.impute + 1]] <- fun(f)
}
f$funresults <- funresults
}
f$stats <- st
f$x <- f$y <- NULL
class(f) <- c('fit.mult.impute', class(f))
f
}
## orm fit$var has only middle intercept
## fit.mult.impute from orm has all intercepts
vcov.fit.mult.impute <-
function(object, regcoef.only=TRUE, intercepts='mid', ...) {
if(inherits(object, 'orm'))
return(NextMethod('vcov', object, regcoef.only=regcoef.only,
intercepts=intercepts, ...))
ns <- num.intercepts(object)
v <- object$var
if(ns == 0) return(v)
vari <- attr(v, 'intercepts')
lvari <- length(vari)
if(is.character(intercepts)) {
switch(intercepts,
mid = {
if(lvari > 0L && lvari != 1L)
stop('requested middle intercept but more than one intercept stored in object$var')
return(v) },
all = {
if(lvari > 0 && lvari < ns)
stop('requested all intercepts but not all stored in object$var')
return(v) },
none = if(inherits(object, 'orm') && lvari == 1)
return(v[-1, -1, drop=FALSE]) else
return(v[-(1 : ns),-(1 : ns), drop=FALSE]))
}
## intercepts is integer scalar or vector
if(lvari && isTRUE(all.equal(sort(vari), sort(intercepts)))) return(v)
if(length(intercepts) == ns) return(v)
if(length(intercepts) > ns) stop('more intercepts requested than in model')
i <- c(intercepts, (ns + 1) : ncol(v))
v[i, i, drop=FALSE]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.