Nothing
#' Confidence Intervals after Multiple Imputation: Combination of Likelihood Profiles
#'
#' This function implements the new combination of likelihood profiles (CLIP) method described in
#' Heinze, Ploner and Beyea (2013). This method is useful for computing confidence intervals for
#' parameters after multiple imputation of data sets, if the normality assumption on parameter estimates and consequently the validity of applying Rubin's rules (pooling of variances) is in doubt. It
#' consists of combining the profile likelihoods into a posterior. The function CLIP.confint searches
#' for those values of a regression coefficient, at which the cumulative distribution function of the
#' posterior is equal to the values specified in the argument ci.level (usually 0.025 and 0.975). The
#' search is performed using R's optimize function.
#'
#' For each confidence limit, this function performs a binary search to evaluate the combined posterior,
#' which is obtained by first transforming the imputed-data likelihood profiles into cumulative distribution functions (CDFs), and then averaging the CDFs to obtain the CDF of the posterior. Usually,
#' the binary search manages to find the confidence intervals very quickly. The number of iterations
#' (mean and maximum) will be supplied in the output object. Further details on the method can be
#' found in Heinze, Ploner and Beyea (2013).
#'
#' @param obj Either a list of logistf fits (on multiple imputed data sets), or the result of analysis of a \code{mice} (multiply imputed) object using \code{with.mids}
#' @param variable The variable of interest, for which confidence intervals should be computed. If missing, confidence intervals for all variables will be computed.
#' @param data A list of data set corresponding to the model fits. Can be left blank if obj was obtained with the \code{dataout=TRUE} option or if obj was obtained by mice
#' @param firth If \code{TRUE}, applies the Firth correction. Should correspond to the entry in obj.
#' @param weightvar An optional weighting variable for each observation.
#' @param control Control parameters for \code{logistf}, usually obtained by \code{logistf.control()}
#' @param ci.level The two confidence levels for each tail of the posterior distribution.
#' @param pvalue If \code{TRUE}, will also compute a P-value from the posterior.
#' @param offset An optional offset variable
#' @param bound.lo Bounds (vector of length 2) for the lower limit. Can be left blank. Use only if problems are encountered.
#' @param bound.up Bounds (vector of length 2) for the upper limit. Can be left blank. Use only if problems are encountered.
#' @param legacy If \code{TRUE}, will use pure R code for all model fitting. Can be slow. Not recommended.
#'
#' @return An object of class \code{CLIP.confint}, with items:
#' \item{variable}{The variable(s) which were analyzed}
#' \item{estimate}{The pooled estimate (average over imputations)}
#' \item{ci}{The confidence interval(s)}
#' \item{pvalue}{The p-value(s)}
#' \item{imputations}{The number of imputed datasets}
#' \item{ci.level}{The confidence level (input)}
#' \item{bound.lo}{The bounds used for finding the lower confidence limit; usually not of interest. May be useful for error-tracing.}
#' \item{bound.up}{The bounds used for finding the upper confidence limit}
#' \item{iter}{The number of iterations (for each variable and each tail)}
#' \item{call}{The call object}
#'
#' @export CLIP.confint
#'
#' @examples
#' #generate data set with NAs
#' freq=c(5,2,2,7,5,4)
#' y<-c(rep(1,freq[1]+freq[2]), rep(0,freq[3]+freq[4]), rep(1,freq[5]), rep(0,freq[6]))
#' x<-c(rep(1,freq[1]), rep(0,freq[2]), rep(1,freq[3]), rep(0,freq[4]),
#' rep(NA,freq[5]),rep(NA,freq[6]))
#' toy<-data.frame(x=x,y=y)
#'
#' # impute data set 5 times
#' set.seed(169)
#' toymi<-list(0)
#' for(i in 1:5){
#' toymi[[i]]<-toy
#' y1<-toymi[[i]]$y==1 & is.na(toymi[[i]]$x)
#' y0<-toymi[[i]]$y==0 & is.na(toymi[[i]]$x)
#' xnew1<-rbinom(sum(y1),1,freq[1]/(freq[1]+freq[2]))
#' xnew0<-rbinom(sum(y0),1,freq[3]/(freq[3]+freq[4]))
#' toymi[[i]]$x[y1==TRUE]<-xnew1
#' toymi[[i]]$x[y0==TRUE]<-xnew0
#' }
#'
#' # logistf analyses of each imputed data set
#' fit.list<-lapply(1:5, function(X) logistf(data=toymi[[X]], y~x, pl=TRUE))
#'
#' # CLIP confidence limits
#' CLIP.confint(obj=fit.list, data = toymi)
#'
#' @author Georg Heinze and Meinhard Ploner
#' @references Heinze G, Ploner M, Beyea J (2013). Confidence intervals after multiple imputation: combining
#' profile likelihood information from logistic regressions. Statistics in Medicine, to appear.
#'
#' @seealso [logistf()] for Firth's bias-Reduced penalized-likelihood logistic regression.
#' @encoding UTF-8
#'
#' @rdname CLIP.confint
CLIP.confint <- function(obj=NULL, variable=NULL, data, firth=TRUE, weightvar=NULL,
control=logistf.control(), ci.level=c(0.025,0.975), pvalue=TRUE,
offset=NULL, bound.lo=NULL, bound.up=NULL, legacy=FALSE) {
which <- NULL
if(length(ci.level)==1) ci.level<-c((1-ci.level)/2, 1-(1-ci.level)/2)
if (!is.null(obj)) {
if (is.mira(obj)) {
# assuming a mira object with fits on each imputed data set
# data.imp<-texteval(paste("",obj$call$data,"",sep=""))
data.imp<-eval(obj$call$data) #############GEHT NOCH NICHT!!
nimp<-data.imp$m
data<-lapply(1:nimp, function(x) complete(data.imp, action=x))
fits<-obj$analyses
formula<-as.formula(fits[[1]]$call$formula)
}
else {
# assuming as input a list of data sets (data) and a list of fits (obj)
fits<-obj
if(missing(data)) if(is.null(fits[[1]]$data)) stop("Please provide data either as list of imputed data sets\n")
else data<-lapply(1:length(fits), function(X) fits[[X]]$data)
formula<-as.formula(fits[[1]]$call$formula)
nimp<-length(data)
}
}
else {
nimp<-length(data)
stop(paste("Please provide a list of fit objects or a mira object with fits on the ",nimp," imputed data sets.\n"))
}
if(is.null(variable)) variable<-names(fits[[1]]$coefficients)
nvar<-length(variable)
inner.CLIP<-function(myvar){
variable<-myvar
old<-legacy
imputations<-nimp
if (is.null(which) & is.null(variable))
stop("You must specify which (a one-sided formula) or variable (the name of the variable).")
res<-matrix(0,length(grid),3)
nperimp<-unlist(lapply(1:imputations,function(x) nrow(data[[x]])))
variable.names<-colnames(model.matrix(formula, data = data[[1]]))
# mat.data<-matrix(lapply(1:imputations,function(x) matrix(unlist(data[[x]]),nperimp[[x]],ncol(data[[x]]))),sum(nperimp),ncol(data[[1]]))
imputation.indicator<-unlist(sapply(1:imputations, function(x) rep(x,nperimp[x]))[TRUE]) #[TRUE]makes vector out of matrix
#mat.data<-matrix(0,sum(nperimp),ncol(data[[1]])) ### copy list of data set into a matrix
#for(i in 1:imputations) mat.data[imputation.indicator==i,1:ncol(data[[i]])]<-as.matrix(data[[i]])
#easier as above (and without converting everything to strings):
mat.data <- do.call(rbind, data)
# if(missing(weightvar)) {
# weightvar<-"weights"
# mat.data[,ncol(mat.data)]<-rep(1,nrow(mat.data))
# }
big.data<-data.frame(mat.data, stringsAsFactors = F)
colnames(big.data)<-colnames(data[[1]])
k<-length(variable.names)
xyw<-matrix(0,sum(nperimp),k+2)
xyw[,1:k]<- model.matrix(formula, data = big.data)
xyw[,k+1]<- model.response(model.frame(as.formula(formula),data=big.data))
if (!is.null(which)) {
cov.name <- variable.names
cov.name2 <- colnames(model.matrix(which, data = big.data))
pos <- match(cov.name2, cov.name)
variable <- cov.name2
}
else {
cov.name <- variable.names
cov.name2 <- variable
pos <- match(cov.name2, cov.name)
}
# fit <- logistf.fit(x, y, weight = weight, offset = offset,
# firth = firth, control = control)
# std.pos <- diag(fit$var)[pos]^0.5
# coefs <- fit$beta
# covs <- fit$var
# n <- nrow(x)
# cov.name <- labels(x)[[2]]
if (is.null(weightvar)) {
# data<-lapply(1:imputations, function(z) {
# data[[z]]$weightvar<-rep(1,nrow(data[[z]]))
# data[[z]]
# })
# weightvar<-"weightvar"
xyw[,k+2]<-rep(1,nrow(xyw))
}
else xyw[,k+2]<-big.data[,weightvar]
posweight<-k+2
lf<-function(index) logistf.fit(y=xyw[index,k+1], x=xyw[index,1:k], weight=xyw[index,k+2])
if (is.null(fits)) fits<-lapply(1:imputations, function(z) lf(imputation.indicator==z))
if(is.null(bound.lo)){
lower.collect<- (unlist(lapply(1:imputations,function(x) fits[[x]]$ci.lower[pos])))
lowerbound.lo<-min(lower.collect) ###von dem den index nehmen und davon das PL CI ausrechnen
upperbound.lo<-max(lower.collect)
} else {
lowerbound.lo<-min(bound.lo)
upperbound.lo<-max(bound.lo)
}
if(is.null(bound.up)){
upper.collect<- (unlist(lapply(1:imputations,function(x) fits[[x]]$ci.upper[pos])))
lowerbound.up<-min(upper.collect) ###von dem den index nehmen und davon das PL CI ausrechnen
upperbound.up<-max(upper.collect)
} else {
lowerbound.up<-min(bound.up)
upperbound.up<-max(bound.up)
}
if (lowerbound.lo == upperbound.lo){
lowerbound.lo <- lowerbound.lo - 1/2 # very crude!
upperbound.lo <- upperbound.lo + 1/2
}
if (lowerbound.up == upperbound.up){
lowerbound.up <- lowerbound.up - 1/2 # very crude!
upperbound.up <- upperbound.up + 1/2
}
estimate<-mean(unlist(lapply(1:imputations,function(x) fits[[x]]$coefficients[pos])))
iter<-numeric(0)
loglik<-unlist(lapply(1:imputations, function(x) fits[[x]]$loglik['full']))
beta<-t(matrix(unlist(lapply(1:imputations,function(x) fits[[x]]$coefficients)),k,imputations))
lpdf<-function(zz,z) logistf.pdf(x=xyw[imputation.indicator==zz,1:k], y=xyw[imputation.indicator==zz,k+1],
weight=xyw[imputation.indicator==zz,k+2], beta=beta[zz,],loglik=loglik[zz],
pos=pos, firth=firth, offset=offset, control=control, b=z, old=old)$pdf
f=function(z) mean(unlist(lapply(1:imputations, function(zz) {lpdf(zz,z)}
)))
f.lower<-f(lowerbound.lo)-ci.level[1]
f.upper<-f(upperbound.lo)-ci.level[1]
iter[1]<-2
itwhile<-0
while(f.lower > 0 & (upperbound.lo - lowerbound.lo) > 0 & itwhile<5) {
itwhile<-itwhile+1
lowerbound.lo<-lowerbound.lo - (upperbound.lo - lowerbound.lo)/2
f.lower<-f(lowerbound.lo)-ci.level[1]
iter[1]<-iter[1]+1
}
if (itwhile>=5 & f.upper<0) stop("pool.pl can not find a lower boundary for the lower confidence limit.\n Try to increase number of imputations or supply boundaries by bound.lo=c(x,xx).\n")
itwhile<-0
while(f.upper < 0 & (upperbound.lo - lowerbound.lo) > 0 & itwhile<5) {
itwhile<-itwhile+1
upperbound.lo<-upperbound.lo + (upperbound.lo - lowerbound.lo)/2
f.upper<-f(upperbound.lo)-ci.level[1]
iter[1]<-iter[1]+1
}
if (itwhile>=5 & f.upper<0) stop("pool.pl can not find an upper boundary for the lower confidence limit.\n Try to increase number of imputations or supply boundaries by bound.lo=c(x,xx).\n")
ci<-numeric(0)
res.ci<-uniroot(f=function(z) {f(z)-ci.level[1]},
lower=lowerbound.lo, upper=upperbound.lo, f.lower=f.lower, f.upper=f.upper)
ci[1]<-res.ci$root
iter[1]<-res.ci$iter+iter[1]
f.lower<-f(lowerbound.up)-ci.level[2]
f.upper<-f(upperbound.up)-ci.level[2]
iter[2]<-2
itwhile<-0
while(f.lower > 0 & (upperbound.up - lowerbound.up) > 0 & itwhile<5) {
itwhile<-itwhile+1
lowerbound.up<-lowerbound.up - (upperbound.up - lowerbound.up)/2
f.lower<-f(lowerbound.up)-ci.level[2]
iter[2]<-iter[2]+1
}
if (itwhile>=5 & f.lower>0) stop("pool.pl can not find a lower boundary for the upper confidence limit.\n Try to increase number of imputations or supply boundaries by bound.up=c(x,xx).\n")
itwhile<-0
while(f.upper < 0 & (upperbound.up - lowerbound.up) > 0 & itwhile<5) {
itwhile<-itwhile+1
upperbound.up<-upperbound.up + (upperbound.up - lowerbound.up)/2
f.upper<-f(upperbound.up)-ci.level[2]
iter[2]<-iter[2]+1
}
if (itwhile>=5 & f.upper<0) stop("pool.pl can not find an upper boundary for the upper confidence limit.\n Try to increase number of imputations or supply boundaries by bound.up=c(x,xx).\n")
res.ci<-uniroot(f=function(z) {f(z)-ci.level[2]},
lower=lowerbound.up, upper=upperbound.up, f.lower=f.lower, f.upper=f.upper)
ci[2]<-res.ci$root
iter[2]<-res.ci$iter+iter[2]
if (pvalue=="TRUE") {
pvalue1<-f(0)
pvalue<-2*min(pvalue1, (1-pvalue1)) # two-sided = twice smaller tail
}
else pvalue<-NA
#uniroot(f=function(x) pdf.imp(b=x, formula = formula, data = data,
# which=which, firth = firth, weights=weights, control=control, plcontrol=plcontrol)-ci.level[2],
# lower=lowerbound.up, upper=upperbound.up)
#ci[2]<-uniroot(f=function(x) pdf.combi(b=x, formula = formula, data = data,
# which=which, firth = firth, weights=weights, control=control, plcontrol=plcontrol)-ci.level[2], lower=lowerbound.up, upper=upperbound.up,
# formula = formula, data = data,
# which=which, firth = TRUE, weights=weights, control=control, plcontrol=plcontrol)
res<-list(
estimate=estimate,ci=ci,pvalue=pvalue, imputations=imputations,
ci.level=ci.level, myvar=myvar, call=match.call(),
bound.lo=c(lowerbound.lo, upperbound.lo),
bound.up=c(lowerbound.up, upperbound.up), iter=iter)
return(res)
}
estimate<-numeric(nvar)
ci<-matrix(0,nvar,2)
pvalue.out<-numeric(nvar)
bound.lo<-matrix(0,nvar,2)
bound.up<-matrix(0,nvar,2)
iter<-matrix(0,nvar,2)
for(i in 1:nvar) {
res.tmp<-inner.CLIP(myvar=variable[i])
estimate[i]<-res.tmp$estimate
ci[i,]<-res.tmp$ci
pvalue.out[i]<-res.tmp$pvalue
bound.lo[i,]<-res.tmp$bound.lo
bound.up[i,]<-res.tmp$bound.up
iter[i,]<-res.tmp$iter
}
res<-list(
variable=variable, estimate=estimate, ci=ci, pvalue=pvalue.out,
imputations=res.tmp$imputations, ci.level=ci.level, bound.lo=bound.lo,
bound.up=bound.up, iter=iter, call=match.call())
attr(res,"class") <- "CLIP.confint"
return(res)
}
#' @method print CLIP.confint
print.CLIP.confint <- function(
x,
exp=FALSE,
...
) {
object <- x
mat <- cbind(object$estimate, object$ci[,1], object$ci[,2], object$pvalue)
if(exp)
mat[,1:3]<-exp(mat[,1:3])
colnames(mat)<-c("Estimate", "Lower", "Upper", "P-value")
if(exp)
colnames(mat)[1]<-"Odds ratio"
rownames(mat)<-object$variable
print(object$call)
cat("Number of imputations: ",object$imputations,"\n\n")
cat("Iterations, mean: ", mean(object$iter), " max:", max(object$iter),"\n\n")
cat("Confidence level, lower:", object$ci.level[1]*100,"%, upper:",object$ci.level[2]*100,"%\n")
print(mat)
}
is.mira <- function(
object
) {
if(is.null(attr(object,"class")))
return(FALSE)
if(attr(object,"class")=="mira")
return(TRUE)
return(FALSE)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.