Nothing
#' Cluster-Adjusted Confidence Intervals And p-Values For mlogit
#'
#' Computes p-values and confidence intervals for multinomial logit models based on cluster-specific model estimation (Ibragimov and Muller 2010). A separate model is estimated in each cluster, and then p-values and confidence intervals are computed based on a t/normal distribution of the cluster-specific estimates.
#'
#' @param mod A model estimated using \code{mlogit}.
#' @param dat The data set used to estimate \code{mod}.
#' @param cluster A formula of the clustering variable.
#' @param ci.level What confidence level should CIs reflect?
#' @param report Should a table of results be printed to the console?
#' @param truncate Should outlying cluster-specific beta estimates be excluded?
#' @param return.vcv Should a VCV matrix and the means of cluster-specific coefficient estimates be returned?
#'
#' @return A list with the elements
#' \item{p.values}{A matrix of the estimated p-values.}
#' \item{ci}{A matrix of confidence intervals.}
#' @author Justin Esarey
#' @note Confidence intervals are centered on the cluster averaged estimate, which can diverge from original model estimates under several circumstances (e.g., if clusters have different numbers of observations). Consequently, confidence intervals may not be centered on original model estimates. Any cluster for which all coefficients cannot be estimated will be automatically dropped from the analysis. If truncate = TRUE, any cluster for which any coefficient is more than 6 times the interquartile range from the cross-cluster mean will also be dropped as an outlier.
#' @examples
#' \dontrun{
#'
#' # example: predict type of heating system installed in house
#' require(mlogit)
#' data("Heating", package = "mlogit")
#' H <- Heating
#' H$region <- as.numeric(H$region)
#' H.ml <- dfidx(H, shape="wide", choice="depvar", varying=c(3:12),
#' idx = list(c("idcase", "region")))
#' m <- mlogit(depvar~ic+oc, H.ml)
#'
#' # compute cluster-adjusted p-values
#' cluster.im.h <- cluster.im.mlogit(m, H.ml, ~ region)
#'
#' }
#' @rdname cluster.im.mlogit
#' @importFrom mlogit mlogit hmftest mFormula is.mFormula mlogit.optim cov.mlogit cor.mlogit rpar scoretest med rg stdev qrpar prpar drpar
#' @references Esarey, Justin, and Andrew Menger. 2017. "Practical and Effective Approaches to Dealing with Clustered Data." \emph{Political Science Research and Methods} forthcoming: 1-35. <URL:http://jee3.web.rice.edu/cluster-paper.pdf>.
#' @references Ibragimov, Rustam, and Ulrich K. Muller. 2010. "t-Statistic Based Correlation and Heterogeneity Robust Inference." \emph{Journal of Business & Economic Statistics} 28(4): 453-468. <DOI:10.1198/jbes.2009.08046>.
#' @import stats
#' @importFrom dfidx dfidx idx
#' @importFrom utils write.table
#' @export
cluster.im.mlogit<-function(mod, dat, cluster, ci.level = 0.95, report = TRUE, truncate = FALSE, return.vcv = FALSE){
# compensate for bizarre R formula updating bug
# thanks to Jason Thorpe for reporting!
form.old <- update(mod$formula, 1 ~ 1 )
while(form.old != mod$formula){
form.old <- mod$formula
invisible(mod <- update(mod, new = .~.))
}
form <- mod$formula # what is the formula of this model?
variables <- all.vars(form) # what variables are in this model?
used.idx <- which(rownames(dat) %in% rownames(mod$mod)) # what observations are used?
dat <- dat[used.idx,] # keep only active observations
ind.variables <- names(coefficients(mod)) # what independent variables are in this model?
"%w/o%" <- function(x, y) x[!x %in% y] # create a without function (see ?match)
dv <- variables %w/o% all.vars(update(form, 1 ~ .)) # what is the dependent variable?
# obtain the clustering variable
clust.name <- all.vars(cluster) # name of the cluster variable
dat.rs <- as.data.frame(subset(idx(dat), select = clust.name )) # select cluster variable from data set
dat.rs$id.zz <- idx(dat, n=1) # choice index
dat.rs$ti.zz <- idx(dat, n=2) # alternative index
clust <- reshape(dat.rs, timevar="ti.zz", # reshape long to wide, store as clust
idvar=c("id.zz", clust.name), direction="wide")[[clust.name]]
if(sum(is.na(clust)>0)){stop("missing cluster indices")} # check for missing cluster indices
G <- length(unique(clust)) # how many clusters are in this model?
b.clust<-matrix(data=NA, nrow=G, ncol=length(coefficients(mod))) # store bootstrapped beta values
dropped.nc <- 0 # store number of non-converged clusters
for(i in 1:G){
clust.ind <- which(dat[[clust.name]] == unlist(unique(clust))[i]) # select obs in cluster i
clust.dat <- dat[clust.ind,] # create the cluster i data set
clust.mod.call <- mod$call # get original model call
clust.mod.call[[3]] <- quote(clust.dat) # modify call for cluster data set
clust.mod <- suppressWarnings(tryCatch(eval(clust.mod.call), # estimate model on cluster dataset
error = function(e){return(NULL)}))
fail <- is.null(clust.mod) # detect model error
if(fail==F){ # if model ran...
# detect whether variables were dropped in individual clusters
if(length(names(coefficients(clust.mod))) != length(ind.variables)){
stop("cluster-specific model(s) dropped variables; ensure that all variables vary within clusters", call.=FALSE)
}
b.clust[i,] <- coefficients(clust.mod) # store the cluster i beta coefficient
}
}
# purge non-converged clusters
b.clust <- na.omit(b.clust)
dropped.nc <- length(attr(b.clust, "na.action")) # record number of models dropped
G.t <- dim(b.clust)[1]
if(G.t == 0){stop("all clusters were dropped (see help file).")}
# remove clusters with outlying betas
dropped <- 0 # store number of outlying estimates
if(truncate==TRUE){ # if drop outlying betas...
IQR <- apply(FUN=quantile, MARGIN=2, X=b.clust, probs=c(0.25, 0.75)) # calculate inter-quartile range
b.clust.save <- b.clust # non-outlying beta identifier
for(i in 1:dim(b.clust)[2]){
b.clust.save[,i] <- ifelse( abs(b.clust[,i]) > # determine which estimates to save
(abs(mean(b.clust[,i])) + 6*abs(IQR[2,i] - IQR[1,i])), 0, 1)
}
save.clust <- apply(X=b.clust.save, MARGIN=1, FUN=min) # which rows had outlying betas
dropped <- dim(b.clust)[1] - sum(save.clust) # how many clusters dropped?
b.clust.adj <- cbind(b.clust, save.clust) # append the outlier ID to beta matrix
b.clust <- subset(b.clust, # drop the outlying betas
subset=(save.clust==1), select=1:dim(b.clust)[2])
}
G <- dim(b.clust)[1]
if(G == 0){stop("all clusters were dropped (see help file).")}
b.hat <- colMeans(b.clust) # calculate the avg beta across clusters
b.dev <- sweep(b.clust, MARGIN = 2, STATS = b.hat) # sweep out the avg betas
#s.hat <- sqrt( (1 / (G-1)) * colSums(b.dev^2) ) # manually calculate the SE of beta estimates across clusters (deprecated)
vcv.hat <- cov(b.dev) # calculate VCV matrix
s.hat <- sqrt(diag(vcv.hat)) # calculate standard error
t.hat <- sqrt(G) * (b.hat / s.hat) # calculate t-statistic
names(b.hat) <- ind.variables
# compute p-val based on # of clusters
p.out <- 2*pmin( pt(t.hat, df = G-1, lower.tail = TRUE), pt(t.hat, df = G-1, lower.tail = FALSE) )
# compute CIs
ci.lo <- b.hat - qt((1-ci.level)/2, df=(G-1), lower.tail=FALSE)*(s.hat/sqrt(G))
ci.hi <- b.hat + qt((1-ci.level)/2, df=(G-1), lower.tail=FALSE)*(s.hat/sqrt(G))
out <- matrix(p.out, ncol=1)
out.p <- cbind( rownames(coefficients(summary(mod))), round(out,3) )
out.ci <- cbind(ci.lo, ci.hi)
rownames(out.ci) <- names(coefficients(summary(mod)))
colnames(out.ci) <- c("CI lower", "CI higher")
print.ci <- cbind(rownames(coefficients(summary(mod))), ci.lo, ci.hi)
print.ci <- rbind(c("variable name", "CI lower", "CI higher"), print.ci)
printmat <- function(m){
write.table(format(m, justify="right"), row.names=F, col.names=F, quote=F, sep = " ")
}
if(report==T){
cat("\n", "Cluster-Adjusted p-values: ", "\n", "\n")
printmat(out.p)
cat("\n", "Confidence Intervals (centered on cluster-averaged results):", "\n", "\n")
printmat(print.ci)
if(dropped.nc > 0){cat("\n", "Note:", dropped.nc, "clusters were dropped due to missing coefficients (see help file).", "\n", "\n")}
if(dropped > 0){cat("\n", "Note:", dropped, "clusters were dropped as outliers (see help file).", "\n", "\n")}
}
out.list <- list()
out.list[["p.values"]] <- out
out.list[["ci"]] <- out.ci
if(return.vcv == TRUE){out.list[["vcv.hat"]] <- vcv.hat}
if(return.vcv == TRUE){out.list[["beta.bar"]] <- b.hat}
return(invisible(out.list))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.