Nothing
#' Rare Events Logistic Regression for Dichotomous Dependent Variables
#'@param formula a symbolic representation of the model to be
#' estimated, in the form \code{y ~ x1 + x2}, where \code{y} is the
#' dependent variable and \code{x1} and \code{x2} are the explanatory
#' variables, and \code{y}, \code{x1}, and \code{x2} are contained in the
#' same dataset. (You may include more than two explanatory variables,
#' of course.) The \code{+} symbol means ``inclusion'' not
#' ``addition.'' You may also include interaction terms and main
#' effects in the form \code{x1*x2} without computing them in prior
#' steps; \code{I(x1*x2)} to include only the interaction term and
#' exclude the main effects; and quadratic terms in the form
#' \code{I(x1^2)}.
#'@param model the name of a statistical model to estimate.
#' For a list of other supported models and their documentation see:
#' \url{http://docs.zeligproject.org/articles/}.
#'@param data the name of a data frame containing the variables
#' referenced in the formula or a list of multiply imputed data frames
#' each having the same variable names and row numbers (created by
#' \code{Amelia} or \code{\link{to_zelig_mi}}).
#'@param ... additional arguments passed to \code{zelig},
#' relevant for the model to be estimated.
#'@param by a factor variable contained in \code{data}. If supplied,
#' \code{zelig} will subset
#' the data frame based on the levels in the \code{by} variable, and
#' estimate a model for each subset. This can save a considerable amount of
#' effort. You may also use \code{by} to run models using MatchIt
#' subclasses.
#'@param cite If is set to 'TRUE' (default), the model citation will be printed
#' to the console.
#'
#' @details
#' The relogit procedure supports four optional arguments in addition to the
#' standard arguments for zelig(). You may additionally use:
#' \itemize{
#' \item \code{tau}: a vector containing either one or two values for \code{tau},
#' the true population fraction of ones. Use, for example, tau = c(0.05, 0.1) to specify
#' that the lower bound on tau is 0.05 and the upper bound is 0.1. If left unspecified, only
#' finite-sample bias correction is performed, not case-control correction.
#' \item \code{case.control}: if tau is specified, choose a method to correct for case-control
#' sampling design: "prior" (default) or "weighting".
#' \item \code{bias.correct}: a logical value of \code{TRUE} (default) or \code{FALSE}
#' indicating whether the intercept should be corrected for finite sample (rare events) bias.
#' }
#'
#' Additional parameters avaialable to many models include:
#' \itemize{
#' \item \code{weights}: vector of weight values or a name of a variable in the dataset
#' by which to weight the model. For more information see:
#' \url{http://docs.zeligproject.org/articles/weights.html}.
#' \item \code{bootstrap}: logical or numeric. If \code{FALSE} don't use bootstraps to
#' robustly estimate uncertainty around model parameters due to sampling error.
#' If an integer is supplied, the number of boostraps to run.
#' For more information see:
#' \url{http://docs.zeligproject.org/articles/bootstraps.html}.
#' }
#' @return Depending on the class of model selected, \code{zelig} will return
#' an object with elements including \code{coefficients}, \code{residuals},
#' and \code{formula} which may be summarized using
#' \code{summary(z.out)} or individually extracted using, for example,
#' \code{coef(z.out)}. See
#' \url{http://docs.zeligproject.org/articles/getters.html} for a list of
#' functions to extract model components. You can also extract whole fitted
#' model objects using \code{\link{from_zelig_model}}.
#'
#' @examples
#' library(Zelig)
#' data(mid)
#' z.out1 <- zelig(conflict ~ major + contig + power + maxdem + mindem + years,
#' data = mid, model = "relogit", tau = 1042/303772)
#' summary(z.out1)
#'
#' @seealso Vignette: \url{http://docs.zeligproject.org/articles/zelig_relogit.html}
#' @import methods
#' @export Zelig-relogit
#' @exportClass Zelig-relogit
#'
#' @include model-zelig.R
#' @include model-glm.R
#' @include model-binchoice.R
#' @include model-logit.R
zrelogit <- setRefClass("Zelig-relogit",
contains = "Zelig",
fields = list(family = "character",
link = "character",
linkinv = "function"))
zrelogit$methods(
initialize = function() {
callSuper()
.self$name <- "relogit"
.self$description <- "Rare Events Logistic Regression for Dichotomous Dependent Variables"
.self$fn <- quote(relogit)
.self$family <- "binomial"
.self$link <- "logit"
.self$wrapper <- "relogit"
ref1 <- bibentry(
bibtype="Article",
title = "Logistic Regression in Rare Events Data",
author = c(
person("Gary", "King"),
person("Langche", "Zeng")
),
journal = "Political Analysis",
volume = 9,
number = 2,
year = 2001,
pages = "137--163")
ref2 <- bibentry(
bibtype="Article",
title = "Explaining Rare Events in International Relations",
author = c(
person("Gary", "King"),
person("Langche", "Zeng")
),
journal = "International Organization",
volume = 55,
number = 3,
year = 2001,
pages = "693--715")
.self$refs<-c(.self$refs,ref1,ref2)
}
)
zrelogit$methods(
show = function(odds_ratios = FALSE, ...) {
if (.self$robust.se) {
if (!.self$mi & !.self$bootstrap) {
# Replace standard errors with robust standard errors
cat("Model: \n")
f5 <- .self$copy()
obj <- f5$from_zelig_model()
summ <- summary(obj)
robust_model <- lmtest::coeftest(obj,
vcov = sandwich::vcovHC(obj, "HC1"))
summ$coefficients[, c(2:4)] <- robust_model[, c(2:4)]
if (odds_ratios) {
summ <- or_summary(summ, label_mod_se = "(OR, robust)")
}
else
colnames(summ$coefficients)[2] <-
paste(colnames(summ$coefficients)[2], "(robust)")
print(summ)
}
else if (.self$mi || .self$bootstrap)
stop("Weighted case control correction results are not currently available for multiply imputed or bootstrapped data.",
call. = FALSE)
}
else if (!.self$robust.se & odds_ratios & !.self$mi & !.self$bootstrap) {
cat("Model: \n")
f5 <- .self$copy()
obj <- f5$from_zelig_model()
summ <- summary(obj)
summ <- or_summary(summ)
print(summ)
}
else {
callSuper(...)
}
#print(base::summary(.self$zelig.out))
}
)
zrelogit$methods(
zelig = function(formula, ..., tau = NULL, bias.correct = NULL,
case.control = NULL, data, by = NULL, bootstrap = FALSE) {
if (!is.null(tau)) {
if (any(tau <= 0))
stop("tau is the population proportion of 1's for the response variable.\nIt must be > 0.",
call. = FALSE)
}
.self$zelig.call <- match.call(expand.dots = TRUE)
.self$model.call <- .self$zelig.call
# Catch NULL case.control
if (is.null(case.control))
case.control <- "prior"
if (case.control == "weighting") # See GitHub issue #295
.self$robust.se <- TRUE
else if (length(.self$robust.se) == 0)
.self$robust.se <- FALSE
# Catch NULL bias.correct
if (is.null(bias.correct))
bias.correct = TRUE
# Construct formula. Relogit models have the structure:
# cbind(y, 1-y) ~ x1 + x2 + x3 + ... + xN
# Where y is the response.
# form <- update(formula, cbind(., 1 - .) ~ .)
# .self$model.call$formula <- form
.self$model.call$case.control <- case.control
.self$model.call$bias.correct <- bias.correct
.self$model.call$tau <- tau
callSuper(formula = formula, data = data, ..., weights = NULL, by = by,
bootstrap = bootstrap)
}
)
zrelogit$methods(
modcall_formula_transformer = function() {
"Transform model call formula."
# Construct formula. Relogit models have the structure:
# cbind(y, 1-y) ~ x1 + x2 + x3 + ... + xN
# Where y is the response.
relogit_form <- update(.self$formula, cbind(., 1 - .) ~ .)
.self$model.call$formula <- relogit_form
}
)
zrelogit$methods(
qi = function(simparam, mm) {
.self$linkinv <- eval(call(.self$family, .self$link))$linkinv
coeff <- simparam
eta <- simparam %*% t(mm)
eta <- Filter(function (y) !is.na(y), eta)
theta <- matrix(.self$linkinv(eta), nrow = nrow(coeff))
ev <- matrix(.self$linkinv(eta), ncol = ncol(theta))
pv <- matrix(nrow = nrow(ev), ncol = ncol(ev))
for (j in 1:ncol(ev))
pv[, j] <- rbinom(length(ev[, j]), 1, prob = ev[, j])
levels(pv) <- c(0, 1)
return(list(ev = ev, pv = pv))
}
)
#' Estimation function for rare events logit models
#'
#' @details This is intended as an internal function. Regular users should
#' use \code{zelig} with \code{model = "relogit"}.
#'
#' @keywords internal
relogit <- function(formula,
data = sys.parent(),
tau = NULL,
bias.correct = TRUE,
case.control = "prior",
...){
mf <- match.call()
mf$tau <- mf$bias.correct <- mf$case.control <- NULL
if (!is.null(tau)) {
tau <- unique(tau)
if (length(case.control) > 1)
stop("You can only choose one option for case control correction.")
ck1 <- grep("p", case.control)
ck2 <- grep("w", case.control)
if (length(ck1) == 0 & length(ck2) == 0)
stop("choose either case.control = \"prior\" ",
"or case.control = \"weighting\"")
if (length(ck2) == 0)
weighting <- FALSE
else
weighting <- TRUE
}
else
weighting <- FALSE
if (length(tau) >= 2) {
stop("tau must be a vector of length less than or equal to 1. For multiple taus, estimate models individually.")
# else if (length(tau) == 2) {
# The following is not currently supported due to issue with summary
# mf[[1]] <- relogit
# res <- list()
# mf$tau <- min(tau)
# res$lower.estimate <- eval(as.call(mf), parent.frame())
# mf$tau <- max(tau)
# res$upper.estimate <- eval(as.call(mf), parent.frame())
# res$formula <- formula
# class(res) <- c("Relogit2", "Relogit")
# return(res)
}
else {
mf[[1]] <- glm
mf$family <- binomial(link = "logit")
y2 <- model.response(model.frame(mf$formula, data))
if (is.matrix(y2))
y <- y2[,1]
else
y <- y2
ybar <- mean(y)
if (weighting) {
w1 <- tau / ybar
w0 <- (1-tau) / (1-ybar)
wi <- w1 * y + w0 * (1 - y)
mf$weights <- wi
}
res <- eval(as.call(mf), parent.frame())
res$call <- match.call(expand.dots = TRUE)
res$tau <- tau
X <- model.matrix(res)
## bias correction
if (bias.correct){
pihat <- fitted(res)
if (is.null(tau)) # w_i = 1
wi <- rep(1, length(y))
else if (weighting)
res$weighting <- TRUE
else {
w1 <- tau/ybar
w0 <- (1 - tau) / (1 - ybar)
wi <- w1 * y + w0 * (1 - y)
res$weighting <- FALSE
}
W <- pihat * (1 - pihat) * wi
##Qdiag <- diag(X%*%solve(t(X)%*%diag(W)%*%X)%*%t(X))
Qdiag <- lm.influence(lm(y ~ X - 1, weights = W), do.coef = FALSE)$hat / W
if (is.null(tau)) # w_1=1 since tau=ybar
xi <- 0.5 * Qdiag * (2 * pihat - 1)
else
xi <- 0.5 * Qdiag * ((1 + w1) * pihat - w1) # returns ISQ (2001, eq. 11)
## xi <- 0.5 * Qdiag * ((1 + w0) * pihat - w0)
res$coefficients <- res$coefficients -
lm(xi ~ X - 1, weights = W)$coefficients
res$bias.correct <- TRUE
}
else
res$bias.correct <- FALSE
## prior correction
if (!is.null(tau) & !weighting){
if (tau <= 0 || tau >= 1)
stop("\ntau needs to be between 0 and 1.\n")
res$coefficients["(Intercept)"] <- res$coefficients["(Intercept)"] -
log(((1 - tau) / tau) * (ybar / (1 - ybar)))
res$prior.correct <- TRUE
res$weighting <- FALSE
}
else
res$prior.correct <- FALSE
if (is.null(res$weighting))
res$weighting <- FALSE
res$linear.predictors <- t(res$coefficients) %*% t(X)
res$fitted.values <- 1 / (1 + exp(-res$linear.predictors))
res$zelig <- "Relogit"
class(res) <- c("Relogit", "glm", "lm")
return(res)
}
}
zrelogit$methods(mcfun = function(x, b0 = 0, b1 = 1, alpha, mc.seed=123, keepall=FALSE, ..., sim = TRUE) {
set.seed(mc.seed)
mu <- 1/(1 + exp(-b0 - b1 * x))
y <- rbinom(n = length(x), size = 1, prob = mu)
if(keepall){
flag <- rep(TRUE, length(x))
}else{
select <- runif(length(x)) <alpha
flag <- ((y==0) & (select)) | (y==1)
}
if (sim) {
return(data.frame(y.sim=y[flag], x.sim=x[flag]))
} else {
return(data.frame(y.hat=mu[flag], x.seq=x[flag]))
}
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.