#' Estimating a Statistical Model
#'
#' The zelig function estimates a variety of statistical
#' models. Use \code{zelig} output with \code{setx} and \code{sim} to compute
#' quantities of interest, such as predicted probabilities, expected values, and
#' first differences, along with the associated measures of uncertainty
#' (standard errors and confidence intervals).
#'
#' This documentation describes the \code{zelig} Zelig 4 compatibility wrapper
#' function.
#'
#' @param formula a symbolic representation of the model to be
#' estimated, in the form \code{y \~\, x1 + x2}, where \code{y} is the
#' dependent variable and \code{x1} and \code{x2} are the explanatory
#' variables, and \code{y}, \code{x1}, and \code{x2} are contained in the
#' same dataset. (You may include more than two explanatory variables,
#' of course.) The \code{+} symbol means ``inclusion'' not
#' ``addition.'' You may also include interaction terms and main
#' effects in the form \code{x1*x2} without computing them in prior
#' steps; \code{I(x1*x2)} to include only the interaction term and
#' exclude the main effects; and quadratic terms in the form
#' \code{I(x1^2)}.
#' @param model the name of a statistical model to estimate.
#' For a list of other supported models and their documentation see:
#' \url{http://docs.zeligproject.org/articles/}.
#' @param data the name of a data frame containing the variables
#' referenced in the formula or a list of multiply imputed data frames
#' each having the same variable names and row numbers (created by
#' \code{Amelia} or \code{\link{to_zelig_mi}}).
#' @param ... additional arguments passed to \code{zelig},
#' relevant for the model to be estimated.
#' @param by a factor variable contained in \code{data}. If supplied,
#' \code{zelig} will subset
#' the data frame based on the levels in the \code{by} variable, and
#' estimate a model for each subset. This can save a considerable amount of
#' effort. For example, to run the same model on all fifty states, you could
#' use: \code{z.out <- zelig(y ~ x1 + x2, data = mydata, model = 'ls',
#' by = 'state')} You may also use \code{by} to run models using MatchIt
#' subclasses.
#' @param cite If is set to 'TRUE' (default), the model citation will be printed
#' to the console.
#'
#' @details
#' Additional parameters avaialable to many models include:
#' \itemize{
#' \item weights: vector of weight values or a name of a variable in the dataset
#' by which to weight the model. For more information see:
#' \url{http://docs.zeligproject.org/articles/weights.html}.
#' \item bootstrap: logical or numeric. If \code{FALSE} don't use bootstraps to
#' robustly estimate uncertainty around model parameters due to sampling error.
#' If an integer is supplied, the number of boostraps to run.
#' For more information see:
#' \url{http://docs.zeligproject.org/articles/bootstraps.html}.
#' }
#'
#' @return Depending on the class of model selected, \code{zelig} will return
#' an object with elements including \code{coefficients}, \code{residuals},
#' and \code{formula} which may be summarized using
#' \code{summary(z.out)} or individually extracted using, for example,
#' \code{coef(z.out)}. See
#' \url{http://docs.zeligproject.org/articles/getters.html} for a list of
#' functions to extract model components. You can also extract whole fitted
#' model objects using \code{\link{from_zelig_model}}.
#'
#' @seealso \url{http://docs.zeligproject.org/articles/}
#' @name zelig
#' @author Matt Owen, Kosuke Imai, Olivia Lau, and Gary King
#' @export
zelig <- function(formula,
model,
data,
...,
by = NULL,
cite = TRUE) {
# .Deprecated('\nz$new() \nz$zelig(...)') Check if required model argument is
# specified
if (missing(model))
stop("Estimation model type not specified.\nSelect estimation model type with the model argument.",
call. = FALSE)
# Zelig Core
zeligmodels <- system.file(file.path("JSON", "zelig5models.json"),
package = "Zelig")
models <- jsonlite::fromJSON(txt = readLines(zeligmodels))$zelig5models
# Zelig Choice
zeligchoicemodels <- system.file(file.path("JSON", "zelig5choicemodels.json"),
package = "ZeligChoice")
if (zeligchoicemodels != "")
models <- c(models, jsonlite::fromJSON(txt = readLines(zeligchoicemodels))$zelig5choicemodels)
# Zelig Panel
zeligpanelmodels <- system.file(file.path("JSON", "zelig5panelmodels.json"),
package = "ZeligPanel")
if (zeligpanelmodels != "")
models <- c(models, jsonlite::fromJSON(txt = readLines(zeligpanelmodels))$zelig5panelmodels)
# Zelig GAM
zeligammodels <- system.file(file.path("JSON", "zelig5gammodels.json"),
package = "ZeligGAM")
if (zeligammodels != "")
models <- c(models, jsonlite::fromJSON(txt = readLines(zeligammodels))$zelig5gammodels)
# Zelig Multilevel
zeligmixedmodels <- system.file(file.path("JSON", "zelig5mixedmodels.json"),
package = "ZeligMultilevel")
if (zeligmixedmodels != "")
models <- c(models, jsonlite::fromJSON(txt = readLines(zeligmixedmodels))$zelig5mixedmodels)
# Aggregating all available models
models4 <- list()
for (i in seq(models)) {
models4[[models[[i]]$wrapper]] <- names(models)[i]
}
model.init <- sprintf("z%s$new()", models4[[model]])
if (length(model.init) == 0)
stop(sprintf("%s is not a supported model type.", model), call. = FALSE)
z5 <- try(eval(parse(text = model.init)), silent = TRUE)
if ("try-error" %in% class(z5))
stop("Model '", model, "' not found")
## End: Zelig 5 models
mf <- match.call()
mf$model <- NULL
mf$cite <- NULL
mf[[1]] <- quote(z5$zelig)
mf <- try(eval(mf, environment()), silent = TRUE)
if ("try-error" %in% class(mf))
z5$zelig(formula = formula, data = data, ..., by = by)
if (cite)
z5$cite()
return(z5)
}
#' Setting Explanatory Variable Values
#'
#' The \code{setx} function uses the variables identified in
#' the \code{formula} generated by \code{zelig} and sets the values of
#' the explanatory variables to the selected values. Use \code{setx}
#' after \code{zelig} and before \code{sim} to simulate quantities of
#' interest.
#'
#' This documentation describes the \code{setx} Zelig 4 compatibility wrapper
#' function.
#'
#' @param obj output object from \code{\link{zelig}}
#' @param fn a list of functions to apply to the data frame
#' @param data a new data frame used to set the values of
#' explanatory variables. If \code{data = NULL} (the default), the
#' data frame called in \code{\link{zelig}} is used
#' @param cond a logical value indicating whether unconditional
#' (default) or conditional (choose \code{cond = TRUE}) prediction
#' should be performed. If you choose \code{cond = TRUE}, \code{setx}
#' will coerce \code{fn = NULL} and ignore the additional arguments in
#' \code{\dots}. If \code{cond = TRUE} and \code{data = NULL},
#' \code{setx} will prompt you for a data frame.
#' @param ... user-defined values of specific variables for overwriting the
#' default values set by the function \code{fn}. For example, adding
#' \code{var1 = mean(data\$var1)} or \code{x1 = 12} explicitly sets the value
#' of \code{x1} to 12. In addition, you may specify one explanatory variable
#' as a range of values, creating one observation for every unique value in
#' the range of values
#' @return The output is returned in a field to the Zelig object. For
#' unconditional prediction, \code{x.out} is a model matrix based
#' on the specified values for the explanatory variables. For multiple
#' analyses (i.e., when choosing the \code{by} option in \code{\link{zelig}},
#' \code{setx} returns the selected values calculated over the entire
#' data frame. If you wish to calculate values over just one subset of
#' the data frame, the 5th subset for example, you may use:
#' \code{x.out <- setx(z.out[[5]])}
#'
#' @examples
#' # Unconditional prediction:
#' data(turnout)
#' z.out <- zelig(vote ~ race + educate, model = 'logit', data = turnout)
#' x.out <- setx(z.out)
#' s.out <- sim(z.out, x = x.out)
#'
#' @author Matt Owen, Olivia Lau and Kosuke Imai
#' @seealso The full Zelig manual may be accessed online at
#' \url{http://docs.zeligproject.org/articles/}
#' @keywords file
#' @export
setx <- function(obj, fn = NULL, data = NULL, cond = FALSE, ...) {
# .Deprecated('\nz$new() \nz$zelig(...) \nz$setx() or z$setx1 or z$setrange')
if(!is_zelig(obj, fail = FALSE))
obj <- to_zelig(obj)
x5 <- obj$copy()
# This is the length of each argument in '...'s
s <- list(...)
if (length(s) > 0) {
hold <- rep(1, length(s))
for (i in 1:length(s)) {
hold[i] <- length(s[i][[1]])
}
} else {
hold <- 1
}
if (max(hold) > 1) {
x5$setrange(...)
} else {
x5$setx(...)
}
return(x5)
}
#' Setting Explanatory Variable Values for First Differences
#'
#' This documentation describes the \code{setx1} Zelig 4 compatibility wrapper
#' function. The wrapper is primarily useful for setting fitted values
#' for creating first differences in piped workflows.
#'
#' @param obj output object from \code{\link{zelig}}
#' @param fn a list of functions to apply to the data frame
#' @param data a new data frame used to set the values of
#' explanatory variables. If \code{data = NULL} (the default), the
#' data frame called in \code{\link{zelig}} is used
#' @param cond a logical value indicating whether unconditional
#' (default) or conditional (choose \code{cond = TRUE}) prediction
#' should be performed. If you choose \code{cond = TRUE}, \code{setx1}
#' will coerce \code{fn = NULL} and ignore the additional arguments in
#' \code{\dots}. If \code{cond = TRUE} and \code{data = NULL},
#' \code{setx1} will prompt you for a data frame.
#' @param ... user-defined values of specific variables for overwriting the
#' default values set by the function \code{fn}. For example, adding
#' \code{var1 = mean(data\$var1)} or \code{x1 = 12} explicitly sets the value
#' of \code{x1} to 12. In addition, you may specify one explanatory variable
#' as a range of values, creating one observation for every unique value in
#' the range of values
#' @return The output is returned in a field to the Zelig object. For
#' unconditional prediction, \code{x.out} is a model matrix based
#' on the specified values for the explanatory variables. For multiple
#' analyses (i.e., when choosing the \code{by} option in \code{\link{zelig}},
#' \code{setx1} returns the selected values calculated over the entire
#' data frame. If you wish to calculate values over just one subset of
#' the data frame, the 5th subset for example, you may use:
#' \code{x.out <- setx(z.out[[5]])}
#'
#' @examples
#' library(dplyr) # contains pipe operator %>%
#' data(turnout)
#'
#' # plot first differences
#' zelig(Fertility ~ Education, data = swiss, model = 'ls') %>%
#' setx(z4, Education = 10) %>%
#' setx1(z4, Education = 30) %>%
#' sim() %>%
#' plot()
#'
#' @author Christopher Gandrud, Matt Owen, Olivia Lau, Kosuke Imai
#' @seealso The full Zelig manual may be accessed online at
#' \url{http://docs.zeligproject.org/articles/}
#' @keywords file
#' @export
setx1 <- function(obj, fn = NULL, data = NULL, cond = FALSE, ...) {
is_zelig(obj)
x5 <- obj$copy()
# This is the length of each argument in '...'s
s <- list(...)
if (length(s) > 0) {
hold <- rep(1, length(s))
for (i in 1:length(s)) {
hold[i] <- length(s[i][[1]])
}
} else {
hold <- 1
}
if (max(hold) > 1) {
x5$setrange1(...)
} else {
x5$setx1(...)
}
return(x5)
}
#' Generic Method for Computing and Organizing Simulated Quantities of Interest
#'
#' Simulate quantities of interest from the estimated model
#' output from \code{zelig()} given specified values of explanatory
#' variables established in \code{setx()}. For classical \emph{maximum
#' likelihood} models, \code{sim()} uses asymptotic normal
#' approximation to the log-likelihood. For \emph{Bayesian models},
#' Zelig simulates quantities of interest from the posterior density,
#' whenever possible. For \emph{robust Bayesian models}, simulations
#' are drawn from the identified class of Bayesian posteriors.
#' Alternatively, you may generate quantities of interest using
#' bootstrapped parameters.
#'
#' This documentation describes the \code{sim} Zelig 4 compatibility wrapper
#' function.
#'
#' @param obj output object from \code{zelig}
#' @param x values of explanatory variables used for simulation,
#' generated by \code{setx}. Not if ommitted, then \code{sim} will look for
#' values in the reference class object
#' @param x1 optional values of explanatory variables (generated by a
#' second call of \code{setx})
#' particular computations of quantities of interest
#' @param y a parameter reserved for the computation of particular
#' quantities of interest (average treatment effects). Few
#' models currently support this parameter
#' @param num an integer specifying the number of simulations to compute
#' @param bootstrap currently unsupported
#' @param bootfn currently unsupported
#' @param cond.data currently unsupported
#' @param ... arguments reserved future versions of Zelig
#' @return The output stored in \code{s.out} varies by model. Use the
#' \code{names} function to view the output stored in \code{s.out}.
#' Common elements include:
#' \item{x}{the \code{\link{setx}} values for the explanatory variables,
#' used to calculate the quantities of interest (expected values,
#' predicted values, etc.). }
#' \item{x1}{the optional \code{\link{setx}} object used to simulate
#' first differences, and other model-specific quantities of
#' interest, such as risk-ratios.}
#' \item{call}{the options selected for \code{\link{sim}}, used to
#' replicate quantities of interest. }
#' \item{zelig.call}{the original function and options for
#' \code{\link{zelig}}, used to replicate analyses. }
#' \item{num}{the number of simulations requested. }
#' \item{par}{the parameters (coefficients, and additional
#' model-specific parameters). You may wish to use the same set of
#' simulated parameters to calculate quantities of interest rather
#' than simulating another set.}
#' \item{qi\$ev}{simulations of the expected values given the
#' model and \code{x}. }
#' \item{qi\$pr}{simulations of the predicted values given by the
#' fitted values. }
#' \item{qi\$fd}{simulations of the first differences (or risk
#' difference for binary models) for the given \code{x} and \code{x1}.
#' The difference is calculated by subtracting the expected values
#' given \code{x} from the expected values given \code{x1}. (If do not
#' specify \code{x1}, you will not get first differences or risk
#' ratios.) }
#' \item{qi\$rr}{simulations of the risk ratios for binary and
#' multinomial models. See specific models for details.}
#' \item{qi\$ate.ev}{simulations of the average expected
#' treatment effect for the treatment group, using conditional
#' prediction. Let \eqn{t_i} be a binary explanatory variable defining
#' the treatment (\eqn{t_i=1}) and control (\eqn{t_i=0}) groups. Then the
#' average expected treatment effect for the treatment group is
#' \deqn{ \frac{1}{n}\sum_{i=1}^n [ \, Y_i(t_i=1) -
#' E[Y_i(t_i=0)] \mid t_i=1 \,],}
#' where \eqn{Y_i(t_i=1)} is the value of the dependent variable for
#' observation \eqn{i} in the treatment group. Variation in the
#' simulations are due to uncertainty in simulating \eqn{E[Y_i(t_i=0)]},
#' the counterfactual expected value of \eqn{Y_i} for observations in the
#' treatment group, under the assumption that everything stays the
#' same except that the treatment indicator is switched to \eqn{t_i=0}. }
#' \item{qi\$ate.pr}{simulations of the average predicted
#' treatment effect for the treatment group, using conditional
#' prediction. Let \eqn{t_i} be a binary explanatory variable defining
#' the treatment (\eqn{t_i=1}) and control (\eqn{t_i=0}) groups. Then the
#' average predicted treatment effect for the treatment group is
#' \deqn{ \frac{1}{n}\sum_{i=1}^n [ \, Y_i(t_i=1) -
#' \widehat{Y_i(t_i=0)} \mid t_i=1 \,],}
#' where \eqn{Y_i(t_i=1)} is the value of the dependent variable for
#' observation \eqn{i} in the treatment group. Variation in the
#' simulations are due to uncertainty in simulating
#' \eqn{\widehat{Y_i(t_i=0)}}, the counterfactual predicted value of
#' \eqn{Y_i} for observations in the treatment group, under the
#' assumption that everything stays the same except that the
#' treatment indicator is switched to \eqn{t_i=0}.}
#'
#' @author Christopher Gandrud, Matt Owen, Olivia Lau and Kosuke Imai
#' @export
sim <- function(obj, x, x1, y = NULL, num = 1000, bootstrap = F,
bootfn = NULL, cond.data = NULL, ...) {
# .Deprecated('\nz$new() \n[...] \nz$sim(...)')
is_zelig(obj)
if (!missing(x)) s5 <- x$copy()
if (!missing(x1)) {
s15 <- x1$copy()
if (!is.null(s15$setx.out$x)) {
s5$setx.out$x1 <- s15$setx.out$x
s5$bsetx1 <- TRUE
}
if (!is.null(s15$setx.out$range)) {
s5$range1 <- s15$range
s5$setx.out$range1 <- s15$setx.out$range
s5$bsetrange1 <- TRUE
}
}
if (missing(x)) s5 <- obj$copy()
s5$sim(num = num)
return(s5)
}
#' Extract standard errors from a Zelig estimated model
#'
#' @param object an object of class Zelig
#' @author Christopher Gandrud
#' @export
get_se <- function(object) {
is_zelig(object)
out <- object$get_se()
return(out)
}
#' Extract p-values from a Zelig estimated model
#'
#' @param object an object of class Zelig
#' @author Christopher Gandrud
#' @export
get_pvalue <- function(object) {
is_zelig(object)
out <- object$get_pvalue()
return(out)
}
#' Extract quantities of interest from a Zelig simulation
#'
#' @param object an object of class Zelig
#' @param qi character string with the name of quantity of interest desired:
#' `"ev"` for expected values, `"pv"` for predicted values or
#' `"fd"` for first differences.
#' @param xvalue chracter string stating which of the set of values of `x`
#' should be used for getting the quantity of interest.
#' @param subset subset for multiply imputed data (only relevant if multiply
#' imputed data is supplied in the original call.)
#' @author Christopher Gandrud
#' @md
#' @export
get_qi <- function(object, qi = "ev", xvalue = "x", subset = NULL) {
is_zelig(object)
out <- object$get_qi(qi = qi, xvalue = xvalue, subset = subset)
return(out)
}
#' Compute simulated (sample) average treatment effects on the treated from
#' a Zelig model estimation
#'
#' @param object an object of class Zelig
#' @param treatment character string naming the variable that denotes the
#' treatment and non-treated groups.
#' @param treated value of `treatment` variable indicating treatment
#' @param num number of simulations to run. Default is 1000.
#' @examples
#' library(dplyr)
#' data(sanction)
#' z.att <- zelig(num ~ target + coop + mil, model = "poisson",
#' data = sanction) %>%
#' ATT(treatment = "mil") %>%
#' get_qi(qi = "ATT", xvalue = "TE")
#'
#' @author Christopher Gandrud
#' @md
#' @export
ATT <- function(object, treatment, treated = 1, num = NULL) {
is_zelig(object)
object$ATT(treatment = treatment, treated = treated,
quietly = TRUE, num = num)
return(object)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.