R/setx.R

#' Setting Explanatory Variable Values
#'
#' The \code{setx} command uses the variables identified in
#' the \code{formula} generated by \code{zelig} and sets the values of
#' the explanatory variables to the selected values.  Use \code{setx}
#' after \code{zelig} and before \code{sim} to simulate quantities of
#' interest.
#' @param obj the saved output from zelig
#' @param fn a list of functions to apply to the data frame
#' @param data a new data frame used to set the values of
#'   explanatory variables. If data = NULL (the default), the
#'   data frame called in zelig is used
#' @param cond   a logical value indicating whether unconditional
#'   (default) or conditional (choose \code{cond = TRUE}) prediction
#'   should be performed.  If you choose \code{cond = TRUE}, \code{setx}
#'   will coerce \code{fn = NULL} and ignore the additional arguments in 
#'   \code{\dots}.  If \code{cond = TRUE} and \code{data = NULL},
#'   \code{setx} will prompt you for a data frame.
#' @param ... user-defined values of specific variables for overwriting the
#'   default values set by the function \code{fn}.  For example, adding
#'   \code{var1 = mean(data\$var1)} or \code{x1 = 12} explicitly sets the value
#'   of \code{x1} to 12.  In addition, you may specify one explanatory variable
#'   as a range of values, creating one observation for every unique value in
#'   the range of values
#' @return For unconditional prediction, \code{x.out} is a model matrix based
#'   on the specified values for the explanatory variables.  For multiple
#'   analyses (i.e., when choosing the \code{by} option in \code{\link{zelig}},
#'   \code{setx} returns the selected values calculated over the entire
#'   data frame.  If you wish to calculate values over just one subset of
#'   the data frame, the 5th subset for example, you may use:  
#'   \code{x.out <- setx(z.out[[5]])}
#' @export
#' @examples
#'
#' # Unconditional prediction:
#' data(turnout)
#' z.out <- zelig(vote ~ race + educate, model = "logit", data = turnout)
#' x.out <- setx(z.out)
#' s.out <- sim(z.out, x = x.out)
#'
#' @author Matt Owen \email{mowen@@iq.harvard.edu}, Olivia Lau and Kosuke Imai 
#' @seealso The full Zelig manual may be accessed online at
#'   \url{http://gking.harvard.edu/zelig}
#' @keywords file
setx <- function(obj, fn=NULL, data=NULL, cond=FALSE, withdata=FALSE,...)
  UseMethod("setx")
#' Set explanatory variables
#'
#' Set explanatory variables
#' @usage \method{setx}{default}(obj, fn=NULL, data=NULL, cond=FALSE, ...)
#' @S3method setx default
#' @param obj a 'zelig' object
#' @param fn a list of key-value pairs specifying which function apply to
#'           columns of the keys data-types
#' @param data a data.frame
#' @param cond ignored
#' @param ... parameters specifying what to explicitly set each column as. This
#'            is used to produce counterfactuals
#' @return a 'setx' object
#' @author Matt Owen \email{mowen@@iq.harvard.edu}, Kosuke Imai, and Olivia Lau 
setx.default <- function(obj, fn=NULL, data=NULL, cond=FALSE, withdata=FALSE, ...) {

  # Warnings and errors
  if (!missing(cond))
    warning('"cond" is not currently supported by this version of Zelig')

  # Get formula used for the call to the model
  form <- formula(obj)

  # Parsed formula. This is an intermediate for used for processin design
  # matrices, etc.
  parsed.formula <- parseFormula(form, data)

  # If data.frame is not explicitly set, use the one from the Zelig call
  if (is.null(data))
    data <- obj$data

  # Get the dots as a set of expressions
  symbolic.dots <- match.call(expand.dots = FALSE)[["..."]]

  if(withdata){
  # Create a variable to hold the values of the dot parameters
    dots <- list()
  # Assign values to the dot parameters
    for (key in names(symbolic.dots)) {
      result <- with(data, eval(symbolic.dots[[key]]))   # Note this used to allow setx definition within a workspace generated b
      dots[[key]] <- result
    }
  }else{
  # Note, old with(,eval()) approach used to allow setx definition within a workspace generated by the data
  # so now need say: setx( z.out, x=quantile(data$x,0.2)) rather than setx( z.out, x=quantile(x,0.2))
  # Might be a way to allow both, seamlessly without additional argument.
  ## cchoirat: fix
    dots <- list(...)
  }
    
    
  # Extract information about terms
  # Note: the functions 'getPredictorTerms' and 'getOutcomeTerms' are in need
  # of a rewrite. At the moment, they are pretty kludgey (written by Matt O.).
  vars.obj <- getPredictorTerms(form)
  not.vars <- getResponseTerms(form)

  # Default the environment to the parent
  env.obj <- parent.frame()

  # explanatory variables
  explan.obj <- Filter(function (x) x %in% vars.obj, names(dots))

  # defaults for fn
  if (missing(fn) || !is.list(fn))
    # set fn to appropriate values, if NULL
    fn <- list(numeric = mean,
               ordered = Median,
               other   = Mode
               )

  # res
  res <- list()

  # compute values
  # if fn[[mode(data(, key))]] exists,
  # then use that function to compute result
  for (key in all.vars(form[[3]])) {
    # skip values that are explicitly set
    if (key %in% names(dots) || key %in% not.vars)
      next

    m <- class(data[,key])[[1]]

    # Match the class-type with the correct function to call
    if (m %in% names(fn))
      res[[key]] <- fn[[m]](data[ ,key])

    # If it is a numeric, then we just evaluate it like a numeric
    else if (is.numeric(data[,key]))
      res[[key]] <- fn$numeric(data[ ,key])

    # If it's ordered, then we take the median, because that's the best we got
    else if (is.ordered(data[,key]))
      res[[key]] <- fn$ordered(data[ ,key])

    # Otherwise we take the mode, because that always kinda makes sense.
    else
      res[[key]] <- fn$other(data[ ,key])
  }

  # Add explicitly set values
  for (key in names(symbolic.dots)) {
    if (! key %in% colnames(data)) {
      warning("`", key,
              "` is not an column in the data-set, and will be ignored")
      next
    }

    res[[key]] <- if (is.factor(data[,key])) {
      factor(dots[[key]], levels=levels(data[,key]))
    }
    else
      dots[[key]]
  }

  # Convert "res" into a list of lists. This makes atomic entries into lists.
  for (k in 1:length(res)) {
    if (!is.factor(res[[k]]))
      res[[k]] <- as.list(res[[k]])
  }

  # Combine all the sublists
  res <- do.call("mix", res)

  # A list containing paired design matrices and their corresponding data.frame's
  frames.and.designs <- list()

  # Iterate through all the results
  for (k in 1:length(res)) {
    #
    label <- paste(names(res[[k]]), "=", res[[k]], sep="", collapse=", ")

    # Get specified explanatory variables
    specified <- res[[k]]

    # Construct data-frame
    d <- constructDataFrame(data, specified)

    # Construct model/design matrix
    # NOTE: THIS NEEDS TO BE MORE ROBUST
    m <- constructDesignMatrix(d, parsed.formula)

    # Model matrix, as a data.frame
    dat <- tryCatch(as.data.frame(m), error = function (e) NA)

    # Specify information
    frames.and.designs[[label]] <- list(
      label = label,
      data.frame = d,
      model.matrix = m,
      as.data.frame = dat
      )
  }

  # Phonetically... setx's
  setexes <- list()

  for (key in names(frames.and.designs)) {
    mod <- frames.and.designs[[key]]$model.matrix
    d <- frames.and.designs[[key]]$data.frame
    dat <- frames.and.designs[[key]]$as.data.frame
    specified <- res[[k]]

    setexes[[key]] <- list(
      name   = obj$name,
      call   = match.call(),
      formula= form,
      matrix = mod,
      updated = d,
      data   = dat,
      values = specified,
      fn     = fn,
      cond   = cond,
      new.data = data,
      special.parameters = dots,
      symbolic.parameters = symbolic.dots,
      label = obj$label,
      explan = vars.obj,
      pred   = not.vars,
      package.name = obj$package.name
    )
    attr(setexes[[key]], "pooled") <- F
    class(setexes[[key]]) <- c(obj$name, "setx")
  }

  if (length(setexes) == 1) {
    attr(setexes, "pooled") <- FALSE
    setexes <- setexes[[1]]
    class(setexes) <- c(obj$name, "setx")
  }
  else {
    attr(setexes, "pooled") <- TRUE
    class(setexes) <- c(obj$name, "pooled.setx", "setx")
  }

  # Return
  setexes
}


#' Construct Data Frame
#' Construct and return a tiny (single-row) data-frame from a larger data-frame,
#' a list of specified values, and a formula
#' @param data a ``data.frame'' that will be used to create a small design matrix
#' @param specified a list with key-value pairs that will be used to explicitly
#' set several values
#' @return a ``data.frame'' containing a single row
constructDataFrame <- function (data, specified) {
  # Make a tiny data-frame with all the necessary columns
  d <- data[1,]

  # Give the computed values to those entries
  for (key in names(specified)) {
    val <- specified[[key]]

    if (is.factor(val) || !(is.numeric(val) || is.ordered(val)))
      val <- factor(val, levels=levels(data[,key]))

    d[, key] <- val
  }

 
  # Return tiny data-frame
  d
}

#' Construct Design Matrix from
#' Construct and return a design matrix based on a tiny data-frame (single-row).
#' @param data a ``data.frame'' (preferably single-rowed) that will be used to
#' create a small design matrix
#' @param formula a formula, whose predictor variables will be used to create a
#' design matrix
#' @return a design (model) matrix
constructDesignMatrix <- function (data, formula) {
  tryCatch(
           # Attempt to generate the design matrix of the formula
           model.matrix(formula, data), 

           # If there is a warning... probably do nothing
           # warning = function (w) w,

           # If there is an error, warn the user and specify the design
           # matrix as NA
           error = function (e) {
             NA
           }
           )
}
#' Set Explanatory Variables for Multiply Imputed Data-sets
#' This function simply calls setx.default once for every fitted model
#' within the 'zelig.MI' object
#' @usage \method{setx}{MI}(obj, ..., data=NULL)
#' @S3method setx MI
#' @param obj a 'zelig' object
#' @param ... user-defined values of specific variables for overwriting the
#'   default values set by the function \code{fn}
#' @param data a new data-frame
#' @return a 'setx.mi' object used for computing Quantities of Interest by the
#'   'sim' method
#' @author Matt Owen \email{mowen@@iq.harvard.edu}
#' @seealso \link{setx}
setx.MI <- function(obj, ..., data = NULL) {

  results.list <- list()

  for (key in names(obj)) {
    object <- obj[[key]]
    results.list[[key]] <- setx(object, ..., data = data)
  }

  class(results.list) <- c("setx.mi", "setx")
  results.list
}
IQSS/Zelig4 documentation built on May 9, 2019, 9:13 a.m.