utils::globalVariables("u")
#' Instrumental Variables: Extrapolation by Marginal Treatment Effects
#'
#' This function provides a general framework for using the marginal
#' treatment effect (MTE) to extrapolate. The model is the same binary
#' treatment instrumental variable (IV) model considered by Imbens and
#' Angrist (1994) (\doi{10.2307/2951620}) and Heckman and Vytlacil
#' (2005) (\doi{10.1111/j.1468-0262.2005.00594.x}). The framework on
#' which this function is based was developed by Mogstad, Santos and
#' Torgovitsky (2018) (\doi{10.3982/ECTA15463}). See also the recent
#' survey paper on extrapolation in IV models by Mogstad and
#' Torgovitsky (2018)
#' (\doi{10.1146/annurev-economics-101617-041813}). A detailed
#' description of the module and its features can be found in
#' \href{https://a-torgovitsky.github.io/shea-torgovitsky.pdf}{Shea
#' and Torgovitsky (2021)}.
#'
#' @import methods stats utils
#'
#' @param data \code{data.frame} or \code{data.table} used to estimate
#' the treatment effects.
#' @param target character, target parameter to be estimated. The
#' function allows for ATE (\code{'ate'}), ATT (\code{'att'}), ATU
#' (\code{'atu'}), LATE (\code{'late'}), and generalized LATE
#' (\code{'genlate'}).
#' @param late.from a named vector or a list declaring the baseline
#' values of Z used to define the LATE. The name associated with
#' each value should be the name of the corresponding variable.
#' @param late.to a named vector or a list declaring the comparison
#' set of values of Z used to define the LATE. The name associated
#' with each value should be the name of the corresponding
#' variable.
#' @param late.X a named vector or a list declaring the values to
#' condition on. The name associated with each value should be the
#' name of the corresponding variable.
#' @param genlate.lb lower bound value of unobservable \code{u} for
#' estimating the generalized LATE.
#' @param genlate.ub upper bound value of unobservable \code{u} for
#' estimating the generalized LATE.
#' @param target.weight0 user-defined weight function for the control
#' group defining the target parameter. A list of functions can be
#' submitted if the weighting function is in fact a spline. The
#' arguments of the function should be variable names in
#' \code{data}. If the weight is constant across all observations,
#' then the user can instead submit the value of the weight
#' instead of a function.
#' @param target.weight1 user-defined weight function for the treated
#' group defining the target parameter. See \code{target.weight0}
#' for details.
#' @param target.knots0 user-defined set of functions defining the
#' knots associated with spline weights for the control group. The
#' arguments of the function should consist only of variable names
#' in \code{data}. If the knots are constant across all
#' observations, then the user can instead submit the vector of
#' knots instead of a function.
#' @param target.knots1 user-defined set of functions defining the
#' knots associated with spline weights for the treated group. See
#' \code{target.knots0} for details.
#' @param m0 one-sided formula for the marginal treatment response
#' function for the control group. Splines may also be
#' incorporated using the expression \code{uSpline}, e.g.
#' \code{uSpline(degree = 2, knots = c(0.4, 0.8), intercept =
#' TRUE)}. The \code{intercept} argument may be omitted, and is
#' set to \code{TRUE} by default.
#' @param m1 one-sided formula for the marginal treatment response
#' function for the treated group. See \code{m0} for details.
#' @param uname variable name for the unobservable used in declaring
#' the MTRs. The name can be provided with or without quotation
#' marks.
#' @param m1.ub numeric value for upper bound on MTR for the treated
#' group. By default, this will be set to the largest value of the
#' observed outcome in the estimation sample.
#' @param m0.ub numeric value for upper bound on MTR for the control
#' group. By default, this will be set to the largest value of the
#' observed outcome in the estimation sample.
#' @param m1.lb numeric value for lower bound on MTR for the treated
#' group. By default, this will be set to the smallest value of
#' the observed outcome in the estimation sample.
#' @param m0.lb numeric value for lower bound on MTR for the control
#' group. By default, this will be set to the smallest value of
#' the observed outcome in the estimation sample.
#' @param mte.ub numeric value for upper bound on treatment effect
#' parameter of interest.
#' @param mte.lb numeric value for lower bound on treatment effect
#' parameter of interest.
#' @param m0.dec logical, set to \code{FALSE} by default. Set equal to
#' \code{TRUE} if the MTR for the control group should be weakly
#' monotone decreasing.
#' @param m0.inc logical, set to \code{FALSE} by default. Set equal to
#' \code{TRUE} if the MTR for the control group should be weakly
#' monotone increasing.
#' @param m1.dec logical, set to \code{FALSE} by default. Set equal to
#' \code{TRUE} if the MTR for the treated group should be weakly
#' monotone decreasing.
#' @param m1.inc logical, set to \code{FALSE} by default. Set equal to
#' \code{TRUE} if the MTR for the treated group should be weakly
#' monotone increasing.
#' @param mte.dec logical, set to \code{FALSE} by default. Set equal
#' to \code{TRUE} if the MTE should be weakly monotone decreasing.
#' @param mte.inc logical, set to \code{FALSE} by default. Set equal
#' to \code{TRUE} if the MTE should be weakly monotone increasing.
#' @param equal.coef one-sided formula to indicate which terms in
#' \code{m0} and \code{m1} should be constrained to have the same
#' coefficients. These terms therefore have no effect on the MTE.
#' @param ivlike formula or vector of formulas specifying the
#' regressions for the IV-like estimands. Which coefficients to
#' use to define the constraints determining the treatment effect
#' bounds (alternatively, the moments determining the treatment
#' effect point estimate) can be selected in the argument
#' \code{components}. If no argument is passed, then a linear
#' regression will be performed to estimate the MTR coefficients.
#' @param components a list of vectors of the terms in the regression
#' specifications to include in the set of IV-like estimands. No
#' terms should be in quotes. To select the intercept term,
#' include the name \code{intercept}. If the factorized
#' counterpart of a variable is included in the IV-like
#' specifications, e.g. \code{factor(x)} where \code{x = 1, 2, 3},
#' the user can select the coefficients for specific factors by
#' declaring the components \code{factor(x)-1, factor(x)-2,
#' factor(x)-3}. See \code{\link{l}} on how to input the
#' argument. If no components for a IV specification are given,
#' then all coefficients from that IV specification will be used
#' to define constraints in the partially identified case, or to
#' define moments in the point identified case.
#' @param subset a single subset condition or list of subset
#' conditions corresponding to each regression specified in
#' \code{ivlike}. The input must be logical. See \code{\link{l}}
#' on how to input the argument. If the user wishes to select
#' specific rows, construct a binary variable in the data set, and
#' set the condition to use only those observations for which the
#' binary variable is 1, e.g. the binary variable is \code{use},
#' and the subset condition is \code{use == 1}.
#' @param propensity formula or variable name corresponding to
#' propensity to take up treatment. If a formula is declared, then
#' the function estimates the propensity score according to the
#' formula and link specified in \code{link}. If a variable name
#' is declared, then the corresponding column in the data is taken
#' as the vector of propensity scores. A variable name can be
#' passed either as a string (e.g \code{propensity = 'p'}), a
#' variable (e.g. \code{propensity = p}), or a one-sided formula
#' (e.g. \code{propensity = ~p}).
#' @param link character, name of link function to estimate propensity
#' score. Can be chosen from \code{'linear'}, \code{'probit'}, or
#' \code{'logit'}. Default is set to \code{'logit'}. The link
#' should be provided with quoation marks.
#' @param treat variable name for treatment indicator. The name can be
#' provided with or without quotation marks.
#' @param outcome variable name for outcome variable. The name can be
#' provided with or without quotation marks.
#' @param solver character, name of the programming package in R used
#' to obtain the bounds on the treatment effect. The function
#' supports \code{'gurobi'}, \code{'cplexapi'}, \code{rmosek},
#' \code{'lpsolveapi'}. The name of the solver should be provided
#' with quotation marks.
#' @param solver.options list, each item of the list should correspond
#' to an option specific to the solver selected.
#' @param solver.presolve boolean, default set to \code{TRUE}. Set
#' this parameter to \code{FALSE} if presolve should be turned off
#' for the LP/QCQP problems.
#' @param solver.options.criterion list, each item of the list should
#' correspond to an option specific to the solver selected. These
#' options are specific for finding the minimum criterion.
#' @param solver.options.bounds list, each item of the list should
#' correspond to an option specific to the solver selected. These
#' options are specific for finding the bounds.
#' @param lpsolver character, deprecated argument for \code{lpsolver}.
#' @param lpsolver.options list, deprecated argument for
#' \code{solver.options}.
#' @param lpsolver.presolve boolean, deprecated argument for
#' \code{solver.presolve}.
#' @param lpsolver.options.criterion list, deprecated argument for
#' \code{solver.options.criterion}.
#' @param lpsolver.options.bounds list, deprecated argument for
#' \code{solver.options.bounds}.
#' @param criterion.tol tolerance for the criterion function, and is
#' set to 1e-4 by default. The criterion measures how well the
#' IV-like moments/conditional means are matched using the
#' l1-norm. Statistical noise may prohibit the theoretical LP/QCQP
#' problem from being feasible. That is, there may not exist a set
#' of MTR coefficients that are able to match all the specified
#' moments. The function thus first estimates the minimum
#' criterion, which is reported in the output under the name
#' 'minimum criterion', with a criterion of 0 meaning that all
#' moments were able to be matched. The function then relaxes the
#' constraints by tolerating a criterion up to \code{minimum
#' criterion * (1 + criterion.tol)}. Set \code{criterion.tol} to a
#' value greater than 0 to allow for more conservative bounds.
#' @param initgrid.nx integer determining the number of points of the
#' covariates used to form the initial constraint grid for
#' imposing shape restrictions on the MTRs.
#' @param initgrid.nu integer determining the number of points in the
#' open interval (0, 1) drawn from a Halton sequence. The end
#' points 0 and 1 are additionally included. These points are
#' always a subset of the points defining the audit grid (see
#' \code{audit.nu}). These points are used to form the initial
#' constraint grid for imposing shape restrictions on the \code{u}
#' components of the MTRs.
#' @param audit.nx integer determining the number of points on the
#' covariates space to audit in each iteration of the audit
#' procedure.
#' @param audit.nu integer determining the number of points in the
#' open interval (0, 1) drawn from a Halton sequence. The end
#' points 0 and 1 are additionally included. These points are used
#' to audit whether the shape restrictions on the \code{u}
#' components of the MTRs are satisfied. The initial grid used to
#' impose the shape constraints in the LP/QCQP problem are
#' constructed from a subset of these points.
#' @param audit.add maximum number of points to add to the initial
#' constraint grid for imposing each kind of shape constraint. For
#' example, if there are 5 different kinds of shape constraints,
#' there can be at most \code{audit.add * 5} additional points
#' added to the constraint grid.
#' @param audit.max maximum number of iterations in the audit
#' procedure.
#' @param audit.tol feasibility tolerance when performing the
#' audit. By default to set to be 1e-06, which is equal to the
#' default feasibility tolerances of Gurobi (\code{solver =
#' "gurobi"}), CPLEX (\code{solver = "cplexapi"}), and Rmosek
#' (\code{solver = "rmosek"}). This parameter should only be
#' changed if the feasibility tolerance of the solver is changed,
#' or if numerical issues result in discrepancies between the
#' solver's feasibility check and the audit.
#' @param rescale boolean, set to \code{TRUE} by default. This
#' rescalels the MTR components to improve stability in the
#' LP/QCQP optimization.
#' @param point boolean. Set to \code{TRUE} if it is believed that the
#' treatment effects are point identified. If set to \code{TRUE}
#' and IV-like formulas are passed, then a two-step GMM procedure
#' is implemented to estimate the treatment effects. Shape
#' constraints on the MTRs will be ignored under point
#' identification. If set to \code{TRUE} and the regression-based
#' criteria is used instead, then OLS will be used to estimate the
#' MTR coefficients used to estimate the treatment effect. If not
#' declared, then the function will determine whether or not the
#' target parameter is point identified.
#' @param point.eyeweight boolean, default set to \code{FALSE}. Set to
#' \code{TRUE} if the GMM point estimate should use the identity
#' weighting matrix (i.e. one-step GMM).
#' @param bootstraps integer, default set to 0. This determines the
#' number of bootstraps used to perform statistical inference.
#' @param bootstraps.m integer, default set to size of data
#' set. Determines the size of the subsample drawn from the
#' original data set when performing inference via the
#' bootstrap. This option applies only to the case of constructing
#' confidence intervals for treatment effect bounds, i.e. it does
#' not apply when \code{point = TRUE}.
#' @param bootstraps.replace boolean, default set to \code{TRUE}. This
#' determines whether the resampling procedure used for inference
#' will sample with replacement.
#' @param levels vector of real numbers between 0 and 1. Values
#' correspond to the level of the confidence intervals constructed
#' via bootstrap.
#' @param ci.type character, default set to \code{'both'}. Set to
#' \code{'forward'} to construct the forward confidence interval
#' for the treatment effect bound. Set to \code{'backward'} to
#' construct the backward confidence interval for the treatment
#' effect bound. Set to \code{'both'} to construct both types of
#' confidence intervals.
#' @param specification.test boolean, default set to
#' \code{TRUE}. Function performs a specification test for the
#' partially identified case when \code{bootstraps > 0}.
#' @param noisy boolean, default set to \code{TRUE}. If \code{TRUE},
#' then messages are provided throughout the estimation
#' procedure. Set to \code{FALSE} to suppress all messages,
#' e.g. when performing the bootstrap.
#' @param smallreturnlist boolean, default set to \code{FALSE}. Set to
#' \code{TRUE} to exclude large intermediary components
#' (i.e. propensity score model, LP/QCQP model, bootstrap
#' iterations) from being included in the return list.
#' @param debug boolean, indicates whether or not the function should
#' provide output when obtaining bounds. The option is only
#' applied when \code{solver = 'gurobi'} or \code{solver =
#' 'rmosek'}. The output provided is the same as what the Gurobi
#' API would send to the console.
#' @return Returns a list of results from throughout the estimation
#' procedure. This includes all IV-like estimands; the propensity
#' score model; bounds on the treatment effect; the estimated
#' expectations of each term in the MTRs; the components and
#' results of the LP/QCQP problem.
#'
#' @details When the function is used to estimate bounds, and
#' statistical inference is not performed, the function returns
#' the following objects.
#' \describe{
#' \item{audit.count}{the number of audits required until there were
#' no more violations; or the number of audits performed before the audit
#' procedure was terminated.}
#' \item{audit.criterion}{the minimum criterion.}
#' \item{audit.grid}{a list containing the points used to define the audit
#' grid, as well as a table of points where the shape constraints were
#' violated.}
#' \item{bounds}{a vector with the estimated lower and upper bounds of
#' the target treatment effect.}
#' \item{call.options}{a list containing all the model specifications and
#' call options generating the results.}
#' \item{gstar}{a list containing the estimate of the weighted means
#' for each component in the MTRs. The weights are determined by the
#' target parameter declared in \code{target}, or the weights defined
#' by \code{target.weight1}, \code{target.knots1},
#' \code{target.weight0}, \code{target.knots0}.}
#' \item{gstar.coef}{a list containing the coefficients on the treated
#' and control group MTRs.}
#' \item{gstar.weights}{a list containing the target weights used to
#' estimate \code{gstar}.}
#' \item{result}{a list containing the LP/QCQP model, and the full output
#' from solving the problem.}
#' \item{solver}{the solver used in estimation.}
#' \item{moments}{the number of elements in the S-set used to generate
#' achieve (partial) identification.}
#' \item{propensity}{the propensity score model. If a variable is fed
#' to the \code{propensity} argument when calling \code{ivmte}, then
#' the returned object is a list containing the name of variable given
#' by the user, and the values of that variable used in estimation.}
#' \item{s.set}{a list of all the coefficient estimates and weights
#' corresponding to each element in the S-set.}
#' \item{splines.dict}{a list including the specifications of each
#' spline declared in each MTR.}
#' \item{messages}{a vector of character strings logging the output of
#' the estimation procedure.}
#' }
#'
#' If \code{bootstraps} is greater than 0, then statistical inference
#' will be performed and the output will additionally contain the
#' following objects.
#' \describe{
#' \item{bootstraps}{the number of bootstraps.}
#' \item{bootstraps.failed}{the number of bootstraps that failed (e.g.
#' due to collinearity) and had to be repeated.}
#' \item{bounds.bootstraps}{the estimates of the bounds from every
#' bootstrap draw.}
#' \item{bounds.ci}{forward and/or backward confidence intervals for
#' the bound estimates at the levels specified in \code{levels}.}
#' \item{bounds.se}{bootstrap standard errors on the lower and upper
#' bound estimates.}
#' \item{p.value}{p-value for the estimated bounds. p-values are
#' constructed by finding the level at which the confidence interval
#' no longer contains 0.}
#' \item{propensity.ci}{confidence interval for coefficient estimates
#' of the propensity score model.}
#' \item{propensity.se}{standard errors for the coefficient estimates
#' of the propensity score model.}
#' \item{specification.p.value}{p-value from a specification test.
#' The specification test is only performed if the minimum criterion
#' is not 0.}
#' }
#'
#' If \code{point = TRUE} and \code{bootstraps = 0}, then point
#' estimation is performed using two-step GMM. The output will contain
#' the following objects.
#' \describe{
#' \item{j.test}{test statistic and results from the asymptotic J-test.}
#' \item{moments}{a vector. Each element is the GMM criterion for each
#' moment condition used in estimation.}
#' \item{mtr.coef}{coefficient estimates for the MTRs.}
#' \item{point.estimate}{point estimate of the treatment effect.}
#' \item{redundant}{indexes for the moment conditions (i.e. elements
#' in the S set) that were linearly independent and could be dropped.}
#' }
#'
#' If \code{point = TRUE} and \code{bootstraps} is not 0, then
#' point estimation is performed using two-step GMM, and additional
#' statistical inference is performed using the bootstrap samples.
#' The output will contain the following additional objects.
#' \describe{
#' \item{bootstraps}{the number of bootstraps.}
#' \item{bootstraps.failed}{the number of bootstraps that failed (e.g.
#' due to collinearity) and had to be repeated.}
#' \item{j.test}{test statistic and result from the J-test performed
#' using the bootstrap samples.}
#' \item{j.test.bootstraps}{J-test statistic from each bootstrap.}
#' \item{mtr.bootstraps}{coefficient estimates for the MTRs from
#' each bootstrap sample. These are used to construct the confidence
#' intervals and standard errors for the MTR coefficients.}
#' \item{mtr.ci}{confidence intervals for each MTR coefficient.}
#' \item{mtr.se}{standard errors for each MTR coefficient estimate.}
#' \item{p.value}{p-value for the treatment effect point estimate
#' estimated using the bootstrap.}
#' \item{point.estimate.bootstraps}{treatment effect point estimate
#' from each bootstrap sample. These are used to construct the
#' confidence interval, standard error, and p-value for the treatment
#' effect.}
#' \item{point.estimate.ci}{confidence interval for the treatment
#' effect.}
#' \item{point.estimate.se}{standard error for the treatment effect
#' estimate.}
#' \item{propensity.ci}{confidence interval for the coefficients in
#' the propensity score model, constructed using the bootstrap.}
#' \item{propensity.se}{standard errors for the coefficient estimates
#' of the propensity score model.}
#' }
#'
#' @examples
#' dtm <- ivmte:::gendistMosquito()
#'
#' ivlikespecs <- c(ey ~ d | z,
#' ey ~ d | factor(z),
#' ey ~ d,
#' ey ~ d | factor(z))
#' jvec <- l(d, d, d, d)
#' svec <- l(, , , z %in% c(2, 4))
#'
#' ivmte(ivlike = ivlikespecs,
#' data = dtm,
#' components = jvec,
#' propensity = d ~ z,
#' subset = svec,
#' m0 = ~ u + I(u ^ 2),
#' m1 = ~ u + I(u ^ 2),
#' uname = u,
#' target = "att",
#' m0.dec = TRUE,
#' m1.dec = TRUE,
#' bootstraps = 0,
#' solver = "lpSolveAPI")
#'
#' @export
ivmte <- function(data, target, late.from, late.to, late.X,
genlate.lb, genlate.ub, target.weight0 = NULL,
target.weight1 = NULL, target.knots0 = NULL,
target.knots1 = NULL, m0, m1, uname = u, m1.ub,
m0.ub, m1.lb, m0.lb, mte.ub, mte.lb, m0.dec, m0.inc,
m1.dec, m1.inc, mte.dec, mte.inc,
equal.coef, ivlike,
components, subset, propensity, link = 'logit',
treat, outcome,
solver, solver.options,
solver.presolve,
solver.options.criterion, solver.options.bounds,
lpsolver, lpsolver.options,
lpsolver.presolve,
lpsolver.options.criterion, lpsolver.options.bounds,
criterion.tol = 1e-4,
initgrid.nx = 20, initgrid.nu = 20, audit.nx = 2500,
audit.nu = 25, audit.add = 100, audit.max = 25,
audit.tol, rescale,
point, point.eyeweight = FALSE,
bootstraps = 0, bootstraps.m,
bootstraps.replace = TRUE,
levels = c(0.99, 0.95, 0.90), ci.type = 'backward',
specification.test = TRUE,
noisy = FALSE,
smallreturnlist = FALSE, debug = FALSE) {
## Try-catch is implemented to deal with sinking of
## output. Specifically, if the function ends prematurely, the
## sink can still be reset at the 'finally' segment.
tryCatch({
## Keep track of sinks
origSinks <- sink.number()
## Save log output
logNameCount <- Sys.getpid()
logName <- paste0(".ivmte.R.tmp.log", logNameCount)
logNameExists <- file.exists(logName)
while(logNameExists) {
logNameCount <- logNameCount + 1
logName <- gsub((logNameCount - 1), logNameCount, logName)
logNameExists <- file.exists(logName)
}
unlink(logName)
tmpOutput <- file(logName)
if (noisy) {
sink(tmpOutput, split = TRUE)
} else {
sink(tmpOutput)
}
call <- match.call(expand.dots = FALSE)
envList <- list(m0 = environment(m0),
m1 = environment(m1),
parent = parent.frame())
if (hasArg(equal.coef)) envList$equal.coef <- environment(equal.coef)
direct <- TRUE
if (hasArg(ivlike) && !is.null(ivlike)) {
direct <- FALSE
envList$ivlike <- environment(ivlike)
}
if (!direct) {
if (hasArg(rescale)) {
warning(gsub('\\s+', ' ',
"The 'rescale' option is currently ignored
unless a direct regression is performed."),
call. = FALSE)
}
rescale <- FALSE
} else {
if (!hasArg(rescale)) rescale <- TRUE
}
envProp <- try(environment(propensity), silent = TRUE)
if (class(envProp) != "environment") {
envList$propensity <- parent.frame()
} else {
envList$propensity <- envProp
}
for (i in 1:length(envList)) {
if (is.null(envList[[i]])) envList[[i]] <- parent.frame()
}
##---------------------------
## 1. Check linear programming dependencies
##---------------------------
## Check if deprecated arguments are used
deprecatedMsg <- NULL
if (hasArg(lpsolver) | hasArg(lpsolver.options) |
hasArg(lpsolver.presolve) |
hasArg(lpsolver.options.criterion) |
hasArg(lpsolver.options.bounds)) {
deprecatedMsg <-
c(deprecatedMsg,
gsub('\\s+', ' ',
"Arguments involving 'lpsolver' are deprecated,
use arguments with 'solver' instead, e.g.
'lpsolver.options' is deprecated, use
'solver.options' instead."))
}
if ((hasArg(lpsolver) && hasArg(solver)) |
(hasArg(lpsolver.options) && hasArg(solver.options)) |
(hasArg(lpsolver.presolve) && hasArg(solver.presolve)) |
(hasArg(lpsolver.options.criterion) &&
hasArg(solver.options.criterion)) |
(hasArg(lpsolver.options.bounds) &&
hasArg(solver.options.bounds))) {
deprecatedMsg <-
c(deprecatedMsg,
gsub('\\s+', ' ',
"If arguments involving 'lpsolver' and 'solver'
are both provided, then only the arguments
involving 'solver' will be used."))
}
if (!is.null(deprecatedMsg)) {
warning(paste(deprecatedMsg, collapse = ' '), call. = FALSE)
}
if (hasArg(lpsolver) | hasArg(solver)) {
if (hasArg(solver)) solver <- tolower(solver)
if (!hasArg(solver)) solver <- tolower(lpsolver)
} else {
if (requireNamespace("gurobi", quietly = TRUE)) {
solver <- "gurobi"
} else if (requireNamespace("lpSolveAPI", quietly = TRUE)) {
solver <- "lpsolveapi"
} else if (requireNamespace("cplexAPI", quietly = TRUE)) {
solver <- "cplexapi"
} else if (requireNamespace("Rmosek", quietly = TRUE)) {
solver <- "rmosek"
} else {
solver <- "none"
point <- TRUE
if (direct) tmp.method <- 'OLS,'
if (!direct) tmp.method <- 'GMM,'
warning(gsub("\\s+", " ",
paste("None of the compatible solvers are
installed, so estimation is only possible
under point identification.
MTR coefficients will be estimated
using", tmp.method, "and an estimate of
the target parameter will be returned
only if the MTR coefficients are point
identified.")),
call. = FALSE)
}
}
if (! solver %in% c("gurobi",
"cplexapi",
"rmosek",
"lpsolveapi",
"none")) {
stop(gsub("\\s+", " ",
paste0("Estimator is incompatible with solver '", solver,
"'. Please install one of the
following solvers instead:
gurobi (version 7.5-1 or later);
cplexAPI (version 1.3.3 or later);
Rmosek (version 9.2.38 or later);
lpSolveAPI (version 5.5.2.0 or later).")),
call. = FALSE)
}
if (hasArg(solver.options) | hasArg(lpsolver.options)) {
if (!hasArg(solver.options)) {
solver.options <- lpsolver.options
}
if (!is.list(solver.options)) {
stop(gsub("\\s+", " ",
paste0("'solver.options' must be a list.
Each item in the list should correspond to an
option to be passed to the solver.
The name of the item should match the name
of the option, and the value of the item
should be the value to set the option to.")),
call. = FALSE)
}
if (hasArg(solver.options.criterion) |
hasArg(solver.options.bounds) |
hasArg(lpsolver.options.criterion) |
hasArg(lpsolver.options.bounds)) {
stop(gsub("\\s+", " ",
paste0("Either declare 'solver.options'; or declare
'solver.options.criterion' and/or
'solver.options.bounds'; but not both.
In the case of
the latter, if only one set of options is
declared, then a set of default options will
be provided for the other.")),
call. = FALSE)
}
}
if (hasArg(solver.options.criterion) |
hasArg(lpsolver.options.criterion)) {
if (!hasArg(solver.options.criterion)) {
solver.options.criterion <- lpsolver.options.criterion
}
if (!is.list(solver.options.criterion)) {
stop(gsub("\\s+", " ",
paste0("'solver.options.criterion' must be a list.
Each item in the list should correspond to an
option to be passed to the solver.
The name of the item should match the name
of the option, and the value of the item
should be the value to set the option to.")),
call. = FALSE)
}
}
if (hasArg(solver.options.bounds) | hasArg(lpsolver.options.bounds)) {
if (!hasArg(solver.options.bounds)) {
solver.options.bounds <- lpsolver.options.bounds
}
if (!is.list(solver.options.bounds)) {
stop(gsub("\\s+", " ",
paste0("'solver.options.bounds' must be a list.
Each item in the list should correspond to an
option to be passed to the solver.
The name of the item should match the name
of the option, and the value of the item
should be the value to set the option to.")),
call. = FALSE)
}
}
if (hasArg(solver.presolve) | hasArg(lpsolver.presolve)) {
if (!hasArg(solver.presolve)) {
solver.presolve <- lpsolver.presolve
}
if (!is.logical(solver.presolve)) {
stop("'solver.presolve' must either be TRUE or FALSE.",
call. = FALSE)
}
if (solver != "gurobi") {
warning(gsub("\\s+", " ",
paste0("The 'presolve' option is only implemented
if the solver is Gurobi. For CPLEX and
lp_solve, set the presolve parameter using
'lpsolve.options', 'lpsolve.options.criterion',
and 'lpsolve.options.bounds'.")),
call. = FALSE)
}
if (((hasArg(solver.options) | hasArg(lpsolver.options)) &&
!is.null(solver.options$presolve)) |
((hasArg(solver.options.criterion) |
hasArg(lpsolver.options.criterion)) &&
!is.null(solver.options.criterion$presolve)) |
((hasArg(solver.options.bounds) |
hasArg(lpsolver.options.bounds)) &&
!is.null(solver.options.bounds$presolve))) {
warning(gsub("\\s+", " ",
paste0("The 'presolve' option overrides the
presolve parameters set in 'lpsolve.options',
'lpsolve.options.criterion', and
'lpsolve.options.bounds'.")),
call. = FALSE)
}
}
if (direct && !(solver %in% c('gurobi', 'rmosek', 'none'))) {
stop(gsub("\\s+", " ",
paste0("A direct regression may only be peformed if
the solver is Gurobi or MOSEK. Please install
either solver and set 'solver = \"gurobi\"'
or 'solver = \"rmosek\"'.")),
call. = FALSE)
}
if (debug) {
if (! solver %in% c("gurobi", "rmosek")) {
if (requireNamespace("gurobi", quietly = TRUE)) {
solver <- 'gurobi'
warning(gsub("\\s+", " ",
"'debug = TRUE' is only permitted if
'solver = \"gurobi\"' or
'solver = \"rmosek\"'. Output and solutions will
be obtained via Gurobi."),
call. = FALSE, immediate. = TRUE)
} else {
if (requireNamespace("Rmosek", quietly = TRUE)) {
solver <- 'rmosek'
warning(gsub("\\s+", " ",
"'debug = TRUE' is only permitted if
'solver = \"gurobi\"' or
'solver = \"rmosek\"'. Output and solutions will
be obtained via MOSEK."),
call. = FALSE, immediate. = TRUE)
} else {
stop(gsub("\\s+", " ",
"'debug = TRUE' is only permitted if
'solver = \"gurobi\"' or
'solver = \"rmosek\"'. Please install
either solver."),
call. = FALSE, immediate. = TRUE)
}
}
}
}
##---------------------------
## 2. Check format of non-numeric arguments
##---------------------------
## Convert uname into a string
uname <- deparse(substitute(uname))
uname <- gsub("~", "", uname)
uname <- gsub("\\\"", "", uname)
## Ensure data can be converted to a data.frame
if (("data.frame" %in% class(data)) |
("matrix" %in% class(data))) {
data <- as.data.frame(data)
} else {
stop(gsub("\\s+", " ",
"'data' argument must either be a data.frame,
data.table, tibble, or matrix."),
call. = FALSE)
}
## Ensure MTRs are formulas
if (classFormula(m0) && classFormula(m1)) {
if (all(length(Formula::as.Formula(m0)) == c(0, 1)) &&
all(length(Formula::as.Formula(m1)) == c(0, 1))) {
mtrFail <- FALSE
} else {
mtrFail <- TRUE
}
} else {
mtrFail <- TRUE
}
if (mtrFail) {
stop(gsub("\\s+", " ",
"Arguments 'm0' and 'm1' must be one-sided formulas,
e.g. m0 = ~ 1 + u + x:I(u ^ 2)."),
call. = FALSE)
}
## Character arguments will be converted to lowercase
if (hasArg(target)) target <- tolower(target)
if (hasArg(link)) link <- tolower(link)
if (hasArg(ci.type)) ci.type <- tolower(ci.type)
## Convert ivlike formula into a list (i.e. a one-element list
## with one formula), which is a more robust framework
if (!direct) {
if (classFormula(ivlike)) {
ivlike <- c(ivlike)
}
## Convert formula, components, and subset inputs into lists
length_formula <- length(ivlike)
userComponents <- FALSE
if (hasArg(components)) {
tmpComp <- deparse(substitute(components))
if (substr(tmpComp, 1, 2) != "l(" &&
substr(tmpComp, 1, 2) == "c(") {
stop(gsub("\\s+", " ",
"The 'components' argument should be declared
using 'l()' instead of 'c()'."),
call. = FALSE)
}
if (!is.null(components)) {
if (length(components) > 1) {
userComponents <- TRUE
}
if (length(components) == 1) {
if (Reduce(paste, deparse(components)) != "list()"){
userComponents <- TRUE
}
}
}
}
if (userComponents) {
length_components <- length(components)
if (length_formula == 1) {
## When a single formula is provided, then the list of
## components should be treated as a single vector of
## components. The way in which the user declares the
## components can be problematic. The function must figure
## out if the components list is entered directly, or as a
## variable.
componentsTmp <-
gsub("\\s+", " ",
Reduce(paste,
deparse(substitute(components))))
if (substr(componentsTmp, 1, 2) == "l(") {
components <- deparse(substitute(components))
components <-
gsub("\\s+", " ", Reduce(paste, components))
if (substr(componentsTmp, 1, 4) != "l(c(") {
internals <- substr(components, 3,
nchar(components) - 1)
charList <- unique(unlist(strsplit(x = internals,
split = "")))
if (length(charList) == 2 &&
all(charList == c(",", " "))) {
internals <- ""
}
components <- paste0("l(c(", internals, "))")
}
components <- eval(parse(text = components))
length_components <- 1
} else {
components <- unlist(lapply(components, deparse))
components <-
gsub("\\s+", " ", Reduce(paste, components))
if (substr(components, 1, 2) == "c(") {
components <- paste0("l(", components, ")")
} else {
components <- paste0("l(c(", components, "))")
}
components <- eval(parse(text = components))
}
}
} else {
length_components <- length_formula
components <- as.list(replicate(length_formula, ""))
}
if (length_formula > length_components & length_components > 0) {
warning(gsub("\\s+", " ",
"List of components not the same length of list of
IV-like specifications: more specifications than
component vectors. Specifications without corresponding
component vectors will include all covariates when
constructing the S-set."),
call. = FALSE)
components[(length(components) + 1) : length(ivlike)] <- ""
}
if (length_formula < length_components) {
warning(gsub("\\s+", " ",
"List of components not the same length of list of
IV-like specifications: more component vectors than
specifications. Component vectors without corresponding
specifications will be dropped."),
call. = FALSE)
components <- components[1 : length(ivlike)]
}
} else {
length_formula <- 1
if (hasArg(components)) {
warning(gsub("\\s+", " ",
"If the 'ivlike' argument is not passed, then the
estimation procedure no longer estimates IV-like
moments and the
'components' argument becomes redundant."),
call. = FALSE)
}
}
## Check the subset input---of the three lists that are input,
## only this can be omitted by the user, in which case no
## subsetting is used
if (hasArg(subset)) {
## If subsetting is not a list, convert it to a list
if (!(classList(subset))) {
## Check if character, if so, then may need to
## deparse. Also check if logical, in which case less
## needs to be done.
subsetChar <- suppressWarnings(
try(is.character(subset), silent = TRUE))
subsetLogic <- suppressWarnings(
try(is.logical(subset), silent = TRUE))
if (subsetChar == TRUE) {
stop(gsub("\\s+", " ",
"Subset conditions should be logical
expressions involving variable names from the
data set and logical operators."),
call. = FALSE)
} else if (subsetLogic == TRUE) {
## Currently prohobit logical vectors. Instead
## restrict to expressions only. Below is the code to
## revert to the case where logical vectors are
## allowed.
## subsetList <- list()
## subsetList[[1]] <- subset
## subset <- subsetList
stop(gsub("\\s+", " ",
"Subset conditions should be logical
expressions involving variable names from the
data set and logical operators. Make sure the name of
the variable is not the same as that of another
object in the workspace. If so, rename the object
in the workspace, or the variable in the data set."),
call. = FALSE)
} else {
subsetStrSubs <- Reduce(paste, deparse(substitute(subset)))
subsetStr <- suppressWarnings(
try(Reduce(paste, deparse(subset)), silent = TRUE))
if (class(subsetStr) == "try-error") { ## i.e. logical
subset <- paste0("l(", subsetStrSubs, ")")
} else { ## i.e. variable, for looping
subset <- paste0("l(", subsetStr, ")")
}
}
if (subsetLogic != TRUE) {
subsetLogicOp <- 0
for (operator in c("==", "<", ">", "%in%")) {
subsetLogicOp <- subsetLogicOp + grepl(operator, subset)
}
if (subsetLogicOp == 0) {
stop(gsub("\\s+", " ",
"Subset conditions should be logical
expressions involving variable names from the
data set and logical operators."),
call. = FALSE)
}
subset <- eval(parse(text = subset))
}
}
## Fill in any missing subset slots
if (length(subset) > 1 && length(subset) != length_formula) {
if (!direct) {
stop(gsub("\\s+", " ",
"Number of subset conditions not equal to
number of IV specifications.
Either declare a single subset
condition to be applied to all IV specifications; or
declare a list of subset conditions, one for each IV
specificaiton. An empty element in the list of subset
conditions corresponds to using the full sample."),
call. = FALSE)
} else {
stop(gsub("\\s+", " ",
"More than one subset condition provided.
If no argument is passed for 'ivlike', then
the estimation procedure performs a single
linear regression to fit the data. Only one
subset condition may therefore be passed."),
call. = FALSE)
}
}
if (length(subset) == 1 && length_formula > 1) {
subset <- rep(subset, length_formula)
}
## Check if all subseting conditions are logical
nonLogicSubset <- NULL
for (i in 1:length_formula) {
if (subset[[i]] == "") {
ssubset <- replicate(nrow(data), TRUE)
} else {
ssubset <- subset[[i]]
}
if (!is.logical(head(eval(substitute(ssubset), data)))) {
nonLogicSubset <- c(nonLogicSubset, i)
}
}
if (length(nonLogicSubset) > 0) {
stop(gsub("\\s+", " ",
paste0("The conditions in the following
positions of the subset list are not
logical: ",
paste(nonLogicSubset, collapse = ", "),
". Please change the conditions so they
are logical.")),
call. = FALSE)
}
if(length(subset) < length_formula) {
warning(gsub("\\s+", " ",
"List of subset conditions not the same length
of list IV-like specifications: more
specifications than subsetting conditions.
Specifications without corresponding subset
conditions will include all observations."),
call. = FALSE)
subset[(length(subset) + 1) : length_formula] <- ""
}
if(length(subset) > length_formula) {
warning(gsub("\\s+", " ",
"List of subset conditions not the same length
of list IV-like specifications: more subset
conditions than IV-like specifications.
Subset conditions without corresponding
specifications will be dropped."),
call. = FALSE)
subset <- subset[1 : length_formula]
}
} else {
## if no subset input, then we construct it
subset <- as.list(replicate(length_formula, ""))
}
##---------------------------
## 3. Check numeric arguments and case completion
##---------------------------
## Variable and weight checks
if (hasArg(treat)) {
treatStr <- deparse(substitute(treat))
treatStr <- gsub("~", "", treatStr)
treatStr <- gsub("\\\"", "", treatStr)
if (! treatStr %in% colnames(data)) {
stop("Declared treatment indicator not found in data",
call. = FALSE)
}
}
if (hasArg(outcome)) {
if (direct) {
outcomeStr <- deparse(substitute(outcome))
outcomeStr <- gsub("~", "", outcomeStr)
outcomeStr <- gsub("\\\"", "", outcomeStr)
if (! outcomeStr %in% colnames(data)) {
stop("Declared outcome variable not found in data",
call. = FALSE)
}
} else {
warning(gsub('\\s+', ' ',
"If an argument is passed for 'ivlike', then
the outcome variable will be inferred from the
IV-like formulas, and the 'outcome' argument
will be ignored."),
call. = FALSE)
}
}
if (direct && !hasArg(outcome)) {
stop(gsub('\\s+', ' ',
"If 'ivlike' is not passed, then a regression of the
outcome variable on the MTR will be performed.
Please provide the outcome variable in the
'outcome 'argument."),
call. = FALSE)
}
if (hasArg(target)) {
if (! target %in% c("ate", "att", "atu", "late", "genlate")) {
stop(gsub("\\s+", " ",
"Specified target parameter is not recognized.
Choose from 'ate', 'att', 'atu', 'late', or 'genlate'."),
call. = FALSE)
}
if (target == "late") {
## Check the LATE arguments are declared properly.
if (!(hasArg(late.to) & hasArg(late.from))) {
stop(gsub("\\s+", " ",
"Target paramter 'late' requires arguments
'late.to', and 'late.from'."),
call. = FALSE)
}
if (classList(late.to)) late.to <- unlist(late.to)
if (classList(late.from)) late.from <- unlist(late.from)
zIsVec <- substr(deparse(substitute(late.Z)), 1, 2) == "c("
zIsList <- substring(deparse(substitute(late.Z)), 1, 2) == "l("
if (zIsVec) {
late.Z <- restring(substitute(late.Z), substitute = FALSE)
} else if (zIsList) {
late.Z <- restring(substitute(late.Z), substitute = FALSE,
command = "l")
} else {
late.Z <- deparse(substitute(late.Z))
}
late.Z = sort(names(late.from))
late.Ztmp = sort(names(late.to))
if (length(late.to) != length(late.from) |
length(late.to) != length(late.Z)) {
stop(gsub("\\s+", " ",
"The number of variables/values declared in
'late.to' and 'late.from' must be equal."),
call. = FALSE)
}
if (!all(late.Z == late.Ztmp)) {
stop(gsub("\\s+", " ",
"The variables declared in 'late.to' and
'late.from' must be the same."),
call. = FALSE)
}
if (length(unique(late.Z)) != length(late.Z)) {
stop(gsub("\\s+", " ",
"Each variable in 'late.to' and 'late.from' can
only be assigned one value."),
call. = FALSE)
}
if (!is.numeric(late.to) | !is.numeric(late.from)) {
stop("Vectors 'late.from' and 'late.to' must be numeric.",
call. = FALSE)
}
late.from <- late.from[late.Z]
late.to <- late.to[late.Z]
if (all(late.to == late.from)) {
stop(gsub("\\s+", " ",
"'late.to' must be different from 'late.from'."),
call. = FALSE)
}
}
if (target == "genlate") {
if (genlate.lb < 0 | genlate.ub > 1) {
stop(gsub("\\s+", " ",
"'genlate.lb' and 'genlate.ub' must be between 0
and 1."), call. = FALSE)
}
if (genlate.lb >= genlate.ub) {
stop(gsub("\\s+", " ",
"'genlate.lb' must be strictly less
than 'genlate.ub'."),
call. = FALSE)
}
}
if (target == "late" | target == "genlate") {
## Check that included insturments are declared properly
if (hasArg(late.X)) {
eval.X <- unlist(late.X)
late.X <- names(late.X)
if (!is.numeric(eval.X)) {
stop("Vector 'eval.X' must be numeric.",
call. = FALSE)
}
}
}
if (target != "genlate" &
(hasArg("genlate.lb") | hasArg("genlate.ub"))) {
warning(gsub("\\s+", " ",
"Unless target parameter is 'genlate',
'genlate.lb' and 'genlate.ub' arguments will not
be used."))
}
if ((target != "late" & target != "genlate") &
(hasArg("eval.X") | hasArg("late.X"))) {
warning(gsub("\\s+", " ",
"Unless target parameter is 'late' or
'genlate', the arguments eval.X' and
'late.X' will not be used."))
}
if ((hasArg(target.weight0) | hasArg(target.weight1))) {
stop(gsub("\\s+", " ",
"A preset target weight is chosen, and a custom target
weight is provided. Please provide an input for 'target'
only, or for 'target.weight0' and 'target.weight1'."),
call. = FALSE)
}
target.weight0 <- NULL
target.weight1 <- NULL
} else {
if (!(hasArg(target.weight0) & hasArg(target.weight1))) {
stop(gsub("\\s+", " ",
"Only one target weight function is provided. If a
custom target weight is to be used, inputs for both
target.weight0 and target.weight1 must be provided."),
call. = FALSE)
}
## Convert all entries into lists
target.weight0 <- c(target.weight0)
target.weight1 <- c(target.weight1)
target.knots0 <- c(target.knots0)
target.knots1 <- c(target.knots1)
## Weight check
weightCheck1 <- unlist(lapply(target.weight0, is.function))
weightCheck2 <- unlist(lapply(target.weight0, function(x)
is.numeric(x) * (length(x) == 1)))
fails0 <- which(weightCheck1 + weightCheck2 == 0)
if (length(fails0) > 0) {
stop(gsub("\\s+", " ",
paste0("Each component of the custom weight
vectors/lists
must either be functions or constants. The
following entries of target.weight0 are neither: ",
paste(fails0, collapse = ", "),
".")), call. = FALSE)
}
weightCheck1 <- unlist(lapply(target.weight1, is.function))
weightCheck2 <- unlist(lapply(target.weight1, function(x)
is.numeric(x) * (length(x) == 1)))
fails1 <- which(weightCheck1 + weightCheck2 == 0)
if (length(fails1) > 0) {
stop(gsub("\\s+", " ",
paste0("Each component of the custom weight
vectors/lists
must either be functions or constants. The
following entries of target.weight1 are neither: ",
paste(fails1, collapse = ", "),
".")), call. = FALSE)
}
if (length(target.weight0) != (length(target.knots0) + 1)) {
stop(gsub("\\s+", " ",
paste0("The number of weight functions declared in
target.weight0 must be exactly equal to the
number of knots declared in target.knots0
plus 1. Currently, the number of weights
declared is ", length(target.weight0),
", and the number of knots declared is ",
length(target.knots0), ".")),
call. = FALSE)
}
if (length(target.weight1) != (length(target.knots1) + 1)) {
stop(gsub("\\s+", " ",
paste0("The number of weight functions declared in
target.weight1 must be exactly equal to the
number of knots declared in target.knots1
plus 1. Currently, the number of weights
declared is ", length(target.weight1),
", and the number of knots declared is ",
length(target.knots1), ".")),
call. = FALSE)
}
## Knots check
if (!is.null(target.knots0)) {
knotsCheck1 <- unlist(lapply(target.knots0, is.function))
knotsCheck2 <- unlist(lapply(target.knots0, function(x)
is.numeric(x) * (length(x) == 1)))
fails0 <- which(knotsCheck1 + knotsCheck2 == 0)
if (length(fails0) > 0) {
stop(gsub("\\s+", " ",
paste0("Each component of the custom knots
vectors/lists
must either be functions or constants. The
following entries of target.knots0 are neither: ",
paste(fails0, collapse = ", "),
".")), call. = FALSE)
}
}
if (!is.null(target.knots1)) {
knotsCheck1 <- unlist(lapply(target.knots1, is.function))
knotsCheck2 <- unlist(lapply(target.knots1, function(x)
is.numeric(x) * (length(x) == 1)))
fails1 <- which(knotsCheck1 + knotsCheck2 == 0)
if (length(fails1) > 0) {
stop(gsub("\\s+", " ",
paste0("Each component of the custom knots
vectors/lists
must either be functions or constants. The
following entries of target.knots1 are neither: ",
paste(fails0, collapse = ", "),
".")), call. = FALSE)
}
}
}
if (hasArg(link)) {
if (! link %in% c("linear", "logit", "probit")) {
stop(gsub("\\s+", " ",
"Specified link is not recognized. Choose from
'linear', 'logit', or 'probit'."), call. = FALSE)
}
}
if (hasArg(point)) {
if (point == TRUE && !direct) {
if (hasArg(m0.dec) | hasArg(m0.inc) |
hasArg(m1.dec) | hasArg(m1.inc) |
hasArg(mte.dec) | hasArg(mte.inc) |
hasArg(audit.nu) | hasArg(audit.nx) | hasArg(audit.add) |
hasArg(initgrid.nu) | hasArg(initgrid.nx)|
hasArg(audit.max) | hasArg(audit.tol)) {
warning(gsub("\\s+", " ",
"If argument 'point' is set to TRUE, then shape
restrictions on m0 and m1 are ignored, and the
audit procedure is not implemented."),
call. = FALSE)
}
}
}
## Audit checks
if (!(is.numeric(criterion.tol) & criterion.tol >= 0)) {
stop("Cannot set 'criterion.tol' below 0.", call. = FALSE)
}
if (!((initgrid.nu %% 1 == 0) & initgrid.nu >= 0)) {
stop(gsub("\\s+", " ",
"'initgrid.nu' must be an integer greater than or equal to
0 (end points 0 and 1 are always included)."),
call. = FALSE)
}
if (!((initgrid.nx %% 1 == 0) & initgrid.nx >= 0)) {
stop("'initgrid.nx' must be an integer greater than or equal to 0.",
call. = FALSE)
}
if (!((audit.nx %% 1 == 0) & audit.nx > 0)) {
stop("'audit.nx' must be an integer greater than or equal to 1.",
call. = FALSE)
}
if (audit.nx < initgrid.nx) {
stop("'audit.nx' must be larger than 'initgrid.nx'.")
}
if (audit.nu < initgrid.nu) {
stop("'audit.nu' must be larger than 'initgrid.nu'.")
}
if (!((audit.add %% 1 == 0) & audit.add > 0)) {
stop("'audit.add' must be an integer greater than or equal to 1.",
call. = FALSE)
}
if (hasArg(audit.tol)) {
if (!is.numeric(audit.tol) |
audit.tol < 0 |
length(audit.tol) > 1) {
stop("'audit.tol' must be a positive scalar.",
call. = FALSE)
}
}
if (hasArg(m0.dec) | hasArg(m0.inc) |
hasArg(m1.dec) | hasArg(m1.inc) |
hasArg(mte.dec) | hasArg(mte.inc) |
hasArg(m0.lb) | hasArg(m0.ub) |
hasArg(m1.lb) | hasArg(m1.ub) |
hasArg(mte.lb) | hasArg(mte.ub)) {
noshape = FALSE ## indicator for whether shape restrictions declared
if (!((audit.nu %% 1 == 0) & audit.nu >= 0)) {
stop(gsub("\\s+", " ",
"'audit.nu' must be an integer
greater than or equal to 0
(end points 0 and 1 are always included)."),
call. = FALSE)
}
if ((hasArg(m0.dec) && !is.logical(m0.dec)) |
(hasArg(m1.dec) && !is.logical(m1.dec)) |
(hasArg(mte.dec) && !is.logical(mte.dec)) |
(hasArg(m0.inc) && !is.logical(m0.inc)) |
(hasArg(m1.inc) && !is.logical(m1.inc)) |
(hasArg(mte.inc) && !is.logical(mte.inc))) {
stop(gsub("\\s+", " ",
"Monotonicity constraints 'm0.dec', 'm1.dec',
'mte.dec', etc. must be either TRUE or FALSE."),
call. = FALSE)
}
if ((hasArg(m0.lb) && !is.numeric(m0.lb)) |
(hasArg(m1.lb) && !is.numeric(m1.lb)) |
(hasArg(mte.lb) && !is.numeric(mte.lb)) |
(hasArg(m0.ub) && !is.numeric(m0.ub)) |
(hasArg(m1.ub) && !is.numeric(m1.ub)) |
(hasArg(mte.ub) && !is.numeric(mte.ub))) {
stop(gsub("\\s+", " ",
"Boundedness constraints 'm0.lb', 'm1.lb', 'mte.lb',
etc. must be numeric."),
call. = FALSE)
}
} else {
noshape = TRUE
if (!((audit.nu %% 1 == 0) & audit.nu >= 0)) {
stop(gsub("\\s+", " ",
"'audit.nu' must be an integer
greater than or equal to 0
(end points 0 and 1 are always included)."),
call. = FALSE)
}
}
if (!((audit.max %% 1 == 0) & audit.max > 0)) {
stop("'audit.max' must be an integer greater than or equal to 1.",
call. = FALSE)
}
## Bootstrap checks
if (bootstraps < 0 | bootstraps %% 1 != 0 | bootstraps == 1) {
stop(gsub("\\s+", " ",
"'bootstraps' must either be 0, or be an integer greater
than or equal to 2."), call. = FALSE)
}
if (hasArg(bootstraps.m)) {
if (bootstraps.m < 0 | bootstraps.m %% 1 != 0) {
stop(gsub("\\s+", " ",
"'bootstraps.m' must be an integer greater than or
equal to 1."), call. = FALSE)
}
if (hasArg(point) && point == TRUE) {
warning(gsub("\\s+", " ",
"Argument 'bootstrap.m' is only used for
partial identification, and will be ignored
under point identification."), call. = FALSE)
}
} else {
bootstraps.m <- nrow(data)
}
if (!is.logical(bootstraps.replace)) {
stop("'bootstraps.replace' must be TRUE or FALSE.", call. = FALSE)
}
if (max(levels) >= 1 | min(levels) <= 0) {
stop(gsub("\\s+", " ",
"'levels' must be a vector of values strictly between 0
and 1."), call. = FALSE)
}
levels <- sort(levels)
if (! ci.type %in% c("forward", "backward", "both")) {
stop(gsub("\\s+", " ",
"'ci.types' selects the type of confidence intervals to be
constructed for the treatment effect bound. It must be set to
either 'forward', 'backward', or 'both'."), call. = FALSE)
}
if (hasArg(point.eyeweight) && point == FALSE) {
warning(gsub("\\s+", " ",
"Argument 'point.eyeweight' is only used for
point identification, and will be ignored when
point = FALSE."), call. = FALSE)
}
if (hasArg(point.eyeweight) && direct) {
warning(gsub("\\s+", " ",
"Argument 'point.eyeweight' is only used for
point identification when IV-like estimands are
provided through the 'ivlike' argument, and will be
ignored."), call. = FALSE)
}
##---------------------------
## 4. Restrict data to complete observations
##---------------------------
## Restrict data used for all parts of procedure to be the same.
## Collect list of all terms used in formula
vars_y <- NULL
vars_formulas_x <- c()
vars_formulas_z <- c()
vars_subsets <- c()
vars_mtr0 <- c()
vars_mtr1 <- c()
vars_mtr <- c()
vars_weights <- c()
vars_propensity <- c()
vars_components <- c()
terms_formulas_x <- c()
terms_formulas_z <- c()
terms_mtr0 <- c()
terms_mtr1 <- c()
terms_components <- c()
if (!direct) {
if (classList(ivlike)) {
if (!min(unlist(lapply(ivlike, classFormula)))) {
stop(gsub("\\s+", " ",
"Not all elements in list of formulas
are specified correctly."), call. = FALSE)
} else {
vars_formulas_x <- unlist(lapply(ivlike,
getXZ,
inst = FALSE))
vars_formulas_z <- unlist(lapply(ivlike,
getXZ,
inst = TRUE))
vars_y <-
unique(unlist(lapply(ivlike,
function(x) all.vars(x)[[1]])))
terms_formulas_x <- lapply(ivlike,
getXZ,
inst = FALSE,
terms = TRUE)
terms_formulas_z <- lapply(ivlike,
getXZ,
inst = TRUE,
terms = TRUE)
if (length(vars_y) > 1) {
stop(gsub("\\s+", " ",
"Multiple response variables specified
in list of
IV-like specifications."),
call. = FALSE)
}
}
} else {
stop(gsub("\\s+", " ",
"'ivlike' argument must either be a formula, a vector
of formulas, or NULL/not passed."), call. = FALSE)
}
} else {
vars_y <- outcomeStr
rm(outcomeStr)
}
## Collect list of all terms in subsetting condition
if (hasArg(subset)) {
vnames <- colnames(data)
if (classList(subset)) {
svec <- paste(unlist(lapply(subset, deparse)), collapse = " ")
} else {
svec <- gsub("\\s+", " ",
Reduce(paste, deparse(substitute(subset))))
}
svec <- subsetclean(svec)
checklist <- c()
checklist <- c(checklist, svec[!isfunctionstring(svec)])
for (w in svec[isfunctionstring(svec)]) {
arguments <- argstring(w)
checklist <- c(checklist, unlist(strsplit(arguments,
split = ",")))
}
for (w in checklist) {
if (w %in% vnames) vars_subsets <- c(vars_subsets, w)
}
}
## Collect list of all terms used in MTRs
parentFrame <- parent.frame()
origm0 <- m0
origm1 <- m1
if (length(attr(terms(origm0), which = "term.labels")) == 0 &&
attr(terms(origm0), which = "intercept") == 0) {
stop(gsub("\\s+", " ",
"Formula for 'm0' is invalid. There must be at least one
non-zero term."), call. = FALSE)
}
if (length(attr(terms(origm1), which = "term.labels")) == 0 &&
attr(terms(origm1), which = "intercept") == 0) {
stop(gsub("\\s+", " ",
"Formula for 'm1' is invalid. There must be at least one
non-zero term."), call. = FALSE)
}
splinesobj <- list(removeSplines(m0, env = parentFrame),
removeSplines(m1, env = parentFrame))
if (is.null(splinesobj[[1]]$formula)) {
m0uCheck <- NULL
} else {
m0uCheck <- checkU(splinesobj[[1]]$formula, uname)
}
if (is.null(splinesobj[[2]]$formula)) {
m1uCheck <- NULL
} else {
m1uCheck <- checkU(splinesobj[[2]]$formula, uname)
}
if (is.null(splinesobj[[1]]$splinesdict)) {
m0uSplineCheck <- NULL
} else {
m0uSplineCheck <-
checkU(as.formula(paste("~",
paste(unlist(
splinesobj[[1]]$splineslist),
collapse = "+"))), uname)
}
if (is.null(splinesobj[[2]]$splinesdict)) {
m1uSplineCheck <- NULL
} else {
m1uSplineCheck <-
checkU(as.formula(paste("~",
paste(unlist(
splinesobj[[2]]$splineslist),
collapse = "+"))), uname)
}
if (length(m0uCheck) + length(m0uSplineCheck) > 0) {
message0 <- paste(" m0:",
paste(c(m0uCheck, m0uSplineCheck),
collapse = ", "),
"\n")
} else {
message0 <- NULL
}
if (length(m1uCheck) + length(m1uSplineCheck) > 0) {
message1 <- paste(" m1:",
paste(c(m1uCheck, m1uSplineCheck),
collapse = ", "),
"\n")
} else {
message1 <- NULL
}
if (!is.null(message0) | !is.null(message1)) {
if (uname != "x") {
egv <- "x"
} else {
egv <- "v"
}
e1 <- paste0(uname, ", I(", uname, "^3)")
e2 <- paste0(egv, ":", uname, ", ", egv, ":I(", uname, "^3)")
e3 <- paste0("exp(", uname, "), I((", egv, " * ", uname, ")^2)")
stop(gsub("\\s+", " ",
"The following terms are not declared properly."),
"\n", message0, message1,
gsub("\\s+", " ",
paste0("The unobserved variable '",
uname, "' must be declared as a
monomial, e.g. ", e1, ". The monomial can be
interacted with other variables, e.g. ",
e2, ". Expressions where the unobservable term
is not a monomial are either not permissible or
will not be parsed correctly,
e.g. ", e3, ". Try to rewrite the expression so
that '", uname, "' is only included in monomials.")),
call. = FALSE)
}
m0 <- splinesobj[[1]]$formula
m1 <- splinesobj[[2]]$formula
vars_mtr0 <- all.vars(splinesobj[[1]]$formula)
vars_mtr1 <- all.vars(splinesobj[[2]]$formula)
vars_mtr <- c(vars_mtr0, vars_mtr1)
if (!is.null(splinesobj[[1]]$formula)) {
terms_mtr0 <- attr(terms(splinesobj[[1]]$formula),
"term.labels")
}
if (!is.null(splinesobj[[2]]$formula)) {
terms_mtr1 <- attr(terms(splinesobj[[2]]$formula),
"term.labels")
}
if (!is.null(splinesobj[[1]]$splineslist)) {
sf0 <- as.formula(paste("~",
paste(unlist(splinesobj[[1]]$splineslist),
collapse = " + ")))
vars_mtr <- c(vars_mtr, all.vars(sf0))
terms_mtr0 <- c(terms_mtr0, attr(terms(sf0), "term.labels"))
}
if (!is.null(splinesobj[[2]]$splineslist)) {
sf1 <- as.formula(paste("~",
paste(unlist(splinesobj[[2]]$splineslist),
collapse = " + ")))
vars_mtr <- c(vars_mtr, all.vars(sf1))
terms_mtr1 <- c(terms_mtr1, attr(terms(sf1), "term.labels"))
}
if (hasArg(equal.coef)) {
if (!classFormula(equal.coef)) {
stop(gsub('\\s+', ' ',
"'equal.coef' must be a one-sided formula.
The terms in the formula must be contained in both
'm0' and 'm1', and determine which terms in
the MTRs are constrained to have the same
coefficient."),
call. = FALSE)
} else {
equal.coef <- Formula::as.Formula(equal.coef)
if (!all(length(equal.coef) == c(0, 1))) {
stop(gsub('\\s+', ' ',
"'equal.coef' must be a one-sided formula.
The terms in the formula must be contained in both
'm0' and 'm1', and determine which terms in
the MTRs are constrained to have the same
coefficient."),
call. = FALSE)
}
}
splinesobj.equal <- removeSplines(equal.coef, env = parentFrame)
orig.equal.coef <- equal.coef
equal.coef <- splinesobj.equal$formula
## Check that the non-spline terms in equal.coef are
## contained in the MTRs
if (!is.null(equal.coef)) {
tmpTerms0Check <-
all(getXZ(equal.coef, terms = TRUE) %in%
getXZ(splinesobj[[1]]$formula, terms = TRUE))
tmpTerms1Check <-
all(getXZ(equal.coef, terms = TRUE) %in%
getXZ(splinesobj[[2]]$formula, terms = TRUE))
} else {
tmpTerms0Check <- TRUE
tmpTerms1Check <- TRUE
}
## Check that the spline terms in equal.coef are contained
## in the MTRs
tmpSplinesCheck <- TRUE
if (!is.null(splinesobj.equal$splineslist)) {
for (sc in names(splinesobj.equal$splineslist)) {
tmpEqualFm <-
as.formula(
paste("~", paste(splinesobj.equal$splineslist[[sc]],
collapse = " + ")))
tmpSplineIndex <- NULL
for (i in 1:2) {
if (! sc %in% names(splinesobj[[i]]$splineslist)) {
tmpSplinesCheck <- FALSE
} else {
tmpMtrFm <-
as.formula(
paste("~",
paste(splinesobj[[i]]$splineslist[[sc]],
collapse = " + ")))
tmpSplinePos <-
which(names(splinesobj[[i]]$splineslist) == sc)
tmpSplineIndex <-
c(tmpSplineIndex,
splinesobj[[i]]$splinesdict[[tmpSplinePos]]$gstar.index)
if (! all(getXZ(tmpEqualFm, terms = TRUE) %in%
getXZ(tmpMtrFm, terms = TRUE))) {
tmpSplinesCheck <- FALSE
}
}
}
if (length(tmpSplineIndex) == 2) {
tmpSplinePos <-
which(names(splinesobj.equal$splineslist) == sc)
splinesobj.equal$splinesdict[[tmpSplinePos]]$gstar.index <-
tmpSplineIndex
}
}
## Now duplicate the splines object for equal.coef to
## mimic the structue of the splines object for m0 and
## m1. The spline indexes of equal.coef are adjusted
## to match those of m0 and m1.
splinesobj.equal0 <- splinesobj.equal
splinesobj.equal1 <- splinesobj.equal
for (i in 1:length(splinesobj.equal$splinesdict)) {
splinesobj.equal0$splinesdict[[i]]$gstar.index <-
splinesobj.equal0$splinesdict[[i]]$gstar.index[1]
splinesobj.equal1$splinesdict[[i]]$gstar.index <-
splinesobj.equal1$splinesdict[[i]]$gstar.index[2]
}
splinesobj.equal <- list(splinesobj.equal0,
splinesobj.equal1)
rm(splinesobj.equal0, splinesobj.equal1)
}
if (!all(c(tmpTerms0Check, tmpTerms1Check, tmpSplinesCheck))) {
stop(gsub('\\s+', ' ',
"Not all terms in 'equal.coef' are contained in
both 'm0' and 'm1'. The terms in 'equal.coef' must
be declared to
exactly match the subset of terms in 'm0' and 'm1'
whose coefficients should be constrained to be
equal."),
call. = FALSE)
}
rm(tmpTerms0Check, tmpTerms1Check)
}
## Collect list of variables used in custom weights (if defined)
if (!hasArg(target)) {
for (i in 1:length(target.weight0)) {
if (is.function(target.weight0[[i]])) {
vars_weights <- c(vars_weights,
formalArgs(target.weight0[[i]]))
}
if (i < length(target.weight0)) {
if (is.function(target.knots0[[i]])) {
vars_weights <- c(vars_weights,
formalArgs(target.knots0[[i]]))
}
}
}
for (i in 1:length(target.weight1)) {
if (is.function(target.weight1[[i]])) {
vars_weights <- c(vars_weights,
formalArgs(target.weight1[[i]]))
}
if (i < length(target.weight1)) {
if (is.function(target.knots1[[i]])) {
vars_weights <- c(vars_weights,
formalArgs(target.knots1[[i]]))
}
}
}
}
## Collect list of all terms used in propensity formula
if (hasArg(propensity)) {
if (classFormula(propensity)) {
vars_propensity <- all.vars(propensity)
if (length(propensity) == 3) {
ptreat <- all.vars(propensity)[1]
if (hasArg(target) && target == "late") {
if (!all(late.Z %in% vars_propensity)) {
stop (gsub("\\s+", " ",
"All variables in 'late.to' and
'late.from' must be included in the
propensity score model."), call. = FALSE)
}
}
if (hasArg(treat)) {
if (ptreat != treatStr) {
stop(gsub("\\s+", " ",
"Variable listed in 'treat' argument
differs from dependent variable in propensity
score formula. Dependent variable from
propensity score formula will be used as the
treatment variable."),
call. = FALSE)
}
treat <- ptreat
} else {
treat <- ptreat
}
if (length(Formula::as.Formula(propensity))[1] == 0 &&
length(all.vars(propensity)) > 1) {
stop(gsub("\\s+", " ",
paste0("'propensity' argument must either be a
two-sided formula (if the propensity score is to be
estimated from the data), or a one-sided formula
containing a single variable on the RHS (where the
variable listed is included in the data, and
corresponds to propensity scores.")),
call. = FALSE)
}
} else if (length(propensity) == 2) {
if (!hasArg(treat)) {
stop(gsub("\\s+", " ",
"Treatment variable is undetermined. Either
provide two-sided formula in the 'propensity'
argument, where the left hand variable is the
treatment variable, or declare the treatment variable
using the argument 'treat'."),
call. = FALSE)
} else {
treat <- treatStr
}
if (hasArg(target) && target == 'late') {
stop(gsub("\\s+", " ",
"Target parameter 'late' requires a propensity
score model. Agrument 'propensity' should be
a two-sided formula."),
call. = FALSE)
}
}
} else {
if (hasArg(target) && target == 'late') {
stop(gsub("\\s+", " ",
"Target parameter 'late' requires a propensity
score model. Agrument 'propensity' should be
a two-sided formula."),
call. = FALSE)
}
propStr <- deparse(substitute(propensity))
propStr <- gsub("~", "", propStr)
propStr <- gsub("\\\"", "", propStr)
if (!propStr %in% colnames(data)) {
stop(gsub("\\s+", " ",
"Propensity score argument is interpreted as a
variable name, but is not found in the data set."),
call. = FALSE)
}
vars_propensity <- c(vars_propensity,
propStr)
## Determine treatment variable
if (hasArg(treat)) {
treat <- treatStr
vars_propensity <- c(vars_propensity,
treat)
} else {
stop(gsub("\\s+", " ",
"Treatment variable is undetermined. Either
provide two-sided formula in the 'propensity'
argument, where the left hand variable is the
treatment variable, or declare the treatment variable
using the argument 'treat'."),
call. = FALSE)
}
propensity <- propStr
propensity <- Formula::as.Formula(paste("~", propensity))
if (length(all.vars(propensity)) > 1) {
stop(gsub("\\s+", " ",
paste0("'propensity' argument must either be a
two-sided formula (if the propensity score is to be
estimated from the data), or a one-sided formula
containing a single variable on the RHS (where the
variable listed is included in the data, and
corresponds to propensity scores).")),
call. = FALSE)
}
}
} else {
if (direct) {
stop(gsub("\\s+", " ",
paste0("'propensity' argument must either be a
two-sided formula (if the propensity score is to be
estimated from the data), or a one-sided formula
containing a single variable on the RHS (where the
variable listed is included in the data, and
corresponds to propensity scores).")),
call. = FALSE)
}
## If there is no propensity argument, then the propensity
## function will have to be constructed. Firstd determine
## treatment variable
if (hasArg(treat)) {
treat <- treatStr
vars_propensity <- treat
} else if (is.list(ivlike)) {
warning(gsub("\\s+", " ",
"First independent variable of first IV regression
is selected as the treatment variable."))
treat <- all.vars(ivlike[[1]])[2]
} else {
stop("Treatment variable indeterminable.", call. = FALSE)
}
## Construct propensity formula
terms_propensity <- c(unlist(terms_formulas_x),
unlist(terms_formulas_z),
unlist(terms_mtr0),
unlist(terms_mtr1))
## Remove all u terms
uterms <- c()
um1 <- which(terms_propensity == uname)
um2 <- grep(paste0("^[", uname, "][[:punct:]]"),
terms_propensity)
um3 <- grep(paste0("[[:punct:]][", uname, "]$"),
terms_propensity)
um4 <- grep(paste0("[[:punct:]][",
uname,
"][[:punct:]]"),
terms_propensity)
um5 <- grep(paste0("[[:punct:]][", uname, "]\\s+"),
terms_propensity)
um6 <- grep(paste0("\\s+[", uname, "][[:punct:]]"),
terms_propensity)
um7 <- grep(paste0("\\s+[", uname, "]\\s+"),
terms_propensity)
um8 <- grep("uSplines", terms_propensity)
for (i in 1:8) {
uterms <- c(uterms, terms_propensity[get(paste0("um", i))])
}
terms_propensity <-
terms_propensity[!(terms_propensity %in% uterms) &
!(terms_propensity == treat)]
propensity <- paste(treat,
paste(unique(c("intercept", terms_propensity)),
collapse = " + "),
sep = " ~ ")
propensity <- gsub("intercept", "1", propensity)
propWarn <- paste("No propensity score formula or propensity score
variable name provided. By default, the function will
create a propensity score formula using all the
covariates in the IV-like specifications and the
MTRs. The propensity score formula generated is:",
propensity)
warning(gsub("\\s+", " ", propWarn), call. = FALSE)
propensity <- as.formula(propensity)
}
## Check that the treatment variable is not contained in the
## MTRs
if (treat %in% vars_mtr) {
stop(gsub("\\s+", " ",
"Treatment variable cannot be included in the MTRs."),
call. = FALSE)
}
## Collect all variables declared, and remove unobserved variable
## from list
allvars <- c(vars_y,
vars_formulas_x,
vars_formulas_z,
vars_subsets,
vars_mtr,
vars_weights,
vars_propensity)
## If the MTRs do not contain the unobserved random variable,
## provide warning that monotonicity constraints are
## redundant.
if (!(uname %in% vars_mtr |
!is.null(splinesobj[[1]]$splineslist) |
!is.null(splinesobj[[2]]$splineslist))) {
if (hasArg(m0.dec) | hasArg(m0.inc) |
hasArg(m1.dec) | hasArg(m1.inc) |
hasArg(mte.dec) | hasArg(mte.inc)) {
warning(gsub('\\s+', ' ',
paste0("Neither 'm0' nor 'm1' contains the
unobserved variable '", uname, "'.
Monotonicity constraints only apply to the
unobserved variable, so will be ignored.")),
call. = FALSE, immediate. = FALSE)
}
}
## For the components, since they may be terms, we first collect
## all terms, and then break it down into variables.
if (!direct) {
vars_components <- NULL
if (userComponents) {
for (comp in components) {
compString <- try(gsub("\\s+", " ",
Reduce(paste, deparse(comp))),
silent = TRUE)
if (class(compString) != "try-error") {
if (substr(compString, 1, 2) == "c(") {
vars_components <- c(vars_components,
restring(comp,
substitute = FALSE))
} else {
vars_components <- c(vars_components,
restring(comp,
substitute = FALSE,
command = ""))
}
## Remove all factor value specifications
vars_components <- strsplit(vars_components, ":")
vars_components <-
lapply(vars_components,
function(x) {
xTmp <- sapply(x,
strsplit,
split = " - ")
xTmp <- lapply(xTmp,
function(x) x[1])
unlist(xTmp)
})
vars_components <- unique(unlist(vars_components))
}
}
}
vars_components <- vars_components[vars_components != '""']
## Break component terms into variables.
vars_components_tmp <-
paste(".qqq ~",
paste(vars_components[vars_components != "components"],
collapse = " + "))
if (! "intercept" %in% vars_components) {
vars_components_tmp <- paste(vars_components_tmp, " - 1")
}
vars_components <- getXZ(as.formula(vars_components_tmp),
components = TRUE)
}
## Collect all variables, and remove the variable name
## corresponding to the unobservable.
if (!direct) allvars <- c(allvars, vars_components)
allvars <- unique(allvars)
allvars <- allvars[allvars != uname]
## Fill in components list if necessary
if (!direct) {
comp_filler <- lapply(terms_formulas_x,
function(x) as.character(unstring(x)))
if (userComponents) {
compMissing1 <- unlist(lapply(components, function(x) {
Reduce(paste, deparse(x)) == ""
}))
compMissing2 <- unlist(lapply(components, function(x) x == ""))
compMissing3 <- unlist(lapply(components,
function(x) x == "c()"))
compMissing <- as.logical(compMissing1 + compMissing2 +
compMissing3)
if (sum(compMissing) > 0) {
components[compMissing] <- comp_filler[compMissing]
}
} else {
components <- comp_filler
}
}
## Check that all LATE variables are included in the propensity
## formula, if a propensity score formula is provided
if (hasArg(target) && target == "late" &&
length(Formula::as.Formula(propensity))[1] != 0) {
if (!all(late.Z %in% vars_propensity)) {
stop(gsub("\\s+", " ",
"Variables in 'late.Z' argument must be contained
in the propensity score model."),
call. = FALSE)
}
nLateX <- 0
if (hasArg(late.X)) {
nLateX <- length(late.X)
if (!all(late.X %in% vars_propensity)) {
stop(gsub("\\s+", " ",
"Variables in 'late.X' argument must be contained
in the propensity score model."),
call. = FALSE)
}
}
}
## Keep only complete cases
varFound1 <- sapply(allvars, exists, where = data)
vars_data <- allvars[varFound1]
missingVars <- allvars[!varFound1]
missingVars <- missingVars[!missingVars %in% c("intercept", uname)]
for (i in 1:length(envList)) {
varFound2 <- sapply(missingVars, exists, where = envList[[i]])
missingVars <- missingVars[!varFound2]
}
if (length(missingVars) > 0) {
varError <- paste0("The following variables are not contained
in the data set or in the parent.frame(): ",
paste(missingVars, collapse = ", "),
".")
stop(gsub("\\s+", " ", varError), call. = FALSE)
}
origDataRows <- nrow(data)
data <- data[complete.cases(data[, vars_data]), ]
## Inform the user if observations have been dropped
if (origDataRows != nrow(data)) {
if (origDataRows - nrow(data) == 1) {
warning(gsub('\\s+', ' ',
paste0(origDataRows - nrow(data),
' incomplete observation dropped.')),
call. = FALSE,
immediate. = TRUE)
} else {
warning(gsub('\\s+', ' ',
paste0(origDataRows - nrow(data),
' incomplete observations dropped.')),
call. = FALSE,
immediate. = TRUE)
}
}
## Ensure there is variation in the treated variable
if (var(data[, treat]) == 0) {
stop(gsub('\\s+', ' ',
paste0('Estimation only uses the subsample of complete
observations, in which there
is no variation in the treatment variable, ',
treat, '.')), call. = FALSE)
}
## Adjust row names to handle bootstrapping
rownames(data) <- as.character(seq(1, nrow(data)))
## Construct a list of what variables interact with the
## spline.
splinesobj <- interactSplines(splinesobj, origm0, origm1, data, uname)
if (exists('splinesobj.equal')) {
splinesobj.equal <- interactSplines(splinesobj.equal,
orig.equal.coef,
orig.equal.coef,
data, uname)
}
## Check that all boolean variables have non-zero variance, and
## that all factor variables are complete.
allterms <- unlist(c(terms_formulas_x, terms_formulas_z, terms_mtr0,
terms_mtr1, terms_components))
allterms <- unique(unlist(sapply(allterms, strsplit, split = ":")))
allterms <- parenthBoolean(allterms)
## Check factors
factorPos <- grep("factor\\([0-9A-Za-z._]*\\)", allterms)
if (length(factorPos) > 0) {
factorDict <- list()
for (i in allterms[factorPos]) {
var <- substr(i, start = 8, stop = (nchar(i) - 1))
factorDict[[var]] <- sort(unique(data[, var]))
}
} else {
factorDict <- NULL
}
## To check for binary variables
binMinCheck <- apply(data, 2, min)
binMaxCheck <- apply(data, 2, max)
binUniqueCheck <- apply(data, 2, function(x) length(unique(x)))
binaryPos <- as.logical((binMinCheck == 0) *
(binMaxCheck == 1) *
(binUniqueCheck == 2))
binaryVars <- colnames(data)[binaryPos]
if (length(binaryVars) == 0) binaryVars <- NULL
## To check booleans expressions, you just need to ensure there is
## non-zero variance in the design matrix.
boolPos <- NULL
for (op in c("==", "!=", ">=", "<=", ">", "<")) {
boolPos <- c(boolPos, grep(op, allterms))
}
if (length(boolPos) > 0) {
boolPos <- unique(boolPos)
boolVars <- allterms[boolPos]
} else {
boolVars <- NULL
}
## Save call options for return
opList <- modcall(call,
newcall = list,
keepargs = c('target', 'late.from', 'late.to',
'late.X', 'genlate.lb',
'genlate.ub', 'm0', 'm1', 'm1.ub',
'm0.ub', 'm1.lb', 'm0.lb',
'mte.ub', 'mte.lb', 'm0.dec',
'm0.inc', 'm1.dec', 'm1.inc',
'mte.dec', 'mte.inc', 'link'))
opList <- eval(opList)
if (!direct) {
opList$ivlike <- ivlike
opList$components <- components
} else {
opList$outcome <- vars_y
}
if (hasArg(point)) opList$point <- point
if (hasArg(subset)) opList$subset <- subset
if (hasArg(propensity)) opList$propensity <- propensity
if (hasArg(uname)) opList$uname <- uname
if (hasArg(treat)) opList$treat <- treat
if (hasArg(target.weight0)) opList$target.weight0 <- target.weight0
if (hasArg(target.weight1)) opList$target.weight1 <- target.weight1
if (hasArg(target.knots0)) opList$target.knots0 <- target.knots0
if (hasArg(target.knots1)) opList$target.knots1 <- target.knots1
##---------------------------
## 5. Perform estimation
##---------------------------
estimateCall <- modcall(call,
newcall = ivmteEstimate,
dropargs = c("m0", "m1", "equal.coef",
"bootstraps", "data",
"bootstraps.m",
"bootstraps.replace",
"subset",
"levels", "ci.type",
"treat", "outcome",
"propensity",
"components",
"solver",
"solver.options",
"solver.presolve",
"solver.options.criterion",
"solver.options.bounds",
"lpsolver",
"lpsolver.options",
"lpsolver.presolve",
"lpsolver.options.criterion",
"lpsolver.options.bounds",
"rescale",
"target.weight0", "target.weight1",
"target.knots0", "target.knots1",
"late.Z", "late.to", "late.from",
"late.X", "eval.X",
"specification.test",
"noisy"),
newargs = list(m0 = quote(m0),
m1 = quote(m1),
target.weight0 =
quote(target.weight0),
target.weight1 =
quote(target.weight1),
target.knots0 =
quote(target.knots0),
target.knots1 =
quote(target.knots1),
data = quote(data),
subset = quote(subset),
solver = quote(solver),
rescale = rescale,
noisy = TRUE,
vars_y = quote(vars_y),
vars_mtr = quote(vars_mtr),
vars_data = quote(vars_data),
terms_mtr0 = quote(terms_mtr0),
terms_mtr1 = quote(terms_mtr1),
treat = quote(treat),
propensity = quote(propensity),
splinesobj = quote(splinesobj),
components = quote(components),
environments = quote(envList)))
if (hasArg(solver.options) | hasArg(lpsolver.options)) {
estimateCall <-
modcall(call = estimateCall,
newargs = list(solver.options = solver.options))
}
if (hasArg(solver.presolve) | hasArg(lpsolver.presolve)) {
estimateCall <-
modcall(call = estimateCall,
newargs = list(solver.presolve = solver.presolve))
}
if (hasArg(solver.options.criterion) |
hasArg(lpsolver.options.criterion)) {
estimateCall <-
modcall(call = estimateCall,
newargs = list(solver.options.criterion =
solver.options.criterion))
}
if (hasArg(solver.options.bounds) |
hasArg(lpsolver.options.bounds)) {
estimateCall <-
modcall(call = estimateCall,
newargs = list(solver.options.bounds =
solver.options.bounds))
}
if (hasArg(target) && (target == "late" | target == "genlate")) {
if (target == "late") {
estimateCall <- modcall(estimateCall,
newargs = list(late.Z = late.Z,
late.to = late.to,
late.from = late.from))
}
if (hasArg(late.X)) {
estimateCall <- modcall(estimateCall,
newargs = list(late.X = late.X,
eval.X = eval.X))
}
}
if (hasArg(equal.coef)) {
estimateCall <-
modcall(estimateCall,
newargs = list(equal.coef = equal.coef,
splinesobj.equal = splinesobj.equal))
}
if (solver == "none") {
estimateCall <-
modcall(estimateCall,
dropargs = c("point"),
newargs = list(point = TRUE))
}
## Estimate bounds
##
## If 'point' is not passed, then determine whether the model
## is point identified.
origEstimate <- eval(estimateCall)
if ('point.estimate' %in% names(origEstimate)) point <- TRUE
if (!'point.estimate' %in% names(origEstimate)) point <- FALSE
if (direct) specification.test <- FALSE
## Now estimate the bounds.
if (point == FALSE) {
## Estimate bounds without resampling
if (bootstraps == 0) {
output <- origEstimate
output$call.options <- opList
output <- output[sort(names(output))]
## If there are errors, return the errors along with
## whatever output is available.
if ('errorTypes' %in% names(output)) {
stop(output$error, call. = FALSE)
}
class(output) <- "ivmte"
} else {
## Obtain audit grid from original estimate
audit.grid <- list(support = origEstimate$audit.grid$audit.x,
uvec = origEstimate$audit.grid$audit.u)
if (is.null(audit.grid$support)) audit.grid$noX <- TRUE
if (!is.null(audit.grid$support)) audit.grid$noX <- FALSE
## Estimate bounds with resampling
boundEstimates <- NULL
propEstimates <- NULL
b <- 1
bootCriterion <- NULL
bootFailN <- 0
bootFailNote <- ""
bootFailIndex <- NULL
## Turn off specification test if criterion is 0
if (origEstimate$audit.criterion == 0) {
specification.test <- FALSE
}
if (specification.test) {
origSset <- lapply(origEstimate$s.set, function(x) {
x[c("ivspec", "beta", "g0", "g1")]
})
origCriterion <- origEstimate$audit.criterion
} else {
origSset <- NULL
origCriterion <- NULL
}
if (!hasArg(bootstraps.m)) bootstraps.m <- nrow(data)
if (bootstraps.m > nrow(data) && bootstraps.replace == FALSE) {
stop(gsub("\\s+", " ",
"Cannot draw more observations than the
number of rows in the data set when
'bootstraps.replace = FALSE'."),
call. = FALSE)
}
## The following function is what will be run for the
## bootstrap. This can be used by the 'future.apply'
## package.
tmpBootCall <- function(bootIndex, noisy = FALSE) {
if (noisy) cat("Bootstrap iteration ", bootIndex,
"...\n", sep = "")
output <- list(bootFailN = 0)
tmpSuccess <- FALSE
while(!tmpSuccess) {
bootIDs <- sample(x = seq(1, nrow(data)),
size = bootstraps.m,
replace = bootstraps.replace)
bdata <- data[bootIDs, ]
bootCall <-
modcall(estimateCall,
dropargs = c("data", "noisy",
"audit.grid",
"count.moments", "point"),
newargs = list(data = quote(bdata),
noisy = FALSE,
audit.grid = audit.grid,
orig.sset = origSset,
orig.criterion =
origCriterion,
count.moments = FALSE,
point = FALSE,
bootstrap = TRUE))
bootEstimate <- try(eval(bootCall), silent = TRUE)
if (is.list(bootEstimate)) {
tmpSuccess <- TRUE
output$bound <- bootEstimate$bound
if (specification.test) {
output$specification.test <-
bootEstimate$specification.test
}
if (direct) {
output$audit.criterion <-
bootEstimate$audit.criterion
}
if (!"propensity.coef" %in% names(bootEstimate) &&
(class(bootEstimate$propensity$model)[1] == "lm"
|
class(bootEstimate$propensity$model)[1] ==
"glm")) {
output$propensity.coef <-
bootEstimate$propensity$model$coef
} else {
output$propensity.coef <-
bootEstimate$propensity.coef
}
## If the future package is used, then
## bootstrap output should be
## suppressed. If the bootstrap is
## sequential, then produce output.
if (noisy) {
cat(" Audit count: ",
bootEstimate$audit.count, "\n", sep = "")
cat(" Minimum criterion: ",
fmtResult(bootEstimate$audit.criterion),
"\n", sep = "")
cat(" Bounds: ",
paste0("[",
fmtResult(bootEstimate$bounds[1]),
", ",
fmtResult(bootEstimate$bounds[2]),
"]"), "\n\n", sep = "")
}
} else {
if (noisy) cat(" Error, resampling...\n",
sep = "")
output$bootFailN <- output$bootFailN + 1
}
}
return(output)
}
## Check if user wants to use 'future'. If neither
## 'future' nor 'future.apply' is loaded, then it is
## assumed the user does not wish to use futures. If
## 'future' is loaded, but 'future.apply' is not
## installed, then a warning is thrown out to inform
## the user that the bootstrap cannot be parallelized
## until 'future.apply' is installed.
tmpFuture <- ('future.apply' %in% loadedNamespaces() |
'future' %in% loadedNamespaces())
## TESTING ####################
tmpFuture <- FALSE
## END TESTING ###################
if (tmpFuture) {
if (!requireNamespace('future.apply', quietly = TRUE)) {
tmpFuture <- FALSE
warning(gsub('\\s+', ' ',
"'future' package is not used in the
bootstrap since the 'future.apply' package
is not installed. Please install the
'future.apply' package to parallelize the
bootstrap using the 'future' package."),
immediate. = FALSE,
call. = FALSE)
}
}
## If neither 'future' nor 'future.apply' is loaded
## into the session, then the bootstrap will be
## performed sequentially.
if (!tmpFuture) {
for (b in 1:bootstraps) {
bootEstimate <- tmpBootCall(bootIndex = b,
noisy = noisy)
boundEstimates <- rbind(boundEstimates,
bootEstimate$bound)
if (specification.test) {
bootCriterion <- c(bootCriterion,
bootEstimate$specification.test)
}
if (direct) {
bootCriterion <- c(bootCriterion,
bootEstimate$audit.criterion)
}
propEstimates <-
cbind(propEstimates,
bootEstimate$propensity.coef)
bootFailIndex <- c(bootFailIndex,
bootEstimate$bootFailN)
rm(bootEstimate)
}
} else {
## If 'future' is loaded into the session, and
## 'future.apply' is available, then perform
## bootstrap in parallel
bootEstimate <-
future.apply::future_lapply(X = 1:bootstraps,
FUN = tmpBootCall,
future.seed = TRUE,
noisy = noisy)
boundEstimates <- Reduce(rbind,
lapply(bootEstimate,
function(x) {
x$bound
}))
if (specification.test) {
bootCriterion <- Reduce(rbind,
lapply(bootEstimate,
function(x) {
x$specification.test
}))
}
if (direct) {
bootCriterion <- Reduce(rbind,
lapply(bootEstimate,
function(x) {
x$audit.criterion
}))
}
propEstimates <- Reduce(cbind,
lapply(bootEstimate,
function(x) {
x$propensity.coef
}))
bootFailIndex <- Reduce(c,
lapply(bootEstimate,
function(x) {
x$bootFailN
}))
rm(bootEstimate)
}
bootFailN <- sum(bootFailIndex)
if (bootFailN > 0) {
if (bootFailN < 10) {
warning(gsub("\\s+", " ",
paste0("Bootstrap iteration(s) ",
paste(which(bootFailIndex != 0),
collapse = ", "),
" failed. Failed bootstraps are
repeated.")),
call. = FALSE)
} else {
warning(gsub("\\s+", " ",
paste0(bootFailN, " bootstrap
iterations failed.
Failed bootstraps are
repeated.")),
call. = FALSE)
}
}
rownames(boundEstimates) <- seq(bootstraps)
colnames(boundEstimates) <- c('lower', 'upper')
cat("--------------------------------------------------\n")
cat("Results", "\n")
cat("--------------------------------------------------\n")
cat("\n")
## Some output must be returned, even if noisy = FALSE
cat("Bounds on the target parameter: [",
fmtResult(origEstimate$bounds[1]), ", ",
fmtResult(origEstimate$bounds[2]), "]\n",
sep = "")
if (origEstimate$audit.count == 1) rs <- "round."
if (origEstimate$audit.count > 1) rs <- "rounds."
if (origEstimate$audit.count < audit.max) {
cat("Audit terminated successfully after",
origEstimate$audit.count,
rs, "\n")
}
if (origEstimate$audit.count == audit.max) {
if (is.null(origEstimate$audit.grid$violations)) {
cat("Audit terminated successfully after",
origEstimate$audit.count,
rs, "\n")
} else {
warning(gsub("\\s+", " ",
"Audit reached audit.max.
Try increasing audit.max."),
call. = FALSE,
immediate. = TRUE)
}
}
## Obtain standard errors of bounds
bootSE <- apply(boundEstimates, 2, sd)
## Construct confidence intervals for bounds
ci <- boundCI(bounds = origEstimate$bounds,
bounds.resamples = boundEstimates,
n = nrow(data),
m = bootstraps.m,
levels = levels,
type = "both")
## Construct confidence intervals for propensity scores
if (!is.null(propEstimates)) {
propensity.ci <- list()
if (!"propensity.coef" %in% names(origEstimate) &&
class(origEstimate$propensity$model)[1]
!= "data.frame" &&
class(origEstimate$propensity$model)[1]
!= "data.table") {
propCoef <- origEstimate$propensity$model$coef
} else {
propCoef <- origEstimate$propensity.coef
}
propSE <- apply(propEstimates, 1, sd)
for (level in levels) {
pLower <- (1 - level) / 2
pUpper <- 1 - (1 - level) / 2
probVec <- c(pLower, pUpper)
tmpPropCi1 <- apply(propEstimates, 1, quantile,
probs = probVec,
type = 1,
na.rm = TRUE)
tmpPropCi2 <- sweep(x = tcrossprod(c(qnorm(pLower),
qnorm(pUpper)),
propSE), MARGIN = 2,
propCoef, FUN = "+")
colnames(tmpPropCi2) <- colnames(tmpPropCi1)
rownames(tmpPropCi2) <- rownames(tmpPropCi1)
propensity.ci$nonparametric[[paste0("level",
level * 100)]] <-
t(tmpPropCi1)
propensity.ci$normal[[paste0("level", level * 100)]] <-
t(tmpPropCi2)
}
}
## Obtain p-value
pvalue <- c(boundPvalue(bounds = origEstimate$bounds,
bounds.resamples = boundEstimates,
n = nrow(data),
m = bootstraps.m,
type = "backward"),
boundPvalue(bounds = origEstimate$bounds,
bounds.resamples = boundEstimates,
n = nrow(data),
m = bootstraps.m,
type = "forward"))
names(pvalue) <- c("backward", "forward")
if (ci.type == "both") {
ciTypes <- c("backward", "forward")
} else {
ciTypes <- ci.type
}
ciN <- 1
for (i in ciTypes) {
cat("\nBootstrapped confidence intervals (",
i, "):\n", sep = "")
for (j in 1:length(levels)) {
cistr <- paste0("[",
fmtResult(ci[[i]][j, 1]),
", ",
fmtResult(ci[[i]][j, 2]),
"]")
cat(" ",
levels[j] * 100,
"%: ",
cistr, "\n", sep = "")
}
cat("p-value: ", fmtResult(pvalue[ciN]), "\n", sep = "")
ciN <- ciN + 1
}
## Obtain specification test
if (specification.test) {
criterionPValue <- mean(origEstimate$audit.criterion <=
bootCriterion)
cat("Bootstrapped specification test p-value: ",
criterionPValue, "\n", sep = "")
}
## Print bootstrap statistics
cat(sprintf("Number of bootstraps: %s",
bootstraps))
if (bootFailN > 0) {
cat(sprintf(" (%s failed and redrawn)\n",
bootFailN))
} else {
cat("\n")
}
cat("\n")
## Return output
output <- c(origEstimate,
list(bounds.se = bootSE,
bounds.bootstraps = boundEstimates,
bounds.ci = ci,
p.value = pvalue,
bootstraps = bootstraps,
bootstraps.failed = bootFailN))
if (specification.test) {
output$specification.p.value <- criterionPValue
}
if (!is.null(propEstimates)) {
output$propensity.se <- propSE
output$propensity.ci <- propensity.ci
}
if (direct) {
output$audit.criterion.bootstraps <- bootCriterion
}
output$call.options <- opList
output <- output[sort(names(output))]
class(output) <- "ivmte"
}
}
## Point estimate without resampling
if (point == TRUE & bootstraps == 0) {
output <- origEstimate
output$call.options = opList
output <- output[sort(names(output))]
class(output) <- "ivmte"
}
## Point estimate with resampling
if (point == TRUE & bootstraps > 0) {
teEstimates <- NULL
mtrEstimates <- NULL
propEstimates <- NULL
jstats <- NULL
SSRs <- NULL
b <- 1
bootFailN <- 0
bootFailNote <- ""
bootFailIndex <- NULL
if (!hasArg(bootstraps.m)) bootstraps.m <- nrow(data)
if (bootstraps.m > nrow(data) && bootstraps.replace == FALSE) {
stop(gsub("\\s+", " ",
"Cannot draw more observations than the number of rows
in the data set when 'bootstraps.replace = FALSE'."),
call. = FALSE)
}
missingFactors <- 0
factorMessage <- 0
factorText <- gsub("\\s+", " ",
"Insufficient variation in categorical variables
(i.e. factor variables, binary variables,
boolean expressions) in the bootstrap sample.
Additional bootstrap samples will be drawn.")
## The following function is what will be run for the
## bootstrap. This can be used by the 'future.apply'
## package.
tmpBootCall <- function(bootIndex, noisy = FALSE) {
if (noisy) cat("Bootstrap iteration ", bootIndex,
"...\n", sep = "")
output <- list(bootFailN = 0,
missingFactors = 0)
tmpSuccess <- FALSE
while(!tmpSuccess) {
bootIDs <- sample(x = seq(1, nrow(data)),
size = bootstraps.m,
replace = bootstraps.replace)
bdata <- data[bootIDs, ]
## Check if the bootstrap data contains sufficient
## variation in all boolean and factor expressions.
if (!is.null(factorDict)) {
for (i in 1:length(factorDict)) {
factorCheck <-
suppressWarnings(
all(sort(unique(bdata[, names(
factorDict)[i]]))
== factorDict[[i]]))
if (!factorCheck) break
}
if (!factorCheck) {
if (factorMessage == 0) {
factorMessage <- 1
warning(factorText, call. = FALSE,
immediate. = TRUE)
}
output$missingFactors <- output$missingFactors + 1
next
}
}
## Check if the binary variables contain
## sufficient variation
if (!is.null(binaryVars)) {
binarySd <- apply(as.matrix(bdata[, binaryVars]), 2, sd)
if (! all(binarySd > 0)) {
if (factorMessage == 0) {
factorMessage <- 1
warning(factorText, call. = FALSE,
immediate. = TRUE)
}
output$missingFactors <- output$missingFactors + 1
next
}
}
## Check if the boolean variables contain
## sufficient variation
if (!is.null(boolVars)) {
boolFormula <-
as.formula(paste("~ 0 +", paste(boolVars,
collapse = " + ")))
boolDmat <- design(boolFormula, bdata)$X
boolSd <- apply(boolDmat, 2, sd)
if (! all(boolSd > 0)) {
if (factorMessage == 0) {
factorMessage <- 1
warning(factorText, call. = FALSE,
immediate. = TRUE)
}
output$missingFactors <- output$missingFactors + 1
next
}
}
## The following code is used to obtain the point estimate.
bootCall <-
modcall(estimateCall,
dropargs = c("data", "noisy", "point"),
newargs =
list(data = quote(bdata),
noisy = FALSE,
point.center =
origEstimate$moments$criterion,
point.redundant =
origEstimate$redundant,
point = TRUE,
bootstrap = TRUE))
bootEstimate <- try(eval(bootCall), silent = TRUE)
if (is.list(bootEstimate)) {
tmpSuccess <- TRUE
output$point.estimate <- bootEstimate$point.estimate
output$mtr.coef <- bootEstimate$mtr.coef
if (!"propensity.coef" %in% names(bootEstimate)) {
output$propensity.coef <-
bootEstimate$propensity$model$coef
} else {
output$propensity.coef <-
bootEstimate$propensity.coef
}
if (!is.null(bootEstimate$j.test)) {
output$jstat <- bootEstimate$j.test[1]
}
if (!is.null(bootEstimate$SSR)) output$SSR <-
bootEstimate$SSR
if (noisy) {
cat(" Point estimate:",
fmtResult(bootEstimate$point.estimate),
"\n\n", sep = "")
}
} else {
if (noisy) cat(" Error, resampling...\n", sep = "")
output$bootFailN <- output$bootFailN + 1
}
}
return(output)
}
## Check if user wants to use 'future'. If neither
## 'future' nor 'future.apply' is loaded, then it is
## assumed the user does not wish to use futures. If
## 'future' is loaded, but 'future.apply' is not
## installed, then a warning is thrown out to inform
## the user that the bootstrap cannot be parallelized
## until 'future.apply' is installed.
tmpFuture <- ('future.apply' %in% loadedNamespaces() |
'future' %in% loadedNamespaces())
if (tmpFuture) {
if (!requireNamespace('future.apply', quietly = TRUE)) {
tmpFuture <- FALSE
warning(gsub('\\s+', ' ',
"'future' package is not used in the
bootstrap since the 'future.apply' package
is not installed. Please install the
'future.apply' package to parallelize the
bootstrap using the 'future' package."),
immediate. = FALSE,
call. = FALSE)
}
}
## If neither 'future' nor 'future.apply' is loaded
## into the session, then the bootstrap will be performed sequentially.
if (!tmpFuture) {
for (b in 1:bootstraps) {
bootEstimate <- tmpBootCall(bootIndex = b,
noisy = noisy)
teEstimates <- c(teEstimates, bootEstimate$point.estimate)
mtrEstimates <- cbind(mtrEstimates, bootEstimate$mtr.coef)
propEstimates <- cbind(propEstimates,
bootEstimate$propensity.coef)
jstats <- c(jstats, bootEstimate$jstat)
SSRs <- c(SSRs, bootEstimate$SSR)
bootFailIndex <- c(bootFailIndex,
bootEstimate$bootFailN)
missingFactors <- c(missingFactors,
bootEstimate$missingFactors)
rm(bootEstimate)
}
} else {
## If 'future' is loaded into the session, and
## 'future.apply' is available, then perform
## bootstrap in parallel
bootEstimate <-
future.apply::future_lapply(X = 1:bootstraps,
FUN = tmpBootCall,
future.seed = TRUE,
noisy = noisy)
teEstimates <- Reduce(c, lapply(bootEstimate,
function(x) x$point.estimate))
mtrEstimates <- Reduce(cbind, lapply(bootEstimate,
function(x) x$mtr.coef))
propEstimates <- Reduce(cbind,
lapply(bootEstimate,
function(x) x$propensity.coef))
jstats <- Reduce(c, lapply(bootEstimate,
function(x) {
if (is.null(x$jstat)) NULL
if (!is.null(x$jstat)) x$jstat
}))
SSRs <- Reduce(c, lapply(bootEstimate,
function(x) x$SSR))
bootFailIndex <- Reduce(c, lapply(bootEstimate,
function(x) x$bootFailN))
missingFactors <-
Reduce(c, lapply(bootEstimate,
function(x) x$missingFactors))
rm(bootEstimate)
}
bootFailN <- sum(bootFailIndex)
if (bootFailN > 0) {
if (bootFailN < 10) {
warning(gsub("\\s+", " ",
paste0("Bootstrap iteration(s) ",
paste(which(bootFailIndex != 0),
collapse = ", "),
" failed. Failed bootstraps are
repeated.")),
call. = FALSE)
} else {
warning(gsub("\\s+", " ",
paste0(bootFailN, " bootstrap
iterations failed.
Failed bootstraps are
repeated.")),
call. = FALSE)
}
}
bootSE <- sd(teEstimates)
colnames(mtrEstimates) <- seq(ncol(mtrEstimates))
jstats <- unname(jstats)
mtrSE <- apply(mtrEstimates, 1, sd)
if (!is.null(propEstimates)) {
propSE <- apply(propEstimates, 1, sd)
}
## Construct p-values (point estimate and J-test)
pvalue <- c(nonparametric =
(sum(teEstimates - origEstimate$point.estimate >=
abs(origEstimate$point.estimate)) +
sum(teEstimates - origEstimate$point.estimate <=
-abs(origEstimate$point.estimate))) /
bootstraps,
parametric =
pnorm(-abs(origEstimate$point.estimate -
mean(teEstimates)) / bootSE) * 2)
if (!is.null(jstats)) {
jtest <- c(mean(jstats >= origEstimate$j.test[1]),
origEstimate$j.test)
names(jtest) <- c("Bootstrapped p-value",
names(origEstimate$j.test))
jtest <- jtest[c(2, 1, 3, 4)]
} else {
jtest <- NULL
}
## Construct confidence intervals for various levels
for (level in levels) {
pLower <- (1 - level) / 2
pUpper <- 1 - (1 - level) / 2
probVec <- c(pLower, pUpper)
## Conf. int. 1: quantile method (same as percentile
## method; nonparametric)
assign(paste0("ci1", level * 100),
quantile(x = teEstimates,
probs = probVec,
type = 1))
assign(paste0("mtrci1", level * 100),
apply(mtrEstimates, 1, quantile,
probs = probVec,
type = 1))
if (!is.null(propEstimates)) {
assign(paste0("propci1", level * 100),
apply(propEstimates, 1, quantile,
probs = probVec,
type = 1,
na.rm = TRUE))
}
## Conf. int. 2: percentile method using Z statistics
tmpCi2 <- origEstimate$point.estimate +
c(qnorm(pLower), qnorm(pUpper)) * bootSE
names(tmpCi2) <- paste0(probVec * 100, "%")
tmpMtrCi2 <- sweep(x = tcrossprod(c(qnorm(pLower),
qnorm(pUpper)),
mtrSE),
MARGIN = 2, origEstimate$mtr.coef, FUN = "+")
colnames(tmpMtrCi2) <- colnames(get(paste0("mtrci1",
level * 100)))
rownames(tmpMtrCi2) <- rownames(get(paste0("mtrci1",
level * 100)))
assign(paste0("ci2", level * 100), tmpCi2)
assign(paste0("mtrci2", level * 100), tmpMtrCi2)
## Special case for propensity scores, which may not exist
if (!"propensity.coef" %in% names(origEstimate)) {
propCoef <- origEstimate$propensity$model$coef
} else {
propCoef <- origEstimate$propensity.coef
}
if (!is.null(propEstimates)) {
tmpPropCi2 <- sweep(x = tcrossprod(c(qnorm(pLower),
qnorm(pUpper)),
propSE), MARGIN = 2,
propCoef, FUN = "+")
colnames(tmpPropCi2) <- colnames(get(paste0("propci1",
level * 100)))
rownames(tmpPropCi2) <- rownames(get(paste0("propci1",
level * 100)))
assign(paste0("propci2", level * 100), tmpPropCi2)
}
}
## Prepare output
output1 <- c(origEstimate,
list(point.estimate.se = bootSE,
mtr.se = mtrSE))
if (!is.null(propEstimates)) output1$propensity.se <- propSE
output1 <- c(output1,
list(point.estimate.bootstraps = teEstimates,
mtr.bootstraps = t(mtrEstimates)))
point.estimate.ci <- list()
mtr.ci <- list()
if (!is.null(propEstimates)) {
propensity.ci <- list()
}
for (level in levels) {
point.estimate.ci$nonparametric <-
rbind(point.estimate.ci$nonparametric,
get(paste0("ci1", level * 100)))
point.estimate.ci$normal <-
rbind(point.estimate.ci$normal,
get(paste0("ci2", level * 100)))
mtr.ci$nonparametric[[paste0("level", level * 100)]] <-
t(get(paste0("mtrci1", level * 100)))
mtr.ci$normal[[paste0("level", level * 100)]] <-
t(get(paste0("mtrci2", level * 100)))
if (!is.null(propEstimates)) {
propensity.ci$nonparametric[[paste0("level",
level * 100)]] <-
t(get(paste0("propci1", level * 100)))
propensity.ci$normal[[paste0("level", level * 100)]] <-
t(get(paste0("propci2", level * 100)))
}
}
rownames(point.estimate.ci$nonparametric) <- levels
rownames(point.estimate.ci$normal) <- levels
colnames(point.estimate.ci$nonparametric) <- c("lb", "ub")
colnames(point.estimate.ci$normal) <- c("lb", "ub")
output2 <- list(point.estimate.ci = point.estimate.ci,
mtr.ci = mtr.ci)
if (!is.null(propEstimates)) {
output2$propensity.ci = propensity.ci
}
output3 <- list(p.value = pvalue,
bootstraps = bootstraps,
bootstraps.failed = bootFailN)
if (!direct) {
output3$j.test <- jtest
output3$j.test.bootstraps <- jstats
}
if (direct) {
output3$SSR.bootstraps <- SSRs
}
if ("j.test" %in% names(output1) &&
"j.test" %in% names(output3)) {
output1$j.test <- NULL
}
output <- c(output1, output2, output3)
cat("--------------------------------------------------\n")
cat("Results", "\n")
cat("--------------------------------------------------\n")
cat("\nPoint estimate of the target parameter: ",
fmtResult(origEstimate$point.estimate), "\n",
sep = "")
cat("\nBootstrapped confidence intervals (nonparametric):\n")
for (level in levels) {
ci1str <- get(paste0("ci1", level * 100))
ci1str <- paste0("[",
fmtResult(ci1str[1]),
", ",
fmtResult(ci1str[2]),
"]")
cat(" ",
level * 100,
"%: ",
ci1str, "\n", sep = "")
}
cat("p-value: ",
fmtResult(pvalue[1]), "\n", sep = "")
if (!is.null(jtest)) {
cat("Bootstrapped J-test p-value: ",
fmtResult(jtest[2]), "\n", sep = "")
}
cat(sprintf("Number of bootstraps: %s",
bootstraps))
if (bootFailN > 0) {
cat(sprintf(" (%s failed and redrawn)\n",
bootFailN))
} else {
cat("\n")
}
cat("\n")
## cat("Bootstrapped confidence intervals (normal quantiles):\n")
## for (level in levels) {
## ci2str <- get(paste0("ci2", level * 100))
## ci2str <- paste0("[",
## fmtResult(ci2str[1]),
## ", ",
## fmtResult(ci2str[2]),
## "]")
## cat(" ",
## level * 100,
## "%: ",
## ci2str, "\n", sep = "")
## }
## cat("p-value: ",
## fmtResult(pvalue[2]), "\n\n", sep = "")
if (sum(missingFactors) > bootstraps) {
warning(gsub("\\s+", " ",
paste0("In order to obtain ", bootstraps,
" boostrap samples without omiting any
levels from all categorical variables,
a total of ", (bootstraps + sum(missingFactors)),
" samples had to be drawn.
This is due to factor variables
and/or boolean variables potentially being omitted from
boostrap samples.")), call. = FALSE)
}
output$call.options <- opList
output <- output[sort(names(output))]
class(output) <- "ivmte"
}
## Return log output
sink()
close(tmpOutput)
messageCheck <- try(output$messages <- readLines(logName),
silent = TRUE)
if (class(messageCheck) == "try-error") {
output$messages <-
c("Error in saving console messages.",
"This can only occur if the package is run in parallel.",
"Please post an issue on GitHub if otherwise.",
"The error is random, and pertains to the log file's name.",
"Nevertheless, the estimation is performed successfullly.")
}
rm(tmpOutput)
unlink(logName)
}, error = function(err) {
if (origSinks < sink.number()) {
## If there was an error, output should be returned so the
## user knows how far the function got before crashing.
sink()
close(tmpOutput)
if (!noisy) {
messages <- readLines(logName)
if (length(messages) > 0) {
cat(messages, sep = '\n')
if (exists("tmpOutput")) suppressWarnings(rm(tmpOutput))
}
unlink(logName)
}
}
stop(err)
}, finally = {
## Turn off sinks if there are interruptions
if (origSinks < sink.number()) {
sink()
if (exists("tmpOutput")) suppressWarnings(close(tmpOutput))
if (exists("tmpOutput")) suppressWarnings(rm(tmpOutput))
unlink(logName)
}
## Make sure temporary log files are delated
unlink(logName)
## And return output
if (exists('output')) {
if (!noisy) return(output)
if (noisy) return(invisible(output))
}
})
}
#' Construct confidence intervals for treatment effects under partial
#' identification
#'
#' This function constructs the forward and backward confidence
#' intervals for the treatment effect under partial identification.
#'
#' @param bounds vector, bounds of the treatment effects under partial
#' identification.
#' @param bounds.resamples matrix, stacked bounds of the treatment
#' effects under partial identification. Each row corresponds to a
#' subset resampled from the original data set.
#' @param n integer, size of original data set.
#' @param m integer, size of resampled data sets.
#' @param levels vector, real numbers between 0 and 1. Values
#' correspond to the level of the confidence intervals constructed
#' via bootstrap.
#' @param type character. Set to 'forward' to construct the forward
#' confidence interval for the treatment effect bounds. Set to
#' 'backward' to construct the backward confidence interval for
#' the treatment effect bounds. Set to 'both' to construct both
#' types of confidence intervals.
#' @return if \code{type} is 'forward' or 'backward', then the
#' corresponding type of confidence interval for each level is
#' returned. The output is in the form of a matrix, with each row
#' corresponding to a level. If \code{type} is 'both', then a list
#' is returned. One element of the list is the matrix of backward
#' confidence intervals, and the other element of the list is the
#' matrix of forward confidence intervals.
boundCI <- function(bounds, bounds.resamples, n, m, levels, type) {
if (type == "both") output <- list()
## Rescale and center bounds
boundLBmod <- sqrt(m) *
(bounds.resamples[, 1] - bounds[1])
boundUBmod <- sqrt(m) *
(bounds.resamples[, 2] - bounds[2])
## Construct backward confidence interval
if (type %in% c('backward', 'both')) {
backwardCI <- cbind(bounds[1] +
(1 / sqrt(n)) *
quantile(boundLBmod,
0.5 * (1 - levels),
type = 1),
bounds[2] +
(1 / sqrt(n)) *
quantile(boundUBmod,
0.5 * (1 + levels),
type = 1))
colnames(backwardCI) <- c("lb.backward", "ub.backward")
rownames(backwardCI) <- levels
if (type == "both") output$backward <- backwardCI
if (type == "backward") output <- backwardCI
}
## Construct forward confidence interval
if (type %in% c('forward', 'both')) {
forwardCI <- cbind(bounds[1] -
(1 / sqrt(n)) *
quantile(boundLBmod,
0.5 * (1 + levels),
type = 1),
bounds[2] -
(1 / sqrt(n)) *
quantile(boundUBmod,
0.5 * (1 - levels),
type = 1))
colnames(forwardCI) <- c("lb.forward", "ub.forward")
rownames(forwardCI) <- levels
if (type == "both") output$forward <- forwardCI
if (type == "forward") output <- forwardCI
}
return(output)
}
#' Construct p-values for treatment effects under partial
#' identification
#'
#' This function estimates the p-value for the treatment effect under
#' partial identification. p-values corresponding to forward and
#' backward confidence intervals can be returned.
#'
#' @param bounds vector, bounds of the treatment effects under partial
#' identification.
#' @param bounds.resamples matrix, stacked bounds of the treatment
#' effects under partial identification. Each row corresponds to a
#' subset resampled from the original data set.
#' @param n integer, size of original data set.
#' @param m integer, size of resampled data sets.
#' @param type character. Set to 'forward' to construct the forward
#' confidence interval for the treatment effect bounds. Set to
#' 'backward' to construct the backward confidence interval for
#' the treatment effect bounds. Set to 'both' to construct both
#' types of confidence intervals.
#' @return If \code{type} is 'forward' or 'backward', a scalar p-value
#' corresponding to the type of confidence interval is
#' returned. If \code{type} is 'both', a vector of p-values
#' corresponding to the forward and backward confidence intervals
#' is returned.
boundPvalue <- function(bounds, bounds.resamples, n, m, type) {
levels <- seq(0, 1, 1 / nrow(bounds.resamples))[-1]
cis <- boundCI(bounds, bounds.resamples, n, m, levels, type)
posNo0 <- apply(cis, MARGIN = 1, FUN = function(x) {
!((0 >= x[1]) && (0 <= x[2]))
})
if (all(posNo0)) {
return(0)
} else if (all(!posNo0)) {
return(1)
} else {
return(1 - max(levels[posNo0]))
}
}
#' Check polynomial form of the u-term
#'
#' This function ensures that the unobservable term enters into the
#' MTR in the correct manner. That is, it enters as a polynomial.
#'
#' @param formula a formula.
#' @param uname name of the unobserved variable.
#' @return If the unobservable term is entered correctly into the
#' formula, then \code{NULL} is returned. Otherwise, the vector of
#' incorrect terms is returned.
checkU <- function(formula, uname) {
termsList <- attr(terms(formula), "term.labels")
termsList <- unique(unlist(strsplit(termsList, ":")))
termsVarList <- lapply(termsList, function(x) {
all.vars(as.formula(paste("~", x)))
})
upos <- unlist(lapply(termsVarList, function(x) uname %in% x))
parPos <- unlist(lapply(termsList, function(x) grepl("\\(", x)))
termsList <- termsList[as.logical(upos * parPos)]
checkVec <- grepl(paste0("^I\\(", uname, "\\^[0-9]+\\)$"), termsList)
if (all(checkVec)) errorTermsFormula <- NULL
if (!all(checkVec)) errorTermsFormula <- termsList[!checkVec]
return(errorTermsFormula)
}
#' Single iteration of estimation procedure from Mogstad, Torgovitsky,
#' Santos (2018)
#'
#' This function estimates the treatment effect parameters, following
#' the procedure described in Mogstad, Santos and Torgovitsky (2018)
#' (\doi{10.3982/ECTA15463}). A detailed description of the module and
#' its features can be found in
#' \href{https://a-torgovitsky.github.io/shea-torgovitsky.pdf}{Shea
#' and Torgovitsky (2021)}. However, this is not the main function of
#' the module. See \code{\link{ivmte}} for the main function. For
#' examples of how to use the package, see the vignette, which is
#' available on the module's
#' \href{https://github.com/jkcshea/ivmte}{GitHub} page.
#'
#' The treatment effects parameters the user can choose from are the
#' ATE, ATT, ATU, LATE, and generalized LATE. The user is required to
#' provide a polynomial expression for the marginal treatment
#' responses (MTR), as well as a set of regressions.
#'
#' There are two approaches to estimating the treatment effect
#' parameters. The first approach restricts the set of MTR
#' coefficients on each term of the MTRs to be consistent with the
#' regression estimates from the specifications passed through
#' \code{ivlike}. The bounds on the treatment effect parameter
#' correspond to finding coefficients on the MTRs that maximize their
#' average difference. If the model is point identified, then GMM is
#' used for estimation. Otherwise, the function solves an LP
#' problem. The second approach restricts the set of MTR coefficients
#' to fit the conditional mean of the outcome variable. If the model
#' is point identified, then constrained least squares is used for
#' estimation. Otherwise, the function solves a QCQP.
#'
#' The estimation procedure relies on the propensity to take up
#' treatment. The propensity scores can either be estimated as part of
#' the estimation procedure, or the user can specify a variable in the
#' data set already containing the propensity scores.
#'
#' Constraints on the shape of the MTRs and marginal treatment effects
#' (MTE) can be imposed by the user. Specifically, bounds and
#' monotonicity restrictions are permitted. These constraints are
#' first enforced over a subset of points in the data. An iterative
#' audit procedure is then performed to ensure the constraints hold
#' more generally.
#'
#' @param late.Z vector of variable names used to define the LATE.
#' @param late.from baseline set of values of Z used to define the
#' LATE.
#' @param late.to comparison set of values of Z used to define the
#' LATE.
#' @param late.X vector of variable names of covariates to condition
#' on when defining the LATE.
#' @param eval.X numeric vector of the values to condition variables
#' in \code{late.X} on when estimating the LATE.
#' @param audit.grid list, contains the \code{A} matrix used in the
#' audit for the original sample, as well as the RHS vector used
#' in the audit from the original sample.
#' @param point.center numeric, a vector of GMM moment conditions
#' evaluated at a solution. When bootstrapping, the moment
#' conditions from the original sample can be passed through this
#' argument to recenter the bootstrap distribution of the
#' J-statistic.
#' @param point.redundant vector of integers indicating which
#' components in the S-set are redundant.
#' @param bootstrap boolean, indicates whether the estimate is
#' for the bootstrap.
#' @param count.moments boolean, indicate if number of linearly
#' independent moments should be counted.
#' @param orig.sset list, only used for bootstraps. The list contains
#' the gamma moments for each element in the S-set, as well as the
#' IV-like coefficients.
#' @param orig.criterion numeric, only used for bootstraps. The scalar
#' corresponds to the minimum observational equivalence criterion
#' from the original sample.
#' @param vars_y character, variable name of observed outcome
#' variable.
#' @param vars_mtr character, vector of variables entering into
#' \code{m0} and \code{m1}.
#' @param terms_mtr0 character, vector of terms entering into
#' \code{m0}.
#' @param terms_mtr1 character, vector of terms entering into
#' \code{m1}.
#' @param vars_data character, vector of variables that can be found
#' in the data.
#' @param splinesobj list of spline components in the MTRs for treated
#' and control groups. Spline terms are extracted using
#' \code{\link{removeSplines}}. This object is supposed to be a
#' dictionary of splines, containing the original calls of each
#' spline in the MTRs, their specifications, and the index used
#' for naming each basis spline.
#' @param splinesobj.equal list of spline components in the MTRs for
#' treated and control groups. The structure of
#' \code{splinesobj.equal} is the same as \code{splinesobj},
#' except the splines are restricted to those whose MTR cofficients
#' should be constrained to be equal across treatment groups.
#' @param environments a list containing the environments of the MTR
#' formulas, the IV-like formulas, and the propensity score
#' formulas. If a formula is not provided, and thus no environment
#' can be found, then the parent.frame() is assigned by default.
#'
#' @inheritParams ivmte
#'
#' @return Returns a list of results from throughout the estimation
#' procedure. This includes all IV-like estimands; the propensity
#' score model; bounds on the treatment effect; the estimated
#' expectations of each term in the MTRs; the components and
#' results of the LP/QCQP problem.
# @export
ivmteEstimate <- function(data, target, late.Z, late.from, late.to,
late.X, eval.X, genlate.lb, genlate.ub,
target.weight0, target.weight1,
target.knots0 = NULL, target.knots1 = NULL,
m0, m1, uname = u, m1.ub, m0.ub, m1.lb,
m0.lb, mte.ub, mte.lb, m0.dec, m0.inc,
m1.dec, m1.inc, mte.dec, mte.inc,
equal.coef, ivlike, components, subset,
propensity, link = "logit", treat, solver,
solver.options, solver.presolve,
solver.options.criterion,
solver.options.bounds, criterion.tol = 0.01,
initgrid.nx = 20, initgrid.nu = 20,
audit.nx = 2500, audit.nu = 25,
audit.add = 100, audit.max = 25, audit.tol,
audit.grid = NULL, rescale = TRUE,
point = FALSE, point.eyeweight = FALSE,
point.center = NULL, point.redundant = NULL,
bootstrap = FALSE, count.moments = TRUE,
orig.sset = NULL, orig.criterion = NULL,
vars_y, vars_mtr, terms_mtr0, terms_mtr1,
vars_data, splinesobj, splinesobj.equal,
noisy = TRUE,
smallreturnlist = FALSE, debug = FALSE,
environments) {
call <- match.call(expand.dots = FALSE)
if (!hasArg(ivlike) | (hasArg(ivlike) && is.null(ivlike))) {
direct <- TRUE
} else {
direct <- FALSE
}
if (classFormula(ivlike)) ivlike <- c(ivlike)
## Character arguments will be converted to lowercase
if (hasArg(solver)) solver <- tolower(solver)
if (hasArg(target)) target <- tolower(target)
if (hasArg(link)) link <- tolower(link)
if (hasArg(ci.type)) ci.type <- tolower(ci.type)
## Convert uname into a string
uname <- deparse(substitute(uname))
uname <- gsub("~", "", uname)
uname <- gsub("\\\"", "", uname)
if (noisy == TRUE && hasArg(solver)) {
if (solver == "gurobi") cat("\nSolver: Gurobi ('gurobi')\n\n")
if (solver == "cplexapi") cat("\nSolver: CPLEX ('cplexAPI')\n\n")
if (solver == "rmosek") cat("\nSolver: MOSEK ('Rmosek')\n\n")
if (solver == "lpsolveapi") {
cat("\nSolver: lp_solve ('lpSolveAPI')\n\n")
warning(gsub("\\s+", " ",
"The R package 'lpSolveAPI' interfaces with 'lp_solve',
which is outdated and potentially unreliable. It is
recommended to use commercial solvers
Gurobi (solver = 'gurobi'),
CPLEX (solver = 'cplexAPI'), or
MOSEK (solver = 'Rmosek') instead.
Free academic licenses can be obtained for these
commercial solvers."),
"\n", call. = FALSE, immediate. = TRUE)
}
}
##---------------------------
## 1. Obtain propensity scores
##---------------------------
if (noisy == TRUE) {
cat("Obtaining propensity scores...\n")
}
## Estimate propensity scores
pcall <- modcall(call,
newcall = propensity,
keepargs = c("link", "late.Z", "late.X"),
dropargs = "propensity",
newargs = list(data = quote(data),
formula = propensity,
env = environments$propensity))
pmodel <- eval(pcall)
##---------------------------
## 2. Generate target moments/gamma terms
##---------------------------
if (noisy == TRUE) {
cat("\nGenerating target moments...\n")
}
## Parse polynomials
if (!is.null(m0)) {
m0call <- modcall(call,
newcall = polyparse,
keepargs = c("uname"),
newargs = list(formula = m0,
data = quote(data),
env = quote(environments$m0)))
pm0 <- eval(as.call(m0call))
rm(m0call)
} else {
pm0 <- NULL
}
if (!is.null(m1)) {
m1call <- modcall(call,
newcall = polyparse,
keepargs = c("uname"),
newargs = list(formula = m1,
data = quote(data),
env = quote(environments$m1)))
pm1 <- eval(as.call(m1call))
rm(m1call)
} else {
pm1 <- NULL
}
if (hasArg(equal.coef) && hasArg(splinesobj.equal)) {
## Get the names of the non-spline terms
pmequal.call <-
modcall(call,
newcall = polyparse,
keepargs = c("uname"),
newargs = list(formula = equal.coef,
data = unique(data),
env = quote(environments$equal.coef)))
pmequal <- eval(as.call(pmequal.call))$terms
rm(pmequal.call)
## Get the name of the spline terms
pmequal.splines0 <- NULL
pmequal.splines1 <- NULL
if (!is.null(splinesobj.equal[[1]]$splinesinter)) {
for (d in 0:1) {
tmpNames <- NULL
for (i in 1:length(splinesobj.equal[[1]]$splinesinter)) {
tmpIndex <-
splinesobj.equal[[(d + 1)]]$splinesdict[[i]]$gstar.index
tmpSplinesCall <-
gsub("uSpline\\(",
"uSplineInt(x = 1, ",
names(splinesobj.equal[[1]]$splineslist)[i])
tmpSplines <- eval(parse(text = tmpSplinesCall))
tmpInter <- splinesobj.equal[[(d + 1)]]$splinesinter[[i]]
for (j in 1:length(tmpInter)) {
tmpSplineName <-
paste0(paste0("u", d, "S", tmpIndex, "."),
seq(1, ncol(tmpSplines)),
paste0(":", tmpInter[j]))
tmpNames <- c(tmpNames, tmpSplineName)
}
}
assign(paste0('pmequal.splines', d), tmpNames)
rm(tmpIndex, tmpSplinesCall, tmpSplines, tmpInter,
tmpSplineName)
}
}
pmequal0 <- c(pmequal, pmequal.splines0)
pmequal1 <- c(pmequal, pmequal.splines1)
}
## Generate target weights
if (!hasArg(target.weight0) & !hasArg(target.weight1)) {
target.weight0 <- NULL
target.weight1 <- NULL
}
if (is.null(target.weight0) & is.null(target.weight1)) {
gentargetcall <- modcall(call,
newcall = genTarget,
keepargs = c("treat", "m1", "m0",
"target", "late.Z",
"late.from", "late.to",
"late.X", "eval.X",
"genlate.lb",
"genlate.ub",
"splinesobj", "noisy"),
dropargs = "data",
newargs = list(data = quote(data),
pmodobj = quote(pmodel),
pm0 = quote(pm0),
pm1 = quote(pm1)))
} else {
gentargetcall <- modcall(call,
newcall = genTarget,
keepargs = c("treat", "m1", "m0",
"target.weight0",
"target.weight1",
"target.knots0", "target.knots1",
"splinesobj", "noisy"),
dropargs = "data",
newargs = list(data = quote(data),
pmodobj = quote(pmodel),
pm0 = quote(pm0),
pm1 = quote(pm1)))
}
if (direct) {
gentargetcall <- modcall(gentargetcall,
dropargs = 'point')
}
targetGammas <- eval(gentargetcall)
rm(gentargetcall)
gstar0 <- targetGammas$gstar0
gstar1 <- targetGammas$gstar1
if (hasArg(equal.coef) && hasArg(splinesobj.equal)) {
if (! all(pmequal0 %in% names(gstar0), pmequal1 %in% names(gstar1))) {
## If equal.coef involves factor variables, then it is
## possible that not all terms in equal.coef will be in the
## MTR. Check that all mismatched terms involve factors.
tmpPmMismatch0 <- pmequal0[which(! pmequal0 %in% names(gstar0))]
tmpPmMismatch1 <- pmequal1[which(! pmequal1 %in% names(gstar1))]
tmpPmMismatch <- c(tmpPmMismatch0, tmpPmMismatch1)
tmpFactorCheck <- grepl("factor\\([0-9A-Za-z._]*\\)", tmpPmMismatch)
if (all(tmpFactorCheck)) {
pmequal0 <- pmequal0[which(pmequal0 %in% names(gstar0))]
pmequal1 <- pmequal1[which(pmequal1 %in% names(gstar1))]
rm(tmpPmMismatch0, tmpPmMismatch1, tmpPmMismatch)
} else {
## If a mismatched term does not pertain to factor
## variables, then request an error report.
stop(gsub('\\s+', ' ',
"Failed to match terms in 'equal.coef' with terms in
'm0' and 'm1'. Please report this issue on our GitHub
page at https://github.com/jkcshea/ivmte/issues."),
call. = FALSE)
}
}
}
##---------------------------
## 3. Generate moments/gamma terms for IV-like estimands
##---------------------------
if (noisy == TRUE) {
if (!direct) cat("\nGenerating IV-like moments...\n")
if (direct) cat("\nPerforming direct MTR regression...\n")
}
sset <- list() ## Contains all IV-like estimates and their
## corresponding moments/gammas
scount <- 1 ## counter for S-set constraints
subsetIndexList <- list()
if (classList(ivlike)) {
## Construct `sset' object when multiple IV-like
## specifications are provided
ivlikeCounter <- 1
ivlikeD <- NULL
for (i in 1:length(ivlike)) {
sformula <- ivlike[[i]]
if (all(length(Formula::as.Formula(sformula)) == c(1, 1))) {
if (treat %in% all.vars(Formula::as.Formula(sformula)[[3]])) {
ivlikeD <- c(ivlikeD, TRUE)
} else {
ivlikeD <- c(ivlikeD, FALSE)
}
}
if (all(length(Formula::as.Formula(sformula)) == c(1, 2))) {
if (treat %in%
all.vars(Formula::as.Formula(sformula)[[3]][[2]])) {
ivlikeD <- c(ivlikeD, TRUE)
} else {
ivlikeD <- c(ivlikeD, FALSE)
}
}
environment(sformula) <- environments$ivlike
scomponent <- components[[i]]
if (subset[[i]] == "") {
ssubset <- replicate(nrow(data), TRUE)
} else {
ssubset <- subset[[i]]
}
## Obtain coefficient estimates and S-weights
## corresponding to the IV-like estimands
sest <- ivEstimate(formula = sformula,
data = data[eval(substitute(ssubset), data), ],
components = scomponent,
treat = treat,
list = TRUE,
order = ivlikeCounter)
## Generate moments (gammas) corresponding to IV-like
## estimands
subset_index <- rownames(data[eval(substitute(ssubset), data), ])
if (length(subset_index) == nrow(data)) {
subsetIndexList[[ivlikeCounter]] <- NA
} else {
subsetIndexList[[ivlikeCounter]] <- as.integer(subset_index)
}
ncomponents <- sum(!is.na(sest$betas))
setobj <- genSSet(data = data,
sset = sset,
sest = sest,
splinesobj = splinesobj,
pmodobj = pmodel$phat[subset_index],
pm0 = pm0,
pm1 = pm1,
ncomponents = ncomponents,
scount = scount,
subset_index = subset_index,
means = FALSE,
yvar = vars_y,
dvar = treat,
noisy = noisy,
ivn = ivlikeCounter,
redundant = point.redundant)
## Update set of moments (gammas)
sset <- setobj$sset
scount <- setobj$scount
rm(setobj)
ivlikeCounter <- ivlikeCounter + 1
}
} else {
if (!direct) {
stop(gsub("\\s+", " ",
"'ivlike' argument must either be a formula or a vector of
formulas."),
call. = FALSE)
} else {
## Prepare direct MTR regresion approach.
if (subset[[1]] == "") {
ssubset <- replicate(nrow(data), TRUE)
} else {
ssubset <- subset[[1]]
}
subset_index <- rownames(data[eval(substitute(ssubset), data), ])
sw0 <- 1 / (1 - pmodel$phat)
sw1 <- 1 / pmodel$phat
sw0[data[[treat]] == 1] <- 0
sw1[data[[treat]] == 0] <- 0
sest <- list(sw0 = matrix(sw0[subset_index], ncol = 1),
sw1 = matrix(sw1[subset_index], ncol = 1))
setobj <- genSSet(data = data,
sset = sset,
sest = sest,
splinesobj = splinesobj,
pmodobj = pmodel$phat[subset_index],
pm0 = pm0,
pm1 = pm1,
ncomponents = 1,
scount = scount,
subset_index = subset_index,
means = FALSE,
yvar = vars_y,
dvar = treat,
noisy = noisy,
ivn = 0)
sset <- setobj$sset
sset$s1$ivspec <- NULL
sset$s1$beta <- NULL
rm(setobj)
## Now perform the MTR regression and check for
## collinearities
drY <- sset$s1$ys
drX <- cbind(sset$s1$g0, sset$s1$g1)
drN <- length(drY)
## Construct equality constraints
if (exists('pmequal0') && exists ('pmequal1')) {
tmp.equal.coef0 <- paste0('[m0]', pmequal0)
tmp.equal.coef1 <- paste0('[m1]', pmequal1)
equal.constraints <- lpSetupEqualCoef(tmp.equal.coef0,
tmp.equal.coef1,
colnames(drX))
}
if (!rescale) {
drFit <- lm.fit(x = drX, y = drY)
collinear <- any(is.na(drFit$coefficients))
drCoef <- drFit$coef
drSSR <- sum(drFit$resid^2)
if (!collinear && exists('pmequal0') && exists ('pmequal1')) {
## Perform separate regression with
## constraints. The lsei() function does not test
## for collinearity. It will simply return
## enormous coefficients. So the check for
## collinearity is performed without constraints
## using lm().
drCoef <- lsei::lsei(a = drX, b = drY,
c = equal.constraints$A,
d = equal.constraints$rhs)
names(drCoef) <- colnames(drX)
drSSR <- sum((drY - drX %*% drCoef)^2)
}
} else {
dVec <- data[subset_index, treat]
gn0 <- ncol(sset$s1$g0)
gn1 <- ncol(sset$s1$g1)
colNorms <- apply(drX, MARGIN = 2, function(x) sqrt(sum(x^2)))
colNorms[colNorms == 0] <- 1
resX <- sweep(x = drX, MARGIN = 2, STATS = colNorms, FUN = '/')
## Perform the regression. If the MTR is point
## identified without restrictions, then it should
## also be point identified with restrictions. This is
## done to correctly detect for collinerity.
drFit <- lm.fit(x = resX, y = drY)
collinear <- any(is.na(drFit$coefficients))
drSSR <- sum(drFit$resid^2)
## Reconstruct the coefficients
drCoef <- drFit$coef / colNorms
if (!collinear && exists('pmequal0') && exists ('pmequal1')) {
## Rescale equality constraints
equal.constraints$A <- sweep(x = equal.constraints$A,
MARGIN = 2,
STATS = colNorms,
FUN = '/')
## Perform separate regression with constraints
## (see comment pertaining to lsei() function, and
## how it does not really indicate collinearity).
drCoef <- lsei::lsei(a = resX, b = drY,
c = equal.constraints$A,
d = equal.constraints$rhs)
drCoef <- drCoef / colNorms
drSSR <- sum((drY - drX %*% drCoef)^2)
}
}
sset$s1$init.coef <- drCoef
sset$s1$SSR <- drSSR
## If no collinearities, return the implied target parameter
if (!collinear) {
if ((hasArg(point) && point == TRUE) |
!hasArg(point)) {
warning(gsub('\\s+', ' ',
'MTR is point identified via linear regression.
Shape constraints are ignored.'),
call. = FALSE)
point.estimate <- sum(c(gstar0, gstar1) * drCoef)
if (noisy == TRUE) {
cat("\nPoint estimate of the target parameter: ",
point.estimate, "\n\n", sep = "")
}
if (!smallreturnlist) {
return(list(point.estimate = point.estimate,
mtr.coef = drCoef,
X = drX,
SSR = drSSR,
propensity = pmodel))
} else {
output <- list(point.estimate = point.estimate,
mtr.coef = drCoef,
SSR = drSSR)
if (all(class(pmodel) != "NULL")) {
output$propensity.coef <- pmodel$coef
}
return(output)
}
}
} else {
## If there are collineariites, then function will
## move onto the QCQP problem. But for bootstraps, if
## the estimate from the original sample was point
## identified, then it is unnecssary to solve the
## QCQP.
if (bootstrap & point) return(gsub('\\s+', ' ',
'Failed point identification
bootstrap, although target
parameter is point
identified in the original
sample.'),
call. = FALSE)
## Move on to the QCQP problem
if (direct && noisy) {
cat(" MTR is not point identified.\n")
}
}
if (!requireNamespace("gurobi", quietly = TRUE) &&
!requireNamespace("Rmosek", quietly = TRUE)) {
stop(gsub('\\s+', ' ',
"The MTR is not point identified by a
direct regression. However, partial
identification involves solving a
quadratically constrained quadratic
program.
Please install one of the
following optimization packages:
gurobi (version 7.5-1 or later);
Rmosek (version 9.2.38 or later)."),
call. = FALSE)
}
}
}
rm(sest, subset_index)
if (!is.null(pm0)) {
pm0 <- list(exporder = pm0$exporder,
terms = pm0$terms,
xterms = pm0$xterms)
}
if (!is.null(pm1)) {
pm1 <- list(exporder = pm1$exporder,
terms = pm1$terms,
xterms = pm1$xterms)
}
if (smallreturnlist) pmodel <- pmodel$model
if (count.moments && !direct) {
wmat <- NULL
for (s in 1:length(sset)) {
if (!is.null(subsetIndexList)) {
wmatTmp <- rep(0, times = 2 * nrow(data))
if (!is.integer(subsetIndexList[[sset[[s]]$ivspec]])) {
wmatTmp <- c(sset[[s]]$w0$multiplier,
sset[[s]]$w1$multiplier)
} else {
wmatTmp[c(subsetIndexList[[sset[[s]]$ivspec]],
nrow(data) +
subsetIndexList[[sset[[s]]$ivspec]])] <-
c(sset[[s]]$w0$multiplier, sset[[s]]$w1$multiplier)
}
} else {
wmatTmp <- c(sset[[s]]$w0$multiplier,
sset[[s]]$w1$multiplier)
}
wmat <- cbind(wmat, wmatTmp)
rm(wmatTmp)
}
## Check number of linearly independent moments
nIndepMoments <- qr(wmat)$rank
if (noisy == TRUE) cat(" Independent moments:", nIndepMoments, "\n")
if (nIndepMoments < length(sset) && !all(ivlikeD)) {
warning(gsub("\\s+", " ",
paste0("The following IV-like specifications do not
include the treatment variable: ",
paste(which(!ivlikeD), collapse = ", "),
". This may result in fewer
independent moment conditions than expected.")),
call. = FALSE)
}
rm(wmat)
## Determine if point identified or not for IV-like case.
if (!hasArg(point)) {
if (nIndepMoments >= length(gstar0) + length(gstar1)) {
point <- TRUE
warning(gsub("\\s+", " ",
"MTR is point identified via GMM.
Shape constraints are ignored."),
call. = FALSE)
} else {
point <- FALSE
}
}
} else {
nIndepMoments <- NULL
}
if (!direct && point == FALSE) {
for (i in 1:length(sset)) {
sset[[i]]$g0 <- colMeans(sset[[i]]$g0)
sset[[i]]$g1 <- colMeans(sset[[i]]$g1)
sset[[i]]$ys <- NULL
}
}
if (!is.null(point.redundant)) point.redundant <- 0
## If bootstrapping, check that length of sset is equivalent in
## length to that of the original sset if bootstrapping. If not,
## then the function ends. The wrapper function will respond by
## drawing a new bootstrap sample and re-estimating.
if (!is.null(orig.sset)) {
if (length(sset) != length(orig.sset)) {
return("collinearity causing S-set to differ from original sample")
}
}
## Prepare GMM estimate estimate if `point' agument is set to TRUE
splinesCheck <- !(all(is.null(splinesobj[[1]]$splineslist)) &&
all(is.null(splinesobj[[2]]$splineslist)))
if (!direct && point == TRUE) {
## Obtain GMM estimate
gmmResult <- gmmEstimate(sset = sset,
gstar0 = gstar0,
gstar1 = gstar1,
center = point.center,
identity = point.eyeweight,
redundant = point.redundant,
subsetList = subsetIndexList,
n = nrow(data),
nMoments = nIndepMoments,
splines = splinesCheck,
noisy = noisy)
if (!smallreturnlist) {
return(list(s.set = sset,
gstar = list(g0 = gstar0,
g1 = gstar1),
propensity = pmodel,
point.estimate = gmmResult$point.estimate,
moments = list(count = length(gmmResult$moments),
criterion = gmmResult$moments),
redundant = gmmResult$redundant,
j.test = gmmResult$j.test,
mtr.coef = gmmResult$coef))
} else {
sset <- lapply(sset, function(x) {
output <- list()
output$ivspec <- x$ivspec
output$beta <- x$beta
output$g0 <- colMeans(x$g0)
output$g1 <- colMeans(x$g1)
return(output)
})
output <- list(s.set = sset,
gstar = list(g0 = gstar0,
g1 = gstar1),
point.estimate = gmmResult$point.estimate,
moments = list(count = length(gmmResult$moments),
criterion = gmmResult$moments),
redundant = gmmResult$redundant,
j.test = gmmResult$j.test,
mtr.coef = gmmResult$coef)
if (all(class(pmodel) != "NULL")) {
output$propensity.coef <- pmodel$coef
}
return(output)
}
}
##---------------------------
## 4. Define constraint matrices using the audit
##---------------------------
if (noisy == TRUE) {
cat("\nPerforming audit procedure...\n")
}
audit.args <- c("uname", "vars_data",
"initgrid.nu", "initgrid.nx",
"audit.nx", "audit.nu", "audit.add",
"audit.max", "audit.tol",
"audit.grid", "rescale",
"m1.ub", "m0.ub",
"m1.lb", "m0.lb",
"mte.ub", "mte.lb", "m0.dec",
"m0.inc", "m1.dec", "m1.inc", "mte.dec",
"mte.inc", "solver.options", "solver.presolve",
"solver.options.criterion", "solver.options.bounds",
"criterion.tol",
"orig.sset", "orig.criterion",
"smallreturnlist", "noisy", "debug")
audit_call <- modcall(call,
newcall = audit,
keepargs = audit.args,
newargs = list(data = quote(data),
m0 = quote(m0),
m1 = quote(m1),
pm0 = quote(pm0),
pm1 = quote(pm1),
splinesobj = quote(splinesobj),
vars_mtr = quote(vars_mtr),
terms_mtr0 = quote(terms_mtr0),
terms_mtr1 = quote(terms_mtr1),
sset = quote(sset),
gstar0 = quote(gstar0),
gstar1 = quote(gstar1),
solver = quote(solver)))
## Impose default upper and lower bounds on m0 and m1
if (!hasArg(m1.ub) | !hasArg(m0.ub)) {
maxy <- max(data[, vars_y])
if (!hasArg(m1.ub)) {
audit_call <- modcall(audit_call,
newargs = list(m1.ub = maxy,
m1.ub.default = TRUE))
}
if (!hasArg(m0.ub)) {
audit_call <- modcall(audit_call,
newargs = list(m0.ub = maxy,
m0.ub.default = TRUE))
}
}
if (!hasArg(m1.lb) | !hasArg(m0.lb)) {
miny <- min(data[, vars_y])
if (!hasArg(m1.lb)) {
audit_call <- modcall(audit_call,
newargs = list(m1.lb = miny,
m1.lb.default = TRUE))
}
if (!hasArg(m0.lb)) {
audit_call <- modcall(audit_call,
newargs = list(m0.lb = miny,
m0.lb.default = TRUE))
}
}
## Add equality constraints
if (hasArg(equal.coef) && hasArg(splinesobj.equal)) {
audit_call <- modcall(audit_call,
newargs = list(equal.coef0 = pmequal0,
equal.coef1 = pmequal1))
}
##---------------------------
## 5. Obtain the bounds
##---------------------------
autoExpand <- 0
autoExpandMax <- 3
newGrid.nu <- initgrid.nu
newGrid.nx <- initgrid.nx
## Select codes for reasons to stop or expand
if (!direct) {
codesStop <- c(0, 2, 5)
codesExpand <- c(3, 4, 6, 7)
} else {
codesStop <- c(0)
codesExpand <- c(2, 3, 4, 5, 6, 7)
}
while(autoExpand <= autoExpandMax) {
audit <- eval(audit_call)
if (is.null(audit$error)) {
autoExpand <- Inf
}
if (!is.null(audit$errorTypes)) {
if (any(codesStop %in% audit$errorTypes)) {
stop(audit$error, call. = FALSE)
}
if (initgrid.nx == audit.nx && initgrid.nu == audit.nu) {
errMessage1 <- audit$error
errMessage2 <-
gsub("\\s+", " ",
"Automatic grid expansion is also not
possible.
Consider imposing additional shape
constraints, or increasing the size of the
initial grid for the audit.
In order to increase the size of the
initial grid, it will be
necessary to increase the size of the audit
grid.
This is because the initial grid must be a
subset of the audit grid, and this constraint
is currently binding.
In order to allow for automatic grid
expansion, make sure audit.nx > initgrid.nx
or audit.nu > initgrid.nu.")
if (any(c(2, 3, 5) %in% audit$errorTypes)) {
tmp <- NULL
for (i in 1:length(audit$errorTypes)) {
tmp <- c(tmp, statusString(audit$errorTypes[[i]],
solver))
}
audit$errorTypes <- tmp
audit$audit.criterion.status <-
statusString(audit$audit.criterion.status,
solver)
audit$status.min <- statusString(audit$status.min,
solver)
audit$status.max <- statusString(audit$status.max,
solver)
audit$error <- paste(errMessage1, "\n\n", errMessage2)
return(audit)
}
}
if (any(codesExpand %in% audit$errorTypes)) {
tmpErrMessage <- NULL
if (2 %in% audit$errorTypes) {
tmpErrMessage <-
c(tmpErrMessage,
'was infeasible')
}
if (3 %in% audit$errorTypes) {
if (!direct) {
tmpErrMessage <-
c(tmpErrMessage,
gsub('\\s+', ' ',
'was infeasible or unbounded
(most likely unbounded)'))
} else {
tmpErrMessage <-
c(tmpErrMessage,
gsub('\\s+', ' ',
'was infeasible or unbounded'))
}
}
if (4 %in% audit$errorTypes) {
tmpErrMessage <-
c(tmpErrMessage,
'was unbounded')
}
if (5 %in% audit$errorTypes) {
tmpErrMessage <-
c(tmpErrMessage,
'resulted in numerical issues')
}
## if (6 %in% audit$errorTypes) {
## tmpErrMessage <-
## c(tmpErrMessage,
## 'suboptimal')
## }
## if (7 %in% audit$errorTypes) {
## tmpErrMessage <-
## c(tmpErrMessage,
## 'optimal but infeasible after rescaling')
## }
tmpErrMessage <- paste(tmpErrMessage, collapse = "; ")
cat(paste0(" Model ",
tmpErrMessage, ".\n"))