# R/mtr.R In ivmte: Instrumental Variables: Extrapolation by Marginal Treatment Effects

#### Documented in constructConstantfunEvalgenBasisSplinesgenGammagenGammaSplinesinteractSplinesmonoIntegralparenthBooleanpolyparsepolyProductremoveSplinesuSplineBasisuSplineIntvecextractwhichforlist

```#' Auxiliary function: \code{which} for lists
#'
#' Auxiliary function that makes it possible to use \code{which} with
#' a list.
#' @param vector the vector for which we want to check the entries of
#' @param obj the value for which we want the vector to match on.
#' @return a vector of positions where the elements in \code{vector}
#'     are equal to \code{obj}.
whichforlist <- function(vector, obj) {
which(vector == obj)
}

#' Auxiliary function: extracting elements from strings
#'
#' This auxiliary function extracts the (string) element in the
#' \code{position} argument of the \code{vector} argument.
#' @param vector the vector from which we want to extract the
#'     elements.
#' @param position the position in \code{vector} to extract.
#' @param truncation the number of characters from the front of the
#'     element being extracted that should be dropped.
#' @return A chracter/string.
vecextract <- function(vector, position, truncation = 0) {
elem <- vector[position]
elem <- substr(elem, truncation + 1, nchar(elem))
return(elem)
}

#' Parsing marginal treatment response formulas
#'
#' This function takes in an MTR formula, and then parses the formula
#' such that it becomes a polynomial in the unobservable \code{u}. It
#' then breaks these polynomials into monomials, and then integrates
#' each of them with respect to \code{u}. Each integral corresponds to
#' E[md | D, X, Z].
#' @param formula the MTR.
#' @param data \code{data.frame} for which we obtain E[md | D, X, Z]
#'     for each observation.
#' @param uname variable name for unobservable used in declaring the
#'     MTR.
#' @param env environment, the original environment in which
#'     the formula was declared.
#' @param as.function boolean, if \code{FALSE} then a list of the
#'     polynomial terms are returned; if \code{TRUE} then a list of
#'     functions corresponding to the polynomials are returned.
#' @return A list (of lists) of monomials corresponding to the
#'     original MTR (for each observation); a list (of lists) of the
#'     integrated monomials; a vector for the degree of each of the
#'     original monomials in the MTR; and a vector for the names of
#'     each variable entering into the MTR (note \code{x^2 + x} has
#'     only one term, \code{x}).
#'
#' @examples
#' dtm <- ivmte:::gendistMosquito()
#'
#' ## Declare MTR functions
#' formula1 = ~ 1 + u
#' formula0 = ~ 1 + u
#'
#' ## Construct MTR polynomials
#' polynomials0 <- polyparse(formula = formula0,
#'                           data = dtm,
#'                           uname = u,
#'                           as.function = FALSE)
#'
#' polynomials1 <- polyparse(formula = formula0,
#'                           data = dtm,
#'                           uname = u,
#'                           as.function = FALSE)
#'
#' @export
polyparse <- function(formula, data, uname = "u", env = parent.frame(),
as.function = FALSE) {
## update formula parsing
formula <- Formula::as.Formula(formula)
environment(formula) <- env
## Convert uname into a string
uname <- deparse(substitute(uname))
uname <- gsub("~", "", uname)
uname <- gsub("\\\"", "", uname)
## Include redundant variable u, so monomials in m0, m1
## specifications correspond to polynomial coefficients on u
## monomials
data[[uname]] <- 1
dmat <- design(formula, data)\$X
oterms <- colnames(dmat)
## Separate and parse the original terms
nterms <- oterms
## Address booleans and their splitting
for (op in c("==", "!=", ">", ">=", "<", "<=")) {
nterms <- sapply(nterms, gsub,
pattern = paste0(" ", op, " "),
replacement = op)
}
for (sep in c("I\\(", "\\)", "\\*", ":", "  ", "  ")) {
nterms <- lapply(nterms, gsub, pattern = sep, replacement = " ")
}
nterms <- strsplit(trimws(unlist(nterms)), " ")
## Correct the spltting for factors
nterms <- lapply(nterms, function(x) {
factorPos <- grep("factor\\(" , x = x)
if (length(factorPos) > 0) {
for (i in factorPos) {
x[i] <- paste0(x[i], ")")
}
}
return(x)
})
## Correct the spltting for booleans
nterms <- lapply(nterms, function(x) {
for (op in c("==", "!=", ">=", "<=", ">",  "<")) {
boolPos <- grep(op, x = x)
if (length(boolPos) > 0) {
for (i in boolPos) {
x[i] <- gsub(op,
paste0(" ", op, " "), x[i])
}
}
}
for (op in c(" < =", " > =")) {
boolPos <- grep(op, x = x)
if (length(boolPos) > 0) {
for (i in boolPos) {
x[i] <- gsub(op,
gsub(" ", "", op) ,
x[i])
}
}
}
return(x)
})
## Find monomials of degree 1 ('degree' is with respect to u)
u_pos <- lapply(nterms, whichforlist, obj = uname)
u_pos <- which(u_pos > 0)
## Find monomials of degree exceeding 1, and determine their degree
trunc_nterms <- lapply(nterms, substr, 0, nchar(uname) + 1)
uexp_pos     <- lapply(trunc_nterms, whichforlist, obj = paste0(uname, "^"))
uexp_pos     <- which(uexp_pos > 0)
uexp_subpos  <- lapply(trunc_nterms[uexp_pos],
whichforlist, obj =  paste0(uname, "^"))
deggtr2      <- FALSE
if (length(unlist(uexp_subpos)) > 0) deggtr2 <- TRUE
## Create a matrix stating the degree for each monomial with degree >= 1
if(length(u_pos) == 0){
exptab1 <- NULL
} else {
exptab1 <- cbind(u_pos, 1)
}
if (deggtr2) {
uexpStr <- mapply(vecextract,
nterms[uexp_pos],
position = uexp_subpos,
truncation = nchar(uname) + 1)
uexp <- sapply(uexpStr, function(x) eval(parse(text = x), envir = env))
exptab2 <- cbind(uexp_pos, uexp)
exptab  <- rbind(exptab1, exptab2)
} else {
exptab <- exptab1
}
if(!is.null(exptab)) {
colnames(exptab) <- c("term", "degree")
}
## Determine which terms do not involve u
if (length(oterms) > 0) {
nonuterms    <- unlist(oterms[!seq(1, length(oterms)) %in% exptab[, 1]])
nonutermspos <- which(oterms %in% nonuterms)
} else {
nonuterms    <- NULL
nonutermspos <- NULL
}
if (length(nonuterms) > 0) {
exptab0 <- cbind(nonutermspos, replicate(length(nonutermspos), 0))
rownames(exptab0) <- oterms[nonutermspos]
} else {
exptab0  <- NULL
}
exptab <- rbind(exptab0, exptab)
if (!is.null(dim(exptab))) exptab <- exptab[order(exptab[, 1]), ]
if (is.matrix(exptab)) {
exporder <- exptab[, 2]
colnames(exptab) <- c("term", "degree")
} else if (!is.matrix(exptab) & length(exptab) > 0) {
exporder <- exptab[2]
names(exptab) <- c("term", "degree")
} else {
exporder <- NULL
}
names(exporder) <- NULL
polymat <- as.matrix(dmat)
if (is.null(dim(polymat))) {
polymat <- matrix(polymat, ncol = 1)
rownames(polymat) <- rownames(data)
}
## Construct a dictionary of non-u terms, where the entry name is
## each term, and the entries are the list of non-u terms that are
## interacted with the u term.
xterms <- lapply(seq(length(exporder)), function(x) {
if (exporder[x] == 0) {
NULL
} else {
splitTerms <- unlist(strsplit(oterms[x], split = ":"))
if (length(splitTerms) == 1) {
NULL
} else {
if (exporder[x] == 1) {
rmPos <- which(splitTerms == uname)
if (length(rmPos) == 0) {
rmPos <- which(splitTerms == paste0("I(", uname, "^1)"))
}
splitTerms <- splitTerms[-rmPos]
paste(splitTerms, collapse = ":")
} else {
rmPos <- which(splitTerms ==
paste0("I(", uname, "^",
exporder[x], ")"))
splitTerms <- splitTerms[-rmPos]
paste(splitTerms, collapse = ":")
}
}
}
})
names(xterms) <- oterms
return(list(polymat = polymat,
exporder = exporder,
terms = oterms,
xterms = xterms))
}

#' Function to multiply polynomials
#'
#' This function takes in two vectors characterizing polynomials. It
#' then returns a vector characterizing the product of the two
#' polynomials.
#' @param poly1 vector, characerizing a polynomial.
#' @param poly2 vector, characerizing a polynomial.
#' @return vector, characterizing the product of the two polynomials
#'     characterized \code{poly1} and \code{poly2}.
polyProduct <- function(poly1, poly2) {
poly1 <- c(1, 2, 3)
poly2 <- c(2, 0, -1)
degreeAdd <- seq(0, length(poly2) - 1)
prodMat <- sapply(poly2, function(x) x * poly1)
prodMat <- sapply(degreeAdd, function(x) c(rep(0, x),
prodMat[, x + 1],
return(rowSums(prodMat))
}

#' Estimating expectations of terms in the MTR (gamma objects)
#'
#' This function generates the gamma objects defined in the paper,
#' i.e. each additive term in E[md], where md is a MTR.
#'
#' @param monomials [UPDATE DESCRIPTION] object containing list of
#'     list of monomials. Each element of the outer list represents an
#'     observation in the data set, each element in the inner list is
#'     a monomial from the MTR. The variable is the unobservable u,
#'     and the coefficient is the evaluation of any interactions with
#'     u.
#' @param lb vector of lower bounds for the interval of
#'     integration. Each element corresponds to an observation.
#' @param ub vector of upper bounds for the interval of
#'     integration. Each element corresponds to an observation.
#' @param multiplier a vector of the weights that enter into the
#'     integral. Each element corresponds to an observation.
#' @param subset The row names/numbers of the subset of observations
#'     to use.
#' @param means logical, if TRUE then function returns the terms of
#'     E[md]. If FALSE, then function instead returns each term of
#'     E[md | D, X, Z]. This is useful for testing the code,
#'     i.e. obtaining population estimates.
#' @param late.rows Boolean vector indicating which observations to
#'     include when conditioning on covariates X.
#' @return If \code{means = TRUE}, then the function returns a vector
#'     of the additive terms in Gamma (i.e. the expectation is over D,
#'     X, Z, and u). If \code{means = FALSE}, then the function
#'     returns a matrix, where each row corresponds to an observation,
#'     and each column corresponds to an additive term in E[md | D, X,
#'     Z] (i.e. only the integral with respect to u is performed).
#'
#' @examples
#' dtm <- ivmte:::gendistMosquito()
#'
#' ## Declare MTR formula
#' formula0 = ~ 1 + u
#'
#' ## Construct MTR polynomials
#' polynomials0 <- polyparse(formula = formula0,
#'                 data = dtm,
#'                 uname = u,
#'                 as.function = FALSE)
#'
#' ## Construct propensity score model
#' propensityObj <- propensity(formula = d ~ z,
#'                             data = dtm,
#'
#' ## Generate gamma moments, with S-weight equal to its default value
#' ## of 1
#' genGamma(monomials = polynomials0,
#'          lb = 0,
#'          ub = propensityObj\$phat)
#'
#' @export
genGamma <- function(monomials, lb, ub, multiplier = 1,
subset = NULL, means = TRUE, late.rows = NULL) {
if (is.null(late.rows)) late.rows <- rep(TRUE, nrow(monomials\$polymat))
exporder <- monomials\$exporder
polymat <- monomials\$polymat
if (!is.null(subset)) {
polymat <- as.matrix(polymat[subset, ])
late.rows <-
late.rows[which(rownames(polymat) %in% as.integer(subset))]
}
nmono <- length(exporder)
## Determine bounds of integrals (i.e. include weights)
if (length(ub) == 1) ub <- replicate(nrow(polymat), ub)
if (length(lb) == 1) lb <- replicate(nrow(polymat), lb)
uLbMat <- NULL
uUbMat <- NULL
for (exp in exporder) {
uLbMat <- cbind(uLbMat, monoIntegral(lb, exp) * multiplier)
uUbMat <- cbind(uUbMat, monoIntegral(ub, exp) * multiplier)
}
preGamma <- polymat * (uUbMat - uLbMat)
preGamma <- preGamma[late.rows, ]
if (means) {
if (is.matrix(preGamma)) {
gstar <- colMeans(preGamma)
} else {
gstar <- mean(preGamma)
}
names(gstar) <- monomials\$terms
return(gstar)
} else {
if (!is.matrix(preGamma)) preGamma <- matrix(preGamma, ncol = 1)
colnames(preGamma) <- monomials\$terms
return(preGamma)
}
}

#' Integrating and evaluating monomials
#'
#' Analytically integrates monomials and evalates them at a given
#' point. It is assumed that there is no constant multiplying the
#' monomial.
#'
#' @param u scalar, the point at which to evaluate the integral. If a
#'     vector is passed, then the integral is evaluated at all the
#'     elements of the vector.
#' @param exp The exponent of the monomial.
#' @return scalar or vector, depending on what \code{u} is.
monoIntegral <- function(u, exp) {
return((u ^ (exp + 1)) / (exp + 1))
}

#' Separating splines from MTR formulas
#'
#' This function separates out the function calls \code{uSpline()} and
#' \code{uSplines()} potentially embedded in the MTR formulas from the
#' rest of the formula. The terms involving splines are treated
#' separately from the terms that do not involve splines when creating
#' the gamma moments.
#' @param formula the formula that is to be parsed.
#' @param env environment in which to formulas. This is necessary as
#'     splines may be declared using objects, e.g. \code{knots = x},
#'     where \code{x = c(0.3, 0.64, 0.9)}.
#' @return a list containing two objects. One object is \code{formula}
#'     but with the spline components removed. The second object is a
#'     list. The name of each element is the
#'     \code{uSpline()}/\code{uSplines()} command, and the elements
#'     are a vector of the names of covariates that were interacted
#'     with the \code{uSpline()}/\code{uSplines()} command.
#'
#' @examples
#' ## Declare and MTR with a sline component.
#' m0 = ~ x1 + x1 : uSpline(degree = 2,
#'                           knots = c(0.2, 0.4)) +
#'             x2 : uSpline(degree = 2,
#'                           knots = c(0.2, 0.4)) +
#'             x1 : x2 : uSpline(degree = 2,
#'                                knots = c(0.2, 0.4)) +
#'             uSpline(degree = 3,
#'                      knots = c(0.2, 0.4),
#'                      intercept = FALSE)
#'
#' ## Now separate the spline component from the non-spline component
#' removeSplines(m0)
#'
#' @export
removeSplines <- function(formula, env = parent.frame()) {
fterms <- attr(terms(formula), "term.labels")
fterms <- gsub("\\s+", " ", fterms)
finter <- attr(terms(formula), "intercept")
if (length(fterms) == 0) {
whichspline <- 0
} else {
whichspline1 <- sapply(fterms,
function(y) grepl(x = y,
pattern = "uSplines\\("))
whichspline2 <- sapply(fterms,
function(y) grepl(x = y,
pattern = "uSpline\\("))
whichspline <- as.logical(whichspline1 + whichspline2)
}
if (max(whichspline) == 1) {
ftobj <- terms(formula)
splinepos <- which(whichspline == TRUE)
if (length(splinepos) == length(fterms)) {
if (finter == 0) nosplines <- NULL
if (finter == 1) nosplines <- ~ 1
} else {
nosplineterms <- fterms[-splinepos]
nosplineterms <- parenthBoolean(nosplineterms)
nosplines <- paste("~", paste(nosplineterms,
collapse = " + "))
if (finter != 1) {
nosplines <- paste0(nosplines, " + 0")
}
nosplines <- Formula::as.Formula(nosplines)
}
splineterms <- fterms[whichspline]
splineterms <- parenthBoolean(splineterms)
splineslist <- list()
splinescall <- list()
for (splineobj in splineterms) {
splinepos1 <- regexpr("uSplines\\(", splineobj)
splinepos2 <- regexpr("uSpline\\(", splineobj)
if (splinepos1 == -1) {
splinepos <- splinepos2
} else {
splinepos <- splinepos1
}
## Check if vectors or sequences are declared to adjust parsing
firstopen  <- regexpr("\\(",
substr(splineobj,
nchar(splineobj)))
firstclose <- regexpr("\\)",
substr(splineobj,
nchar(splineobj)))
secondclose <- regexpr("\\)",
substr(splineobj,
splinepos + splineposadd + 1 +
firstclose,
nchar(splineobj)))
## For the case where knots are explcitly declared
if ((firstopen < firstclose) & (secondclose != - 1)) {
splinecmd <- substr(splineobj,
splinepos,
splinepos + splineposadd + firstclose +
secondclose)
} else { ## For the case where knots are passed as an object
splinecmd <- substr(splineobj,
splinepos,
splinepos + splineposadd + 1 +
firstclose - 1)
}
## Separate uSpline/uSplines command from terms
## interacting with the spline
splinecmdstr <- gsub("\\)", "\\\\)",
gsub("\\(", "\\\\(", splinecmd))
splinecmdstr <- gsub("\\\$", "\\\\\$", splinecmdstr)
splinecmdstr <- gsub("\\.", "\\\\.", splinecmdstr)
splinecmdstr <- gsub("\\+", "\\\\+", splinecmdstr)
splinecmdstr <- gsub("\\*", "\\\\*", splinecmdstr)
splinecmdstr <- gsub("\\^", "\\\\^", splinecmdstr)
interobj <- gsub(paste0(":", splinecmdstr), "", splineobj)
interobj <- gsub(paste0(splinecmdstr, ":"), "", interobj)
interobj <- gsub("::", ":", interobj)
if (interobj == splinecmd) interobj <- "1"
if (splinecmd %in% names(splineslist)) {
splineslist[[splinecmd]] <- c(splineslist[[splinecmd]],
interobj)
} else {
splineslist[[splinecmd]] <- c(interobj)
}
## Recover original spline names
interobjStr <- gsub("\\)", "\\\\)",
gsub("\\(", "\\\\(", interobj))
interobjStr <- gsub("\\^", "\\\\^", interobjStr)
if (interobjStr != "1") {
vlist <- unlist(strsplit(interobjStr, ":"))
for (v in vlist) {
if (grepl(paste0("^", v, ":"), splineobj)) {
splineobj <- gsub(paste0("^", v, ":"), "", splineobj)
}
if (grepl(paste0(":", v, ":"), splineobj)) {
splineobj <- gsub(paste0(":", v, ":"), ":", splineobj)
}
if (grepl(paste0(":", v, "\$"), splineobj)) {
splineobj <- gsub(paste0(":", v, "\$"), "", splineobj)
}
}
}
splinescall[[splinecmd]] <- unique(c(splinescall[[splinecmd]],
splineobj))
}
## Now construct spline dictionary and spline keys
splinesDict <- list()
splinesKey <- NULL
for (i in 1:length(splineslist)) {
inDict <- FALSE
splinesSpecCmd <- gsub("uSplines\\(",
"list(",
names(splineslist)[i])
splinesSpecCmd <- gsub("uSpline\\(",
"list(",
splinesSpecCmd)
splinesSpec <- eval(parse(text = splinesSpecCmd),
envir = env)
if (! "intercept" %in% names(splinesSpec)) {
splinesSpec\$intercept = TRUE
}
## Check if the spline is already in the dictionary
if (length(splinesDict) > 0) {
j <- 1
while (j <= length(splinesDict) & inDict == FALSE) {
inDict <-
(all.equal(splinesDict[[j]]\$knots,
splinesSpec\$knots) == TRUE) &
(splinesDict[[j]]\$intercept == splinesSpec\$intercept) &
(splinesDict[[j]]\$degree == splinesSpec\$degree)
j <- j + 1
}
}
## Update dictionary and key
if (inDict == FALSE) {
splinesDict[[length(splinesDict) + 1]] <-
splinesSpec[order(names(splinesSpec))]
splinesDict[[length(splinesDict)]]\$gstar.index <- i
splinesKey <- rbind(splinesKey, c(i, length(splinesDict)))
} else {
splinesKey <- rbind(splinesKey, c(i, j - 1))
}
}
colnames(splinesKey) <- c("spline", "dictKey")
## Using dictionary, generate new condensed splines list. In
## addition, store all the original calls for each spline
## (this will be used to determine which variables interact
## with splines).
splinesList2 <- list()
splinesCall2 <- list()
for (j in 1:length(splinesDict)) {
dictKey <- paste0("uSpline(degree = ",
splinesDict[[j]]\$degree,
", knots = c(",
paste(splinesDict[[j]]\$knots,
collapse = ", "),
"), intercept = ",
splinesDict[[j]]\$intercept,
")")
newEntry <- sort(unlist(splineslist[splinesKey[splinesKey[, 2] == j,
1]]))
origNameEntry <- unlist(splinescall[splinesKey[splinesKey[, 2] == j,
1]])
names(origNameEntry) <- NULL
splinesCall2[[dictKey]] <- origNameEntry
## Convert I() as-is declarations to interactions, if possible
newEntry <- sapply(newEntry, function(x) {
multiply <- grepl("*", x)
asis <- substr(x, 1, 2) == "I("
if (! multiply * asis) {
return(x)
} else {
othops <- 0
for (j in c("\\^", "\\+", "-", "/")) {
othops <- max(othops, grepl(j, x))
}
if (othops == 1) {
return(x)
} else {
x <- substring(x, 3, nchar(x) - 1)
x <- gsub("\\*", ":", x)
x <- gsub(" ", "", x)
return(x)
}
}
})
names(newEntry) <- NULL
splinesList2[[dictKey]] <- newEntry
}
} else {
nosplines <- formula
splinesList2 <- NULL
splinesCall2 <- NULL
splinesDict <- NULL
}
return(list(formula = nosplines,
splineslist = splinesList2,
splinescall = splinesCall2,
splinesdict = splinesDict))
}

#' Integrated splines
#'
#' This function integrates out splines that the user specifies when
#' declaring the MTRs. This is to be used when generating the gamma
#' moments.
#' @param x the points to evaluate the integral of the the splines.
#' @param knots the knots of the spline.
#' @param degree the degree of the spline; default is set to 0
#'     (constant splines).
#' @param intercept boolean, set to TRUE if intercept term is to be
#'     included (i.e. an additional basis such that the sum of the
#'     splines at every point in \code{x} is equal to 1).
#' @return a matrix, the values of the integrated splines. Each row
#'     corresponds to a value of \code{x}; each column corresponds to
#'     a basis defined by the degrees and knots.
#'
#' @examples
#' ## Since the splines are declared as part of the MTR, you will need
#' ## to have parsed out the spline command. Thus, this command will be
#' ## called via eval(parse(text = .)). In the examples below, the
#' ## commands are parsed from the object \code{splineslist} generated
#' ## by \code{\link[MST]{removeSplines}}. The names of the elements in
#' ## the list are the spline commands, and the elements themselves are
#' ## the terms that interact with the splines.
#'
#' ## Declare MTR function
#' m0 = ~ x1 + x1 : uSpline(degree = 2,
#'                           knots = c(0.2, 0.4)) +
#'     x2 : uSpline(degree = 2,
#'                   knots = c(0.2, 0.4)) +
#'     x1 : x2 : uSpline(degree = 2,
#'                        knots = c(0.2, 0.4)) +
#'     uSpline(degree = 3,
#'              knots = c(0.2, 0.4),
#'              intercept = FALSE)
#'
#' ## Separate the spline components from the MTR function
#' splineslist <- removeSplines(m0)\$splineslist
#'
#' ## Delcare the points at which we wish to evaluate the integrals
#' x <- seq(0, 1, 0.2)
#'
#' ## Evaluate the splines integrals
#' eval(parse(text = gsub("uSpline\\(",
#'                        "ivmte:::uSplineInt(x = x, ",
#'                        names(splineslist)[1])))
#'
#'
#' eval(parse(text = gsub("uSpline\\(",
#'                        "ivmte:::uSplineInt(x = x, ",
#'                        names(splineslist)[2])))
uSplineInt <- function(x, knots, degree = 0, intercept = TRUE) {
## Note: warning below is suppressed since it will be provided
## when uSplineBasis is run.
if (any(knots < 0) || any(knots > 1)) {
stop(gsub("\\s+", " ",
"When defining splines, each knot must be inside the
[0, 1] interval."))
}
splines2::ibs(x = x,
knots = knots,
degree = degree,
intercept = intercept,
Boundary.knots = c(0, 1))
}

#' Spline basis function
#'
#' This function evaluates the splines that the user specifies when
#' declaring the MTRs. This is to be used for auditing, namely when
#' checking the boundedness and monotonicity conditions.
#' @param x the points to evaluate the integral of the the splines.
#' @param knots the knots of the spline.
#' @param degree the degree of the spline; default is set to 0
#'     (constant splines).
#' @param intercept boolean, set to TRUE if intercept term is to be
#'     included (i.e. an additional basis such that the sum of the
#'     splines at every point in \code{x} is equal to 1).
#' @return a matrix, the values of the integrated splines. Each row
#'     corresponds to a value of \code{x}; each column corresponds to
#'     a basis defined by the degrees and knots.
#'
#' @examples
#' ## Since the splines are declared as part of the MTR, you will need
#' ## to have parsed out the spline command. Thus, this command will be
#' ## called via eval(parse(text = .)). In the examples below, the
#' ## commands are parsed from the object \code{splineslist} generated
#' ## by \code{\link[MST]{removeSplines}}. The names of the elements in
#' ## the list are the spline commands, and the elements themselves are
#' ## the terms that interact with the splines.
#'
#' ## Declare MTR function
#' m0 = ~ x1 + x1 : uSpline(degree = 2,
#'                           knots = c(0.2, 0.4)) +
#'     x2 : uSpline(degree = 2,
#'                   knots = c(0.2, 0.4)) +
#'     x1 : x2 : uSpline(degree = 2,
#'                        knots = c(0.2, 0.4)) +
#'     uSpline(degree = 3,
#'              knots = c(0.2, 0.4),
#'              intercept = FALSE)
#'
#' ## Extract spline functions from MTR function
#' splineslist <- removeSplines(m0)\$splineslist
#'
#' ## Declare points at which we wish to evaluate the spline functions
#' x <- seq(0, 1, 0.2)
#'
#' ## Evaluate the splines
#' eval(parse(text = gsub("uSpline\\(",
#'                        "ivmte:::uSplineBasis(x = x, ",
#'                         names(splineslist)[1])))
#'
#' eval(parse(text = gsub("uSpline\\(",
#'                        "ivmte:::uSplineBasis(x = x, ",
#'                        names(splineslist)[2])))
uSplineBasis <- function(x, knots, degree = 0, intercept = TRUE) {
if (any(knots < 0) || any(knots > 1)) {
stop(gsub("\\s+", " ",
"When defining splines, each knot must be inside the
[0, 1] interval."))
}
splines2::bSpline(x = x,
knots = knots,
degree = degree,
intercept = intercept,
Boundary.knots = c(0, 1))
}

#' Generate Gamma moments for splines
#'
#' The user can declare that the unobservable enters into the MTRs in
#' the form of splines. This function generates the gamma moments for
#' the splines. The specifications for the spline must be passed as an
#' element generated by \code{\link{removeSplines}}. This function
#' accounts for the interaction between covariates and splines.
#' @param splinesobj a list generated by \code{\link{removeSplines}}
#'     applied to either the \code{m0} and \code{m1} argument.
#' @param data a \code{data.frame} object containing all the variables
#'     that interact with the spline components.
#' @param lb vector of lower bounds for the interval of
#'     integration. Each element corresponds to an observation.
#' @param ub vector of upper bounds for the interval of
#'     integration. Each element corresponds to an observation.
#' @param multiplier a vector of the weights that enter into the
#'     integral. Each element corresponds to an observation.
#' @param subset Subset condition used to select observations with
#'     which to estimate gamma.
#' @param d either 0 or 1, indicating the treatment status.
#' @param means boolean, default set to \code{TRUE}. Set to
#'     \code{TRUE} if estimates of the gamma moments should be
#'     returned. Set to \code{FALSE} if the gamma estimates for each
#'     observation should be returned.
#' @param late.rows Boolean vector indicating which observations to
#'     include when conditioning on covariates X.
#' @return a matrix, corresponding to the splines being integrated
#'     over the region specified by \code{lb} and \code{ub},
#'     accounting for the interaction terms. The number of rows is
#'     equal to the number of rows in \code{data}. The number of
#'     columns depends on the specifications of the spline. The name
#'     of each column takes the following form: "u[d]S[j].[b]", where
#'     "u" and "S" are fixed and stand for "unobservable" and
#'     "Splines" respectively. "[d]" will be either 0 or 1, depending
#'     on the treatment status. "[j]" will be an integer indicating
#'     which element of the list \code{splines} the column pertains
#'     to. "[b]" will be an integer reflect which component of the
#'     basis the column pertains to.
genGammaSplines <- function(splinesobj, data, lb, ub, multiplier = 1,
subset, d = NULL, means = TRUE,
late.rows = NULL) {
if (is.null(late.rows)) late.rows <- rep(TRUE, nrow(data))
splines <- splinesobj\$splineslist
inters  <- splinesobj\$splinesinter
if (is.null(splines)) {
return(list(gamma = NULL,
interactions = NULL))
} else {
if (!hasArg(subset)) {
subset <- replicate(nrow(data), TRUE)
} else {
late.rows <- late.rows[as.integer(subset)]
}
if (length(lb) == 1) lb <- replicate(nrow(data[subset, ]), lb)
if (length(ub) == 1) ub <- replicate(nrow(data[subset, ]), ub)
splinesGamma <- NULL
splinesNames <- NULL
splinesInter <- NULL
for (j in 1:length(splines)) {
## Construct design matrix for each interaction at a
## time. Only include the interactions contained in the
## 'inters' object.
inters[[j]][which(inters[[j]] == "1")] <- "(Intercept)"
nonSplinesDmat <- NULL
for (k in 1:length(splines[[j]])) {
if (splines[[j]][k] != "1") {
tmpDmat <- design(as.formula(paste("~ 0 +",
splines[[j]][k])),
data)\$X
nonSplinesDmat <- cbind(nonSplinesDmat, tmpDmat)
} else {
nonSplinesDmat <- cbind(nonSplinesDmat,
design(~ 1, data)\$X)
}
}
colnames(nonSplinesDmat) <- parenthBoolean(colnames(nonSplinesDmat))
currentNames <- colnames(nonSplinesDmat)
keepPos <- colnames(nonSplinesDmat) %in% inters[[j]]
nonSplinesDmat <- as.matrix(nonSplinesDmat[, keepPos])
colnames(nonSplinesDmat) <- currentNames[keepPos]
## Fill in the gstar with any missing variables---if
## assigned values of 0, they will not affect the LP
## optimization
missingVars <- inters[[j]][!inters[[j]] %in%
colnames(nonSplinesDmat)]
if (length(missingVars) > 0) {
nonSplinesDmat <- data.frame(nonSplinesDmat)
for (mv in missingVars) {
nonSplinesDmat[, mv] <- 0
}
nonSplinesDmat <- nonSplinesDmat[, inters[[j]]]
nonSplinesDmat <- as.matrix(nonSplinesDmat)
rownames(nonSplinesDmat) <- rownames(data)
}
## Spline integral matrices
splinesLB <- eval(parse(text = gsub("uSpline\\(",
"uSplineInt(x = lb, ",
names(splines)[j])))
splinesUB <- eval(parse(text = gsub("uSpline\\(",
"uSplineInt(x = ub, ",
names(splines)[j])))
splinesInt <- splinesUB - splinesLB
## Combine the design and integral matrices
for (l in 1:length(colnames(nonSplinesDmat))) {
tmpGamma <- sweep(splinesInt,
MARGIN = 1,
STATS = nonSplinesDmat[subset, l],
FUN = "*")
tmpGamma <- sweep(x = tmpGamma,
MARGIN = 1,
STATS = multiplier,
FUN = "*")
splinesNames <- c(splinesNames,
paste0(paste0("u", d, "S", j, "."),
seq(1, ncol(tmpGamma)),
paste0(":",
colnames(nonSplinesDmat)[l])))
splinesGamma <- cbind(splinesGamma, tmpGamma)
splinesInter <- c(splinesInter,
rep(colnames(nonSplinesDmat)[l],
ncol(tmpGamma)))
}
}
splinesNames <- gsub(":\\(Intercept\\)", ":1", splinesNames)
splinesNames <- gsub("\\(Intercept\\):", "1:", splinesNames)
splinesGamma <- splinesGamma[late.rows, ]
if (is.null(dim(splinesGamma))) {
if (length(late.rows) > 1) {
splinesGamma <- matrix(splinesGamma, ncol = 1)
} else {
splinesGamma <- matrix(splinesGamma, nrow = 1)
}
}
if (means == TRUE) {
splinesGamma <- colMeans(splinesGamma)
names(splinesGamma) <- splinesNames
} else {
colnames(splinesGamma) <- splinesNames
}
return(list(gamma = splinesGamma,
interactions = splinesInter))
}
}

#' Generate basis matrix for splines
#'
#' The user can declare that the unobservable enters into the MTRs in
#' the form of splines. This function generates the basis matrix for
#' the splines. The specifications for the spline must be passed as
#' the \code{\$splineslist} object generated by
#' \code{\link{removeSplines}}. Note that this function does not
#' account for any interactions between the splines and the
#' covariates. Interactions can be added simply by sweeping the basis
#' matrix by a vector for the values of the covariates.
#' @param splines a list. The name of each element should be the
#'     spline command, and each element should be a vector. Each entry
#'     of the vector is a covariate that the spline should be
#'     interacted with. Such an object can be generated by
#'     \code{\$splineslist}.
#' @param x the values of the unobservable at which the splines basis
#'     should be evaluated.
#' @param d either 0 or 1, indicating the treatment status.
#' @return a matrix. The number of rows is equal to the length of
#'     \code{x}, and the number of columns depends on the
#'     specifications of the spline. The name of each column takes the
#'     following form: "u[d]S[j].[b]", where "u" and "S" are fixed and
#'     stand for "unobservable" and "Splines" respectively. "[d]" will
#'     be either 0 or 1, depending on the treatment status. "[j]" will
#'     be an integer indicating which element of the list
#'     \code{splines} the column pertains to. "[b]" will be an integer
#'     reflect which component of the basis the column pertains to.
genBasisSplines <- function(splines, x, d = NULL) {
if (is.null(splines)) {
return(NULL)
} else {
bmatList <- list()
for (j in 1:length(splines)) {
splinesBasis <- NULL
splinesNames <- NULL
bmat <- eval(parse(text = gsub("uSpline\\(",
"uSplineBasis(x = x, ",
names(splines[j]))))
colnames(bmat) <- paste0(paste0("u", d, "S", j, "."),
seq(1, ncol(bmat)))
bmatList[[j]] <- bmat
}
return(bmatList)
}
}

#' Evaluate a particular function
#'
#' This function evaluates a single function in a list of functions.
#' @param fun the function to be evaluated.
#' @param values the values of the arguments to the function. Ordering
#'     is assumed to be the same as in \code{argnames}.
#' @param argnames the argument names corresponding to \code{values}.
#' @return the output of the function evaluated.
funEval <- function(fun, values = NULL, argnames = NULL) {
if (!is.null(values) & !is.null(argnames)) {
args <- as.list(values)
names(args) <- argnames
do.call(fun, args)
} else {
do.call(fun, list())
}
}

#' Construct constant function
#'
#' This function constructs another function that returns a
#' constant. It is used for constructing weight/knot functions.
#' @param x scalar, the constant the function evaluates to.
#' @return a function.
constructConstant <- function(x) {
fun <- function(...) {
x
}
return(fun)
}

#' Correct boolean expressions in terms lists
#'
#' This function takes a vector of terms and places parentheses around
#' boolean expressions.
#' @param termsList character vector, the vector of terms.
#' @return character vector.
parenthBoolean <- function(termsList) {
termsList <- lapply(termsList, function(x) {
tmpTerms <- unlist(strsplit(x, ":"))
for (op in c("==", "!=", ">", ">=", "<", "<=")) {
boolPos <- grep(op, tmpTerms)
if (length(boolPos > 0)) {
for (j in boolPos) {
if (substr(tmpTerms[j], 1, 1) != "(") {
if (substr(tmpTerms[j], 1, 2) != "I(") {
tmpTerms[j] <- paste0("(", tmpTerms[j], ")")
tmpTerms[j] <- gsub("TRUE)",
")TRUE",
tmpTerms[j])
tmpTerms[j] <- gsub("FALSE)",
")FALSE",
tmpTerms[j])
}
}
}
}
}
paste(tmpTerms, collapse = ":")
})
termsList <- unlist(termsList)
return(termsList)
}

#' Update splines object with list of interactions
#'
#' Certain interactions between factor variables and splines should be
#' dropped to avoid collinearity. Albeit collinearity in the MTR
#' specification will not impact the bounds, it can substantially
#' impact how costly it is to carry out the estimation. What this
#' function does is map each spline to a temporary variable. A design
#' matrix is then constructed using these temporary variables in place
#' the splines. If an interaction involving one of the temporary
#' variables is dropped, then one knows to also drop the corresponding
#' interaction with the spline. Note that only interaction terms need
#' to be omitted, so one does not need to worry about the formula
#' contained in removeSplines\$formula.
#' @param splinesobj list, consists of two elelments. The first is
#'     \code{removeSplines(m0)}, the second is
#'     \code{removeSplines(m1)}.
#' @param m0 one-sided formula for the marginal treatment response
#'     function for the control group. This should be the full MTR
#'     specificaiton (i.e. not the specification after removing the
#'     splines).
#' @param m1 one-sided formula for the marginal treatment response
#'     function for the treated group. This should be the full MTR
#'     specificaiton (i.e. not the specification after removing the
#'     splines).
#' @param data data.frame, restricted to complete observations.
#' @param uname string, name of the unobserved variable.
#' @return An updated version of \code{splinesobj}.
#' @export
interactSplines <- function(splinesobj, m0, m1, data, uname) {
tmpInterName <- "..t.i.n"
for (d in 0:1) {
if (!is.null(splinesobj[[d + 1]]\$splineslist)) {
mdata <- unique(data)
mdata[, uname] <- 1
if (d == 0) md <- m0
if (d == 1) md <- m1
md <- gsub("\\s+", " ", Reduce(paste, deparse(md)))
altNames <- list()
splineKeys <- names(splinesobj[[d + 1]]\$splinescall)
for (i in 1:length(splinesobj[[d + 1]]\$splinescall)) {
tmpName <- paste0(tmpInterName, i)
mdata[, tmpName] <- 1
altNames[splineKeys[i]] <- tmpName
for (j in 1:length(splinesobj[[d + 1]]\$splinescall[[i]])) {
origCall <- splinesobj[[d + 1]]\$splinescall[[i]][j]
origCall <- gsub("\\)", "\\\\)",
gsub("\\(", "\\\\(", origCall))
origCall <- gsub("\\\$", "\\\\\$", origCall)
origCall <- gsub("\\.", "\\\\.", origCall)
origCall <- gsub("\\+", "\\\\+", origCall)
origCall <- gsub("\\*", "\\\\*", origCall)
origCall <- gsub("\\^", "\\\\^", origCall)
md <- gsub(origCall, tmpName, md)
}
}
tmpColNames <- colnames(design(as.formula(md), mdata)\$X)
for (k in 1:length(altNames)) {
tmpName <- paste0(tmpInterName, k)
inter1 <- grep(paste0(":", altNames[[k]], "\$"),
tmpColNames)
inter2 <- grep(paste0("^", altNames[[k]], ":"),
tmpColNames)
inter3 <- grep(paste0(":", altNames[[k]], ":"),
tmpColNames)
inter4 <- altNames[[k]] %in% tmpColNames
interpos <- c(inter1, inter2, inter3)
if (length(interpos) > 0) {
## The case where there are interactions with splines
altNames[[k]] <- parenthBoolean(tmpColNames[interpos])
altNames[[k]] <- gsub(paste0(":", tmpName), "",
altNames[[k]])
altNames[[k]] <- gsub(paste0(tmpName, ":"), "",
altNames[[k]])
}
if (inter4) {
if (length(interpos) == 0) {
altNames[[k]] <- "1"
} else {
altNames[[k]] <- c("1", altNames[[k]])
}
}
}
splinesobj[[d + 1]]\$splinesinter <- altNames
} else {
splinesobj[[d + 1]]\$splinesinter <- NULL
}
}
return(splinesobj)
}
```

## Try the ivmte package in your browser

Any scripts or data that you put into this service are public.

ivmte documentation built on Sept. 17, 2021, 5:06 p.m.