Nothing
#' @include StudySpecification.R WeightedStudySpecification.R StudySpecificationAccessors.R SandwichLayerVariance.R confint_lm.R
NULL
# The above ensures that `StudySpecification`, `WeightedStudySpecification`, and
# `vcov_tee` are defined prior to `teeMod`
setClass("teeMod",
contains = "lm",
slots = c(StudySpecification = "StudySpecification",
lmitt_fitted = "logical",
absorbed_intercepts = "logical",
moderator = "character",
ctrl_means_model = "lm",
lmitt_call = "call"))
setValidity("teeMod", function(object) {
if (length(object@lmitt_fitted) != 1) {
return("@lmitt_fitted slot must be a single logical")
}
return(TRUE)
})
##' @title Show a \code{teeMod}
##' @description Display information about a \code{teeMod} object
##' @param object \code{teeMod} object, usually a result of a call to
##' [lmitt()].
##' @return \code{object}, invisibly.
##' @export
setMethod("show", "teeMod", function(object) {
coeffs <- object$coefficients
## Display only treatment effects, intercepts from supplementary
## regression of same y but w/o the covadj offset
if (object@lmitt_fitted) {
toprint <- (
## This should match any coefficients starting with the "txt." or "`txt."
grepl(paste0("^\\`?", var_names(object@StudySpecification, "t"), "\\."), names(coeffs)) |
## This should match the coefficients of the supplementary regression
grepl(paste0("^", formula(object)[[2L]], ":"), names(coeffs))
)
print(coeffs[toprint])
} else {
print(coeffs)
}
invisible(object)
})
##' @title Compute variance-covariance matrix for fitted \code{teeMod} model
##' @description An S3method for \code{stats::vcov} that computes standard
##' errors for \code{teeMod} models using \code{vcov_tee()}.
##' @details \code{vcov.teeMod()} wraps around \code{vcov_tee()}, so additional
##' arguments passed to \code{...} will be passed to the \code{vcov_tee()}
##' call. See documentation for \code{vcov_tee()} for information about
##' necessary arguments.
##' @param object a fitted \code{teeMod} model
##' @param ... additional arguments to \code{vcov_tee()}.
##' @inherit vcov_tee return
##' @exportS3Method
vcov.teeMod <- function(object, ...) {
cl <- match.call()
cl[[1L]] <- quote(vcov_tee)
names(cl)[2L] <- "x"
vmat <- eval(cl, parent.frame())
return(vmat)
}
##' @title Confidence intervals with standard errors provided by
##' \code{vcov.teeMod()}
##' @description An S3method for \code{stats::confint} that uses standard errors
##' computed using \code{vcov.teeMod()}. Additional arguments passed to this
##' function, such as \code{cluster} and \code{type}, specify the arguments of
##' the \code{vcov.teeMod()} call.
##' @details Rather than call \code{stats::confint.lm()},
##' \code{confint.teeMod()} calls \code{.confint_lm()}, a function internal to
##' the \code{propertee} package that ensures additional arguments in the
##' \code{...} of the \code{confint.teeMod()} call are passed to the internal
##' \code{vcov()} call.
##' @inheritParams .confint_lm
##' @inherit .confint_lm return
##' @exportS3Method
confint.teeMod <- function(object, parm, level = 0.95, ...) {
cl <- match.call()
cl[[1L]] <- quote(.confint_lm)
return(eval(cl, parent.frame()))
}
##' @title Extract empirical estimating equations from a \code{teeMod} model fit
##' @description An S3method for \code{sandwich::estfun} for producing a matrix
##' of contributions to the direct adjustment estimating equations.
##' @details If a prior covariance adjustment model has been passed to the
##' \code{offset} argument of the \code{teeMod} model using \code{cov_adj()},
##' \code{estfun.teeMod()} incorporates contributions to the estimating
##' equations of the covariance adjustment model.\cr\cr The covariance
##' adjustment sample may not fully overlap with the direct adjustment sample,
##' in which case \code{estfun.teeMod()} returns a matrix with the same number
##' of rows as the number of unique units of observation used to fit the two
##' models. Uniqueness is determined by matching units of assignment used to
##' fit the covariance adjustment model to units of assignment in the
##' \code{teeMod} model's \code{StudySpecification} slot; units of observation
##' within units of assignment that do not match are additional units that add
##' to the row count.\cr\cr The\code{by} argument in \code{cov_adj()} can
##' provide a column or a pair of columns (a named vector where the name
##' specifies a column in the direct adjustment sample and the value a column
##' in the covariance adjustment sample) that uniquely specifies units of
##' observation in each sample. This information can be used to align each
##' unit of observation's contributions to the two sets of estimating
##' equations. If no \code{by} argument is provided and units of observation
##' cannot be uniquely specified, contributions are aligned up to the unit of
##' assignment level. If standard errors are clustered no finer than that,
##' they will provide the same result as if each unit of observation's
##' contributions were aligned exactly.\cr\cr This method incorporates bias
##' corrections made to the residuals of \code{x} and, if applicable, the
##' covariance model stored in its \code{offset}. When its crossproduct is taken
##' (perhaps after suitable summing across rows within clusters), it provides
##' a heteroskedasticity- (or cluster-) robust estimate of the meat matrix of
##' the variance-covariance of the parameter estimates in \code{x}.
##'
##' @param x a fitted \code{teeMod} model
##' @param ... arguments passed to methods, most importantly those that define
##' the bias corrections for the residuals of \code{x} and, if applicable, a
##' \code{fitted_covariance_model} stored in its offset
##' @return An \eqn{n\times k} matrix of empirical estimating equations for \code{x}.
##' \code{k} includes the model intercept, main effects of treatment and
##' moderator variables, any moderator effects, and marginal and conditional
##' means of the outcome (and \code{offset}, if provided) in the control condition.
##' See Details for definition of \eqn{n}.
##' @exportS3Method
estfun.teeMod <- function(x, ...) {
# change model object's na.action to na.exclude so estfun returns NA rows
if (!is.null(x$na.action)) class(x$na.action) <- "exclude"
dots <- list(...)
# this sets the default to model-based
vcov_type <- match.arg(dots$vcov_type, c("MB", "HC", "CR", "DB"))
# sandwich::sandwich calls `NROW(estfun(x))` to get the scaling factor, so to
# allow users to use sandwich::sandwich, we need to accommodate that kind of
# call to `estfun` where itt_rcorrect isn't provided. we'll set a default
# option here (the correction doesn't change the dimension of the output) but
# make sure the user-facing variance estimation function `vcov_tee()` passes
# on an `itt_rcorrect` argument that correctly reflects the args provided there
if (is.null(itt_rcorrect <- dots$itt_rcorrect)) itt_rcorrect <- "HC0"
# MB and DB estfun without covariance adjustment; below, this will be replaced
# entirely if there's covariance adjustment
mat <- .base_S3class_estfun(x) - .estfun_DB_blockabsorb(x, ...)
if (vcov_type != "DB") {
# get estfun for ctrl mean regression
cm_ef <- estfun(x@ctrl_means_model)
q <- ncol(cm_ef)
cm_ef[is.na(cm_ef)] <- 0
mat <- cbind(mat, cm_ef)
} else {
q <- 0
cm_ef <- NULL
}
## if ITT model offset doesn't contain info about covariance model, estimating
## equations should be the ITT model estimating equations
if (is.null(sl <- x$model$`(offset)`) | !inherits(sl, "SandwichLayer")) {
# after setting the na.action to "exclude", `residuals` returns NA's, so
# we make sure the entries in the estfun are set to 0 rather than NA
resids <- stats::residuals(x, type = "working")
return(mat / replace(resids, is.na(resids), 1) *
replace(do.call(.rcorrect,
c(list(resids = resids, x = x, model = "itt", type = itt_rcorrect),
dots)), is.na(resids), 0))
}
## otherwise, extract/compute the rest of the relevant matrices/quantities
estmats <- .align_and_extend_estfuns(x, cm_ef, ...)
a11_inv <- .get_a11_inverse(x)
a21 <- .get_a21(x, ...)
## get scaling constants
nq <- nrow(stats::model.frame(x, na.action = "na.pass")) # this includes NA's as our other routines do
nc <- nrow(stats::model.frame(sl@fitted_covariance_model))
n <- nrow(estmats[["psi"]])
## form matrix of estimating equations
mat <- estmats[["psi"]] - nq / nc * estmats[["phi"]] %*% t(a11_inv) %*% t(a21)
mat[,1:(ncol(estmats[["psi"]]) - q)] <- mat[,1:(ncol(estmats[["psi"]])-q)] - .estfun_DB_blockabsorb(x, ...)
return(mat)
}
##' @title Extract bread matrix from a \code{teeMod} model fit
##' @description
##' An S3method for \code{sandwich::bread} that extracts the bread of the
##' direct adjustment model sandwich covariance matrix.
##'
##' @details This function is a thin wrapper around
##' \code{.get_tilde_a22_inverse()}.
##' @param x a fitted \code{teeMod} model
##' @param ... arguments passed to methods
##' @inherit vcov_tee return
##' @exportS3Method
bread.teeMod <- function(x, ...) .get_tilde_a22_inverse(x, ...)
##' @title Bias correct residuals contributing to standard errors of a \code{teeMod}
##' @param resids numeric vector of residuals to correct
##' @param x teeMod object
##' @param model string indicating which model the residuals are from. \code{"itt"}
##' indicates correction to the residuals of \code{x}, and \code{"cov_adj"}
##' indicates correction to the residuals of the covariance adjustment model.
##' This informs whether corrections should use information from \code{x} or the
##' \code{fitted_covariance_model} slot of the \code{SandwichLayer} object in the
##' \code{offset} for corrections
##' @param type string indicating the desired bias correction. Can be one of
##' \code{"(HC/CR/MB)0"}, \code{"(HC/CR/MB)1"}, or \code{"(HC/CR/MB)2"}
##' @param ... additional arguments passed from up the call stack; in particular,
##' the \code{cluster_cols} argument, which informs whether to cluster and provide
##' CR2 corrections instead of HC2 corrections, as well as the correction for the
##' number of clusters in the CR1 correction. This may also include a \code{by}
##' argument.
##' @keywords internal
.rcorrect <- function(resids, x, model, type, ...) {
if (type %in% paste0(c("HC", "CR", "MB", "DB"), "0")) {
cr <- resids
} else if (type %in% c(paste0(rep(c("MB", "HC", "CR"), 2), rep(c(1, 2), each = 3)))) {
dots <- list(...)
if (is.null(cluster_cols <- dots$cluster_cols)) cluster_cols <- var_names(x@StudySpecification, "u")
sl <- x$model$`(offset)`
cmod <- if (!is.null(sl) & inherits(sl, "SandwichLayer")) sl@fitted_covariance_model else NULL
if (substr(type, 3, 3) == 1) {
# HC/CR1 correction is based on n and k from propertee's estfun matrix
# for both sets of residuals
cls <- if ("cluster" %in% names(dots)) dots$cluster else {
if (is.null(by <- dots$by) & !is.null(cmod)) {
by <- setdiff(colnames(sl@keys),
c(var_names(x@StudySpecification, "u"), "in_Q"))
if (length(by) == 0) by <- NULL
}
if (is.null(by)) by <- cluster_cols
do.call(".make_uoa_ids",
list(x = x, vcov_type = substr(type, 1, 2), cluster = cluster_cols, by = by))
}
g <- length(unique(cls))
n <- length(cls)
k <- x$rank + if (!is.null(cmod) & inherits(cmod, "lmrob")) {
sum(!is.na(cmod$coefficients)) + 1
} else if (!is.null(cmod)) sum(!is.na(cmod$coefficients)) else 0
cr <- sqrt(g / (g-1) * (n-1) / (n-k)) * resids
} else {
# (HC/CR)2 correction; derived separately for the two regressions
model <- match.arg(model, c("itt", "cov_adj"))
if (model == "itt") {
mod <- x
efm <- .base_S3class_estfun(mod)
mf_data <- get("data", environment(formula(mod)))
} else {
if (is.null(cmod)) {
stop("x must have a SandwichLayer object as an offset")
}
mod <- sl@fitted_covariance_model
efm <- estfun(mod)
mf_data <- eval(mod$call$data, environment(formula(mod)))
}
if (cluster_cols[1] == "..uoa..") {
cls <- row.names(stats::model.frame(mod, na.action = na.pass))
} else {
cls <- Reduce(
function(l, r) paste(l, r, sep = "_"),
as.list(stats::expand.model.frame(mod, cluster_cols)[, cluster_cols, drop=FALSE])
)
}
if (any(nas <- is.na(cls))) cls[nas] <- replicate(
sum(nas), paste(sample(letters, 8, replace = TRUE), collapse = ""), simplify = TRUE
)
if (inherits(mod, "teeMod")) {
uoa_cols <- var_names(mod@StudySpecification, "u")
if (has_blocks(mod@StudySpecification) &
length(setdiff(cluster_cols, uoa_cols)) == 0 &
length(setdiff(uoa_cols, cluster_cols)) == 0) {
Q_obs <- .sanitize_Q_ids(mod, id_col = cluster_cols)
Q_obs_ids <- Q_obs$cluster
spec_blocks <- blocks(mod@StudySpecification)
uoa_block_ids <- apply(spec_blocks, 1, function(...) paste(..., collapse = ","))
small_blocks <- identify_small_blocks(mod@StudySpecification)
structure_w_small_blocks <- cbind(
mod@StudySpecification@structure,
small_block = small_blocks[uoa_block_ids],
block_replace_id = apply(spec_blocks, 1,
function(nms, ...) paste(paste(nms, ..., sep = ""), collapse = ","),
nms = colnames(spec_blocks))
)
Q_obs <- .merge_preserve_order(Q_obs, structure_w_small_blocks, by = uoa_cols, all.x = TRUE)
na_blocks <- apply(Q_obs[var_names(x@StudySpecification, "b")], 1, function(x) any(is.na(x)))
Q_obs$cluster[Q_obs$small_block & !na_blocks] <-
Q_obs$block_replace_id[Q_obs$small_block & !na_blocks]
cls <- Q_obs$cluster
}
}
g <- length(unique(cls))
n <- nrow(efm)
k <- ncol(efm)
if (g == n) {
pii <- stats::hatvalues(mod)
cr <- 1 / sqrt(1 - pii) * resids
} else {
if (inherits(mod, c("glmrob", "lmrob"))) stop("CR2 correction not implemented for robust fits")
wres <- stats::residuals(mod, type = "working")
XW <- sweep(efm, 1, wres, FUN = "/")
XW[is.na(XW)] <- 0
X <- stats::model.matrix(
stats::delete.response(stats::terms(mod)),
do.call("model.frame",
list(mod, mf_data, subset = eval(mod$call$subset, mf_data),
na.action = na.pass)),
contrasts.arg = mod$contrasts,
xlev = mod$xlevels
)[,colnames(XW),drop=FALSE] # estfun drops NA coeffs for XW, so we have to align with that
XTWX_inv <- solve(crossprod(XW, X))
cr <- numeric(length(cls))
for (cl in unique(cls)) {
cl_ix <- which(cls == cl)
crc <- rep(NA_real_, length(cl_ix))
nas <- stats::na.action(mod)
ok <- setdiff(cl_ix, nas)
if (inherits(mod, "teeMod")) {
iss <- cluster_iss(mod, cluster_unit = cl, cluster_ids = cls)
} else {
I_P_cc <- diag(length(ok)) - tcrossprod(
tcrossprod(X[ok,,drop=FALSE], XTWX_inv), XW[ok,,drop=FALSE])
schur <- eigen(I_P_cc)
iss <- schur$vectors %*% (solve(schur$vectors) / sqrt(schur$values))
}
crc[!(cl_ix %in% nas)] <- drop(iss %*% resids[ok])
cr[cl_ix] <- crc
}
}
}
} else {
stop(paste0("'", type, "' bias correction not available"))
}
return(cr)
}
##' @title (Internal) Align the dimensions and rows of direct adjustment and
##' covariance adjustment model estimating equations matrices
##' @details \code{.align_and_extend_estfuns()} first extracts the matrices of
##' contributions to the empirical estimating equations for the direct
##' adjustment and covariance adjustment models; then, it pads the matrices
##' with zeros to account for units of observation that appear in one
##' model-fitting sample but not the other; finally it orders the matrices so
##' units of observation (or if unit of observation-level ordering is
##' impossible, units of assignment) are aligned.
##' @param x a fitted \code{teeMod} model
##' @param ctrl_means_ef_mat optional, a matrix of estimating equations corresponding
##' to the estimates of the marginal (and possibly conditional) means of the outcome
##' and \code{offset} in the control condition. These are aligned and extended
##' in the same way as the matrix of estimating equations for \code{x} and
##' \code{cbind}ed to them
##' @param by optional, a character vector indicating columns that uniquely identify
##' rows in the dataframe used for fitting \code{x} and the dataframe passed to the
##' \code{data} argument of the covariance adjustment model fit. The default is
##' \code{NULL}, in which case the unit of assignment columns specified in the
##' \code{StudySpecification} slot of \code{x} are used.
##' @param ... mostly arguments passed to methods, but the special case is the argument
##' \code{loco_residuals}, which indicates the offsets in the residuals of \code{x}
##' should be replaced by versions that use leave-one-cluster-out estimates of
##' the covariance model
##' @return A list of two matrices, one being the aligned contributions to the
##' estimating equations for the direct adjustment model, and the other being
##' the aligned contributions to the covariance adjustment model.
##' @keywords internal
.align_and_extend_estfuns <- function(x, ctrl_means_ef_mat = NULL, by = NULL, ...) {
if (!inherits(x, "teeMod") | !inherits(x$model$`(offset)`, "SandwichLayer")) {
stop("`x` must be a fitted teeMod object with a SandwichLayer offset")
}
dots <- list(...)
dots$type <- NULL
## same in-line comment in `estfun.teeMod()` about setting itt_rcorrect applies
## to both args here
if (is.null(itt_rcorrect <- dots$itt_rcorrect)) itt_rcorrect <- "HC0"
if (is.null(cov_adj_rcorrect <- dots$cov_adj_rcorrect)) cov_adj_rcorrect <- "HC0"
uoa_cols <- var_names(x@StudySpecification, "u")
cluster_cols <- if (is.null(dots$cluster_cols)) uoa_cols else dots$cluster_cols
sl <- x$model$`(offset)`
cmod <- sl@fitted_covariance_model
# the ordering output by `.order_samples()` is explained in that function's
# documentation
id_order <- .order_samples(x, by = by, verbose = FALSE)
n <- length(c(id_order$Q_not_C, id_order$C_in_Q, id_order$C_not_Q))
nq <- length(c(id_order$Q_not_C, id_order$Q_in_C))
nc <- length(c(id_order$C_in_Q, id_order$C_not_Q))
## get the unaligned + unextended estimating equations
phi_r <- stats::residuals(cmod, type = "working") # for teeMod, lm, lmrob this gives desired type = "response"
phi <- estfun(cmod) / replace(phi_r, is.na(phi_r), 1) *
replace(do.call(.rcorrect,
c(list(resids = phi_r, x = x, model = "cov_adj", type = cov_adj_rcorrect, by = by),
dots)), is.na(phi_r), 0)
# use jackknife first-stage coefficient estimates if Q and C overlap
psi_r <- stats::residuals(x, type = "working")
if (!is.null(dots$loco_residuals) & sum(sl@keys$in_Q) > 0) {
new_psi_r <- .compute_loo_resids(x, cluster_cols, ...)
} else {
new_psi_r <- psi_r
}
psi <- .base_S3class_estfun(x) / replace(psi_r, is.na(psi_r), 1) *
replace(do.call(.rcorrect,
c(list(resids = new_psi_r, x = x, model = "itt", type = itt_rcorrect, by = by),
dots)), is.na(new_psi_r), 0)
Q_order <- c(as.numeric(names(id_order$Q_not_C)), as.numeric(names(id_order$Q_in_C)))
aligned_psi <- matrix(0, nrow = n, ncol = ncol(psi), dimnames = list(seq(n), colnames(psi)))
aligned_psi[1:nq,] <- psi[Q_order,,drop=FALSE]
C_order <- c(as.numeric(names(id_order$C_in_Q)), as.numeric(names(id_order$C_not_Q)))
aligned_phi <- matrix(0, nrow = n, ncol = ncol(phi),
dimnames = list(seq_len(n), colnames(phi)))
aligned_phi[(n-nc+1):n,] <- phi[C_order,,drop=FALSE]
out <- list(psi = aligned_psi, phi = aligned_phi)
if (!is.null(ctrl_means_ef_mat)) {
aligned_cm_ef <- matrix(0, nrow = n, ncol = ncol(ctrl_means_ef_mat),
dimnames = list(seq(n), colnames(ctrl_means_ef_mat)))
aligned_cm_ef[1:nq,] <- ctrl_means_ef_mat[Q_order,,drop=FALSE]
out[["psi"]] <- cbind(out[["psi"]], aligned_cm_ef)
}
return(out)
}
##' @title (Internal) Extract empirical estimating equations from a
##' \code{teeMod} model using the S3 method associated with its
##' \code{.S3Class} slot
##' @inheritParams estfun.teeMod
##' @return S3 method
##' @keywords internal
.base_S3class_estfun <- function(x) {
## this vector indicates the hierarchy of `sandwich::estfun` methods to use
## to extract ITT model's estimating equations
x$coefficients <- replace(x$coefficients, is.na(x$coefficients), 0)
valid_classes <- c("glm", "lmrob", "svyglm", "lm")
base_class <- match(x@.S3Class, valid_classes)
if (all(is.na(base_class))) {
stop(paste("Direct adjustment model must have been fitted using a function from the",
"`propertee`, `stats`, `robustbase`, or `survey` package"))
}
ef <- getS3method("estfun", valid_classes[min(base_class, na.rm = TRUE)])(x)
ef[is.na(ef)] <- 0
keep_coef <- if (is.null(aliased <- stats::alias(x)$Complete)) {
colnames(ef)
} else {
if (is.null(nms <- colnames(aliased))) 1 else nms
}
return(ef[, keep_coef,drop=FALSE])
}
#' @title Compute residuals for a \code{teeMod} object with leave-one-out estimates
#' of the \code{offset}
#' @details
#' The residual for any observation also used for fitting the \code{fitted_covariance_model}
#' stored in the \code{offset} of \code{x} is replaced by an estimated residual
#' that uses a cluster leave-one-out estimate of the \code{fitted_covariance_model}
#' for generating a value of the \code{offset}.
#' @param x a \code{teeMod} object
#' @param cluster vector of column names that identify clusters
#'
#' @importFrom stats model.frame
#' @keywords internal
.compute_loo_resids <- function(x, cluster, ...) {
## what's in the call
sl <- x$model$`(offset)`
cmod <- sl@fitted_covariance_model
## get cluster ID's
in_Q <- which(sl@keys$in_Q)
if (cluster[1] == "..uoa..") {
C_cls <- rownames(stats::model.frame(cmod, na.action = na.pass))
} else {
C_cls <- Reduce(
function(l, r) paste(l, r, sep = "_"),
as.list(stats::expand.model.frame(cmod, cluster)[, cluster, drop=FALSE])
)
}
jk_units <- unique(C_cls[in_Q])
## get LOO coefficients for each overlapping unit in Q and C
loo_cmod <- Reduce(
cbind,
mapply(
function(loo_unit, cmod, cls) {
cmod_cl <- stats::getCall(cmod)
cmod_cl$subset <- eval(cls != loo_unit)
cf <- eval(cmod_cl, envir = environment(formula(cmod)))$coefficients
matrix(cf, ncol = 1, dimnames = list(names(cf), NULL))
},
jk_units,
SIMPLIFY = FALSE,
MoreArgs = list(cmod = cmod, cls = C_cls)
)
)
colnames(loo_cmod) <- jk_units
loo_cmod[is.na(loo_cmod)] <- 0 # replace NA coefficients due to singularity with 0's
## make preds for units in Q using LOO estimates of covariance adjustment model
## (make sure to set treatment indicator to 0 if it was included in the model)
tt <- stats::delete.response(stats::terms(cmod))
Q_data <- get("data", envir = environment(formula(x)))
mf <- call("model.frame",
tt,
Q_data,
subset = eval(x@lmitt_call$subset, envir = Q_data),
na.action = na.pass,
xlev = cmod$xlevels)
mf <- eval(mf)
trt_name <- var_names(x@StudySpecification, "t")
if (trt_name %in% colnames(mf)) {
trts <- treatment(x@StudySpecification)[, 1]
if (is.numeric(trts)) {
mf[[trt_name]] <- min(abs(trts))
} else if (is.logical(trts)) {
mf[[trt_name]] <- FALSE
} else if (is.factor(trts)) {
mf[[trt_name]] <- levels(trts)[1]
}
}
mm <- stats::model.matrix(tt, mf, contrasts.arg = cmod$contrasts)
if (cluster[1] == "..uoa..") {
Q_cls <- rownames(stats::model.frame(x, na.action = na.pass))
} else {
Q_cls <- Reduce(
function(l, r) paste(l, r, sep = "_"),
as.list(stats::expand.model.frame(x, cluster)[, cluster, drop=FALSE])
)
}
Q_cls_in_C <- Q_cls %in% colnames(loo_cmod)
loo_preds <- rowSums(mm[Q_cls_in_C,,drop=FALSE] *
t(loo_cmod[, as.character(Q_cls[Q_cls_in_C])]))
if (!is.null(cmod$family)) loo_preds <- cmod$family$linkinv(loo_preds)
## slot them in to existing predictions
all_preds <- sl@.Data
all_preds[Q_cls_in_C] <- loo_preds
## make residuals (need to subtract original offset because it's included in fitted values)
y <- stats::model.response(stats::model.frame(x, na.action = na.pass))
os <- stats::model.offset(stats::model.frame(x, na.action = na.pass))
return(y - stats::fitted(x) + os - all_preds)
}
#' @title Make ID's to pass to the \code{cluster} argument of \code{vcov_tee()}
#' @description \code{.make_uoa_ids()} returns a factor vector of cluster ID's
#' that align with the order of the units of observations' contributions in
#' \code{estfun.teeMod()}. This is to ensure that when \code{vcov_tee()} calls
#' \code{sandwich::meatCL()}, the \code{cluster} argument aggregates the
#' correct contributions to estimating equations within clusters.
#' @param x a fitted \code{teeMod} object
#' @param vcov_type a string indicating model-based or design-based
#' covariance estimation. Currently, "MB", "CR", and "HC" are the only strings
#' registered as indicating model-based estimation.
#' @param cluster character vector or list; optional. Specifies column names
#' that appear in both the covariance adjustment and direct adjustment model
#' dataframes. Defaults to NULL, in which case unit of assignment columns
#' indicated in the StudySpecification will be used for clustering. If there
#' are multiple clustering columns, they are concatenated together for each
#' row and separated by "_".
#' @param ... arguments passed to methods
#' @return A vector with length equal to the number of unique units of
#' observation used to fit the two models. See Details of
#' \code{estfun.teeMod()} for the method for determining uniqueness.
#' @keywords internal
.make_uoa_ids <- function(x, vcov_type, cluster = NULL, ...) {
if (!inherits(x, c("teeMod", "mmm"))) {
stop("Must be provided a teeMod or mmm object")
} else if (inherits(x, "mmm")) {
# just use the first model to generate the ID's; this will be the same for
# mmm objects of models with the same Q (and possibly C) samples
x <- x[[1L]]
}
mc <- match.call()
# Must be a teeMod object for this logic to occur
if (!inherits(cluster, "character")) {
cluster <- var_names(x@StudySpecification, "u")
}
# get observation-level unit of assignment and cluster ID's for observations
# in Q
Q_obs <- .sanitize_Q_ids(x, id_col = cluster, ...)
Q_obs_ids <- Q_obs$cluster
# for model-based vcov calls on blocked specifications when clustering is
# called for at the assignment level, replace unit of assignment ID's with
# block ID's for small blocks
if (vcov_type %in% c("CR", "HC", "MB") & has_blocks(x@StudySpecification)) {
uoa_cols <- var_names(x@StudySpecification, "u")
if (length(setdiff(cluster, uoa_cols)) == 0 & length(setdiff(uoa_cols, cluster)) == 0) {
spec_blocks <- blocks(x@StudySpecification)
uoa_block_ids <- apply(spec_blocks, 1,
function(...) paste(..., collapse = ","))
small_blocks <- identify_small_blocks(x@StudySpecification)
structure_w_small_blocks <- cbind(
x@StudySpecification@structure,
small_block = small_blocks[uoa_block_ids],
block_replace_id = apply(spec_blocks, 1,
function(nms, ...) paste(paste(nms, ..., sep = ""), collapse = ","),
nms = colnames(spec_blocks))
)
Q_obs <- .merge_preserve_order(Q_obs, structure_w_small_blocks, by = uoa_cols, all.x = TRUE)
na_blocks <- apply(Q_obs[var_names(x@StudySpecification, "b")], 1, function(x) any(is.na(x)))
Q_obs$cluster[Q_obs$small_block & !na_blocks] <-
Q_obs$block_replace_id[Q_obs$small_block & !na_blocks]
Q_obs_ids <- Q_obs$cluster
}
}
# If there's no covariance adjustment info, return the ID's found in Q
if (!inherits(ca <- x$model$`(offset)`, "SandwichLayer")) {
return(factor(Q_obs_ids, levels = unique(Q_obs_ids)))
}
# `keys` may have columns in addition to "in_Q" and the uoa columns if a
# `by` argument was specified in `cov_adj()` or `as.SandwichLayer()`. If it
# does, use the columns exclusively specified in `by` to produce the order
if (is.null(by <- mc$by)) by <- setdiff(colnames(ca@keys),
c(var_names(x@StudySpecification, "u"), "in_Q"))
if (length(by) == 0) {
by <- cluster
}
ord_call <- call(".order_samples", quote(x), by = quote(by))
id_order <- eval(ord_call)
# if no "by" was specified in cov_adj(), cluster variable was used for ordering,
# so we can take the names of the sorted vector. Otherwise, we need to get
# ID's associated with the ordering.
if (length(setdiff(cluster, by)) == 0) {
ids <- Reduce(c, id_order[c("Q_not_C", "Q_in_C", "C_not_Q")])
} else {
C_ids <- .sanitize_C_ids(ca, cluster, verbose = FALSE, sorted = FALSE, ...)
ids <- c(
Q_obs_ids[as.numeric(names(id_order$Q_not_C))],
Q_obs_ids[as.numeric(names(id_order$Q_in_C))],
C_ids[as.numeric(names(id_order$C_not_Q))]
)
}
na_ids <- is.na(ids)
ids[na_ids] <- apply(
matrix(sample(c(letters, LETTERS), 8 * sum(na_ids), replace = TRUE), ncol = 8),
1, function(...) paste(..., collapse = "")
)
return(factor(ids, levels = unique(ids)))
}
#' @title (Internal) Order observations used to fit a \code{teeMod} model and a
#' prior covariance adjustment model
#' @details \code{.order_samples()} underpins the ordering for
#' \code{.make_uoa_ids()} and \code{estfun.teeMod()}. This function orders the
#' outputs of those functions, but also informs how the original matrices of
#' contributions to estimating equations need to be indexed to align units of
#' observations' contributions to both sets of estimating equations.\cr\cr
#' When a \code{by} argument is provided to \code{cov_adj()}, it is used to
#' construct the order of \code{.order_samples()}.
#' @param x a fitted \code{teeMod} model
#' @param by character vector of columns to get ID's for ordering from. Default
#' is NULL, in which case unit of assignment ID's are used for ordering.
#' @param ... arguments passed to methods
#' @return A list of four named vectors. The \code{Q_not_C} element holds the
#' ordering for units of observation in the direct adjustment sample but not
#' the covariance adjustment samples; \code{Q_in_C} and \code{C_in_Q}, the
#' ordering for units in both; and \code{C_not_Q}, the ordering for units in
#' the covariance adjustment sample only. \code{Q_in_C} and \code{C_in_Q}
#' differ in that the names of the \code{Q_in_C} vector correspond to row
#' indices of the original matrix of estimating equations for the direct
#' adjustment model, while the names of \code{C_in_Q} correspond to row
#' indices of the matrix of estimating equations for the covariance adjustment
#' model. Similarly, the names of \code{Q_not_C} and \code{C_not_Q} correspond
#' to row indices of the direct adjustment and covariance adjustment samples,
#' respectively. Ultimately, the order of \code{.make_uoa_ids()} and
#' \code{estfun.teeMod()} is given by concatenating the vectors stored in
#' \code{Q_not_C}, \code{Q_in_C}, and \code{C_not_q}.
#' @keywords internal
.order_samples <- function(x, by = NULL, ...) {
if (!inherits(x, "teeMod") | !inherits(ca <- x$model$`(offset)`, "SandwichLayer")) {
stop(paste("x must be a teeMod object with a SandwichLayer offset or",
"ca must be a SandwichLayer object to retrieve information about",
"the covariance adjustment model"))
}
## `keys` may have additional columns beyond "in_Q" and the uoa columns if a
## `by` argument was specified in `cov_adj()` or `as.SandwichLayer()`. If it
## does, use the columns exclusively specified in `by` to merge.
if (is.null(by) | length(setdiff(colnames(ca@keys),
c(var_names(x@StudySpecification, "u"), "in_Q"))) > 0) {
by <- setdiff(colnames(ca@keys), c(var_names(x@StudySpecification, "u"), "in_Q"))
}
## Should only hit this if a custom `cluster` argument hasn't been passed to
## vcov_tee or no `by` was specified in `cov_adj`
if (length(by) == 0) {
by <- var_names(x@StudySpecification, "u")
}
# The order, given by the names of the output vector, will be:
# Q not in C --> Q in C --> C not in Q. The values in the vector correspond to
# the rows to pull from the original estfun matrices
# get all ID's in Q
# Q_ids <- .sanitize_Q_ids(x, id_col = by, ...)[, "cluster"]
if (x@StudySpecification@unit_of_assignment_type == "none") {
Q_ids <- rownames(model.frame(x, na.action = NULL))
} else {
Q_ids <- stats::expand.model.frame(x, by)[, by, drop = FALSE]
Q_ids <- apply(Q_ids, 1, function(...) paste(..., collapse = "_"))
}
# get all ID's in C and replace NA's with unique ID
C_ids <- .sanitize_C_ids(ca, by, sorted = FALSE, ...)
# need Q_in_C and C_in_Q to have the same order so contributions are aligned
Q_in_C <- stats::setNames(Q_ids[which(Q_ids %in% C_ids)], which(Q_ids %in% C_ids))
Q_in_C <- sort(Q_in_C)
C_in_Q <- stats::setNames(C_ids[which(C_ids %in% Q_ids)], which(C_ids %in% Q_ids))
C_in_Q <- sort(C_in_Q)
if (length(Q_in_C) != length(C_in_Q)) {
stop(paste("Contributions to covariance adjustment and/or effect estimation",
"are not uniquely specified. Provide a `by` argument to `cov_adj()`",
"or `vcov.teeMod()`"))
}
out <- list(
Q_not_C = stats::setNames(Q_ids[!(Q_ids %in% C_ids)], which(!(Q_ids %in% C_ids))),
Q_in_C = Q_in_C,
C_in_Q = C_in_Q,
C_not_Q = stats::setNames(C_ids[!(C_ids %in% Q_ids)], which(!(C_ids %in% Q_ids)))
)
return(out)
}
#' @title (Internal) Return ID's used to order observations in the direct
#' adjustment sample
#' @param x a fitted \code{teeMod} model
#' @param id_col character vector; optional. Specifies column(s) whose ID's will
#' be returned. The column must exist in the data that created the
#' \code{StudySpecification} object. Default is NULL, in which case unit of
#' assignment columns indicated in the specification will be used to generate
#' ID's.
#' @param ... arguments passed to methods
#' @return A vector with length equal to the number of units of observation in
#' the direct adjustment sample
#' @keywords internal
.sanitize_Q_ids <- function(x, id_col = NULL, ...) {
# link the units of assignment in the StudySpecification with desired cluster ID's
uoa_cls_df <- .make_uoa_cluster_df(x@StudySpecification, id_col)
uoa_cols <- var_names(x@StudySpecification, "u")
if (nrow(uoa_cls_df) == nrow(x$model)) {
expand_cols <- unique(c(uoa_cols, id_col))
by.y <- if (length(expand_cols) == length(uoa_cols)) uoa_cols else c(uoa_cols, "cluster")
} else {
expand_cols <- by.y <- uoa_cols
}
if (x@StudySpecification@unit_of_assignment_type == "none") {
moddata <- x$call$data
moddata$..uoa.. <- rownames(moddata)
x$call$data <- moddata
}
obs_uoa_ids <- stats::expand.model.frame(x,
expand_cols)[, expand_cols, drop = FALSE]
out <- merge(cbind(obs_uoa_ids, .order_id = seq_len(nrow(obs_uoa_ids))),
uoa_cls_df, by.x = expand_cols, by.y = by.y, all.x = TRUE, sort = FALSE)
out <- out[sort(out$.order_id, index.return = TRUE)$ix,]
out$.order_id <- NULL
colnames(out) <- unique(c(by.y, "cluster"))
out$cluster <- as.character(out$cluster)
rownames(out) <- NULL
return(out)
}
#' @exportS3Method getCall teeMod
#' @importFrom stats getCall
getCall.teeMod <- function(x, ...) {
return(x$call)
}
#' @export
update.teeMod <- function(object, ...) {
stop(paste("teeMod objects do not support an `update` method.\n",
"You can use `update` on the formula object passed to `lmitt`",
"instead."))
}
#' @title Design-based estimating equations contributions
#' @param x a fitted \code{teeMod} object
#' @param ... arguments passed to methods
#' @details calculate contributions to empirical estimating equations from a
#' \code{teeMod} model with absorbed intercepts from the design-based
#' perspective
#' @return An \eqn{n\times k} matrix
#' @keywords internal
.estfun_DB_blockabsorb <- function (x, by = NULL, ...){
# return 0 if not asking for a design-based SE or DA does not absorb intercepts
cl <- match.call()
vcov_type <- match.arg(cl$vcov_type, c("MB", "HC", "CR", "DB"))
if (!(db <- vcov_type == "DB") | !x@absorbed_intercepts) {
return(0)
}
if (inherits(x$model$`(offset)`, "SandwichLayer")){
# if the model involves SandwichLayer covariance adjustment
temp <- .align_and_extend_estfuns(x, by = by, ...)[["psi"]]
}
else
temp <- .base_S3class_estfun(x)
phitilde <- .get_phi_tilde(x)
appinv_atp <- .get_appinv_atp(x)
if (!is.null(x$call$offset)){
# if the model involves covariance adjustment
# define the row ordering using .order_samples() and insert rows of 0's into
# the matrices where necessary while maintaining observation alignment
id_order <- .order_samples(x, by = by, verbose = FALSE)
aligned_phitilde <- matrix(
0, nrow = nrow(temp), ncol = ncol(phitilde),
dimnames = list(seq_len(nrow(temp)), colnames(phitilde)))
C_order <- c(as.numeric(names(id_order$C_in_Q)), as.numeric(names(id_order$C_not_Q)))
aligned_phitilde[which(!is.na(C_order)),] <- phitilde[C_order[!is.na(C_order)], ]
mat <- aligned_phitilde %*% appinv_atp
}
else{
mat <- phitilde %*% appinv_atp
}
mat <- cbind(matrix(0, nrow = nrow(mat), ncol = ncol(temp) - ncol(mat)), mat)
return(mat)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.