# R/declare_potential_outcomes.R In DeclareDesign: Declare and Diagnose Research Designs

#### Documented in expand_conditionspotential_outcomes_internal.formulapotential_outcomes_internal.NULL

```#' Declare potential outcomes
#'
#' @inheritParams declare_internal_inherit_params
#'
#' @return a function that returns a data.frame
#'
#' @export
#'
#' @details
#'
#' A \code{declare_potential_outcomes} function is used to create outcomes that each unit would express in each possible treatment condition.
#'
#' @examples
#'
#' # Potential outcomes can be declared in two ways:
#' # by using a formula or as separate variables.
#'
#'
#' # Using a formula
#' declare_population(N = 100, U = rnorm(N)) +
#'   declare_potential_outcomes(Y ~ 0.5*Z + U)
#'
#' # As separate variables
#' declare_population(N = 100, U = rnorm(N)) +
#'   declare_potential_outcomes(Y_Z_0 = U,
#'                              Y_Z_1 = U + 0.5)
#' # (notice the naming structure: outcome_assignment_condition: Y_Z_1)
#'
#'
#' # You can change the name of the outcome
#' declare_population(N = 100, U = rnorm(N)) +
#'   declare_potential_outcomes(Y2 ~ 0.5*Z + U)
#'
#' # You can change the name of the assignment_variable
#' declare_population(N = 100, U = rnorm(N)) +
#'   declare_potential_outcomes(Y ~ 0.5*D + U, assignment_variable = "D")
#'
#'
#' # `conditions` defines the "range" of the potential outcomes function
#' declare_population(N = 100, age = sample(18:65, N, replace = TRUE)) +
#'   declare_potential_outcomes(formula = Y ~ .05 + .25 * Z + .01 * age * Z,
#'                              conditions = 1:4)
#'
#' # Multiple assignment variables can be specified in `conditions`. For example,
#' # in a 2x2 factorial potential outcome:
#'
#' declare_population(N = 100, age = sample(18:65, N, replace = TRUE)) +
#'   declare_potential_outcomes(formula = Y ~ .05 + .25 * Z1 + .01 * age * Z2,
#'                              conditions = list(Z1 = 0:1, Z2 = 0:1))
#'
declare_potential_outcomes <- make_declarations(potential_outcomes_handler, "potential_outcomes")
### Default handler calls either the formula handler or non-formula handler
### this can be determind at declare time in the validation_fn, and the correct function returned instead
### If possible, we do so, even though much of the logic is essentially duplicated
### this makes tracing the execution in run_design much simpler

potential_outcomes_handler <- function(..., data, level) {
(function(formula, ...) UseMethod("potential_outcomes_internal"))(..., data = data, level = level)
}

validation_fn(potential_outcomes_handler) <- function(ret, dots, label) {
declare_time_error_if_data(ret)

# Below is a similar redispatch strategy, only at declare time
validation_delegate <- function(formula = NULL, ...) {
potential_outcomes_internal <- function(formula, ...) UseMethod("potential_outcomes_internal", formula)
for (c in class(formula)) {
s3method <- getS3method("potential_outcomes_internal", class(formula))
if (is.function(s3method)) return(s3method)
}
declare_time_error("Could not find appropriate implementation", ret)
}

s3method <- eval_tidy(quo(validation_delegate(!!!dots)))

# explicitly name all dots, for easier s3 handler validation
dots <- rename_dots(s3method, dots)

if ("level" %in% names(dots)) {
dots\$level <- reveal_nse_helper(dots\$level)
}

ret <- build_step(
currydata(s3method, dots),
handler = s3method,
dots = dots,
label = label,
step_type = attr(ret, "step_type"),
causal_type = attr(ret, "causal_type"),
call = attr(ret, "call")
)

if (has_validation_fn(s3method)) ret <- validate(s3method, ret, dots, label)

ret
}

#' @param formula a formula to calculate potential outcomes as functions of assignment variables.
#' @param conditions see \code{\link{expand_conditions}}. Provide values (e.g. \code{conditions = 1:4}) for a single assignment variable. If multiple assignment variables, provide named list (e.g. \code{conditions = list(Z1 = 0:1, Z2 = 0:1)}). Defaults to 0:1 if no conditions provided.
#' @param assignment_variables The name of the assignment variable. Generally not required as names are taken from \code{conditions}.
#' @param level a character specifying a level of hierarchy for fabricate to calculate at
#' @param data a data.frame
#' @importFrom fabricatr fabricate
#' @importFrom rlang quos := !! !!! as_quosure
#' @rdname declare_potential_outcomes
potential_outcomes_internal.formula <- function(formula,
conditions = c(0, 1),
assignment_variables = "Z", # only used to provide a default - read from names of conditions immediately after.
data,
level = NULL,
label = outcome_variable) {
outcome_variable <- as.character(formula[])

to_restore <- assignment_variables %icn% data
to_null <- setdiff(assignment_variables, to_restore)

# Build a single large fabricate call -
# fabricate( Z=1, Y_Z_1=f(Z), Z=2, Y_Z_2=f(Z), ..., Z=NULL)
condition_quos <- quos()

### If assn vars already present, swap them out
if (length(to_restore) > 0) {
restore_mangled <- paste(rep("_", max(nchar(colnames(data)))), collapse = "")

restore_mangled <- setNames(
lapply(to_restore, as.symbol),
paste0(".", restore_mangled, to_restore)
)

condition_quos <- c(condition_quos, quos(!!!restore_mangled))
}

# build call
expr <- as_quosure(formula)
for (i in seq_len(nrow(conditions))) {
condition_values <- conditions[i, , drop = FALSE]
out_name <- paste0(outcome_variable, "_", paste0(assignment_variables, "_", condition_values, collapse = "_"))

condition_quos <- c(condition_quos, quos(!!!condition_values, !!out_name := !!expr))
}

# clean up
if (length(to_restore) > 0) {
to_restore <- setNames(
lapply(names(restore_mangled), as.symbol),
to_restore
)
restore_mangled <- lapply(restore_mangled, function(x) NULL)
condition_quos <- c(condition_quos, quos(!!!to_restore), quos(!!!restore_mangled))
}

if (length(to_null) > 0) {
to_null <- lapply(setNames(nm = to_null), function(x) NULL)
condition_quos <- c(condition_quos, quos(!!!to_null))
}

if (is.character(level)) {
condition_quos <- quos(!!level := modify_level(!!!condition_quos))
}

### Actually do it and return
### Note ID_label=NA
structure(
fabricate(data = data, !!!condition_quos, ID_label = NA),
outcome_variable = outcome_variable,
assignment_variables = assignment_variables
)
}

validation_fn(potential_outcomes_internal.formula) <- function(ret, dots, label) {
dots\$formula <- eval_tidy(dots\$formula)
outcome_variable <- as.character(dots\$formula[])

if (length(dots\$formula) < 3) {
declare_time_error("Must provide an outcome in potential outcomes formula", ret)
}

if ("ID_label" %in% names(dots)) {
declare_time_error("Must not pass ID_label.", ret)
}

if ("assignment_variables" %in% names(dots)) {
dots\$assignment_variables <- reveal_nse_helper(dots\$assignment_variables)
}

dots\$conditions <- eval_tidy(quo(expand_conditions(!!!dots)))
dots\$assignment_variables <- names(dots\$conditions)

ret <- build_step(currydata(potential_outcomes_internal.formula,
dots),
handler = potential_outcomes_internal.formula,
dots = dots,
label = label,
step_type = attr(ret, "step_type"),
causal_type = attr(ret, "causal_type"),
call = attr(ret, "call")
)

# Note that this sets a design_validation callback for later use!!! see below
# step_meta is the data that design_validation will use for design time checks
structure(ret,
potential_outcomes_formula = formula,
step_meta = list(
outcome_variables = outcome_variable,
assignment_variables = names(dots\$conditions)
)
)
}

#' @importFrom fabricatr fabricate add_level modify_level
#' @rdname declare_potential_outcomes
potential_outcomes_internal.NULL <- function(formula = stop("Not provided"), ..., data, level = NULL) {
if (is.character(level)) {
fabricate(data = data, !!level := modify_level(...))
} else {
fabricate(data = data, ..., ID_label = NA)
}
}

validation_fn(potential_outcomes_internal.NULL) <- function(ret, dots, label) {
if ("ID_label" %in% names(dots)) {
declare_time_error("Must not pass ID_label.", ret)
}

if ("" %in% names(dots)) {
declare_time_warn("Unnamed declared argument in potential outcome", ret)
}

ret
}

#' Expand assignment conditions
#'
#' Internal helper to eagerly build assignment conditions for potential outcomes.
#'
#' If conditions is a data.frame, it is returned unchanged
#'
#' Otherwise, if conditions is a list, it is passed to expand.grid for expansion to a data.frame
#'
#' Otherwise, if condition is something else, box it in a list with assignment_variables for names,
#' and pass that to expand.grid.
#'
#' @param conditions the conditions
#' @param assignment_variables the name of assignment variables, if conditions is not already named.
#' @return a data.frame of potential outcome conditions
#' @keywords internal
expand_conditions <- function() {
if (!is.data.frame(conditions)) {
if (!is.list(conditions)) {
conditions <- rep(list(conditions), length(assignment_variables))
conditions <- setNames(conditions, assignment_variables)
}

conditions <- expand.grid(conditions, stringsAsFactors = FALSE)
}
conditions
}
formals(expand_conditions) <- formals(potential_outcomes_internal.formula)
formals(expand_conditions)["label"] <- list(NULL) # Fixes R CMD Check warning outcome is undefined
```

## Try the DeclareDesign package in your browser

Any scripts or data that you put into this service are public.

DeclareDesign documentation built on Aug. 21, 2021, 5:07 p.m.