#' Declare a design
#'
#' @param lhs A step in a research design, beginning with a function that draws the population. Steps are evaluated sequentially. With the exception of the first step, all steps must be functions that take a \code{data.frame} as an argument and return a \code{data.frame}. Typically, many steps are declared using the \code{declare_} functions, i.e., \code{\link{declare_population}}, \code{\link{declare_population}}, \code{\link{declare_sampling}}, \code{\link{declare_potential_outcomes}}, \code{\link{declare_estimand}}, \code{\link{declare_assignment}}, and \code{\link{declare_estimator}}.
#' @param rhs A second step in a research design
#'
#' @details
#'
#' Users can supply three kinds of functions to create a design:
#'
#' 1. Data generating functions. These include population, assignment, and sampling functions.
#'
#' 2. Estimand functions.
#'
#' 3. Estimator functions.
#'
#' The location of the estimand and estimator functions in the pipeline of functions determine *when* the values of the estimand and estimator are calculated. This allows users to, for example, differentiate between a population average treatment effect and a sample average treatment effect by placing the estimand function before or after sampling.
#'
#' Design objects declared with the + operator can be investigated with a series of post-declaration commands, such as \code{\link{draw_data}}, \code{\link{draw_estimands}}, \code{\link{draw_estimates}}, and \code{\link{diagnose_design}}.
#'
#' The print and summary methods for a design object return some helpful descriptions of the steps in your research design. If randomizr functions are used for any assignment or sampling steps, additional details about those steps are provided.
#'
#' @return a list of two functions, the \code{design_function} and the \code{data_function}. The \code{design_function} runs the design once, i.e. draws the data and calculates any estimates and estimands defined in \code{...}, returned separately as two \code{data.frame}'s. The \code{data_function} runs the design once also, but only returns the final data.
#'
#' @name declare_design
#'
#' @importFrom rlang quos eval_tidy quo_text is_formula is_symbol
#' @importFrom utils bibentry
#' @export
#'
#' @examples
#'
#' my_population <- declare_population(N = 500, noise = rnorm(N))
#'
#' my_potential_outcomes <- declare_potential_outcomes(Y ~ Z + noise)
#'
#' my_sampling <- declare_sampling(n = 250)
#'
#' my_assignment <- declare_assignment(m = 25)
#'
#' my_estimand <- declare_estimand(ATE = mean(Y_Z_1 - Y_Z_0))
#'
#' my_estimator <- declare_estimator(Y ~ Z, estimand = my_estimand)
#'
#' my_mutate <- declare_step(dplyr::mutate, noise_sq = noise^2)
#'
#' my_reveal <- declare_reveal()
#'
#' design <- my_population + my_potential_outcomes + my_sampling +
#' my_estimand + my_mutate +
#' my_assignment + my_reveal + my_estimator
#'
#' design
#'
#' df <- draw_data(design)
#'
#' estimates <- draw_estimates(design)
#' estimands <- draw_estimands(design)
#'
#' # You can add steps to a design
#'
#' design <- my_population + my_potential_outcomes
#' design + my_sampling
#'
#' # Special Cases
#'
#' # You may wish to have a design with only one step:
#'
#' design <- my_population + NULL
#' design
#'
#'
#' \dontrun{
#' diagnosis <- diagnose_design(design)
#'
#' summary(diagnosis)
#' }
#'
#' @importFrom rlang enexpr expr_deparse is_null is_missing enquos f_rhs
#'
#' @export
`+.dd` <- function(lhs, rhs) {
# two cases
# 1. lhs is a step
# 2. lhs is a design
if (missing(rhs)) {
rhs <- NULL
qs <- enquos(lhs)
} else {
qs <- enquos(lhs, rhs)
}
if (!inherits(rhs, "dd") && !inherits(rhs, "function") && !is.null(rhs)) {
stop("The right hand side of the + does not appear to be a ",
"DeclareDesign object or a function.",
call. = FALSE
)
}
lhs <- if (inherits(lhs, "design")) {
Filter(Negate(is_autogenerated), lhs)
} else {
wrap_step(lhs, f_rhs(qs[[1]]))
}
rhs <- if (inherits(rhs, "design")) {
Filter(Negate(is_autogenerated), rhs)
} else if (!is.null(rhs)) wrap_step(rhs, f_rhs(qs[[2]]))
unique_nms <- make.unique(c(names(lhs), names(rhs)), sep = "_")
if (!is.null(rhs)) {
names(rhs) <- unique_nms[(length(lhs) + 1):length(unique_nms)]
}
steps <- append(lhs, rhs)
construct_design(steps)
}
construct_design <- function(steps) {
ret <- structure(steps,
call = match.call(),
class = c("design", "dd")
)
# for each step in qs, eval, and handle edge cases (dplyr calls, non-declared functions)
for (i in seq_along(ret)) {
# Is it a non-declared function
if (is.function(ret[[i]]) && !inherits(ret[[i]], "design_step")) {
# warn if the function call does not have exactly data as arguments
# except: if it is a dplyr pipeline (class fseq)
if (!identical(names(formals(ret[[i]])), "data") &&
!inherits(ret[[i]], "fseq")) {
warning("Undeclared Step ", i, " function arguments are not exactly 'data'")
}
ret[[i]] <- build_step(
ret[[i]],
handler = NULL, dots = list(), label = names(ret)[i],
step_type = "undeclared", causal_type = "dgp", call = attr(ret[[i]], "call")
)
}
}
# If there is a design-time validation, trigger it
for (i in seq_along(ret)) {
step <- ret[[i]]
callback <- attr(step, "design_validation")
if (is.function(callback)) {
ret <- callback(ret, i, step)
}
}
# name new auto-reveal steps
names(ret)[sapply(ret, function(x) attr(x, "auto-generated") %||% FALSE)] <- "auto_reveal"
# ensure all names are unique
unique_nms <- make.unique(names(ret), sep = "_")
# Assert that all labels are unique
local({
labels <- sapply(ret, attr, "label")
function_types <- sapply(ret, attr, "step_type")
check_unique_labels <- function(labels, types, what) {
ss <- labels[types == what]
if (anyDuplicated(ss)) {
stop(
"You have ", what, "s with identical labels: ",
unique(ss[duplicated(ss)]),
"\nPlease provide ", what, "s with unique labels"
)
}
}
check_unique_labels(labels, function_types, "estimand")
check_unique_labels(labels, function_types, "estimator")
})
ret
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.