ddoldversion: Declare and Diagnose Research Designs

#' Declare a design 
#'
#' @param lhs A step in a research design, beginning with a function that draws the population. Steps are evaluated sequentially. With the exception of the first step, all steps must be functions that take a \code{data.frame} as an argument and return a \code{data.frame}. Typically, many steps are declared using the \code{declare_} functions, i.e., \code{\link{declare_population}}, \code{\link{declare_population}}, \code{\link{declare_sampling}}, \code{\link{declare_potential_outcomes}}, \code{\link{declare_estimand}}, \code{\link{declare_assignment}}, and \code{\link{declare_estimator}}.
#' @param rhs A second step in a research design
#'
#' @details
#'
#' Users can supply three kinds of functions to create a design:
#'
#' 1. Data generating functions. These include population, assignment, and sampling functions.
#'
#' 2. Estimand functions.
#'
#' 3. Estimator functions.
#'
#' The location of the estimand and estimator functions in the pipeline of functions determine *when* the values of the estimand and estimator are calculated. This allows users to, for example, differentiate between a population average treatment effect and a sample average treatment effect by placing the estimand function before or after sampling.
#'
#' Design objects declared with the + operator can be investigated with a series of post-declaration commands, such as \code{\link{draw_data}}, \code{\link{draw_estimands}}, \code{\link{draw_estimates}}, and \code{\link{diagnose_design}}.
#'
#' The print and summary methods for a design object return some helpful descriptions of the steps in your research design. If randomizr functions are used for any assignment or sampling steps, additional details about those steps are provided.
#'
#' @return a list of two functions, the \code{design_function} and the \code{data_function}. The \code{design_function} runs the design once, i.e. draws the data and calculates any estimates and estimands defined in \code{...}, returned separately as two \code{data.frame}'s. The \code{data_function} runs the design once also, but only returns the final data.
#'
#' @name declare_design
#'
#' @importFrom rlang quos eval_tidy quo_text is_formula is_symbol
#' @importFrom utils bibentry
#' @export
#'
#' @examples
#'
#' my_population <- declare_population(N = 500, noise = rnorm(N))
#'
#' my_potential_outcomes <- declare_potential_outcomes(Y ~ Z + noise)
#'
#' my_sampling <- declare_sampling(n = 250)
#'
#' my_assignment <- declare_assignment(m = 25)
#'
#' my_estimand <- declare_estimand(ATE = mean(Y_Z_1 - Y_Z_0))
#'
#' my_estimator <- declare_estimator(Y ~ Z, estimand = my_estimand)
#'
#' my_mutate <- declare_step(dplyr::mutate, noise_sq = noise^2)
#'
#' my_reveal <- declare_reveal()
#'
#' design <- my_population + my_potential_outcomes + my_sampling +
#'          my_estimand + my_mutate +
#'          my_assignment + my_reveal + my_estimator
#'
#' design
#'
#' df <- draw_data(design)
#'
#' estimates <- draw_estimates(design)
#' estimands <- draw_estimands(design)
#'
#' # You can add steps to a design
#'
#' design <- my_population + my_potential_outcomes
#' design + my_sampling
#'
#' # Special Cases
#'
#' # You may wish to have a design with only one step:
#'
#' design <- my_population + NULL
#' design
#'
#'
#' \dontrun{
#' diagnosis <- diagnose_design(design)
#'
#' summary(diagnosis)
#' }
#'
#' @importFrom rlang enexpr expr_deparse is_null is_missing enquos f_rhs
#'
#' @export
`+.dd` <- function(lhs, rhs) {

  # two cases
  # 1. lhs is a step
  # 2. lhs is a design

  if (missing(rhs)) {
    rhs <- NULL
    qs <- enquos(lhs)
  } else {
    qs <- enquos(lhs, rhs)
  }

  if (!inherits(rhs, "dd") && !inherits(rhs, "function") && !is.null(rhs)) {
    stop("The right hand side of the + does not appear to be a ",
      "DeclareDesign object or a function.",
      call. = FALSE
    )
  }

  lhs <- if (inherits(lhs, "design")) {
    Filter(Negate(is_autogenerated), lhs)
  } else {
    wrap_step(lhs, f_rhs(qs[[1]]))
  }

  rhs <- if (inherits(rhs, "design")) {
    Filter(Negate(is_autogenerated), rhs)
  } else if (!is.null(rhs)) wrap_step(rhs, f_rhs(qs[[2]]))

  unique_nms <- make.unique(c(names(lhs), names(rhs)), sep = "_")

  if (!is.null(rhs)) {
    names(rhs) <- unique_nms[(length(lhs) + 1):length(unique_nms)]
  }

  steps <- append(lhs, rhs)

  construct_design(steps)
}


construct_design <- function(steps) {
  ret <- structure(steps,
    call = match.call(),
    class = c("design", "dd")
  )

  # for each step in qs, eval, and handle edge cases (dplyr calls, non-declared functions)
  for (i in seq_along(ret)) {


    # Is it a non-declared function
    if (is.function(ret[[i]]) && !inherits(ret[[i]], "design_step")) {

      # warn if the function call does not have exactly data as arguments
      #  except: if it is a dplyr pipeline (class fseq)
      if (!identical(names(formals(ret[[i]])), "data") &&
        !inherits(ret[[i]], "fseq")) {
        warning("Undeclared Step ", i, " function arguments are not exactly 'data'")
      }

      ret[[i]] <- build_step(
        ret[[i]],
        handler = NULL, dots = list(), label = names(ret)[i],
        step_type = "undeclared", causal_type = "dgp", call = attr(ret[[i]], "call")
      )
    }
  }

  # If there is a design-time validation, trigger it
  for (i in seq_along(ret)) {
    step <- ret[[i]]
    callback <- attr(step, "design_validation")
    if (is.function(callback)) {
      ret <- callback(ret, i, step)
    }
  }

  # name new auto-reveal steps
  names(ret)[sapply(ret, function(x) attr(x, "auto-generated") %||% FALSE)] <- "auto_reveal"

  # ensure all names are unique
  unique_nms <- make.unique(names(ret), sep = "_")

  # Assert that all labels are unique
  local({
    labels <- sapply(ret, attr, "label")
    function_types <- sapply(ret, attr, "step_type")

    check_unique_labels <- function(labels, types, what) {
      ss <- labels[types == what]
      if (anyDuplicated(ss)) {
        stop(
          "You have ", what, "s with identical labels: ",
          unique(ss[duplicated(ss)]),
          "\nPlease provide ", what, "s with unique labels"
        )
      }
    }

    check_unique_labels(labels, function_types, "estimand")
    check_unique_labels(labels, function_types, "estimator")
  })

  ret
}

DeclareDesign/ddoldversion documentation built on Oct. 30, 2019, 5:17 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

DeclareDesign/ddoldversion
Declare and Diagnose Research Designs

R/construct_design.R
In DeclareDesign/ddoldversion: Declare and Diagnose Research Designs

Defines functions `+.dd` construct_design

R Package Documentation

Browse R Packages

We want your feedback!

DeclareDesign/ddoldversion Declare and Diagnose Research Designs

R/construct_design.R In DeclareDesign/ddoldversion: Declare and Diagnose Research Designs

Defines functions `+.dd` construct_design

R Package Documentation

Browse R Packages

We want your feedback!

DeclareDesign/ddoldversion
Declare and Diagnose Research Designs

R/construct_design.R
In DeclareDesign/ddoldversion: Declare and Diagnose Research Designs