R/blueprint.R

Defines functions capture_command is_blueprint print.blueprint blueprint_metadata_directory blueprint_metadata_name use_localized_metadata blueprint

Documented in blueprint

#' Create a blueprint
#'
#' @param name The name of the blueprint
#' @param command The code to build the target dataset
#' @param description An optional description of the dataset to be used for
#'   codebook generation
#' @param annotate If `TRUE`, during cleanup the metadata will "annotate"
#'   the dataset by adding variable attributes for each metadata field to
#'   make metadata provenance easier and responsive to code changes.
#' @param metadata The associated variable metadata for this dataset
#' @param metadata_file_type The kind of metadata file. Currently only CSV.
#' @param metadata_file_name The file name for the metadata file. If the
#'   option `blueprintr.use_local_metadata_path` is set to `TRUE`, then the
#'   default file name will be the name of the blueprint script, minus the .R
#'   extension. Otherwise, this will default to the name of the blueprint.
#' @param metadata_directory Where the metadata file will be stored. If the
#'   option `blueprintr.use_local_metadata_path` is set to `TRUE`, then the
#'   default location will be the folder where the blueprint script is
#'   located. Otherwise, this will default to `here::here("blueprints")`
#' @param metadata_file_path Overrides the metadata file path generated by
#'   `metadata_directory`, `name`, and `metadata_file_type` if not NULL.
#' @param extra_steps A `list()` of extra 'bpstep' objects, which add
#'   extra targets to the workflow after the desired dataset has completed
#'   its cleanup phase. Uses of this could include generating codebooks
#'   or other reports based on the built data.
#'   See [bp_add_bpstep()][bp_add_bpstep()] for more details.
#' @param ... Any other parameters and settings for the blueprint
#' @param class A subclass of blueprint capability, for future work
#'
#' @return A blueprint object
#'
#' @details # Cleanup Tasks
#' blueprintr offers some post-check tasks that attempt to match datasets to the
#' metadata as much as possible. There are two default tasks that run:
#'   1. Reorders variables to match metadata order.
#'   1. Drops variables marked with `dropped == TRUE` if the `dropped` variable
#'      exists in the metadata.
#'
#' The remaining tasks have to be enabled by the user:
#'   * If `labelled = TRUE` in the `blueprint()` command, all columns will be
#'     converted to [labelled()][haven::labelled()] columns, provided that at
#'     least the `description` field is filled in. If the `coding` column is
#'     present in the metadata, then categorical levels as specified by a
#'     [coding()][rcoder::coding()] will be added to the column as well. In case
#'     the `description` field is used for detailed column descriptions, the
#'     `title` field can be added to the metadata to act as short titles for the
#'     columns.
#'
#' @export
blueprint <- function(name,
                      command,
                      description = NULL,
                      metadata = NULL,
                      annotate = FALSE,
                      metadata_file_type = c("csv"),
                      metadata_file_name = NULL,
                      metadata_directory = NULL,
                      metadata_file_path = NULL,
                      extra_steps = NULL,
                      ...,
                      class = character()) {
  stopifnot(is.character(name) || is.null(name))
  stopifnot(is.null(description) || is.character(description))

  captured_command <- capture_command(substitute(command))
  metadata_file_type <- match.arg(metadata_file_type)

  metadata_file_name <- metadata_file_name %||% blueprint_metadata_name(name, .env = parent.frame())
  metadata_directory <- metadata_directory %||% blueprint_metadata_directory(.env = parent.frame())

  default_path <- file.path(
    metadata_directory,
    glue::glue("{metadata_file_name}.{metadata_file_type}")
  )
  path <- metadata_file_path %||% default_path

  structure(
    list(
      name = name,
      command = captured_command,
      description = description,
      annotate = annotate,
      metadata_file_path = path,
      extra_steps = extra_steps,
      ...
    ),
    class = c(class, "blueprint")
  )
}

use_localized_metadata <- function() {
  getOption("blueprintr.use_local_metadata_path", default = FALSE)
}

blueprint_metadata_name <- function(name, .env = parent.frame()) {
  if (exists("cur_blueprint_script", envir = .env)) {
    script_file <- basename(.env$cur_blueprint_script)
    script_name <- gsub("\\.[rR]$", "", script_file)

    if (use_localized_metadata()) {
      name <- script_name
    }
  }

  name
}

blueprint_metadata_directory <- function(.env = parent.frame()) {
  script_dir <- here::here("blueprints")

  if (exists("cur_blueprint_script", envir = .env)) {
    if (use_localized_metadata()) {
      script_dir <- dirname(.env$cur_blueprint_script)
    }
  }

  script_dir
}

#' @export
print.blueprint <- function(x, ...) {
  cat_line("<blueprint: {ui_value(x$name)}>") # nocov start
  cat_line()

  if (!is.null(x$description)) {
    cat_line("Description: {x$description}")
  } else {
    cat_line("No description provided")
  }

  cat_line("Annotations: {if (isTRUE(x$annotate)) 'ENABLED' else 'DISABLED'}")

  cat_line("Metadata location: {ui_value(metadata_path(x))}")
  cat_line()

  if (!is.null(x$checks)) {
    cat_line("-- Dataset content checks --")
    print(x$checks)
    cat_line()
  }

  cat_line("-- Command --")
  cat_line("Workflow command:")
  print(translate_macros(x$command))
  cat_line()
  cat_line("Raw command:")
  print(x$command)

  invisible(x) # nocov end
}

is_blueprint <- function(x) {
  inherits(x, "blueprint")
}

capture_command <- function(quoted_statement) {
  if (identical(quote(.), rlang::node_car(quoted_statement))) {
    return(eval(rlang::node_cdr(quoted_statement)[[1]]))
  }

  quoted_statement
}

handle_name_dispatch <- function(x, suffix = NULL, .env = parent.frame()) {
  stopifnot(is.null(suffix) || is.character(suffix))

  sym_x <- substitute(x, env = .env)

  if (!exists(as.character(sym_x))) {
    # User passed in a symbol to a macro which needs to be
    # converted to a string

    return(handle_name_dispatch(
      as.character(sym_x),
      suffix = suffix
    ))
  }

  if (is_blueprint(x)) {
    return(handle_name_dispatch(
      x$name,
      suffix = suffix
    ))
  }

  if (is.character(x)) {
    suffix <- if (!is.null(suffix)) paste0("_", suffix) else NULL
    return(paste0(x, suffix))
  }

  bp_err("Unhandled object passed to blueprint name retrieval system: {class(x)}")
}

blueprint_target_name <- function(x, .env = parent.frame()) {
  handle_name_dispatch(x, suffix = "initial", .env = .env)
}

blueprint_checks_name <- function(x, .env = parent.frame()) {
  handle_name_dispatch(x, suffix = "checks", .env = .env)
}

blueprint_final_name <- function(x, .env = parent.frame()) {
  handle_name_dispatch(x, .env = .env)
}

blueprint_reference_name <- function(x, .env = parent.frame()) {
  handle_name_dispatch(x, suffix = "blueprint", .env = .env)
}

blueprint_codebook_name <- function(x, .env = parent.frame()) {
  handle_name_dispatch(x, suffix = "codebook", .env = .env)
}
nyuglobalties/blueprintr documentation built on July 16, 2024, 10:27 a.m.