#  This file is part of the 'rstudio/pointblank' project.
#  Copyright (c) 2017-2024 pointblank authors
#  For full copyright and license information, please look at
#  https://rstudio.github.io/pointblank/LICENSE.html

#' Execute all agent and informant YAML tasks
#' @description
#' The `yaml_exec()` function takes all relevant **pointblank** YAML files in a
#' directory and executes them. Execution involves interrogation of agents for
#' YAML agents and incorporation of informants for YAML informants. Under the
#' hood, this uses [yaml_agent_interrogate()] and [yaml_informant_incorporate()]
#' and then [x_write_disk()] to save the processed objects to an output
#' directory. These written artifacts can be read in at any later time with the
#' [x_read_disk()] function or the [read_disk_multiagent()] function. This is
#' useful when data in the target tables are changing and the periodic testing
#' of such tables is part of a data quality monitoring plan.
#' The output RDS files are named according to the object type processed, the
#' target table, and the date-time of processing. For convenience and
#' modularity, this setup is ideal when a table store YAML file (typically named
#' `"tbl_store.yml"` and produced via the [tbl_store()] and [yaml_write()]
#' workflow) is available in the directory, and when table-prep formulas are
#' accessed by name through [tbl_source()].
#' A typical directory of files set up for execution in this way might have the
#' following contents:
#' - a `"tbl_store.yml"` file for holding table-prep formulas (created with
#' [tbl_store()] and written to YAML with [yaml_write()])
#' - one or more YAML *agent* files to validate tables (ideally using
#' [tbl_source()])
#' - one or more YAML *informant* files to provide refreshed metadata on tables
#' (again, using [tbl_source()] to reference table preparations is ideal)
#' - an output folder (default is `"output"`) to save serialized versions of
#' processed agents and informants
#' Minimal example files of the aforementioned types can be found in the
#' **pointblank** package through the following `system.file()` calls:
#' - `system.file("yaml", "agent-small_table.yml", package = "pointblank")`
#' - `system.file("yaml", "informant-small_table.yml", package = "pointblank")`
#' - `system.file("yaml", "tbl_store.yml", package = "pointblank")`
#' The directory itself can be accessed using `system.file("yaml", package =
#' "pointblank")`.
#' @param path The path that contains the YAML files for agents and informants.
#' @param files A vector of YAML files to use in the execution workflow. By
#'   default, `yaml_exec()` will attempt to process every valid YAML file in
#'   `path` but supplying a vector here limits the scope to the specified files.
#' @param write_to_disk Should the execution workflow include a step that writes
#'   output files to disk? This internally calls [x_write_disk()] to write RDS
#'   files and uses the base filename of the agent/informant YAML file as part
#'   of the output filename, appending the date-time to the basename.
#' @param output_path The output path for any generated output files. By
#'   default, this will be a subdirectory of the provided `path` called
#'   `"output"`.
#' @param keep_tbl,keep_extracts For agents, the table may be kept if it is a
#'   data frame object (databases tables will never be pulled for storage) and
#'   *extracts*, collections of table rows that failed a validation step, may
#'   also be stored. By default, both of these options are set to `FALSE`.
#' @return Invisibly returns a named vector of file paths for the input files
#'   that were processed; file output paths (for wherever writing occurred) are
#'   given as the names.
#' @examples
#' if (interactive()) {
#' # The 'yaml' directory that is
#' # accessible in the package through
#' # `system.file()` contains the files
#' # 1. `agent-small_table.yml`
#' # 2. `informant-small_table.yml`
#' # 3. `tbl_store.yml`
#' # There are references in YAML files
#' # 1 & 2 to the table store YAML file,
#' # so, they all work together cohesively
#' # Let's process the agent and the
#' # informant YAML files with `yaml_exec()`;
#' # and we'll specify the working directory
#' # as the place where the output RDS files
#' # are written
#' output_dir <- getwd()
#' yaml_exec(
#'   path = system.file(
#'     "yaml", package = "pointblank"
#'   ),
#'   output = output_dir
#' )
#' # This generates two RDS files in the
#' # working directory: one for the agent
#' # and the other for the informant; each
#' # of them are automatically time-stamped
#' # so that periodic execution can be
#' # safely carried out without risk of
#' # overwriting 
#' }
#' @family pointblank YAML
#' @section Function ID:
#' 11-8
#' @export
yaml_exec <- function(
    path = NULL,
    files = NULL,
    write_to_disk = TRUE, 
    output_path = file.path(path, "output"),
    keep_tbl = FALSE,
    keep_extracts = FALSE
) {
  # If `path` isn't provided then the working directory
  # is the path containing the input files
  if (is.null(path)) {
    path <- fs::path_abs(fs::path_wd())
  } else {
    initial_wd <- fs::path_abs(fs::path_wd())
    wd_path <- fs::path_abs(path)
    if (!fs::dir_exists(wd_path)) {
        "The `path` provided (", as.character(wd_path), ") does not exist.",
        call. = FALSE
    if (initial_wd != wd_path) {
    path <- wd_path
  # Construct paths to files
  if (!is.null(files)) {
    files_paths <- fs::path(fs::path_wd(), files)
  } else {
    files_paths <- fs::path(fs::path_wd(), fs::dir_ls(regexp = ".ya?ml$"))
  # Normalize the output path
  if (is.null(output_path)) {
    output_path <- fs::path_norm(initial_wd)
  } else {
    if (!fs::is_absolute_path(output_path)) {
      output_path <- fs::path_norm(fs::path(initial_wd, output_path))
  agent_file_paths <- c()
  informant_file_paths <- c()
  # Determine which of the files are agents
  for (file_path in files_paths) {
    y <- yaml::read_yaml(file_path)
    if (all(c("tbl", "tbl_name", "locale", "steps") %in% names(y))) {
      agent_file_paths <- c(agent_file_paths, file_path)
  # Determine which of the files are informants
  for (file_path in files_paths) {
    y <- yaml::read_yaml(file_path)
    if (all(c("table", "columns") %in% names(y))) {
      informant_file_paths <- c(informant_file_paths, file_path)
  # Get the total number of files that are candidates for agents/informants
  total_files <- length(agent_file_paths) + length(informant_file_paths)
  # If there are no files to process, invisibly return NULL
  if (total_files == 0) {
  # Create a vector for collecting files that were written and also read in  
  files_written <- c()
  files_read <- c()
  if (total_files == 1) {
    execution_progress_header <- 
      "Execution Started - there is a single file to process"
  } else {
    execution_progress_header <- 
      "Execution Started - there are {total_files} files to process"
  if (length(agent_file_paths) > 0) {
    for (agent_yml_file in agent_file_paths) {
      cli::cli_rule(left = basename(agent_yml_file))
      agent <- yaml_agent_interrogate(agent_yml_file)
      if (write_to_disk) {

        fs::dir_create(path = output_path)
        # Construct the filename/path for the output RDS file
        file_name <- 
            paste0(basename(tools::file_path_sans_ext(agent_yml_file)), ".rds")
        # Write the file to disk, affixing the date and time to
        # the filename for ease of parsing by other functions
          x = agent,
          filename = affix_datetime(
            filename = file_name,
            delimiter = "-"
          keep_tbl = keep_tbl,
          keep_extracts = keep_extracts
        files_written <- c(files_written, as.character(file_name))
      } else {
        files_written <- c(files_written, "")
      files_read <- c(files_read, agent_yml_file)

  if (length(informant_file_paths) > 0) {
    for (informant_yml_file in informant_file_paths) {
      cli::cli_rule(left = basename(informant_yml_file))
      informant <- yaml_informant_incorporate(informant_yml_file)
      if (write_to_disk) {
        fs::dir_create(path = output_path)
        # Construct the filename/path for the output RDS file
        file_name <- 
        # Write the file to disk, affixing the date and time to
        # the filename for ease of parsing by other functions
          x = informant,
          filename = affix_datetime(
            filename = file_name,
            delimiter = "-"
        files_written <- c(files_written, as.character(file_name))
      } else {
        files_written <- c(files_written, "")
      files_read <- c(files_read, informant_yml_file)
  cli::cli_h1("Execution Finished")
  files_in_out <- files_read
  names(files_in_out) <- files_written
