R/outpack_packet.R

Defines functions check_current_packet outpack_packet_finish outpack_packet_file_list outpack_packet_file_mark outpack_packet_add_custom outpack_packet_use_dependency outpack_packet_end outpack_packet_cancel outpack_packet_start

##' Start a packet build (`outpack_packet_start`), end one
##' (`outpack_packet_cancel`, `outpack_packet_end`) and interact with
##' one (`outpack_packet_use_dependency`).
##'
##' @title Start, interact with, and end a packet build
##'
##' @param path Path to the build / output directory.
##'
##' @param name The name of the packet
##'
##' @param parameters Optionally, a named list of parameters.  The
##'   names must be unique, and the values must all be non-NA scalar
##'   atomics (logical, integer, numeric, character)
##'
##' @param id Optionally, an outpack id via `orderly::outpack_id`. If
##'   not given a new id will be generated.
##'
##' @inheritParams outpack_metadata
##'
##' @return Invisibly, a copy of the packet data; this can be passed
##'   as the `packet` argument.
##'
##' @noRd
outpack_packet_start <- function(path, name, parameters = NULL, id = NULL,
                                 root = NULL) {
  root <- root_open(root, require_orderly = FALSE)

  assert_scalar_character(name)
  assert_is_directory(path)
  validate_parameters(parameters, NULL)

  if (is.null(id)) {
    id <- outpack_id()
  } else {
    validate_outpack_id(id)
  }

  time <- list(start = Sys.time())

  packet <- structure(
    list2env(
      list(
        name = name,
        id = id,
        path = path,
        parameters = parameters,
        files = list(),
        time = time,
        root = root),
      parent = emptyenv()),
    class = "outpack_packet")

  ## Human readable logging:
  cli::cli_alert_info(
    "Starting packet '{.pkg {name}}' {.code {id}} at {time$start}")
  if (length(parameters) > 0) {
    cli::cli_alert_info("Parameters:")
    cli::cli_li(sprintf("{.bold %s}: %s",
                        names(parameters), unname(parameters)))
  }

  invisible(packet)
}


outpack_packet_cancel <- function(packet) {
  packet <- check_current_packet(packet)
  cli::cli_alert_danger("Cancelling {packet$id}")
  outpack_packet_finish(packet)
}


##' @param insert Logical, indicating if we should insert the packet
##'   into the store. This is the default and generally what you
##'   want. The use-case we have for `insert = FALSE` is where you
##'   want to write out all metadata after a failure, and in this case
##'   you would not want to do a final insertion into the outpack
##'   archive. When `insert = FALSE`, we write out the json metadata
##'   that would have been written as `outpack.json` within the packet
##'   working directory.  Note that this skips a lot of validation
##'   (for example, validating that all files exist and that files
##'   marked immutable have not been changed)
##'
##' @noRd
outpack_packet_end <- function(packet, insert = TRUE) {
  packet <- check_current_packet(packet)
  packet$time$end <- Sys.time()
  hash_algorithm <- packet$root$config$core$hash_algorithm
  elapsed_str <- format(packet$time$end - packet$time$start)
  cli::cli_alert_info(
    "Finished {packet$id} at {packet$time$end} ({elapsed_str})")
  json <- outpack_metadata_create(packet$path, packet$name, packet$id,
                                  packet$time,
                                  files = NULL,
                                  depends = packet$depends,
                                  parameters = packet$parameters,
                                  git = git_info(packet$path),
                                  custom = packet$custom,
                                  file_hash = packet$files$immutable,
                                  file_ignore = packet$files$ignored,
                                  hash_algorithm = hash_algorithm)
  if (insert) {
    outpack_insert_packet(packet$path, json, packet$root)
  } else {
    ## TODO: I am not sure what the best filename to use here is -
    ## good options feel like 'outpack.json' and `<id>` or '<id>.json'
    ## (totally collision resistant)
    writeLines(json, file.path(packet$path, "outpack.json"))
  }
  outpack_packet_finish(packet)
}


##' @param query An [orderly_query()] object, or something
##'   (e.g., a string) that can be trivially converted into one.
##'
##' @param search_options Optional search options for restricting the
##'   search (see [orderly_search()] for details)
##'
##' @param envir Optional environment for `environment:` lookups in
##'   `query`, and for interpolating filenames in `files`; the default
##'   is to use the parent frame, but other suitable options are the
##'   global environment or the environment of the script you are
##'   running (this only relevant if you have `environment:` lookups
##'   in `query`).
##'
##' @param overwrite Overwrite files at the destination; this is
##'   typically what you want, but set to `FALSE` if you would prefer
##'   that an error be thrown if the destination file already exists.
##'
##' @inheritParams orderly_copy_files
##' @noRd
outpack_packet_use_dependency <- function(packet, query, files,
                                          envir = parent.frame(),
                                          search_options = NULL,
                                          overwrite = TRUE) {
  packet <- check_current_packet(packet)
  query <- as_orderly_query(query, arg = "query")
  search_options <- search_options %||% build_search_options()
  assert_is(search_options, "orderly_search_options")

  if (!query$info$single) {
    cli::cli_abort(
      c("The provided query is not guaranteed to return a single value",
        ">" = squote(deparse_query(query$value$expr, NULL, NULL)),
        "i" = "Did you forget latest(...)?"))
  }

  id <- orderly_search(query,
                       parameters = packet$parameters,
                       envir = envir,
                       location = search_options$location,
                       allow_remote = search_options$allow_remote,
                       fetch_metadata = search_options$fetch_metadata,
                       root = packet$root)
  if (is.na(id)) {
    explanation <- orderly_query_explain(
      query, parameters = packet$parameters, envir = envir,
      location = search_options$location,
      allow_remote = search_options$allow_remote,
      root = packet$root)
    cli::cli_abort(
      c("Failed to find packet for query '{format(query)}'",
        i = "See 'rlang::last_error()$explanation' for details"),
      explanation = explanation)
  }

  needs_pull <- search_options$allow_remote &&
    packet$root$config$core$require_complete_tree &&
    !(id %in% packet$root$index$unpacked())
  if (needs_pull) {
    orderly_location_pull(id, root = packet$root)
  }

  result <- orderly_copy_files(id, files = files, dest = packet$path,
                               location = search_options$location,
                               allow_remote = search_options$allow_remote,
                               fetch_metadata = search_options$fetch_metadata,
                               overwrite = overwrite,
                               envir = envir,
                               root = packet$root)

  query_str <- deparse_query(query$value$expr,
                             lapply(query$subquery, "[[", "expr"),
                             envir)

  ## Only update packet information after success, to reflect new
  ## metadata
  depends <- list(
    packet = id,
    query = query_str,
    files = result$files)
  packet$depends <- c(packet$depends, list(depends))

  outpack_packet_file_mark(packet, result$files$here, "immutable")

  invisible(result)
}


##' @section Custom metadata:
##'
##' The `outpack_packet_add_custom` function adds arbitrary
##'   additional metadata into a packet. It is primarily designed for
##'   use with applications that build on outpack to provide
##'   additional information beyond the minimal set provided by
##'   outpack.
##'
##' For example, orderly tracks "artefacts" which collect groups of
##'   file outputs into logical bundles.  To support this it needs to
##'   register additional data for each artefact with:
##'
##' * the description of the artefact (a short phrase)
##' * the format of the artefact (a string describing the data type)
##' * the contents of the artefact (an array of filenames)
##'
##' JSON for this might look like:
##'
##' ```json
##' {
##'   "artefacts": [
##'     {
##'       "description": "Data for onward use",
##'       "format": "data",
##'       "contents": ["results.rds", "summary.rds"]
##'     },
##'     {
##'       "description": "Diagnostic figures",
##'       "format": "staticgraph",
##'       "contents": ["fits.png", "inputs.png"]
##'     }
##'   ]
##' }
##' ```
##'
##' Here, we describe two artefacts, together collecting four files.
##'
##' We need to store these in outpack's final metadata, and we want to
##'   do this in a way that allows easy querying later on while
##'   scoping the data to your application.  To allow for this we
##'   group all data your application adds under an application key
##'   (e.g., `orderly`).  You can then store whatever data you want
##'   beneath this key.
##'
##' **NOTE1**: A limitation here is that the filenames above cannot be
##'   checked against the outpack list of files because outpack does
##'   not know that `contents` here refers to filenames.
##'
##' **NOTE2**: To allow for predictable serialisation to JSON, you
##'   must serialise your own data before passing through to
##'   `outpack_packet_add_custom`.
##'
##' @noRd
##'
##' @param application The name of the application (used to organise
##'   the data and query it later, see Details)
##'
##' @param data Additional metadata to add to the packet. This must be
##'   a string representing already-serialised json data.
outpack_packet_add_custom <- function(packet, application, data) {
  p <- check_current_packet(packet)

  assert_scalar_character(application)
  assert_scalar_character(data)

  parse_json(data, name = "custom metadata")

  if (application %in% vcapply(packet$custom, "[[", "application")) {
    cli::cli_abort(
      "metadata for '{application}' has already been added for this packet")
  }

  custom <- list(application = application, data = data)
  packet$custom <- c(packet$custom, list(custom))
  invisible()
}


##' Mark file within an in-progress packet. This will store the hash
##' of the file within the internal outpack structures and force an
##' error if the file is changed or deleted later.  The function
##' `outpack_packet_file_list()` will report on which files
##' are marked (or unmarked) within the directory.
##'
##' @title Mark files during packet run
##'
##' @param packet The current packet
##'
##' @param files A character vector of relative paths
##'
##' @param status A status to mark the file with. Must be "immutable"
##'   or "ignored"
##'
##' @return Depending on function
##'
##' * `outpack_packet_file_mark` returns nothing
##' * `outpack_packet_file_list` returns a [data.frame] with columns
##'   `path` and `status` (`immutable`, `ignored` or `unknown`)
##'
##' @noRd
outpack_packet_file_mark <- function(packet, files, status) {
  status <- match_value(status, c("immutable", "ignored"))
  packet <- check_current_packet(packet)

  assert_file_exists_relative(files, workdir = packet$path, name = "File",
                              call = environment())

  ## TODO: these are exclusive categories because we later return a
  ## 1:1 mapping of file to status
  if (status == "immutable") {
    hash_algorithm <- packet$root$config$core$hash_algorithm
    value <- withr::with_dir(packet$path,
                             hash_files(files, hash_algorithm, named = TRUE))

    if (any(files %in% packet$files$ignored)) {
      err <- intersect(files, packet$files$ignored)
      cli::cli_abort(c("Cannot mark ignored files as immutable",
                       set_names(err, "x")))
    }

    prev <- names(packet$files$immutable)
    validate_hashes(value, packet$files$immutable[intersect(files, prev)],
                    call = environment())
    value <- value[setdiff(names(value), prev)]
    packet$files$immutable <- c(packet$files$immutable, value)
  } else if (status == "ignored") {
    if (any(files %in% names(packet$files$immutable))) {
      err <- intersect(files, names(packet$files$immutable))
      cli::cli_abort(c("Cannot mark immutable files as ignored",
                       set_names(err, "x")))
    }

    packet$files$ignored <- union(packet$files$ignored, files)
  }
  invisible()
}


outpack_packet_file_list <- function(packet) {
  packet <- check_current_packet(packet)
  files <- withr::with_dir(packet$path,
                           dir(all.files = TRUE, recursive = TRUE, no.. = TRUE))
  status <- rep("unknown", length(files))
  status[files %in% names(packet$files$immutable)] <- "immutable"
  status[files %in% packet$files$ignored] <- "ignored"
  data_frame(path = files, status = status)
}



outpack_packet_finish <- function(packet) {
  packet$complete <- TRUE
}


## This is used in each of the functions that accept either 'packet'
## as an argument and which will fall back onto the global active
## packet.
check_current_packet <- function(packet) { # TODO: rename
  assert_is(packet, "outpack_packet")
  if (isTRUE(packet$complete)) {
    cli::cli_abort("Packet '{packet$id}' is complete")
  }
  packet
}

Try the orderly package in your browser

Any scripts or data that you put into this service are public.

orderly documentation built on Jan. 24, 2026, 1:07 a.m.