Nothing
#' @title Customize a target in [drake_plan()].
#' `r lifecycle::badge("stable")`
#' @description The `target()` function is a way to
#' configure individual targets in a `drake` plan.
#' Its most common use is to invoke static branching
#' and dynamic branching, and it can also set the values
#' of custom columns such as `format`, `elapsed`, `retries`,
#' and `max_expand`. Details are at
#' `https://books.ropensci.org/drake/plans.html#special-columns`.
#' Note: `drake_plan(my_target = my_command())`
#' is equivalent to
#' `drake_plan(my_target = target(my_command())`.
#' @details `target()` must be called inside [drake_plan()].
#' It is invalid otherwise.
#' @export
#' @inheritSection drake_plan Columns
#' @inheritSection drake_plan Keywords
#' @inheritSection drake_plan Formats
#' @seealso [drake_plan()], [make()]
#' @return A one-row workflow plan data frame with the named
#' arguments as columns.
#' @param command The command to build the target.
#' @param transform A call to [map()], [split()], [cross()], or [combine()]
#' to apply a *static* transformation. Details:
#' `https://books.ropensci.org/drake/static.html`
#' @param dynamic A call to [map()], [cross()], or [group()]
#' to apply a *dynamic* transformation. Details:
#' `https://books.ropensci.org/drake/dynamic.html`
#' @param ... Optional columns of the plan for a given target.
#' See the Columns section of this help file for a selection
#' of special columns that `drake` understands.
#' @examples
#' # Use target() to create your own custom columns in a drake plan.
#' # See ?triggers for more on triggers.
#' drake_plan(
#' website_data = target(
#' download_data("www.your_url.com"),
#' trigger = "always",
#' custom_column = 5
#' ),
#' analysis = analyze(website_data)
#' )
#' models <- c("glm", "hierarchical")
#' plan <- drake_plan(
#' data = target(
#' get_data(x),
#' transform = map(x = c("simulated", "survey"))
#' ),
#' analysis = target(
#' analyze_data(data, model),
#' transform = cross(data, model = !!models, .id = c(x, model))
#' ),
#' summary = target(
#' summarize_analysis(analysis),
#' transform = map(analysis, .id = c(x, model))
#' ),
#' results = target(
#' bind_rows(summary),
#' transform = combine(summary, .by = data)
#' )
#' )
#' plan
#' if (requireNamespace("styler", quietly = TRUE)) {
#' print(drake_plan_source(plan))
#' }
target <- function(
command = NULL,
transform = NULL,
dynamic = NULL,
...
) {
if (!nzchar(Sys.getenv("drake_target_silent"))) {
# 2019-12-05
stop0(
"target() in drake is not a standalone user-side function. ",
"It must be called from inside drake_plan(). Details: ",
"https://books.ropensci.org/drake/static.html"
)
}
call <- match.call(expand.dots = FALSE)
lst <- c(
command = call$command,
transform = call$transform,
dynamic = call$dynamic,
call$...
)
lst <- select_nonempty(lst)
lst <- lst[nzchar(names(lst))]
out <- data.frame(command = NA, stringsAsFactors = FALSE)
for (col in names(lst)) {
if (is.language(lst[[col]])) {
out[[col]] <- list(lst[[col]])
} else {
out[[col]] <- lst[[col]]
}
}
out
}
#' @title Customize the decision rules for rebuilding targets
#' `r lifecycle::badge("stable")`
#' @description Use this function inside a target's command
#' in your [drake_plan()] or the `trigger` argument to
#' [make()] or [drake_config()].
#' For details, see the chapter on triggers
#' in the user manual:
#' `https://books.ropensci.org/drake/triggers.html`
#' @details
#' A target always builds if it has not been built before.
#' Triggers allow you to customize the conditions
#' under which a pre-existing target *re*builds.
#' By default, the target will rebuild if and only if:
#' - Any of `command`, `depend`, or `file` is `TRUE`, or
#' - `condition` evaluates to `TRUE`, or
#' - `change` evaluates to a value different from last time.
#' The above steps correspond to the "whitelist" decision rule.
#' You can select other decision rules with the `mode` argument
#' described in this help file.
#' On another note, there may be a slight efficiency loss
#' if you set complex triggers
#' for `change` and/or `condition` because
#' `drake` needs to load any required dependencies
#' into memory before evaluating these triggers.
#' @export
#' @seealso [drake_plan()], [make()]
#' @return A list of trigger specification details that
#' `drake` processes internally when it comes time to decide
#' whether to build the target.
#' @param command Logical, whether to rebuild the target if the
#' [drake_plan()] command changes.
#' @param depend Logical, whether to rebuild if a
#' non-file dependency changes.
#' @param file Logical, whether to rebuild the target
#' if a [file_in()]/[file_out()]/[knitr_in()] file changes.
#' Also applies to external data tracked with
#' `target(format = "file")`.
#' @param seed Logical, whether to rebuild the target
#' if the seed changes. Only makes a difference if you set
#' a custom `seed` column in your [drake_plan()] at some point
#' in your workflow.
#' @param format Logical, whether to rebuild the target if the
#' choice of specialized data format changes: for example,
#' if you use `target(format = "qs")` one instance and
#' `target(format = "fst")` the next. See
#' `https://books.ropensci.org/drake/plans.html#special-data-formats-for-targets` # nolint
#' for details on formats.
#' @param condition R code (expression or language object)
#' that returns a logical. The target will rebuild
#' if the code evaluates to `TRUE`.
#' @param change R code (expression or language object)
#' that returns any value. The target will rebuild
#' if that value is different from last time
#' or not already cached.
#' @param mode A character scalar equal to `"whitelist"` (default) or
#' `"blacklist"` or `"condition"`. With the `mode` argument, you can choose
#' how the `condition` trigger factors into the decision to build
#' or skip the target. Here are the options.
#' - `"whitelist"` (default): we *rebuild* the target whenever `condition`
#' evaluates to `TRUE`. Otherwise, we defer to the other triggers.
#' This behavior is the same as the decision rule described in the
#' "Details" section of this help file.
#' - `"blacklist"`: we *skip* the target whenever `condition` evaluates
#' to `FALSE`. Otherwise, we defer to the other triggers.
#' - `"condition"`: here, the `condition` trigger is the only decider,
#' and we ignore all the other triggers. We *rebuild* target whenever
#' `condition` evaluates to `TRUE` and *skip* it whenever `condition`
#' evaluates to `FALSE`.
#' @examples
#' # A trigger is just a set of decision rules
#' # to decide whether to build a target.
#' trigger()
#' # This trigger will build a target on Tuesdays
#' # and when the value of an online dataset changes.
#' trigger(condition = today() == "Tuesday", change = get_online_dataset())
#' \dontrun{
#' isolate_example("Quarantine side effects.", {
#' if (suppressWarnings(require("knitr"))) {
#' load_mtcars_example() # Get the code with drake_example("mtcars").
#' # You can use a global trigger argument:
#' # for example, to always run everything.
#' make(my_plan, trigger = trigger(condition = TRUE))
#' make(my_plan, trigger = trigger(condition = TRUE))
#' # You can also define specific triggers for each target.
#' plan <- drake_plan(
#' x = sample.int(15),
#' y = target(
#' command = x + 1,
#' trigger = trigger(depend = FALSE)
#' )
#' )
#' # Now, when x changes, y will not.
#' make(plan)
#' make(plan)
#' plan$command[1] <- "sample.int(16)" # change x
#' make(plan)
#' }
#' })
#' }
trigger <- function(
command = TRUE,
depend = TRUE,
file = TRUE,
seed = TRUE,
format = TRUE,
condition = FALSE,
change = NULL,
mode = c("whitelist", "blacklist", "condition")
) {
command <- as.logical(command)
depend <- as.logical(depend)
file <- as.logical(file)
format <- as.logical(format)
condition <- rlang::quo_squash(rlang::enquo(condition))
change <- rlang::quo_squash(rlang::enquo(change))
mode <- match.arg(mode)
new_drake_triggers(
command = command,
depend = depend,
file = file,
seed = seed,
format = format,
condition = condition,
change = change,
mode = mode
)
}
#' @title Declare input files and directories.
#' `r lifecycle::badge("stable")`
#' @description `file_in()` marks individual files
#' (and whole directories) that your targets depend on.
#' @section URLs:
#' As of `drake` 7.4.0, `file_in()` and `file_out()` have
#' support for URLs. If the file name begins with
#' "http://", "https://", or "ftp://", [make()] attempts
#' to check the ETag to see if the data changed from last time.
#' If no ETag can be found, `drake` simply uses the ETag
#' from last [make()] and registers the file as unchanged
#' (which prevents your workflow from breaking if you lose
#' internet access). If your `file_in()` URLs require
#' authentication, see the `curl_handles` argument of
#' `make()` and `drake_config()` to learn how to supply credentials.
#'
#' @export
#' @inheritSection drake_plan Keywords
#' @seealso [file_out()], [knitr_in()], [ignore()], [no_deps()]
#' @return A character vector of declared input file or directory paths.
#' @param ... Character vector, paths to files and directories. Use
#' `.id_chr` to refer to the current target by name. `.id_chr` is not
#' limited to use in `file_in()` and `file_out()`.
#' @export
#' @examples
#' \dontrun{
#' isolate_example("contain side effects", {
#' # The `file_out()` and `file_in()` functions
#' # just takes in strings and returns them.
#' file_out("summaries.txt")
#' # Their main purpose is to orchestrate your custom files
#' # in your workflow plan data frame.
#' plan <- drake_plan(
#' out = write.csv(mtcars, file_out("mtcars.csv")),
#' contents = read.csv(file_in("mtcars.csv"))
#' )
#' plan
#' # drake knows "\"mtcars.csv\"" is the first target
#' # and a dependency of `contents`. See for yourself:
#'
#' make(plan)
#' file.exists("mtcars.csv")
#'
#' # You may use `.id_chr` inside `file_out()` and `file_in()`
#' # to refer to the current target. This works inside
#' # static `map()`, `combine()`, `split()`, and `cross()`.
#'
#' plan <- drake::drake_plan(
#' data = target(
#' write.csv(data, file_out(paste0(.id_chr, ".csv"))),
#' transform = map(data = c(airquality, mtcars))
#' )
#' )
#' plan
#'
#' # You can also work with entire directories this way.
#' # However, in `file_out("your_directory")`, the directory
#' # becomes an entire unit. Thus, `file_in("your_directory")`
#' # is more appropriate for subsequent steps than
#' # `file_in("your_directory/file_inside.txt")`.
#' plan <- drake_plan(
#' out = {
#' dir.create(file_out("dir"))
#' write.csv(mtcars, "dir/mtcars.csv")
#' },
#' contents = read.csv(file.path(file_in("dir"), "mtcars.csv"))
#' )
#' plan
#'
#' make(plan)
#' file.exists("dir/mtcars.csv")
#'
#' # See the connections that the file relationships create:
#' if (requireNamespace("visNetwork", quietly = TRUE)) {
#' vis_drake_graph(plan)
#' }
#' })
#' }
file_in <- function(...) {
as.character(c(...))
}
#' @title Declare output files and directories.
#' `r lifecycle::badge("stable")`
#' @description `file_out()` marks individual files
#' (and whole directories) that your targets create.
#' @export
#' @inheritSection drake_plan Keywords
#' @seealso [file_in()], [knitr_in()], [ignore()], [no_deps()]
#' @return A character vector of declared output file or directory paths.
#' @inheritParams file_in
#' @export
#' @examples
#' \dontrun{
#' isolate_example("contain side effects", {
#' # The `file_out()` and `file_in()` functions
#' # just takes in strings and returns them.
#' file_out("summaries.txt")
#' # Their main purpose is to orchestrate your custom files
#' # in your workflow plan data frame.
#' plan <- drake_plan(
#' out = write.csv(mtcars, file_out("mtcars.csv")),
#' contents = read.csv(file_in("mtcars.csv"))
#' )
#' plan
#' # drake knows "\"mtcars.csv\"" is the first target
#' # and a dependency of `contents`. See for yourself:
#'
#' make(plan)
#' file.exists("mtcars.csv")
#'
#' # You may use `.id_chr` inside `file_out()` and `file_in()`
#' # to refer to the current target. This works inside `map()`,
#' # `combine()`, `split()`, and `cross()`.
#'
#' plan <- drake::drake_plan(
#' data = target(
#' write.csv(data, file_out(paste0(.id_chr, ".csv"))),
#' transform = map(data = c(airquality, mtcars))
#' )
#' )
#'
#' plan
#'
#' # You can also work with entire directories this way.
#' # However, in `file_out("your_directory")`, the directory
#' # becomes an entire unit. Thus, `file_in("your_directory")`
#' # is more appropriate for subsequent steps than
#' # `file_in("your_directory/file_inside.txt")`.
#' plan <- drake_plan(
#' out = {
#' dir.create(file_out("dir"))
#' write.csv(mtcars, "dir/mtcars.csv")
#' },
#' contents = read.csv(file.path(file_in("dir"), "mtcars.csv"))
#' )
#' plan
#'
#' make(plan)
#' file.exists("dir/mtcars.csv")
#'
#' # See the connections that the file relationships create:
#' if (requireNamespace("visNetwork", quietly = TRUE)) {
#' vis_drake_graph(plan)
#' }
#' })
#' }
file_out <- file_in
#' @title Declare `knitr`/`rmarkdown` source files
#' as dependencies.
#' `r lifecycle::badge("stable")`
#' @description `knitr_in()` marks individual `knitr`/R Markdown
#' reports as dependencies. In `drake`, these reports are pieces
#' of the pipeline. R Markdown is a great tool for *displaying*
#' precomputed results, but not for running a large workflow
#' from end to end. These reports should do as little
#' computation as possible.
#' @details Unlike [file_in()] and [file_out()], `knitr_in()`
#' does not work with entire directories.
#' @export
#' @inheritSection drake_plan Keywords
#' @seealso [file_in()], [file_out()], [ignore()], [no_deps()]
#' @return A character vector of declared input file paths.
#' @param ... Character strings. File paths of `knitr`/`rmarkdown`
#' source files supplied to a command in your workflow plan data frame.
#' @examples
#' \dontrun{
#' isolate_example("contain side effects", {
#' if (requireNamespace("knitr", quietly = TRUE)) {
#' # `knitr_in()` is like `file_in()`
#' # except that it analyzes active code chunks in your `knitr`
#' # source file and detects non-file dependencies.
#' # That way, updates to the right dependencies trigger rebuilds
#' # in your report.
#' # The mtcars example (`drake_example("mtcars")`)
#' # already has a demonstration
#'
#' load_mtcars_example()
#' make(my_plan)
#'
#' # Now how did drake magically know that
#' # `small`, `large`, and `coef_regression2_small` were
#' # dependencies of the output file `report.md`?
#' # because the command in the workflow plan had
#' # `knitr_in("report.Rmd")` in it, so drake knew
#' # to analyze the active code chunks. There, it spotted
#' # where `small`, `large`, and `coef_regression2_small`
#' # were read from the cache using calls to `loadd()` and `readd()`.
#' }
#' })
#' }
knitr_in <- file_in
#' @title Ignore code
#' `r lifecycle::badge("stable")`
#' @description Ignore sections of commands and imported functions.
#' @details In user-defined functions and [drake_plan()] commands, you can
#' wrap code chunks in `ignore()` to
#' 1. Tell `drake` to not search for dependencies
#' (targets etc. mentioned in the code) and
#' 2. Ignore changes to the code so downstream targets remain up to date.
#' To enforce (1) without (2), use [no_deps()].
#' @export
#' @inheritSection drake_plan Keywords
#' @seealso [file_in()], [file_out()], [knitr_in()], [no_deps()]
#' @return The argument.
#' @param x Code to ignore.
#' @examples
#' \dontrun{
#' isolate_example("Contain side effects", {
#' # Normally, `drake` reacts to changes in dependencies.
#' x <- 4
#' make(plan = drake_plan(y = sqrt(x)))
#' x <- 5
#' make(plan = drake_plan(y = sqrt(x)))
#' make(plan = drake_plan(y = sqrt(4) + x))
#' # But not with ignore().
#' make(plan = drake_plan(y = sqrt(4) + ignore(x))) # Builds y.
#' x <- 6
#' make(plan = drake_plan(y = sqrt(4) + ignore(x))) # Skips y.
#' make(plan = drake_plan(y = sqrt(4) + ignore(x + 1))) # Skips y.
#'
#' # ignore() works with functions and multiline code chunks.
#' f <- function(x) {
#' ignore({
#' x <- x + 1
#' x <- x + 2
#' })
#' x # Not ignored.
#' }
#' make(plan = drake_plan(y = f(2)))
#' readd(x)
#' # Changes the content of the ignore() block:
#' f <- function(x) {
#' ignore({
#' x <- x + 1
#' })
#' x # Not ignored.
#' }
#' make(plan = drake_plan(x = f(2)))
#' readd(x)
#' })
#' }
ignore <- function(x = NULL) {
x
}
#' @title Suppress dependency detection.
#' `r lifecycle::badge("stable")`
#' @description Tell `drake` to not search for dependencies in a chunk of code.
#' @details `no_deps()` is similar to [ignore()], but it still lets `drake`
#' track meaningful changes to the code itself.
#' @export
#' @inheritSection drake_plan Keywords
#' @seealso [file_in()], [file_out()], [knitr_in()], [no_deps()]
#' @return The argument.
#' @param x Code for which dependency detection is suppressed.
#' @examples
#' \dontrun{
#' isolate_example("Contain side effects", {
#' # Normally, `drake` reacts to changes in dependencies.
#' x <- 4
#' make(plan = drake_plan(y = sqrt(x)))
#' x <- 5
#' make(plan = drake_plan(y = sqrt(x)))
#' make(plan = drake_plan(y = sqrt(4) + x))
#' # But not with no_deps().
#' make(plan = drake_plan(y = sqrt(4) + no_deps(x))) # Builds y.
#' x <- 6
#' make(plan = drake_plan(y = sqrt(4) + no_deps(x))) # Skips y.
#' # However, `drake` *does* react to changes
#' # to the *literal code* inside `no_deps()`.
#' make(plan = drake_plan(y = sqrt(4) + ignore(x + 1))) # Builds y.
#'
#' # Like ignore(), no_deps() works with functions and multiline code chunks.
#' z <- 1
#' f <- function(x) {
#' no_deps({
#' x <- z + 1
#' x <- x + 2
#' })
#' x
#' }
#' make(plan = drake_plan(y = f(2)))
#' readd(y)
#' z <- 2 # Changed dependency is not tracked.
#' make(plan = drake_plan(y = f(2)))
#' readd(y)
#' })
#' }
no_deps <- function(x = NULL) {
x
}
#' @title Cancel a target mid-build under some condition
#' `r lifecycle::badge("stable")`
#' @description Cancel a target mid-build if some logical condition is met.
#' Upon cancellation, `drake` halts the current target and moves to the
#' next one. The target's previous value and metadata, if they exist,
#' remain in the cache.
#' @export
#' @seealso cancel
#' @return Nothing.
#' @inheritParams cancel
#' @param condition Logical, whether to cancel the target.
#' @examples
#' \dontrun{
#' isolate_example("cancel_if()", {
#' f <- function(x) {
#' cancel_if(x > 1)
#' Sys.sleep(2) # Does not run if x > 1.
#' }
#' g <- function(x) f(x)
#' plan <- drake_plan(y = g(2))
#' make(plan)
#' # Does not exist.
#' # readd(y)
#' })
#' }
cancel_if <- function(condition, allow_missing = TRUE) {
envir_call()
if (length(condition) != 1L) {
stop("condition must be length 1 in cancel_if()")
}
if (!condition) {
return(invisible())
}
cancel(allow_missing = allow_missing)
}
#' @title Cancel a target mid-build `r lifecycle::badge("stable")`
#' @description Cancel a target mid-build.
#' Upon cancellation, `drake` halts the current target and moves to the
#' next one. The target's previous value and metadata, if they exist,
#' remain in the cache.
#' @export
#' @seealso cancel_if
#' @return Nothing.
#' @param allow_missing Logical. If `FALSE`, `drake` will not cancel
#' the target if it is missing from the cache (or if you removed the
#' key with `clean()`).
#' @examples
#' \dontrun{
#' isolate_example("cancel()", {
#' f <- function(x) {
#' cancel()
#' Sys.sleep(2) # Does not run.
#' }
#' g <- function(x) f(x)
#' plan <- drake_plan(y = g(1))
#' make(plan)
#' # Does not exist.
#' # readd(y)
#' })
#' }
cancel <- function(allow_missing = TRUE) {
envir <- envir_call()
if (!allow_missing) {
if (target_missing(envir$target, envir$config)) {
return(invisible())
}
}
stop(cancellation(allow_missing = allow_missing))
}
cancellation <- function(...) {
structure(
list(message = "cancel", call = NULL),
class = c("drake_cancel", "error", "condition")
)
}
#' @title Name of the current target `r lifecycle::badge("stable")`
#' @export
#' @description `id_chr()` gives you the name of the current target
#' while [make()] is running. For static branching in [drake_plan()],
#' use the `.id_chr` symbol instead. See the examples for details.
#' @inheritSection drake_plan Keywords
#' @return The name of the current target.
#' @examples
#' try(id_chr()) # Do not use outside the plan.
#' \dontrun{
#' isolate_example("id_chr()", {
#' plan <- drake_plan(x = id_chr())
#' make(plan)
#' readd(x)
#' # Dynamic branching
#' plan <- drake_plan(
#' x = seq_len(4),
#' y = target(id_chr(), dynamic = map(x))
#' )
#' make(plan)
#' readd(y, subtargets = 1)
#' # Static branching
#' plan <- drake_plan(
#' y = target(c(x, .id_chr), transform = map(x = !!seq_len(4)))
#' )
#' plan
#' })
#' }
id_chr <- function() {
envir_call()$target
}
#' @title Get the environment where drake builds targets
#' `r lifecycle::badge("questioning")`
#' @description Call this function inside the commands in your plan
#' to get the environment where `drake` builds targets.
#' Advanced users can use it to strategically remove targets from memory
#' while [make()] is running.
#' @details `drake` manages in-memory targets in 4 environments:
#' one with sub-targets, one with whole dynamic targets, one with
#' static targets, and one with imported global objects and functions.
#' This last environment is usually the environment
#' from which you call [make()].
#' Select the appropriate environment for your
#' use case with the `which` argument of `drake_envir()`.
#' @export
#' @inheritSection drake_plan Keywords
#' @seealso [from_plan()]
#' @return The environment where `drake` builds targets.
#' @param which Character of length 1, which environment
#' to select. See the details of this help file.
#' @examples
#' \dontrun{
#' isolate_example("contain side effects", {
#' plan <- drake_plan(
#' large_data_1 = sample.int(1e4),
#' large_data_2 = sample.int(1e4),
#' subset = c(large_data_1[seq_len(10)], large_data_2[seq_len(10)]),
#' summary = {
#' print(ls(envir = parent.env(drake_envir())))
#' # We don't need the large_data_* targets in memory anymore.
#' rm(large_data_1, large_data_2, envir = drake_envir("targets"))
#' print(ls(envir = drake_envir("targets")))
#' mean(subset)
#' }
#' )
#' make(plan, cache = storr::storr_environment(), session_info = FALSE)
#' })
#' }
drake_envir <- function(
which = c("targets", "dynamic", "subtargets", "imports")
) {
config <- envir_call()$config
which <- match.arg(which)
which <- ifelse(which == "imports", "envir", paste0("envir_", which))
config[[which]]
}
envir_call <- function() {
calls <- vcapply(sys.calls(), safe_deparse)
matching_calls <- grepl("^drake_with_call_stack_8a6af5\\(", calls)
if (!any(matching_calls)) {
envir_call_error()
}
pos <- max(which(matching_calls))
sys.frame(pos)
}
envir_call_error <- function() {
stop0(
"Could not find the environment where drake builds targets. ",
"Functions drake_envir(), id_chr(), cancel(), and cancel_if() ",
"can only be invoked through make()."
)
}
#' @title Row-bind together drake plans
#' `r lifecycle::badge("stable")`
#' @description Combine drake plans together in a way that
#' correctly fills in missing entries.
#' @export
#' @seealso [drake_plan()], [make()]
#' @param ... Workflow plan data frames (see [drake_plan()]).
#' @examples
#' # You might need to refresh your data regularly (see ?triggers).
#' download_plan <- drake_plan(
#' data = target(
#' command = download_data(),
#' trigger = "always"
#' )
#' )
#' # But if the data don't change, the analyses don't need to change.
#' analysis_plan <- drake_plan(
#' usage = get_usage_metrics(data),
#' topline = scrape_topline_table(data)
#' )
#' your_plan <- bind_plans(download_plan, analysis_plan)
#' your_plan
bind_plans <- function(...) {
sanitize_plan(drake_bind_rows(...))
}
#' @title Turn an R script file or `knitr` / R Markdown report
#' into a `drake` plan.
#' `r lifecycle::badge("questioning")`
#' @export
#' @seealso [drake_plan()], [make()], [plan_to_code()],
#' [plan_to_notebook()]
#' @description `code_to_plan()`, [plan_to_code()], and
#' [plan_to_notebook()] together illustrate the relationships
#' between `drake` plans, R scripts, and R Markdown documents.
#' @details This feature is easy to break, so there are some rules
#' for your code file:
#' 1. Stick to assigning a single expression to a single target at a time.
#' For multi-line commands, please enclose the whole command
#' in curly braces.
#' Conversely, compound assignment is not supported
#' (e.g. `target_1 <- target_2 <- target_3 <- get_data()`).
#' 2. Once you assign an expression to a variable,
#' do not modify the variable any more.
#' The target/command binding should be permanent.
#' 3. Keep it simple. Please use the assignment operators rather than
#' `assign()` and similar functions.
#' @param path A file path to an R script or `knitr` report.
#' @examples
#' plan <- drake_plan(
#' raw_data = read_excel(file_in("raw_data.xlsx")),
#' data = raw_data,
#' hist = create_plot(data),
#' fit = lm(Ozone ~ Temp + Wind, data)
#' )
#' file <- tempfile()
#' # Turn the plan into an R script a the given file path.
#' plan_to_code(plan, file)
#' # Here is what the script looks like.
#' cat(readLines(file), sep = "\n")
#' # Convert back to a drake plan.
#' code_to_plan(file)
code_to_plan <- function(path) {
stopifnot(file.exists(path))
txt <- readLines(path)
# From CodeDepends: https://github.com/duncantl/CodeDepends/blob/7c9cf7eceffaea1d26fe25836c7a455f059e13c1/R/frags.R#L74 # nolint
# Checks if the file is a knitr report.
if (any(grepl(knitr_pattern, txt))) { # nolint
txt <- get_tangled_text(path)
}
nodes <- parse(text = txt)
out <- lapply(nodes, node_plan)
out <- do.call(rbind, out)
out <- parse_custom_plan_columns(out)
sanitize_plan(out)
}
node_plan <- function(node) {
weak_tibble(
target = safe_deparse(node[[2]], backtick = FALSE),
command = safe_deparse(node[[3]], backtick = TRUE)
)
}
#' @title Turn a `drake` plan into a plain R script file.
#' `r lifecycle::badge("questioning")`
#' @export
#' @seealso [drake_plan()], [make()], [code_to_plan()],
#' [plan_to_notebook()]
#' @description `code_to_plan()`, [plan_to_code()], and
#' [plan_to_notebook()] together illustrate the relationships
#' between `drake` plans, R scripts, and R Markdown documents.
#' In the file generated by `plan_to_code()`, every target/command pair
#' becomes a chunk of code.
#' Targets are arranged in topological order
#' so dependencies are available before their downstream targets.
#' Please note:
#' 1. You are still responsible for loading your project's
#' packages, imported functions, etc.
#' 2. Triggers disappear.
#' @param plan Workflow plan data frame. See [drake_plan()]
#' for details.
#' @param con A file path or connection to write to.
#' @examples
#' plan <- drake_plan(
#' raw_data = read_excel(file_in("raw_data.xlsx")),
#' data = raw_data,
#' hist = create_plot(data),
#' fit = lm(Ozone ~ Temp + Wind, data)
#' )
#' file <- tempfile()
#' # Turn the plan into an R script a the given file path.
#' plan_to_code(plan, file)
#' # Here is what the script looks like.
#' cat(readLines(file), sep = "\n")
#' # Convert back to a drake plan.
#' code_to_plan(file)
plan_to_code <- function(plan, con = stdout()) {
writeLines(text = plan_to_text(plan), con = con)
}
#' @title Turn a `drake` plan into an R notebook.
#' `r lifecycle::badge("questioning")`
#' @export
#' @seealso [drake_plan()], [make()], [code_to_plan()],
#' [plan_to_code()]
#' @description `code_to_plan()`, [plan_to_code()], and
#' [plan_to_notebook()] together illustrate the relationships
#' between `drake` plans, R scripts, and R Markdown documents.
#' In the file generated by `plan_to_code()`, every target/command pair
#' becomes a chunk of code.
#' Targets are arranged in topological order
#' so dependencies are available before their downstream targets.
#' Please note:
#' 1. You are still responsible for loading your project's
#' packages, imported functions, etc.
#' 2. Triggers disappear.
#' @inheritParams plan_to_code
#' @examples
#' if (suppressWarnings(require("knitr"))) {
#' plan <- drake_plan(
#' raw_data = read_excel(file_in("raw_data.xlsx")),
#' data = raw_data,
#' hist = create_plot(data),
#' fit = lm(Ozone ~ Temp + Wind, data)
#' )
#' file <- tempfile()
#' # Turn the plan into an R notebook a the given file path.
#' plan_to_notebook(plan, file)
#' # Here is what the script looks like.
#' cat(readLines(file), sep = "\n")
#' # Convert back to a drake plan.
#' code_to_plan(file)
#' }
plan_to_notebook <- function(plan, con) {
out <- c(
"---",
"title: \"My Notebook\"",
"output: html_notebook",
"---",
"",
"```{r my_code}",
plan_to_text(plan),
"```"
)
writeLines(out, con = con)
}
plan_to_text <- function(plan) {
. <- NULL
graph <- drake_config(
plan[, c("target", "command")],
envir = new.env(parent = emptyenv()),
cache = storr::storr_environment(),
history = FALSE,
verbose = FALSE
)$graph
order <- igraph::topo_sort(graph)$name
order <- intersect(order, plan$target)
order <- match(order, table = plan$target)
plan <- plan[order, ]
if (!is.character(plan$command)) {
plan$command <- vapply(
plan$command,
safe_deparse,
FUN.VALUE = character(1),
backtick = TRUE
)
}
text <- paste(plan$target, "<-", plan$command)
if (requireNamespace("styler")) {
try(text <- styler::style_text(text), silent = TRUE)
}
text
}
#' @title Show the code required to produce a given `drake` plan
#' `r lifecycle::badge("stable")`
#' @description You supply a plan, and [drake_plan_source()]
#' supplies code to generate that plan. If you have the
#' `prettycode` package,
#' installed, you also get nice syntax highlighting in the console
#' when you print it.
#' @export
#' @seealso [drake_plan()]
#' @return a character vector of lines of text. This text
#' is a call to [drake_plan()] that produces the plan you provide.
#' @param plan A workflow plan data frame (see [drake_plan()])
#' @examples
#' plan <- drake::drake_plan(
#' small_data = download_data("https://some_website.com"),
#' large_data_raw = target(
#' command = download_data("https://lots_of_data.com"),
#' trigger = trigger(
#' change = time_last_modified("https://lots_of_data.com"),
#' command = FALSE,
#' depend = FALSE
#' ),
#' timeout = 1e3
#' )
#' )
#' print(plan)
#' if (requireNamespace("styler", quietly = TRUE)) {
#' source <- drake_plan_source(plan)
#' print(source) # Install the prettycode package for syntax highlighting.
#' file <- tempfile() # Path to an R script to contain the drake_plan() call.
#' writeLines(source, file) # Save the code to an R script.
#' }
drake_plan_source <- function(plan) {
assert_pkg("styler")
text <- drake_plan_call(plan)
text <- style_recursive(text, name = "", append_comma = FALSE)
class(text) <- c("drake_plan_source", "character")
text
}
#' @export
print.drake_plan_source <- function(x, ...) {
if (requireNamespace("prettycode", quietly = TRUE)) {
x <- prettycode::highlight(x)
}
cat(x, sep = "\n")
}
drake_plan_call <- function(plan) {
target_calls <- drake_pmap(plan, drake_target_call)
names(target_calls) <- plan$target
as.call(c(quote(drake_plan), target_calls))
}
drake_target_call <- function(...) {
args <- select_valid_args(list(...))
target <- parse(text = args$target)[[1]]
args$target <- NULL
if (is.character(args[["command"]])) {
args$command <- safe_parse(args[["command"]])
}
if ("trigger" %in% names(args)) {
if (is.character(args[["trigger"]])) {
args[["trigger"]] <- safe_parse(args[["trigger"]])
}
}
if (!identical(names(args), "command")) {
args[["command"]] <- as.call(c(quote(target), args))
}
args[["command"]]
}
select_valid_args <- function(x) {
index <- vapply(
X = x,
FUN = function(y) {
length(y) > 0 && !safe_is_na(y)
},
FUN.VALUE = logical(1)
)
x[index]
}
style_recursive <- function(expr, name, append_comma) {
text <- style_recursive_loop(expr)
head <- character(0)
if (nzchar(name)) {
head <- paste(name, "= ")
}
head <- paste0(head, safe_deparse(expr[[1]], backtick = FALSE), "(")
out <- c(head, paste0(" ", text), ")")
if (append_comma) {
out[length(out)] <- paste0(out[length(out)], ",")
}
out
}
style_recursive_loop <- function(expr) {
args <- expr[-1]
text <- character(0)
for (i in seq_along(args)) {
recurse <- is_target_call(args[[i]]) || is_trigger_call(args[[i]])
if (recurse) {
text <- c(
text,
style_recursive(
expr = args[[i]],
name = names(args)[i],
append_comma = i < length(args)
)
)
} else {
text <- c(
text,
style_leaf(
expr = args[[i]],
name = names(args)[i],
append_comma = i < length(args)
)
)
}
}
text
}
style_leaf <- function(name, expr, append_comma) {
text <- safe_deparse(expr, backtick = TRUE)
try(text <- styler::style_text(text), silent = TRUE)
text[1] <- paste(name, "=", text[1])
if (append_comma) {
text[length(text)] <- paste0(text[length(text)], ",")
}
text
}
is_trigger_call <- function(expr) {
tryCatch(
safe_deparse(expr[[1]], backtick = FALSE) %in% trigger_fns,
error = error_false
)
}
#' @title Turn a script into a function.
#' `r lifecycle::badge("stable")`
#' @description `code_to_function()` is a quick (and very dirty) way to
#' retrofit drake to an existing script-based project. It parses
#' individual `\*.R/\*.RMD` files into functions so they can be added
#' into the drake workflow.
#'
#' @details Most data science workflows consist of imperative scripts.
#' `drake`, on the other hand, assumes you write *functions*.
#' `code_to_function()` allows for pre-existing workflows to incorporate
#' drake as a workflow management tool seamlessly for cases where
#' re-factoring is unfeasible. So drake can monitor dependencies, the
#' targets are passed as arguments of the dependent functions.
#'
#' @export
#' @seealso [file_in()], [file_out()], [knitr_in()], [ignore()], [no_deps()],
#' [code_to_plan()], [plan_to_code()], [plan_to_notebook()]
#' @return A function to be input into the drake plan
#' @param path Character vector, path to script.
#' @param envir Environment of the created function.
#' @export
#' @examples
#' \dontrun{
#' isolate_example("contain side effects", {
#' if (requireNamespace("ggplot2", quietly = TRUE)) {
#' # The `code_to_function()` function creates a function that makes it
#' # available for drake to process as part of the workflow.
#' # The main purpose is to allow pre-existing workflows to incorporate drake
#' # into the workflow seamlessly for cases where re-factoring is unfeasible.
#' #
#'
#' script1 <- tempfile()
#' script2 <- tempfile()
#' script3 <- tempfile()
#' script4 <- tempfile()
#'
#' writeLines(c(
#' "data <- mtcars",
#' "data$make <- do.call('c',",
#' "lapply(strsplit(rownames(data), split=\" \"), `[`, 1))",
#' "saveRDS(data, \"mtcars_alt.RDS\")"
#' ),
#' script1
#' )
#'
#' writeLines(c(
#' "data <- readRDS(\"mtcars_alt.RDS\")",
#' "mtcars_lm <- lm(mpg~cyl+disp+vs+gear+make,data=data)",
#' "saveRDS(mtcars_lm, \"mtcars_lm.RDS\")"
#' ),
#' script2
#' )
#' writeLines(c(
#' "mtcars_lm <- readRDS(\"mtcars_lm.RDS\")",
#' "lm_summary <- summary(mtcars_lm)",
#' "saveRDS(lm_summary, \"mtcars_lm_summary.RDS\")"
#' ),
#' script3
#' )
#' writeLines(c(
#' "data<-readRDS(\"mtcars_alt.RDS\")",
#' "gg <- ggplot2::ggplot(data)+",
#' "ggplot2::geom_point(ggplot2::aes(",
#' "x=disp, y=mpg, shape=as.factor(vs), color=make))",
#' "ggplot2::ggsave(\"mtcars_plot.png\", gg)"
#' ),
#' script4
#' )
#'
#'
#' do_munge <- code_to_function(script1)
#' do_analysis <- code_to_function(script2)
#' do_summarize <- code_to_function(script3)
#' do_vis <- code_to_function(script4)
#'
#' plan <- drake_plan(
#' munged = do_munge(),
#' analysis = do_analysis(munged),
#' summary = do_summarize(analysis),
#' plot = do_vis(munged)
#' )
#'
#' plan
#' # drake knows "script1" is the first script to be evaluated and ran,
#' # because it has no dependencies on other code and a dependency of
#' # `analysis`. See for yourself:
#'
#' make(plan)
#'
#' # See the connections that the sourced scripts create:
#' if (requireNamespace("visNetwork", quietly = TRUE)) {
#' vis_drake_graph(plan)
#' }
#' }
#' })
#' }
code_to_function <- function(path, envir = parent.frame()) {
force(envir)
lines <- readLines(path)
if (any(grepl(knitr_pattern, lines))) {
lines <- get_tangled_text(path)
}
lines <- c(
"function(...) {",
lines,
"list(time = Sys.time(),tempfile = tempfile())",
"}"
)
text <- paste(lines, sep = "\n")
func <- eval(safe_parse(text), envir = envir)
func
}
knitr_pattern <- "^(### chunk number|<<[^>]*>>=|```\\{r.*\\})" # nolint
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.