#' Create a .yml makefile for a multi-task job
#'
#' Create a .yml makefile (for use with remake or scipiper) for a set of tasks
#' that together form a single job. The default target will be named after
#' `makefile` (specifically,
#' `indicatorfile=tools::file_path_sans_ext(basename(makefile))`) and can be
#' evoked from another remake file as
#' `make(I('indicatorfile'),remake_file='thismakefile.yml')` after replacing
#' `indicatorfile` and `thismakefile.yml` with their values.
#'
#' @param task_plan a task plan as produced by `create_task_plan()`
#' @param makefile character name of the remake file to create
#' @param include character vector of any remake .yml files to include within
#' this one. If any files must be quoted in the remake file, quote them with
#' inner single quotes, e.g. `c("unquoted", "'quoted file name.tsv'")`
#' @param packages character vector of any packages to load before running steps
#' @param sources character vector of any files that should be sourced before
#' running steps. If any files must be quoted in the remake file, quote them
#' with inner single quotes, e.g. `c("unquoted", "'quoted file name.tsv'")`
#' @param file_extensions character vector of file extensions to be added to the
#' defaults at `remake::file_extensions()`. Inclusion of `'ind'` is
#' recommended because this indicator file extension is commonly used by
#' scipiper.
#' @param template_file character name of the mustache template to render into
#' `makefile`. The default is recommended
#' @param final_targets a file path or object name (or vector of either) specifying final target(s)
#' to be created by `finalize_funs`
#' @param finalize_funs a string function name (or vector of function names, of equal
#' length to `final_targets`). Using NULL for `finalize_funs` will turn the
#' final steps from the task plan into depends for the default/all target.
#' @param as_promises hides the actual `final_targets` file/object from remake, and uses a
#' dummy target name instead, with suffix "_promise". This allows us to avoid cyclic
#' dependencies. Naming convention for `_promise` variables is to drop any dir structure
#' from the `final_targets`
#' @param tickquote_combinee_objects TRUE by default
#' @return the file name of the makefile that was created,
#' or the string output (if makefile = NULL)
#' @export
#' @examples
#' task_config <- data.frame(
#' id=c('AZ','CO','CA'),
#' capital=c('Phoeniz','Denver','Sacramento')
#' )
#' step1 <- create_task_step(
#' step_name = 'prep',
#' target = function(task_name, step_name, ...) {
#' sprintf('%s_%s', task_name, step_name)
#' },
#' depends = c('A','B'),
#' command = "process(target_name, I('C'))"
#' )
#' step2 <- create_task_step(
#' step_name = 'plot',
#' command = function(target_name, task_name, ...) {
#' capital <- task_config[task_config$id == task_name, 'capital']
#' sprintf('visualize(\'%s\', \'%s\')', task_name, capital)
#' }
#' )
#' step3 <- create_task_step('report')
#' task_plan <- create_task_plan(c('AZ','CA','CO'), list(step1, step2, step3),
#' final_steps='report', ind_dir='states/log')
#' task_makefile <- create_task_makefile(
#' task_plan, makefile=file.path(tempdir(), 'states.yml'),
#' file_extensions=c('ind'), packages='mda.streams')
#' cat(readLines(task_makefile), sep='\n')
#' @importFrom methods getPackageName
#' @importFrom utils packageVersion
create_task_makefile <- function(
task_plan, makefile,
include=c(), packages='scipiper', sources=c(), file_extensions=c('ind'),
template_file=system.file('extdata/task_makefile.mustache', package='scipiper'),
final_targets,
finalize_funs = 'combine_to_ind',
as_promises = TRUE,
tickquote_combinee_objects = TRUE) {
# prepare the overall job task: list every step of every job as a dependency.
# first mutate the makefile file name into an object name to use as the
# default/overall job target. this should be an acceptable target name (not
# conflicting with other targets) and allows the calling remake file to use
# ind_file as a target (even though this target is the one responsible for
# actually writing to ind_file)
# default is to use the makefile naming to assign job name
# when makefile isn't used (string output), we use the final_targets to create default job name
if (is.null(makefile)){
if (missing(final_targets) || length(final_targets) != 1){
job_name <- "all_tasks"
} else {
job_name <- paste0(tools::file_path_sans_ext(basename(final_targets)), '_all')
}
} else {
job_name <- tools::file_path_sans_ext(basename(makefile))
}
if(missing(final_targets)){
ind_ext <- getOption("scipiper.ind_ext") # put this here because if you want to control the extension, specify the final_targets
ind_dir <- attr(task_plan, 'ind_dir') # if you want to control the dir over default, specify the final_targets
if (is.null(ind_dir)){
final_targets <- as_ind_file(job_name, ind_ext)
} else {
final_targets <- file.path(ind_dir, as_ind_file(job_name, ind_ext))
}
}
if (!is.null(finalize_funs) & length(final_targets) != length(finalize_funs)){
stop('when specifying function names for `finalize_funs`, an equal number of `final_targets` need to be specified')
}
job_steps <- attr(task_plan, 'final_steps')
targets_to_combine <- unlist(lapply(task_plan, function(task) {
lapply(task$steps[job_steps], function(step) {
step$target_name
})
}), use.names=FALSE)
# prepare the task list for rendering
tasks <- unname(task_plan) # remove list element names where they'd interfere with whisker.render
for(task in seq_along(tasks)) {
# remove lmore ist element names where they'd interfere with whisker.render
tasks[[task]]$steps <- unname(tasks[[task]]$steps)
for(step in seq_along(tasks[[task]]$steps)) {
# add a logical for has_depends
tasks[[task]]$steps[[step]]$has_depends <- length(tasks[[task]]$steps[[step]]$depends) > 0
}
}
pos_extensions <- unique(c(remake::file_extensions(), file_extensions))
formatted_final_targets <- sapply(X = targets_to_combine, function(target){
if (('remake' %:::% 'target_is_file')(target, file_extensions = pos_extensions)){
paste0("'", target, "'")
} else {
if(tickquote_combinee_objects) paste0("`", target, "`") else target
}
})
if (is.null(finalize_funs)){
# when there is no finalize fun, the "jobs" are the final step targs and they have no new commands
job <- lapply(seq_len(length(formatted_final_targets)), function(i){
list(
target_name = unname(formatted_final_targets[i])
)
})
has_finalize_funs <- FALSE
} else {
has_finalize_funs <- TRUE
job <- lapply(seq_len(length(finalize_funs)), function(i){
list(
target_name = ifelse(as_promises, paste0(basename(final_targets[i]), "_promise"), final_targets[i]),
command = {
to_combine <- paste(formatted_final_targets, collapse=',\n ')
target_is_file <- ('remake' %:::% 'target_is_file')(final_targets[i], file_extensions = pos_extensions)
# hide the object/file changes as input when using `promises`
if (target_is_file){
ifelse(as_promises, {
sprintf("%s(I('%s'),\n %s)", finalize_funs[i], final_targets[i], to_combine)
}, {
sprintf("%s(target_name,\n %s)", finalize_funs[i], to_combine)
})
} else {
if (finalize_funs[i] == 'combine_to_ind')
stop('cannot use an object target for combine_to_ind(), must specify a file target',
'\nyou used ', final_targets[i])
combine_str <- "%s(\n %s)"
sprintf(combine_str, finalize_funs[i], to_combine)
}
})
})
}
# Gather info about how this function is being called
this_fun <- as.character(sys.call(0)[[1]])
calling_fun <- as.character(sys.call(-1)[[1]])
call_info <- if(length(calling_fun) == 0) {
sprintf('%s()', this_fun)
} else {
sprintf('%s() via %s()', this_fun, calling_fun)
}
# prepare the final list of variables to be rendered in the template
params <- list(
job = job,
target_default = job_name,
include = include,
has_include = length(include) > 0,
packages = packages,
has_packages = length(packages) > 0,
sources = sources,
has_sources = length(sources) > 0,
file_extensions = file_extensions,
has_file_extensions = length(file_extensions) > 0,
tasks = tasks,
has_finalize_funs = has_finalize_funs,
rendering_function = call_info
)
# read the template
template <- readLines(template_file)
# render the template
yml <- whisker::whisker.render(template, data=params)
yml <- gsub('[\n]{3,}', '\\\n\\\n', yml) # reduce 3+ line breaks to just 2
#yml <- trim(yml) # chop off any trailing white space or line endings (write_lines will add back a final line ending for us)
yml <- paste0(substring(yml, 1, nchar(yml)-1), gsub('\\n', '', substring(yml, nchar(yml)))) # chop off the final character if it's a newline
if (is.null(makefile)){
makefile <- yml
} else {
readr::write_lines(yml, path=makefile)
if(has_finalize_funs) {
how_to_run <- paste0(
"Run all tasks and finalizers with:\n",
paste(sapply(seq_along(job), function(job_id) {
finalizer <- job[[job_id]]
parent_target_name <- if(as_promises) final_targets[[job_id]] else sprintf("your_target_name_%d", job_id)
sprintf(
"%s:\n command: scmake(I('%s'), remake_file='%s')",
parent_target_name, finalizer$target_name, normalizePath(makefile, winslash='/'))
}), collapse='\n')
)
} else {
how_to_run <- sprintf(
"Run all tasks with\n%s:\n command: scmake(remake_file='%s')\n",
final_targets, makefile)
}
message(how_to_run)
}
# write the makefile and return the file path
return(makefile)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.