#' Get a list of all targets in a remake file
#'
#' @param remake_file filename of the remake YAML file from which targets should
#' be collected
#' @param recursive logical. if TRUE, result will include all targets from any
#' YAMLs listed in the include: section of the given remake_file, or any YAMLs
#' listed in the include: sections of those included YAMLs, etc.
#' @return vector of all target names explicitly declared in this remake_file
#' (and if recursive=TRUE, also the names of targets declared in remake files
#' included by this remake_file)
#' @export
#' @examples
#' \dontrun{
#' # assuming you have a file named remake.yml:
#' list_all_targets() # get status for all explicitly named targets in remake.yml
#'
#' # status for all targets in a different remake YAML:
#' list_all_targets('other_remake.yml')
#'
#' # status for all targets in remake.yml and any remake YAMLs included by remake.yml
#' list_all_targets(recursive=TRUE)
#' }
list_all_targets <- function(remake_file=getOption('scipiper.remake_file'), recursive=FALSE) {
# load the remake file as a yaml and as remake loads it
remake_list <- yaml::yaml.load_file(remake_file)
# get all explicitly defined targets
targets <- names(remake_list$targets)
# exclude remake keyword targets, which can be explicit even though they're
# usually not
targets <- setdiff(targets, c('tidy','clean','purge'))
# if requested, include targets of the included ymls
if(isTRUE(recursive)) {
includes <- remake_list$include
nested_targets <- unlist(lapply(includes, list_all_targets))
targets <- c(targets, nested_targets)
}
# if we wanted to add more info about these targets, we could return the following instead:
# remake_object <- ('remake' %:::% 'remake')(remake_file=remake_file, verbose=FALSE, load_sources=FALSE)
# remake_object$targets[targets]
# return a simple vector of target names
targets
}
#' list targets in remake file that are fake/dummy/group targets
#'
#' @param remake_file filename of the remake YAML file from which targets should
#' be collected
#'
#' @details this is an internal file. We may want to make it recursive, but I
#' don't see a use-case right now for that.
#' @keywords internal
list_group_targets <- function(remake_file=getOption('scipiper.remake_file')){
# load the remake file as a yaml and as remake loads it
remake_list <- yaml::yaml.load_file(remake_file)
# get all explicitly defined targets
targets <- names(remake_list$targets)
targets[sapply(remake_list$targets, FUN = function(x) is.null(x$command), USE.NAMES = FALSE)]
}
#' Produce a table describing the remake build status relative to 1+ targets
#'
#' Describes the hash, build time, and dependencies as of the most recent build
#' of each target. If the dependencies have changed since that last build, those
#' new dependencies are not described here. See why_dirty() to compare the old
#' and new dependencies.
#'
#' Runs faster when RDSify_first = FALSE, but the default is TRUE so that you
#' can usually pretend that there's no extra step between a git pull and
#' updating the remake database.
#'
#' @param target_names character vector of targets for which to determine build
#' status, including status for dependencies of the named targets. If NULL
#' will return status for the default target and its dependencies.
#' @param remake_file filename of the remake YAML file from which status should
#' be determined
#' @param format if "nested", returnes a nested tibble with tibble list columns
#' for 'depends' and 'functions'. if "flat", returns character columns for
#' 'depends' and 'functions' that are ugly, flattened versions of the tibbles.
#' "flat" can be useful for comparing status before and after an operation
#' using dplyr joins (e.g., within `scmake()`).
#' @param RDSify_first logical. Should the info in build/status/*.yml files be
#' copied over to the remake RDS-based status database before querying for
#' target cleanliness?
#' @export
#' @examples
#' \dontrun{
#' # assuming you have a file named remake.yml:
#' get_remake_status() # get status for the default target and its dependencies
#' get_remake_status(list_all_targets(), RDSify_first=FALSE) # status for all targets in remake.yml
#'
#' # or to get status for all targets in a different remake YAML:
#' get_remake_status(list_all_targets('other_remake.yml'), 'other_remake.yml')
#' }
get_remake_status <- function(target_names=NULL, remake_file=getOption('scipiper.remake_file'), format=c('nested', 'flat'), RDSify_first=TRUE) {
format <- match.arg(format)
# sync the remake status database with the scipiper status database if requested
if(isTRUE(RDSify_first)) {
RDSify_build_status(new_only=FALSE, remake_file=remake_file)
}
# collect information about the current remake database. do load sources to get the dependencies right
remake_object <- ('remake' %:::% 'remake')(remake_file=remake_file, verbose=FALSE, load_sources=TRUE)
# make sure target_names is concrete and sensible
if(is.null(target_names)) target_names <- remake_object$default_target
unknown_targets <- setdiff(target_names, names(remake_object$targets))
if(length(unknown_targets) > 0) stop('unknown targets: ', paste(unknown_targets, collapse=', '))
# create a table of all targets relevant to target_names, with T/F fields for
# dirty and dirty_by_descent. remake_status doesn't actually get used by
# remake::make, and the code is full of caveats that make me wonder if they
# believe it...but it's the nearest thing to a current status report that I've
# found so far
graph <- ('remake' %:::% 'remake_dependency_graph')(remake_object)
target <- current <- dirty <- dirty_by_descent <- '.dplyrvar'
currentness <- ('remake' %:::% 'remake_status')(remake_object, target_names, graph) %>% # matrix
as.data.frame() %>%
tibble::rownames_to_column(var='target') %>% as_tibble() %>% # convert to tibble with rownames as 'target' column
dplyr::mutate( # mutate maintains a 0-row tibble if it started that way
current = ('remake' %:::% 'remake_is_current')(remake_object, target)) %>%
dplyr::select(target, current, dirty, dirty_by_descent)
# extract dependency info for each target
dependencies <- lapply(currentness$target, function(target_name) {
# I can't decide whether this should be as_of 'now' or 'last_build', or if
# it even makes sense to report this stuff in a table that also reports
# current (which compares 'now' to 'last_build'). I'm trying last_build
# for now.
status_old <- get_dependency_status(target_name, remake_object, as_of='last_build', format='wide')
}) %>%
bind_rows()
# combine into a single tibble
status <- full_join(currentness, dependencies, by='target')
name <- hash <- info <- depends <- functions <- '.dplyrvar'
# return
if(format == 'nested') {
return(status)
} else {
flatten_nested <- function(nested_df) {
nested_df %>% mutate(info = paste(name, hash, sep=':')) %>% pull(info) %>% paste(collapse=':::')
}
status_flat <- status %>%
mutate(
depends = sapply(depends, flatten_nested),
functions = sapply(functions, flatten_nested))
return(status_flat)
}
}
#' Retrieve status of dependencies from the remake store
#'
#' Returns placeholders if the object doesn't yet exist. hash=NA when the target
#' hasn't yet been built, or '??' when the target is fake, or a hexcode string
#' otherwise.
#'
#' @param target_name length-1 name of a target
#' @param remake_object as produced by remake:::remake
#' @param as_of what status should be reflected - status as of the last build
#' (from the remake database) or status if we considered it built right now?
#' @param format return in remake's list format ('raw'), a one-row tibble with
#' nested tibbles for depends and code ('wide'), or a multi-row tibble with
#' columns for name, dependency type, and dependency hash ('long')?
#' @importFrom purrr map2_chr
get_dependency_status <- function(
target_name,
remake_object = ('remake' %:::% 'remake')(remake_file=getOption('scipiper.remake_file'), verbose=FALSE, load_sources=TRUE),
as_of=c('last_build', 'now'), format=c('raw', 'wide', 'long')) {
# process arguments
target <- remake_object$targets[[target_name]]
store <- remake_object$store
as_of <- match.arg(as_of)
format <- match.arg(format)
# helper
null_to_na <- function(x) {
if(is.null(x)) NA else x
}
# mimic remake's behavior, using existing remake functions where possible
status_list_default <- list(
version=store$version,
name=target_name,
type=null_to_na(target$type),
hash=as.character(NA),
time=as.POSIXct(NA),
depends=list(),
fixed=NULL,
code=list())
status_list <- if(is.null(target)) {
status_list_default
} else if(target$type == 'fake') {
replace(status_list_default, 'hash', '??')
} else if('target_file_implicit' %in% attr(target, 'class')) {
# target_file_implicits don't have a target rule (code) to check, so
# just check depends. and return time as the time of last file update,
# because Sys.time() just doesn't make sense
status_list_implicit <- ('remake' %:::% 'dependency_status')(target, store, missing_ok=TRUE, check='depends')
status_list_implicit$time <- file.mtime(target$name)
status_list_implicit
} else {
switch(
as_of,
'last_build' = tryCatch({
store$db$get(target_name) # hash is NA for unbuilt targets
}, error=function(e) {
status_list_default # target hasn't been built
}),
'now' = ('remake' %:::% 'dependency_status')(target, store, missing_ok=TRUE, check='all') # hash is NA for unbuilt targets
)
}
if(format == 'raw') return(status_list)
# override depends to include dependencies of fake targets, and to include
# fake targets as dependencies (remake does neither; only file and object
# dependencies of file or object targets are reported)
if(is.null(target) || length(target$depends_name) == 0) {
missing_depends <- tibble(type='', name='')[c(),]
} else if(target$type == 'fake') {
missing_depends <- tibble(type = target$depends_type, name = target$depends_name)
} else {
type <- '.dplyrvar'
missing_depends <- tibble(type = target$depends_type, name = target$depends_name) %>%
dplyr::filter(type == 'fake')
}
if(nrow(missing_depends) > 0) {
# we don't actually know which targets were dependencies during the
# last_build, so we've had to assume that they're the same as those that are
# dependencies now. we'll also take this into account in why_dirty().
if(target$type == 'fake' && as_of == 'last_build') {
#warning(sprintf("guessing names and returning hash='??' for depends of fake target '%s' during last build", target$name))
} else {
if(as_of == 'last_build') {
#warning(sprintf("guessing names and returning hash='??' for fake depends of target '%s' during last build", target$name))
} else if(any(missing_depends$type == 'fake')) {
# fake dependencies have no hash available through remake, hence the warning about hash=?? even when as_of=='now'
#warning(sprintf("returning hash='??' for fake depends of target '%s'", target$name))
}
}
status_list$depends <- c( # this element is a named vector of hashes
status_list$depends, # append the new dependencies to the current ones
setNames(
purrr::map2_chr(missing_depends$name, missing_depends$type, function(name, format) {
if(format %in% c('file', 'object')) {
switch(
as_of,
'last_build' = '??',
'now' = store$get_hash(name, format, missing_ok=TRUE)) # returns NA if missing
} else { # fake
'??'
}
}),
missing_depends$name))
}
# avoid "no visible binding for global variable ..." in R CMD check
hash <- name <- fixed <- '.dplyrvar'
# long format
status_long <- bind_rows(
tibble(type = 'target', name = status_list$name, hash = status_list$hash),
if(length(status_list$depends) > 0) {
tibble(type='depends', name=names(status_list$depends), hash=unlist(unname(status_list$depends)))
},
if(!is.null(status_list$fixed)) {
tibble(type='fixed', name=NA, hash=status_list$fixed)
},
if(length(status_list$code$functions) > 0) {
tibble(type='function', name=names(status_list$code$functions), hash=unlist(unname(status_list$code$functions)))
}
) %>%
mutate(
hash = ifelse(is.na(hash), 'none', hash)) # NAs are really inconvenient to deal with below, and 'none' is a little clearer anyway
if(format == 'long') return(status_long)
# wide format
status_wide <- tibble(
target = status_list$name,
type = status_list$type,
hash = status_list$hash,
time = status_list$time,
depends = status_long %>% dplyr::filter(type=='depends') %>% select(name, hash) %>% list(),
fixed = null_to_na(status_list$fixed),
functions = status_long %>% dplyr::filter(type=='function') %>% select(name, hash) %>% list()
) %>%
mutate(
fixed = as.character(fixed),
hash = ifelse(is.na(hash), 'none', hash))
if(format == 'wide') return(status_wide)
stop('a status report should have been returned by now')
}
#' List the targets for which current is FALSE (as a character vector)
#'
#' Runs faster when RDSify_first = FALSE.
#'
#' @param target_names character vector of targets for which to determine build
#' status, including status for dependencies of the named targets. If NULL
#' will return status for the default target and its dependencies.
#' @param remake_file filename of the remake YAML file from which status should
#' be determined
#' @param RDSify_first logical. Should the info in build/status/*.yml files be
#' copied over to the remake RDS-based status database before querying for
#' target cleanliness?
#' @export
#' @examples
#' \dontrun{
#' # assuming you have a file named remake.yml:
#' get_remake_status() # get status for the default target and its dependencies
#' get_remake_status(list_all_targets()) # get status for all explicitly named targets in remake.yml
#'
#' # or to get status for all targets in a different remake YAML:
#' get_remake_status(list_all_targets('other_remake.yml'), 'other_remake.yml')
#' }
which_dirty <- function(target_names=NULL, remake_file=getOption('scipiper.remake_file'), RDSify_first=TRUE) {
# get the full status information for all relevant targets
status <- get_remake_status(target_names=target_names, remake_file=remake_file, RDSify_first=RDSify_first)
# pick out the non-current targets. It's possible for a file to be current,
# !dirty, and yet dirty_by_descent, in which case remake will rebuild it, so
# define non-current as a combination of all three logicals
not_dirty <- current <- dirty <- dirty_by_descent <- target <- '.dplyrvar'
status %>%
mutate(not_dirty = current & !dirty & !dirty_by_descent) %>%
dplyr::filter(!not_dirty) %>%
pull(target)
}
#' Explain why a target is dirty
#'
#' Compares the dependency status of a target (1) as it currently appears in the
#' repo and (2) as documented in the remake database. Returns information about
#' the mismatches that are causing remake to consider this target dirty.
#'
#' @param target_name character name of the target for which to determine build
#' status, including status for dependencies of the named target. Exactly one
#' is required.
#' @param remake_file filename of the remake YAML file from which status should
#' be determined
#' @param RDSify_first logical. Should the info in build/status/*.yml files be
#' copied over to the remake RDS-based status database before querying for
#' target cleanliness? Defaults to FALSE because if you're asking why, you've
#' probably already queried to determine that the target is dirty
#' @return Interpretive messages are printed to the console. The return value is
#' a tibble. The first row is information about the target in question.
#' hash_old is the hash of that row's target (see 'name') as of the last
#' documented build. hash_new is the hash of the current file or object, which
#' might have changed if the file (or object) has been edited. hash_old and
#' hash_new are '??' if they refer to a fake target or unrecorded previous
#' build, 'none' if the file or object does not (or did not) exist, and a
#' hexcode hash otherwise. hash_mismatch is NA if there are '??' values in the
#' hashes (because we can't determine whether the hashes have changed).
#' 'dirty' is reported by remake and reports whether the object has altered
#' inputs (files, objects, fixed arguments, and/or functions).
#' 'dirty_by_descent' is as reported by remake and reports whether the
#' object's dependencies themselves have altered inputs. 'current' is the
#' output of remake::is_current().
#' @export
why_dirty <- function(target_name, remake_file=getOption('scipiper.remake_file'), RDSify_first=FALSE) {
# quick argument check
stopifnot(length(target_name) == 1 & is.character(target_name))
# avoid "no visible binding for global variable XX" in R CMD check
type <- name <- current <- target <- dirty <- dirty_by_descent <- hash_old <- hash_new <- hash_mismatch <- definitely_dirty <- '.dplyrvar'
# collect information about the current remake database. do load sources to get the dependencies right
remake_object <- ('remake' %:::% 'remake')(remake_file=remake_file, verbose=FALSE, load_sources=TRUE)
# target_name ought to be the name of a real target
if(!exists(target_name, remake_object$targets)) {
stop(sprintf("target_name '%s' not found in the remake files", target_name))
}
# get any pre-existing dependency information
old_status <- get_dependency_status(target_name, remake_object, as_of='last_build', format='long')
new_status <- get_dependency_status(target_name, remake_object, as_of='now', format='long')
# check for dependencies that haven't been rebuilt but need to be (dirty_by_descent)
new_depends <- dplyr::filter(new_status, type=='depends') %>% pull(name)
graph <- ('remake' %:::% 'remake_dependency_graph')(remake_object)
currentness <- ('remake' %:::% 'remake_status')(remake_object, target_name, graph) %>% # matrix
as.data.frame() %>%
tibble::rownames_to_column(var='target') %>% as_tibble() %>% # convert to tibble with rownames as 'target' column
dplyr::mutate( # mutate maintains a 0-row tibble if it started that way
current = ('remake' %:::% 'remake_is_current')(remake_object, target)) %>%
dplyr::select(target, dirty, dirty_by_descent, current)
# check that it's actually dirty. It's possible for a file to be current,
# !dirty, and yet dirty_by_descent, in which case remake will rebuild it, so
# define dirty as the combination of all three logicals. Also handle the special
# case where remake doesn't actually think it's dirty but the file has changed
if(!target_name %in% which_dirty(target_name, remake_file=remake_file, RDSify_first=FALSE)) {
old_target <- dplyr::filter(old_status, type=='target')
new_target <- dplyr::filter(new_status, type=='target')
if(!is.na(old_target$hash) && !is.na(new_target$hash) && (old_target$hash != new_target$hash)) {
message(sprintf("target '%s' is not dirty in remake's eyes, but its hashes are different so returning those.", target_name))
info_df <- full_join(old_target, new_target, by=c('type','name'), suffix=c('_old','_new')) %>%
mutate(hash_mismatch = TRUE) %>%
left_join(currentness, by=c(name='target'))
return(info_df)
} else {
stop(sprintf("target '%s' is not dirty", target_name))
}
}
# compare. Truth table for hash_mismatch:
# no ?? aa bb
# no F NA T T
# ?? NA NA NA NA
# aa T NA F T
# bb T NA T F
status_compare <- full_join(old_status, new_status, by=c('type','name'), suffix=c('_old', '_new')) %>%
mutate(hash_mismatch = ifelse(
hash_old == 'none' & hash_new == 'none',
FALSE, # never built, therefore hashes haven't changed
ifelse( # now we know there are no 'none's
hash_old == '??' | hash_new == '??',
NA, # we just can't know because we're missing information
hash_old != hash_new # the main case: built or not before and definitely built now, looks different
)
)) %>%
left_join(currentness, by=c('name'='target')) %>%
mutate( # fill in dirtiness for fixed and functions, which can never themselves by dirty
dirty_by_descent = ifelse(is.na(dirty_by_descent), FALSE, dirty_by_descent),
dirty = ifelse(is.na(dirty), FALSE, dirty),
current = ifelse(is.na(current), TRUE, current))
# interpret
dirty_rows <- dplyr::filter(status_compare, (hash_mismatch | is.na(hash_mismatch) | dirty | dirty_by_descent) & type != 'target') %>%
mutate(definitely_dirty = !is.na(hash_mismatch) | dirty | dirty_by_descent)
target_type <- remake_object$targets[[target_name]]$type
if((dplyr::filter(status_compare, type == 'target') %>% pull(hash_new)) == 'none') {
message(sprintf("The target '%s' does not exist", target_name))
} else if(target_type == 'fake') {
preamble <- "Fake targets are never 'current'."
def_dirty <- pull(dplyr::filter(dirty_rows, definitely_dirty), name)
pos_dirty <- pull(dplyr::filter(dirty_rows, !definitely_dirty), name)
if(length(c(def_dirty, pos_dirty)) > 0) {
preamble <- paste(preamble, sprintf("Also, the fake target '%s' has", target_name))
if(length(def_dirty) > 0) message(paste(c(paste(preamble, "these dirty dependencies:"), def_dirty), collapse='\n * '))
if(length(pos_dirty) > 0) message(paste(c(sprintf("%s these possibly dirty dependencies:", if(length(def_dirty) > 0) 'and' else preamble), pos_dirty), collapse='\n * '))
} else {
message(preamble)
}
} else {
if(nrow(dirty_rows) == 0) {
warning(sprintf("Uh oh, can't explain why '%s' is dirty. Please file a bug report.", target_name))
}
explanations <- sapply(1:nrow(dirty_rows), function(i) {
row <- dirty_rows[i,]
explanation <- if(isTRUE(row$hash_mismatch)) {
switch(
row$type,
'depends' = sprintf("the dependency '%s' has changed", row$name),
'fixed' = sprintf("the fixed arguments (character, logical, or numeric) to the target's command have changed", row$name),
'function' = sprintf("the function '%s' used by the target has changed", row$name)
)
} else if(is.na(row$hash_mismatch)) {
if(row$type == 'fixed') {
"the fixed arguments might have changed"
} else {
sprintf("the dependency '%s' might have changed", row$name)
}
} else if(row$dirty_by_descent) {
sprintf("the dependency '%s' depends on dirty targets", row$name)
} else if(row$dirty) {
sprintf("the dependency '%s' depends on files, objects, fixed arguments, or functions that have changed", row$name)
}
if(exists(row$name, remake_object$targets) && remake_object$targets[[row$name]]$type == 'fake') {
paste(explanation, "(but note that remake ignores fake targets when assessing currentness)")
}
explanation
})
message(paste(c(sprintf("Since the last build of the target '%s':", target_name), explanations), collapse='\n * '))
}
# return
return(status_compare)
}
#' Get the file path and name for a scipiper build/status/*.yml files
#'
#' Internal helper function with limited functionality for now. Before making
#' this an exported function, if that's ever needed, we'd need some error
#' checking to make sure these are indicator files, see whether the .yml files
#' actually exist, and maybe provide suggestions if they don't.
#' @param target_names names of targets. should be .ind files
#' @param remake_file name of the remake_file where target_names can be found
#' (recursively if needed)
locate_build_status_yml <- function(target_names, remake_file=getOption('scipiper.remake_file')) {
remake_object <- ('remake' %:::% 'remake')(remake_file=remake_file, verbose=FALSE, load_sources=FALSE)
mangled_keys <- get_mangled_key(target_names, remake_object$store$db)
sprintf('build/status/%s.yml', mangled_keys)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.