# R CMD CHECK generates a warning because 'found' is not a bound variable
# it's lifted from a column to a variable in 'stats::aggregate'
# This line of code tells R that this is intentional.
utils::globalVariables("found")
stash_filepath <- function(fun, argslist, postfix = NULL) {
file.path(
getOption("stash.cache_path"),
paste0(
paste0(
stable_digest(fun),
stable_digest(argslist),
postfix,
collapse = "-"
),
".rds"
)
)
}
lookup_in_dispatch <- function(metadata, envir) {
# TODO: what does each step here do and why?
df_names <- data.frame(name = unique(metadata$name))
df_names$digest <- sapply(df_names$name, function(nm) {stable_digest(envir[[nm]])}, USE.NAMES = F)
df_names$found <- T
df_merged <- merge(metadata, df_names, by = c("name", "digest"), all.x = T)
df_merged$found <- !is.na(df_merged$found)
df_agg <- aggregate(found ~ postfix, df_merged, all)
df_subset <- subset(df_agg, found)
return(df_subset)
}
#' Execute a Function Call and Stash the Result
#'
#' Similar to `do.call`, `do.call.stash` constructs and executes a function call.
#' Additionally, `do.call.stash` stores the result indexed by the function body,
#' its arguments, and its dependencies.
#'
#' @param fun Function to apply. Unlike `do.call`, strings naming the function are not supported
#' @param argslist List of arguments to `fun`, in order.
#'
#' @examples
#' \dontrun{
#' heavy_function <- function(x) {
#' Sys.sleep(3)
#' mean(x)
#' }
#'
#' result <- NULL
#' system.time({result <- do.call.stash(heavy_function, list(c(1:20)))})
#' result
#' system.time({result <- do.call.stash(heavy_function, list(c(1:20)))})
#' result
#' }
#'
#' @importFrom stats aggregate
#' @export do.call.stash
do.call.stash <- function(fun, argslist) {
envir <- fn_env(fun)
metadata <- NULL
# First, look for the default file.
first_fname <- stash_filepath(fun, argslist)
# If it's a "result",
if (file.exists(first_fname)) {
first_data <- readRDS(first_fname)
if (first_data$type == "result") {
# In this case, we're good to go. This body and argument values do not call
# anything else in the neighboring environment.
log_info("Loading cached file {first_fname}", namespace = 'stash')
return(first_data$data)
}
else if (first_data$type == "dispatch_table") {
# Ok, we need to do some matching.
metadata <- first_data$data
dispatch_table_matches <- lookup_in_dispatch(metadata, envir)
if (nrow(dispatch_table_matches) > 1) {
log_error("Improper number of matching rows in metadata table; ignoring cache.", namespace = 'stash')
} else if (nrow(dispatch_table_matches) == 1) {
result_fname <- stash_filepath(fun, argslist, dispatch_table_matches$postfix)
log_info("Loading cached file {result_fname}", namespace = 'stash')
return(readRDS(result_fname))
}
} else {
log_error("Metadata entry could not be read; ignoring cache.", namespace = 'stash')
}
}
# The code flow reaches here either if the file does not exist, the specific bunch of environment variables were not present,
# or the metadata could not be processed reasonably [along with warning.]
# Computing
log_info("Computing result to save", namespace = 'stash')
# Set up the fancy environment to run 'fun' within
wrapped_envir <- wrap_environment(envir)
fn_env(fun) <- wrapped_envir
# Run the computation
result <- do.call(fun, argslist)
# How many objects were accessed in the environment?
accessed <- wrapped_envir$..stash_accessed
if (length(accessed) == 0) {
# then just cache the result, this has no sourced dependencies.
saveRDS(list("type" = "result", "data" = result), file = first_fname)
log_info("Saving at {first_fname}", namespace = 'stash')
} else {
# ok, do i need a new metadata table?
if (is.null(metadata)) {
metadata <- data.frame(postfix = character(), name = character(), digest = character())
}
accessed_names <- names(accessed)
accessed_digest <- sapply(accessed_names, function(nm) {accessed[[nm]]}, USE.NAMES = F)
# compute postfix... (it doesn't need to be recomputable, just needs to have unlikely collisions)
postfix <- digest(accessed_digest)
# add entries to the metadata table
metadata <- rbind(metadata, data.frame(postfix = postfix, name = accessed_names, digest = accessed_digest))
result_fname <- stash_filepath(fun, argslist, postfix)
log_info("Saving metadata at {first_fname} and file at {result_fname}.", namespace = 'stash')
# save the metadata table and result
saveRDS(list("type" = "dispatch_table", "data" = metadata), file = first_fname)
saveRDS(result, file = result_fname)
}
return(result)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.