##' @importFrom utils head read.csv tail
NULL
## Other caching mechanisms in R:
## Biobase::cache Checks in cache for
## LHS of LHS<-RHS expression and only evaluates RHS if not found in
## cache
## R.cache package
## cacheSweave
## knitr with cache=TRUE
## SOAR package, not really the same thing, basically create lazyload dbs for
## things on the fly and take them out of memory?
## cacher
## memoize - Caches output of function call, immediately returns
## output if function is called again with same inputs. Not clear if
## it can track more than one set of inputs. I think it may actually
## modify the function itself akin to trace/debug
#this is a list of classes we know correspond to plots, and whose show
#functions are assumed to draw the plot. grobs (grid low level
#objects for image components) are drawn by default when they are
#created, but their show/print commands do NOT draw them, so they are
#excluded here
gclasses = c("trellis", "ggplot", "gg", "ggbio", "recordedplot")
#' @title Evaluation functions and return value handlers
#'
#' @description RCacheSuite allows users to specify custom evaluation functions,
#' which can be used to capture information about side effects, and return value
#' handlers, which can then use that information to recreate the side effects.
#'
#' In practice, the behavior of the default evaluation function
#' (\code{parseWithVis}) and return handler (\code{withVizHandler}) should meet
#' users' needs the majority of the time.
#'
#' @param code The code to evaluate
#' @param env The evaluation environment/environment the return handler should
#' populate
#' @param ... Generally unused.
#' @param val ANY. The object returned from the evaluation function
#' @param graphics PlotList. The graphics captured during evaluation
#' @param evaled Logical. Indicates whether evaluation occured (it is passed
#' \code{FALSE} when values are loaded from cache).
#' @param last Logical. Is the value being processed the output of the final
#' expression in the code being handled.
#'
#' @details
#' Evaluation functions must accept \code{code}, \code{env}, and \code{...}.
#'
#' Return value handlers must accept \code{val}, \code{graphics},
#' \code{env}, and \code{...}. They will also be passed \code{evaled}
#' and \code{last} when called by the caching machinery, and can
#' handle those specifically or catch them in \code{...}.
#'
#'\code{parseEval} simply parses and evaluates the code, returning the
#' value returned by the call to \code{eval}.
#'
#' \code{parseWithVis} evaluates the code using \code{withVis} and
#' returns a \code{WithVisValue} object with the resulting return
#' value and visibility information.
#'
#' \code{withVisHandler} accepts the \code{WithVisValue} object
#' returned by \code{parseWithVis} as well as printing the final
#' result if the \code{val$visible} is \code{TRUE}. It also draws any
#' plots stored in the cache in the case that \code{evaled} is
#' \code{FALSE} (if it were \code{TRUE} those plots would have
#' already been generated during the evaluation). Appropriate for use
#' with \code{parseWithVis}.
#'
#' \code{returnRaw} recreates any plots recorded in the cache (always,
#' not differentiating between evaluation and cache loading) and
#' returns \code{val} unmodified. Appropriate for use with \code{parseEval}
#'
#' \code{noGraphicsRaw} returns \code{val} unmodified without
#' recreating any plots.
#'
#' @return Evaluation functions should return the result of evaluating
#' the code, along with information sufficient to recreate any
#' non-plotting side-effects the user wishes to record and
#' recreate on loading from cache. Return value handlers should
#' recreate those side effects, and can then call \code{returnRaw}
#' or \code{withVisRaw} to recreate plots and return the value
#' from a raw return \code{val} or \code{WithVisValue} val object,
#' respectively. \code{WithVisValue} objects are created
#' automatically by the default evaluate function.
#' @author Gabriel Becker
#' @export
#' @rdname evalhandlers
parseEval = function(code, env, ...) eval(parse(text=code), envir=env)
#' @export
#' @rdname evalhandlers
parseWithVis = function(code, env, ...) {
# ret = withVisible(eval(parse(text=code), envir= env))
ret = withVisible(eval(code, envir = env))
new("WithVisValue", value = ret$value, visible = ret$visible)
}
#@val is an arbitrary object in the exact form returned by the evaluation function
#@graphics is a PlotList object (essentially a list) with 0 or more recordedplot objects in it
#' @export
#' @rdname evalhandlers
withVisHandler = function(val, graphics, env, evaled = FALSE, last = FALSE, ...)
{
raw = withVisRaw(val, graphics, env, evaled, last, ...)
invisible(raw@value)
}
#' @export
#' @rdname evalhandlers
withVisRaw = function(val, graphics, env, evaled = FALSE, last = FALSE, ...)
{
if(!is(val, "WithVisValue"))
stop("the withVisHandler return handler function expects an object of class 'WithVisValue', got an object of class: ", class(val))
if(length(graphics) && !evaled)
lapply(graphics, redrawPlot)
if(val@visible && last)
show(val@value)
if(!is(graphics, "PlotList")) {
if(!is.null(graphics))
graphics = list(graphics)
else
graphics = list()
graphics = as(graphics, "PlotList")
}
val = new("WithVisPlusGraphics", value = val@value, visible = val@visible && last, graphics = graphics)
invisible(val)
}
#' @export
#' @rdname evalhandlers
returnRaw = function(val, graphics, env, evaled = FALSE, ...)
{
if(length(graphics) && !evaled)
lapply(graphics, redrawPlot)
val
}
#' @export
#' @rdname evalhandlers
noGraphicsRaw = function(val, graphics, env, evaled = FALSE, ...)
{
val
}
missingornull = function(x) missing(x) || is.null(x)
#' @title evalWithCache
#'
#' @description This function accepts a piece of R code and performs
#' state (input variable value) dependent cached evaluation of it.
#'
#' @param code The code to evaluate
#' @param codeInfo A ScriptNodeInfo object, defaults to the result of
#' calling \code{getInputs} on the \code{code} with default
#' handling options
#' @param inputVars A vector of variable names which are inputs to the
#' code block. Overrides automatically detected inputs
#' @param outputVars A vector of variable name which are outputs
#' generated by the code block. Overrides automatically detected
#' outputs
#' @param cache The CachingEngine to use when evaluating the
#' code. Defaults to an engine which writes caches out
#' immediately, mimicing caching behavior of other systems. See
#' note.
#' @param eval_fun A function to perform evaluation of the
#' code. Defaults to the code evaluator associated with
#' \code{cache}
#' @param return_handler A function to be called on the value returned
#' by evaluating the code (or loading from cache). Defaults to
#' return handler associated with \code{cache}
#' @param env The environment in which to evaluate the code or load
#' the cached outputs
#' @param force A logical indicating whether to force a cache
#' refresh. If \code{TRUE}, cached values will NOT be used, but a
#' new cache will be written after evaluating
#' \code{code}. Defaults to \code{FALSE}
#' @param cacheRand A logical indicating whether to create/use caches
#' for expressions with a detected random component. Defaults to
#' FALSE
#' @param verbose Should extra informative messages be
#' emitted. Defaults to \code{FALSE}
#' @param gexts A vector of graphics file extensions for caching
#' graphics
#' @param gdev A vector of graphics devices to use to generate the
#' files indicated by \code{gexts} for caching graphics. Defaults
#' to functions named directly for their extensions (e.g.,
#' \code{png}).
#' @param last logical. Is this the last expression in the codeblock
#' being evaluated. Passed to return handler. Generally this
#' should not be manually set.
#' @param stopMissingInput Should an error be thrown when an input
#' variable is not present in the evaluation scope. If
#' \code{FALSE} a warning will be emitted. Defaults to
#' \code{FALSE}
#' @param singleEntFun A function, or NULL. If a function, it is
#' called on \code{codeInfo} to determine if \code{code} should be
#' treated as a single entity for caching purposes. If
#' \code{NULL}, or a function that returns \code{FALSE},
#' expressions in \code{code} are evaluated and cached
#' separately. Defautls to \code{sameOutVar}
#' @param unCacheable A function which accepts a ScriptNodeInfo object
#' and returns a logical value indicating whether the computation
#' is cacheable. Defaults to \code{mustForce}
#' @param ... Passed to \code{eval_fun} if \code{code} is evaluated.
#' @details This function performs state-aware cached evaluation of R
#' code. This means that a cache is considered to apply if and
#' only if both the code (after parsing) and the values of any
#' variables used as inputs to the code are identical to those
#' present when the cache was created.
#'
#' Cached evaluation is done via a caching engine (CachingEngine
#' object), which has a set of caches it already knows about, the
#' ability to create new caches, and a set of behaviors for when - if
#' ever - a cache should be written to disk, an evaluator function
#' which evaluates the code, and a return handler which processes the
#' object returned from the evaluator or loaded from a matching cache.
#'
#' The evaluator function is charged with evaluating code and
#' capturing any side-effects of the code that the user wishes to
#' cache and recreate, encoding them in the object it returns. The
#' return handler is tasked with processing that return value,
#' recreating any side effects (including plots, messages printed to
#' the console, etc) and then returning the raw result of the
#' evaluation.
#' @return The value generated by \code{cache$return_handler} when
#' called on the value returned by evaluating \code{code} using
#' \code{cache$eval_fun}.
#'
#' In other words, the value
#' \preformatted{
#' cache$return_handler(cache$eval_fun(code, env, ...))
#' }
#' is returned, though if a valid cache is found,
#' \code{cache$eval_fun} is not called and the cached return value (of the
#' evaluator function at the time of caching) is
#' passed to \code{cache$return_handler}.
#'
#' The return value of \code{cache$return_handler} is returned
#' invisibly. The \code{cache$return_handler} function itself is
#' responsible for the duplication of any side effects, including any
#' printing of errors, warnings, or messages as well as any necessary
#' regeneration of graphical output.
#' @author Gabriel Becker
#' @seealso \code{\link{cachingEngine}}, \code{\link[CodeDepends]{getInputs}}
#' @examples
#' \dontrun{
#' res = evalWithCache("x = 5; y = x+7;y")
#' }
#' @export
evalWithCache = function(code,
env = parent.frame(), #.GlobalEnv,
cache = cachingEngine(write_on_cache = TRUE),
codeInfo = NULL,
inputVars = NULL, #overrides automatically detected inputs
outputVars = NULL, #overrides automatically detected outputs
## The default here is write_on_cache=TRUE because we don't return
## the cache engine so if it isn't passed in its not retained
## anywhere.
## This replicates existing caching behaviors where the
## only caching action s are one-time read from/write to disk.
## Passing in an existing CachingEngine objects modifies that
## object in place and gives access to the full spectrum of
## caching behaviors offered by the package.
eval_fun = cache$eval_fun,
return_handler = cache$return_handler,
force = FALSE,
cacheRand = FALSE,
verbose = FALSE,
gexts = "png",
gdev = sapply(gexts, function(nm) get(nm, mode="function")),
last = TRUE,
stopMissingInput = FALSE,
singleEntFun= sameOutVar,
unCacheable = mustForce,
## passed to eval_fun
...)
{
if(is.character(code))
{
code = readScript("", "R", txt = code)
}
## we were passed a comment, most likely
if(length(code) == 0)
invisible(return(NULL))
if(is(code, "Script") && length(code) > 1)
{
if( (is.function(singleEntFun) && singleEntFun(code)))
{
oneExpr = paste("{", paste(as(code, "character"), collapse = "\n"), "}", collapse = "\n")
code = readScript("", "R", oneExpr)
} else {
#cache is a refclass, this modifies it in place!!
lapply(code[-length(code)], evalWithCache, cache = cache,
eval_fun = eval_fun,
return_handler = return_handler, env = env,
force = force, cacheRand = cacheRand,
verbose = verbose, gexts = gexts, gdev = gdev,
last = FALSE, ...)
return(evalWithCache(code[[length(code)]], cache = cache,
eval_fun = eval_fun,
return_handler = return_handler,
env = env, force = force,
cacheRand = cacheRand,
verbose = verbose, gexts = gexts,
gdev = gdev, last = TRUE, ...)
)
}
}
if(is.null(codeInfo))
{
codeInfo = getInputs(code)
if(!is(codeInfo, "ScriptNodeInfo"))
codeInfo = codeInfo[[1]]
}
## we can't cache certain types of expressions, calls to library,
## load, par, options, etc. The unCacheable argument accepts a
## function which accepts codeInfo for an expression and returns
## TRUE if the expression is uncacheable.
if(!force && unCacheable(codeInfo))
{
force = TRUE
}
if(is.null(inputVars)||is.null(outputVars))
{
if(is.null(inputVars))
inputVars = c(codeInfo@inputs, codeInfo@updates)
if(is.null(outputVars))
outputVars = c(codeInfo@outputs, codeInfo@updates)
}
if(is(code, "Script"))
code = code[[1]]
## parse and deparse the code to get rid of the annoying "adding a
## space invalidates the cache" issue other systems have
pcode = get_parsed_code(code)
if(verbose)
cat(paste(pcode, ": "))
## do the required inputs even exist in the current session?
in_exist = sapply(inputVars, exists, envir = env)
if(!all(in_exist))
{
if(stopMissingInput)
stop(paste("Missing variable(s) required to evaluate codeblock:",
inputVars[!in_exist], collapse = " "))
else
warning(paste("Missing inputs? Variables detected as inputs but not found:",
inputVars[!in_exist], collapse = " "))
}
## make the list we will fastdigest to get the hash. It includes
## the (properly handled) code as well as the current values of
## all the input variables
chash = fastdigest(pcode)
inputVals = mget(inputVars, envir = env, ifnotfound=list(NULL))
ivarhashes = sapply(inputVals, fastdigest)
names(ivarhashes) = inputVars
## this is doublehashing. Should provide fully equivalent uniqueness
## guarantee to hasing the list of actual values, but gives us access
## to the invidividual hashes for provenance.
ihash = fastdigest(ivarhashes)
fnd = cache$find_data(chash, ihash)
if(is(fnd, "CachedData") && !force)
{
if(verbose)
cat("using matching cache.\n")
fnd$retrieve_data(env)
xxx_returnvalue = get("xxx_returnvalue", env)
## The handler handles reproducing side effects such as
## printing, warning/error message, and graphics.
if(!is.null(env$xxx_handler))
returnvalue = env$xxx_handler(xxx_returnvalue,
env$xxx_graphics,
env = env,
evaled = FALSE,
last = last)
else
returnvalue = xxx_returnvalue
} else {
if(verbose)
cat("evaluating.\n")
if(exists(".Random.seed"))
oldRS = .Random.seed
else
oldRS = NULL
oldplot = if(dev.cur() > 1) recordPlot() else NULL
olddev = dev.cur()
xxx_returnvalue = eval_fun(code = code, env = env, evaled= TRUE, last = last, ...)
assign("xxx_returnvalue", xxx_returnvalue, envir = env)
assign("xxx_handler", return_handler, envir = env)
xxx_graphics = as(list(), "PlotList")
#check if there an active graphics device and if so if its contents are different than
#the contents
# Hadley's evaluate package does a more sophisticated version of this check, but do we want to depend on it just for that?
newdev = dev.cur()
if(newdev > 1 && (is.null(oldplot) || newdev != olddev || ! identical(oldplot, recordPlot() ) ) )
xxx_graphics[[length(xxx_graphics) + 1]] = recordPlot()
assign("xxx_graphics", xxx_graphics, envir = env)
#XXX previously outlist included pcode. why did I think i needed the pcode value? Am I using it somehwere?
outlist = c("xxx_returnvalue", "xxx_handler", "xxx_graphics", outputVars)
if(!is.list(gdev))
gdev = list(gext = gdev)
#XXX This won't catch everything, but it will catch a lot.
#See note/details in ?set.seed
rand = FALSE
if(exists(".Random.seed") && ! identical(oldRS, .Random.seed))
rand = TRUE
#What should we do when we know the result is random?
#Cache it with information on the object indicating its random, or not cache at all?
#we can control this with an argument but still need to decide the default...
if(!rand || cacheRand )
{
## we don't use mget here because we want it to fail if it can't
## find an expected output variable. Thatshould never happen
## and probably means the static analysis (or whatever it
## was overridden with) is wrong.
## sapply provides the names so no need to do that separately
outvarhashes = sapply(outputVars,
function(nm) fastdigest(get(nm, envir = env)))
# cset = cache$get_or_create_set(pcode, inputVars, outputVars)
#XXX right now it always assigns the cache to the first location in cache_dirs, even if there are more than one
provdf = makeProvDF(ivarhashes, outvarhashes, pcode, chash)
newcd = cachedData$new(code_hash = chash,
code = pcode,
inputs_hash = ihash,
provstore = provdf,
disk_location = file.path(cache$base_dir,
sprintf("code_%s", chash)),
tmp_disk_location = file.path(cache$tmp_base_dir,
sprintf("code_%s", chash)),
.data = new.env(),
file_stale = TRUE,
plot = xxx_graphics, gdevs = gdev,
write_allowed = cache$write_allowed)
for(o in outlist)
assign(o, get(o, envir = env), envir = newcd$.data)
cache$add_data(newcd)
}
returnvalue = return_handler(xxx_returnvalue, xxx_graphics, env, evaled = TRUE, last = last)
}
#clean up ugly detrius once we're done.
rm(list = c("xxx_returnvalue", "xxx_graphics", "xxx_handler"), envir = env)
invisible(returnvalue)
}
nocache = c("par", "options", "load", "rm", "source")
## This function checks for whether there are uncachable functions
## called by the expression.
##' @title mustForce
##' @description A simple heuristic function which returns false
##' (i.e., uncacheable) if the code calls any of the functions in
##' \code{nonCachedFuns}, which defaults to \code{par},
##' \code{options}, \code{load}, \code{rm}, and \code{source}.
##' @param codeInf A ScriptNodeInfo object for the code being
##' assessed
##' @param nonCachedFuns a vector of function names which should be
##' considered uncacheable
##' @return \code{TRUE} if any of the functions in \code{nonCachedFuns} are
##' called, \code{FALSE} otherwise.
##' @export
mustForce = function(codeInf, nonCachedFuns = nocache)
{
ret = FALSE
if(length(codeInf@libraries))
ret = TRUE
if(any(nonCachedFuns %in% codeInf@functions))
ret = TRUE
ret
}
#' @title sameOutVar
#' @description this function provides a simple heuristic for when a
#' block of code should be cached as a single entity. It returns
#' true when all expressions in the block create or modify the
#' same variable (or none at all), e.g.,
#' \code{
#' x=data.frame(y=rnorm(100), z=rnorm(100))
#' x$y[x$y > 3] = NA
#' x$col3 = sample(1:4, 100, TRUE)
#' x = x[1:50,]
#' }
#' @param code The code or ScriptNodeInfo to be assessed
#' @return TRUE if one (or less) unique variable is output or updated across all expressions in \code{code}, FALSE if two or more unique variables are.
#' @export
#think there's going to be a general "always right" heuristic, which
#is why the user can specify their own, or none at all.
sameOutVar = function(code)
{
cinfo = getInputs(code)
outputs = unlist(lapply(cinfo, function(x) c(x@outputs, x@updates)))
if(length(unique(outputs)) == 1)
TRUE
else
FALSE
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.