# Copyright (C) 2017 Harvard University, Mount Holyoke College
#
# This file is part of ProvR.
#
# ProvR is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# ProvR is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ProvR; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# This package was forked from <https://github.com/End-to-end-provenance/RDataTracker>
#
# Contact: Matthew Lau <matthewklau@fas.harvard.edu>
# prov.capture executes a script (r.script.path). r.script.path - the full path
# to the R script. If provided, a copy of the script will be saved with the DDG.
# If only r.script.path is provided, the script is sourced using ddg.source and a
# DDG is created for the script. annotate.inside.functions (optional) - if TRUE,
# functions are annotated. first.loop (optional) - the first loop to annotate in
# a for, while, or repeat statement. max.loops (optional) - the maximum number
# of loops to annotate in a for, while, or repeat statement. If max.loops = -1
# there is no limit. If max.loops = 0, no loops are annotated. If non-zero,
# if-statements are also annotated. save (optional) - if TRUE information is
# saved.
#' @export
prov.capture <- function(r.script.path = NULL, ddgdir = NULL, overwrite = TRUE,
enable.console = TRUE, annotate.inside.functions = FALSE, first.loop = 1, max.loops = 1,
max.snapshot.size = 10, debug = FALSE, save=FALSE) {
# Initiate ddg.
ddg.init(r.script.path, ddgdir, overwrite, enable.console, annotate.inside.functions,
first.loop, max.loops, max.snapshot.size, save)
# Set .ddg.is.sourced to TRUE if script provided.
if (!is.null(r.script.path))
.ddg.set(".ddg.is.sourced", TRUE)
# Save debug files to debug directory.
if (debug && save)
.ddg.set("ddg.save.debug", save.debug)
# If an R error is generated, get the error message and close the DDG.
tryCatch(
if (!is.null(r.script.path))
ddg.source(.ddg.get("ddg.r.script.path"), ddgdir = ddgdir, ignore.ddg.calls = FALSE, ignore.init = TRUE, force.console = FALSE)
, finally = {
if(save && !is.null(r.script.path)) .ddg.save(r.script.path)
})
invisible()
}
# prov.json return thr provenance json data corresponding to latest captured
# script.
#' @export
prov.json <- function() {
ddg.json <- .ddg.json.current()
return(ddg.json)
}
# ddg.init intializes a new DDG.
# r.script.path (optional) - the full path to the R script file that is being
# executed. If provided, a copy of the script will be saved with the DDG. ddgdir
# (optional) - the directory where the DDG should be saved. If not provided, the
# DDG will be saved in a subdirectory called 'ddg' in the current working
# directory. enable.console (optional) - if TRUE, console mode is turned on.
# annotate.inside.functions (optional) - if TRUE, functions are annotated.
# first.loop (optional) - the first loop to annotate in a for, while, or repeat
# statement. max.loops (optional) - the maximum number of loops to annotate in a
# for, while, or repeat statement. If max.loops = -1 there is no limit. If
# max.loops = 0, no loops are annotated. If non-zero, if-statements are also
# annotated. max.snapshot.size (optional) - the maximum size for objects that
# should be output to snapshot files. If 0, no snapshot files are saved. If -1,
# all snapshot files are saved. Size in kilobytes. Note that this tests the size
# of the object that will be turned into a snapshot, not the size of the
# resulting snapshot. Addition : overwrite (optional) - default TRUE, if FALSE,
# generates timestamp for ddg directory
ddg.init <- function(r.script.path = NULL, ddgdir = NULL, overwrite = TRUE, enable.console = TRUE,
annotate.inside.functions = TRUE, first.loop = 1, max.loops = 1, max.snapshot.size = 10,
save = TRUE) {
.ddg.init.tables()
# Setting the path for the ddg
if (is.null(ddgdir)) {
# Default is the file where the script is located
if (!is.null(r.script.path)) {
ddg.path <- paste(dirname(r.script.path), "/", basename(tools::file_path_sans_ext(r.script.path)),
"_ddg", sep = "")
} else {
ddg.path <- paste(getwd(), "/", "ddg", sep = "")
}
} else ddg.path <- normalizePath(ddgdir, winslash = "/", mustWork = FALSE)
if (save) {
.ddg.set("ddg.save.to.disk", TRUE)
# Overwrite default is
if (!overwrite) {
no.overwrite.folder <- paste(ddg.path, "_timestamps", sep = "")
if (!dir.exists(no.overwrite.folder)) {
dir.create(no.overwrite.folder)
}
ddg.path <- paste(no.overwrite.folder, "/", basename(tools::file_path_sans_ext(r.script.path)),
"_ddg_", .format.time(Sys.time()), sep = "")
}
.ddg.set("ddg.path", ddg.path)
# Remove files from DDG directory
.ddg.delete.save()
# Create DDG directories
.ddg.init.environ()
# Save copy of original script.
file.copy(r.script.path, paste(paste(.ddg.get("ddg.path"), "/scripts", sep = ""), "/", basename(r.script.path),
sep = ""))
} else {
.ddg.set("ddg.save.to.disk", FALSE)
}
# Reset r.script.path if RMarkdown file
if (!is.null(r.script.path) && tools::file_ext(r.script.path) == "Rmd") {
output.path <- paste(paste(.ddg.get("ddg.path"), "/scripts", sep = ""), "/", basename(tools::file_path_sans_ext(r.script.path)),
".R", sep = "")
.ddg.markdown(r.script.path, output.path)
.ddg.set("ddg.r.script.path", output.path)
} else {
.ddg.set("ddg.r.script.path", if (is.null(r.script.path))
NULL else normalizePath(r.script.path, winslash = "/"))
}
# Set environment constants.
.ddg.set(".ddg.enable.console", enable.console)
.ddg.set(".ddg.func.depth", 0)
.ddg.set(".ddg.explorer.port", 6096)
.ddg.set("details.omitted", FALSE)
# Initialize the information about the open start-finish blocks
.ddg.set(".ddg.starts.open", vector())
# Initialize the stack of commands and environments being executed in active
# functions
.ddg.set(".ddg.cur.cmd.stack", vector())
.ddg.set(".ddg.cur.expr.stack", vector())
# Mark graph as initilized.
.ddg.set(".ddg.initialized", TRUE)
# Store the starting graphics device.
.ddg.set("prev.device", dev.cur())
.ddg.set("possible.graphics.files.open", NULL)
.ddg.set("ddg.open.devices", vector())
if (interactive() && .ddg.get(".ddg.enable.console") && save) {
ddg.history.file <- paste(paste(.ddg.get("ddg.path"), "/data", sep = ""), "/.ddghistory", sep = "")
.ddg.set(".ddg.history.file", ddg.history.file)
# Empty file if it already exists, do the same with tmp file.
file.create(ddg.history.file, showWarnings = FALSE)
# One timestamp keeps track of last .ddg.save (the default).
.write.timestamp.to.history()
# Save the history if the platform supports it.
tryCatch(savehistory(ddg.history.file), error = function(e) {
})
}
# Store value of annotate.inside.
.ddg.set("ddg.annotate.inside", annotate.inside.functions)
# Store maximum number of loops to annotate.
if (max.loops < 0)
max.loops <- 10^10
.ddg.set("ddg.max.loops", max.loops)
# Store maximum snapshot size.
.ddg.set("ddg.max.snapshot.size", max.snapshot.size)
# If loops are not annotated, do not annotate functions called from inside a
# loop.
if (max.loops == 0)
ddg.loop.annotate.off()
# Initialize the counter that keeps track of nested levels of ifs and loops
ddg.set.inside.loop()
# Set number of first loop.
.ddg.set("ddg.first.loop", first.loop)
.ddg.set(".ddg.proc.start.time", .elapsed.time())
# Store time when script begins execution.
.ddg.set("ddg.start.time", .format.time(Sys.time()))
invisible()
}
# ddg.annotate.on enables annotation for the specified functions. Functions not
# on this list are not annotated. If fnames is NULL, all functions will be
# annotated
# fnames - a list of one or more function names passed in as strings.
ddg.annotate.on <- function(fnames = NULL) {
if (is.null(fnames)) {
.ddg.set("ddg.annotate.off", vector())
.ddg.set("ddg.annotate.inside", TRUE)
return()
}
# Add to the on list
on.list <- .ddg.get("ddg.annotate.on")
on.list <- union(on.list, fnames)
.ddg.set("ddg.annotate.on", on.list)
# Remove from the off list
off.list <- .ddg.get("ddg.annotate.off")
off.list <- Filter(function(off) !(off %in% fnames), off.list)
.ddg.set("ddg.annotate.off", off.list)
}
# ddg.annotate.off disables annotation for the specified functions. Functions
# not on this list are annotated. If fnames is NULL, no functions will be
# annotated fnames - a list of one or more function names passed in as strings.
ddg.annotate.off <- function(fnames = NULL) {
if (is.null(fnames)) {
.ddg.set("ddg.annotate.on", vector())
.ddg.set("ddg.annotate.inside", FALSE)
return()
}
# Add to the off list
off.list <- .ddg.get("ddg.annotate.off")
off.list <- union(off.list, fnames)
.ddg.set("ddg.annotate.off", off.list)
# Remove from the on list
on.list <- .ddg.get("ddg.annotate.on")
on.list <- Filter(function(on) !(on %in% fnames), on.list)
.ddg.set("ddg.annotate.on", on.list)
}
# ddg.source reads in an R script and executes it in the provided enviroment.
# ddg.source essentially mimics the behaviour of the R source command, having
# similar input parameters and results, but with additional parameters
# ignore.ddg.calls and ignore.init.
# file - the name of the R script file to source. ddgdir (optional) - the
# directory where the DDG will be saved. If not provided, the DDG will be saved
# in a directory called 'ddg' in the current working directory. local (optional)
# - the environment in which to evaluate parsed expressions. If TRUE, the
# environment from which ddg.source is called. If FALSE, the user's workspace
# (global environment). echo (optional) - print each expression after parsing.
# print.eval (optional) - print result of each evaluation. verbose (optional) -
# print extra diagnostics. max.deparse.length (optional) - maximum number of
# characters output for deparse of a single expression. chdir (optional) -
# change R working directory temporarily to the directory containing the file to
# be sourced. encoding (optional) - encoding to be assumed when file is a
# character string. ignore.ddg.calls (optional) - if TRUE, ignore DDG function
# calls. ignore.init (optional) - if TRUE, ignore ddg.init and ddg.run.
# force.console (optional) - if TRUE, turn console mode on.
ddg.source <- function(file, ddgdir = NULL, local = FALSE, echo = verbose, print.eval = echo,
verbose = getOption("verbose"), max.deparse.length = 150, chdir = FALSE, encoding = getOption("encoding"),
ignore.ddg.calls = TRUE, ignore.init = ignore.ddg.calls, force.console = ignore.init) {
# Store script number & name.
snum <- .ddg.get(".ddg.next.script.num")
sname <- basename(file)
if (snum == 0) {
df <- data.frame(snum, sname, stringsAsFactors = FALSE)
} else {
df <- rbind(.ddg.get(".ddg.sourced.scripts"), c(snum, sname))
}
.ddg.set(".ddg.sourced.scripts", df)
# Increment script number.
.ddg.inc(".ddg.next.script.num")
### CODE IN THIS SECTION IS BASICALLY REPLICATION OF source FUNCTION ###
# Get the environment under which the script should be executed.
envir <- if (isTRUE(local)) {
parent.frame()
} else if (identical(local, FALSE)) {
.GlobalEnv
} else if (is.environment(local)) {
local
} else stop("'local' must be TRUE, FALSE or an environment")
# Parse encoding information.
have_encoding <- !missing(encoding) && encoding != "unknown"
if (!missing(echo)) {
if (!is.logical(echo))
stop("'echo' must be logical")
if (!echo && verbose) {
warning("'verbose' is TRUE, 'echo' not; ... coercing 'echo <- TRUE'\n")
echo <- TRUE
}
}
# Print extra information about environment.
if (verbose) {
cat("'envir' chosen:")
print(envir)
}
# Parse input file and figure out encoding.
ofile <- file
from_file <- FALSE
srcfile <- NULL
if (is.character(file)) {
if (identical(encoding, "unknown")) {
enc <- utils::localeToCharset()
encoding <- enc[length(enc)]
} else enc <- encoding
if (length(enc) > 1L) {
encoding <- NA
owarn <- options("warn")
options(warn = 2)
for (e in enc) {
if (is.na(e))
next
zz <- file(file, encoding = e)
res <- tryCatch(readLines(zz, warn = FALSE), error = identity)
close(zz)
if (!inherits(res, "error")) {
encoding <- e
break
}
}
options(owarn)
}
if (is.na(encoding))
stop("unable to find a plausible encoding")
if (verbose)
cat(gettextf("encoding = \"%s\" chosen", encoding), "\n", sep = "")
if (file == "") {
filename <- "stdin"
file <- stdin()
srcfile <- "<stdin>"
} else {
filename <- file
file <- file(filename, "r", encoding = encoding)
on.exit(close(file))
lines <- readLines(file, warn = FALSE)
on.exit()
close(file)
srcfile <- srcfilecopy(filename, lines, file.info(filename)[1, "mtime"],
isFile = TRUE)
}
loc <- utils::localeToCharset()[1L]
encoding <- if (have_encoding)
switch(loc, `UTF-8` = "UTF-8", `ISO8859-1` = "latin1", "unknown") else "unknown"
} else {
filename <- "Connection"
lines <- readLines(file, warn = FALSE)
srcfile <- srcfilecopy(deparse(substitute(file)), lines)
}
# Parse the expressions from the file.
exprs <- if (!from_file) {
if (length(lines)) {
parse(stdin(), n = -1, lines, "?", srcfile, encoding, keep.source = TRUE)
} else expression()
} else {
parse(file, n = -1, NULL, "?", srcfile, encoding, keep.source = TRUE)
}
on.exit()
# Set the working directory for the current script and expressions.
if (from_file)
close(file)
if (verbose)
cat("--> parsed", "expressions; now eval(.)ing them:\n")
if (chdir) {
if (is.character(ofile)) {
isURL <- length(grep("^(ftp|http|file)://", ofile)) > 0L
if (isURL)
warning("'chdir = TRUE' makes no sense for a URL")
if (!isURL && (path <- dirname(ofile)) != ".") {
owd <- getwd()
if (is.null(owd)) {
stop("cannot 'chdir' as current directory is unknown")
on.exit(setwd(owd), add = TRUE)
setwd(path)
}
}
} else {
warning("'chdir = TRUE' makes no sense for a connection")
}
}
### END OF MODIFIED source CODE SECTION ###
# Calculate the regular expressions for what should be ignored and what
# shouldn't.
if (ignore.ddg.calls && !ignore.init) {
if (verbose)
warning("'ignore.ddg.calls' is TRUE, 'ignore.int' not; ... coercion 'ignore.init <- TRUE'\n")
ignore.init <- TRUE
}
# Ignore calculation of certain execution steps.
ignores <- c("^library[(]provR[)]$", if (ignore.ddg.calls) "^ddg." else if (ignore.init) c("^ddg.init",
"^prov.capture") else "a^")
# Now we can parse the commands as we normally would for a DDG.
if (length(exprs) > 0) {
# Turn on the console if forced to, keep track of previous setting, parse
# previous commands if necessary.
prev.on <- (.ddg.is.set(".ddg.initialized") && .ddg.get(".ddg.initialized")) && .ddg.get(".ddg.enable.console")
if (prev.on && interactive())
.ddg.console.node()
if (force.console)
.ddg.console.on()
# Let library know that we are sourcing a file.
prev.source <- (.ddg.is.set(".ddg.initialized") && .ddg.get(".ddg.initialized")) && (.ddg.is.set("from.source") && .ddg.get("from.source"))
# Initialize the tables for ddg.capture.
.ddg.set("from.source", TRUE)
# Parse the commands into a console node.
.ddg.parse.commands(exprs, sname, snum, environ = envir, ignore.patterns = ignores,
node.name = sname, echo = echo, print.eval = print.eval, max.deparse.length = max.deparse.length,
run.commands = TRUE)
# Save the DDG among other things, but don't return any values, TODO - should we
# do this? ddg.save()
.ddg.set("from.source", prev.source)
# Turn return console to previous state.
if (!prev.on)
.ddg.console.off() else .ddg.console.on()
}
invisible()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.