# Copyright (C) President and Fellows of Harvard College and
# Trustees of Mount Holyoke College, 2014, 2015, 2016, 2017, 2018.
# This program is free software: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this program. If not, see
# <http://www.gnu.org/licenses/>.
############################ API.R #############################
# This file contains files that are exported:
# prov.init - intializes a new provenance graph
# prov.save - saves the current provenance graph
# prov.quit - saves the current provenance graph and finalizes it
# prov.run - initiates execution of a script
# prov.json - returns the current provenance graph as a prov-json string
#' Provenance Collection Functions
#'
#' prov.init intializes a new provenance graph. Called by the user
#' in console mode.
#'
#' RDataTracker is an R package that collects provenance as an R script
#' executes. The resulting provenance provides a detailed record of the
#' execution of the script and includes information on the steps that were
#' performed and the intermediate data values that were created. The
#' resulting provenance can be used for a wide variety of applications
#' that include debugging scripts, cleaning code, and reproducing results.
#'
#' There are two ways in which a user can collect provenance. To collect
#' provenance from commands stored in a script file, use prov.run. This
#' will execute the commands that are in the script, collecting provenance
#' as it does so.
#'
#' The user can also collect provenance while executing commands in the
#' console. To do this, first execute prov.init. Then enter console
#' commands as normal. When done with the commands for which you want
#' provenance, use prov.quit. If you want to save the current provenance
#' without turning off provenance collection, call prov.save instead of
#' prov.quit. You can call prov.save multiple times before calling prov.quit.
#' Each call will append to the same provenance file.
#'
#' The provenance is stored in PROV-JSON format. For immediate use it may
#' be retrieved from memory using the prov.json function. For later use
#' the provenance is also written to the file prov.json. This file and
#' associated files are written by default to the R session temporary
#' directory. The user can change this location by (1) using the optional
#' parameter prov.dir in the prov.run or prov.init functions, or (2) setting
#' the prov.dir option (e.g. by using the R options command or editing the
#' Rprofile.site or .Rprofile file). If prov.dir is set to ".", the current working
#' directory is used.
#'
#' The level of detail collected by RDataTracker may be set using parameters
#' of the prov.run and prov.init functions. Options include collecting
#' provenance inside functions and inside control constructs and saving
#' snapshots of large intermediate values as separate files. These
#' features are turned off by default to optimize performance. Common
#' settings for the level of detail can also be set and managed using the
#' prov.set.detail and related functions.
#' @param r.script.path the full path to the R script file
#' that is being executed. If provided, a copy of the script will
#' be saved with the provenance graph.
#' @param prov.dir the directory where the provenance graph will be
#' saved. If not provided, the directory specified by the prov.dir
#' option is used. Otherwise the R session temporary directory
#' is used.
#' @param overwrite if FALSE, includes a time stamp in the provenance
#' graph directory name.
#' @param annotate.inside.functions if TRUE, provenance is collected
#' inside functions.
#' @param first.loop the first loop to collect provenance in a for,
#' while, or repeat statement.
#' @param max.loops the maximum number of loops to collect
#' provenance in a for, while, or repeat statement. If max.loops = -1,
#' there is no limit. If max.loops = 0, no loops are annotated.
#' If non-zero, it indicates the number of iterations of each loop for
#' which provenance should be collected. If max.loops is non-zero, provenance
#' is also collected inside if-statements.
#' @param max.snapshot.size the maximum size for snapshot files.
#' If 0, no snapshot files are saved.
#' If -1, the complete state of an object is stored in the snapshot
#' file. For other values, the head of the object, truncated to a size near
#' the specified limit, is saved. The size is in kilobytes.
#' @param hash.algorithm the hash algorithm to use for files.
#' Choices are md5 (default), sha1, crc32, sha256, sha512, xxhash32,
#' xxhash64 and murmur32. This feature uses the digest function from
#' the digest package.
#' @return prov.init initializes the provenance collector. The prov.init
#' function does not return a value.
#' @export
#' @rdname prov.run
#' @seealso \code{\link{prov.json}} for access to the JSON text of the provenance,
#' \code{\link{prov.display}} to view the provenance graphically.
#' \code{\link{prov.set.detail}} to see an alternative way to set the amount of
#' provenance collected.
#' \code{\link{prov.annotate.on}} and \code{\link{prov.annotate.off}} to see how to control
#' annotation of individual functions
prov.init <- function(r.script.path = NULL, prov.dir = NULL, overwrite = TRUE,
annotate.inside.functions = FALSE, first.loop = 1,
max.loops = 0, max.snapshot.size = 0,
hash.algorithm="md5") {
# Initialize tables
.ddg.init.tables()
# Save hash algorithm
.ddg.set (".ddg.hash.algorithm", hash.algorithm)
# Set path for provenance graph
.ddg.set.path (prov.dir, r.script.path, overwrite)
# Remove files from DDG directory
.ddg.flush.ddg()
# Create DDG directories
.ddg.init.environ()
# Reset r.script.path if RMarkdown file
if (!is.null(r.script.path) && tools::file_ext(r.script.path) == "Rmd") {
output.path <- paste(.ddg.path.scripts(), "/",
basename(tools::file_path_sans_ext(r.script.path)), ".R",
sep = "")
.ddg.markdown(r.script.path, output.path)
.ddg.set("ddg.r.script.path", output.path)
} else if (!is.null (r.script.path)) {
.ddg.set("ddg.r.script.path",
if (is.null(r.script.path)) NULL
else normalizePath(r.script.path, winslash = "/"))
}
else {
# If running from the console, set up a callback to be notified
# after each statement completes execution and build the
# corresponding portions of the ddg.
.ddg.trace.task <- function (task, result, success, printed) {
# Create the provenance for the new command
.ddg.parse.commands(as.expression(task),
environ = .GlobalEnv,
run.commands = FALSE)
return(TRUE)
}
.ddg.set (".ddg.taskCallBack.id", addTaskCallback(.ddg.trace.task))
}
# Set environment constants.
.ddg.set.details.omitted(FALSE)
# If ddg.detail is not set, use values of annotate.inside, max.loops
# and max.snapshot.size.
if (is.null(prov.get.detail())) {
# Store value of annotate.inside.
.ddg.set("ddg.annotate.inside", annotate.inside.functions)
# Store maximum number of loops to annotate.
if (max.loops < 0) max.loops <- 10^10
# Store maximum snapshot size.
.ddg.set("ddg.max.snapshot.size", max.snapshot.size)
}
# Intialize loops
.ddg.init.loops (first.loop, max.loops)
# Store time when script begins execution.
.ddg.set("ddg.start.time", .ddg.timestamp())
# Initialize the I/O tracing code
.ddg.init.iotrace ()
# Mark graph as initilized.
.ddg.set(".ddg.initialized", TRUE)
# Add a Console start node if running from the console.
if (is.null (r.script.path)) {
.ddg.add.start.node (node.name = "Console")
}
# Set functions to be used for script annotation
.ddg.set.annotation.functions()
invisible()
}
#' .ddg.set.path sets the path where the DDG will be stored. It creates the
#' directory if it does not currently exist.
#' The base directory is set as follows:
#' (1) the directory specified by the user in the parameter prov.dir, or
#' (2) the directory specified by the user as the value of the option "prov.dir", or
#' (3) the R session temporary directory. If the directory specified by the user
#' is a period (.), the base directory is set to the current working directory.
#' The provenance graph is stored in a subdirectory of the base directory called
#' "prov_console" in console mode or "prov_[script name]" in script mode. If overwrite =
#' FALSE, a timestamp is added to the directory name.
#' @param prov.dir name of directory. This can be a directory name, ".", or NULL.
#' @param r.script.path the path to the R script. If NULL, we are running from the console.
#' @param overwrite If FALSE, a timestamp is added to the directory name
#' @return the name of the directory where the ddg should be stored
.ddg.set.path <- function (prov.dir, r.script.path, overwrite) {
# Directory specified by prov.dir parameter
if (!is.null(prov.dir)) {
if (prov.dir == ".") {
base.dir <- getwd()
} else {
base.dir <- prov.dir
}
}
else {
# Directory specified as an option for prov.dir
prov.dir.option <- getOption("prov.dir")
if (!is.null(prov.dir.option) && prov.dir.option != "") {
if (prov.dir.option == ".") {
base.dir <- getwd()
} else {
base.dir <- getOption("prov.dir")
}
}
# R session temporary directory
else {
# Normalize path
base.dir <- normalizePath(tempdir(), winslash = "/", mustWork = FALSE)
}
}
# Remove final slash if present
base.dir <- sub("/$", "", base.dir)
# Console mode
if (is.null(r.script.path)) {
ddg.path <- paste(base.dir, "/prov_console", sep="")
# Script mode
} else {
ddg.path <- paste(base.dir, "/prov_",
basename(tools::file_path_sans_ext(r.script.path)), sep="")
}
# Add timestamp if overwrite = FALSE
if (!overwrite) ddg.path <- paste(ddg.path, "_", .ddg.timestamp(), sep="")
# Create directory if it does not exist
if (!dir.exists(ddg.path)) dir.create(ddg.path, recursive = TRUE)
.ddg.set("ddg.path", ddg.path)
}
#' .ddg.flush.ddg removes all files from the DDG directories unless the
#' the DDG directory is the working directory. If no DDG directory is
#' specified, the current DDG directory is assumed.
#' @param ddg.path path to DDG directory.
#' @return nothing
.ddg.flush.ddg <- function(ddg.path=NULL) {
# TODO: When ddg.flush.ddg is removed (from Obsolete.R, we
# can remove the ddg.path parameter and update the
# code below to always look up the path.
# Use current DDG directories if no directory is specified.
if (is.null(ddg.path)) {
ddg.path <- .ddg.path()
ddg.path.data <- .ddg.path.data()
ddg.path.debug <- .ddg.path.debug()
ddg.path.scripts <- .ddg.path.scripts()
}
# Remove files unless the DDG directory is the working directory.
if (ddg.path != getwd()) {
unlink(paste(ddg.path, "*.*", sep="/"))
unlink(paste(ddg.path.data, "*.*", sep="/"))
unlink(paste(ddg.path.data, ".ddghistory", sep="/"))
unlink(paste(ddg.path.debug, "*.*", sep="/"))
unlink(paste(ddg.path.scripts, "*.*", sep="/"))
}
invisible()
}
#' .ddg.set.annotation.functions sets the names of the functions used for script
#' annotation in the user's environment. This is a workaround to exporting
#' these functions and would not be allowed by CRAN.
#' @return nothing
.ddg.set.annotation.functions <- function () {
assign(".ddg.details.omitted", RDataTracker:::.ddg.details.omitted,
envir = globalenv())
assign(".ddg.eval", RDataTracker:::.ddg.eval,
envir = globalenv())
assign(".ddg.finish", RDataTracker:::.ddg.finish,
envir = globalenv())
assign(".ddg.first.loop", RDataTracker:::.ddg.first.loop,
envir = globalenv())
assign(".ddg.forloop", RDataTracker:::.ddg.forloop,
envir = globalenv())
assign(".ddg.function", RDataTracker:::.ddg.function,
envir = globalenv())
assign(".ddg.loop.annotate.off", RDataTracker:::.ddg.loop.annotate.off,
envir = globalenv())
assign(".ddg.loop.annotate.on", RDataTracker:::.ddg.loop.annotate.on,
envir = globalenv())
assign(".ddg.loop.count", RDataTracker:::.ddg.loop.count,
envir = globalenv())
assign(".ddg.loop.count.inc", RDataTracker:::.ddg.loop.count.inc,
envir = globalenv())
assign(".ddg.max.loops", RDataTracker:::.ddg.max.loops,
envir = globalenv())
assign(".ddg.max.snapshot.size", RDataTracker:::.ddg.max.snapshot.size,
envir = globalenv())
assign(".ddg.not.inside.loop", RDataTracker:::.ddg.not.inside.loop,
envir = globalenv())
assign(".ddg.reset.loop.count", RDataTracker:::.ddg.reset.loop.count,
envir = globalenv())
assign(".ddg.return.value", RDataTracker:::.ddg.return.value,
envir = globalenv())
assign(".ddg.set.inside.loop", RDataTracker:::.ddg.set.inside.loop,
envir = globalenv())
assign(".ddg.source", RDataTracker:::.ddg.source,
envir = globalenv())
assign(".ddg.start", RDataTracker:::.ddg.start,
envir = globalenv())
assign(".ddg.should.run.annotated", RDataTracker:::.ddg.should.run.annotated,
envir = globalenv())
invisible()
}
#' prov.save
#'
#' prov.save saves the current provenance graph to a prov-json file.
#' If more R statements are executed, the provenance for these statements
#' is added to the graph. The graph is finalized with prov.quit.
#' Called by the user in console mode.
#' @param save.debug If TRUE, debug files are saved to the debug directory.
#' This is intended for developers of the RDataTracker package.
#' @return prov.save writes the current provenance to a file but does not
#' return a value.
#' @export
#' @rdname prov.run
prov.save <- function(save.debug = FALSE) {
if (!.ddg.is.init()) return(invisible())
# If running from the console create a Console finish node.
# Also create a start node for the next segment.
if (is.null (.ddg.get ("ddg.r.script.path"))) {
.ddg.add.finish.node ()
.ddg.add.start.node (node.name = "Console")
}
# Save prov.json to file.
.ddg.json.write()
if (interactive()) print(paste("Saving prov.json in ", .ddg.path(), sep=""))
# Save debug files to debug directory.
if (save.debug || .ddg.save.debug()) {
.ddg.save.debug.files()
}
invisible()
}
#' prov.auit
#'
#' prov.quit saves and closes the current provenance graph.
#' Called by the user in console mode.
#' @return prov.quit writes the current provenance to a file but does not
#' return a value.
#' @export
#' @rdname prov.run
prov.quit <- function(save.debug = FALSE) {
if (!.ddg.is.init()) return(invisible())
# If running from the console create a Console finish node.
if (is.null (.ddg.get ("ddg.r.script.path"))) {
.ddg.add.finish.node ()
}
# If there are any connections still open when the script ends,
# create nodes and edges for them.
.ddg.create.file.nodes.for.open.connections ()
# If there is a display device open, grab what is on the display
if (length(grDevices::dev.list()) >= 1) {
tryCatch (.ddg.capture.graphics(called.from.save = TRUE),
error = function (e) print(e))
}
# Turn off the I/O tracing and console tracing.
.ddg.stop.iotracing()
if (.ddg.is.set (".ddg.taskCallBack.id")) {
removeTaskCallback (.ddg.get (".ddg.taskCallBack.id"))
}
# Delete temporary files.
.ddg.delete.temp()
# Shut down the DDG.
#.ddg.clear()
# Mark graph as initilized.
.ddg.set(".ddg.initialized", FALSE)
# Save prov.json to file.
.ddg.json.write()
if (interactive()) print(paste("Saving prov.json in ", .ddg.path(), sep=""))
# Save debug files to debug directory.
if (save.debug || .ddg.save.debug()) {
.ddg.save.debug.files()
}
invisible()
}
#' prov.run
#'
#' prov.run initiates execution of a script and collects provenance as
#' the script executes.
#'
#' @param f a function to run. If supplied, the function f is executed
#' with calls to prov.init and prov.save so that provenance for the
#' function is captured. Exactly one of f and r.script.path should be provided.
#' @param display if TRUE, the provenance graph is displayed in DDG Explorer
#'
#' @return prov.run runs a script, collecting provenance as it does so.
#' It does not return a value.
#' @export
#' @rdname prov.run
#' @examples
#' \dontrun{prov.run ("script.R")}
#' prov.init ()
#' a <- 1
#' b <- 2
#' prov.save ()
#' ab <- a + b
#' prov.quit ()
prov.run <- function(r.script.path = NULL, prov.dir = NULL, overwrite = TRUE,
f = NULL, annotate.inside.functions = FALSE, first.loop = 1,
max.loops = 0, max.snapshot.size = 0, save.debug = FALSE,
display = FALSE, hash.algorithm="md5") {
# Initialize ddg.
prov.init(r.script.path, prov.dir, overwrite, annotate.inside.functions,
first.loop, max.loops, max.snapshot.size, hash.algorithm)
# Set .ddg.is.sourced to TRUE if script provided.
.ddg.set(".ddg.is.sourced", !is.null(r.script.path))
# Save debug files to debug directory.
.ddg.set("ddg.save.debug", save.debug)
# If an R error is generated, get the error message and close
# the DDG.
tryCatch(
if (!is.null(r.script.path)) {
.ddg.source(
.ddg.get("ddg.r.script.path"),
ignore.ddg.calls = FALSE)
}
else if (!is.null(f)) f()
else stop("r.script.path and f cannot both be NULL"),
finally={
# Add finish nodes for anything left open due to errors
.ddg.close.blocks()
prov.quit()
if (display==TRUE){
prov.display()
}
}
)
invisible()
}
#' .ddg.source
#'
#' .ddg.source reads and executes an R script in the specified
#' environment. .ddg.source mimics the behaviour of the R source command,
#' with similar input parameters and results, but with the additional
#' parameter ignore.ddg.calls.
#' @param file the name of the R script file to source.
#' @param local the environment in which to evaluate parsed
#' expressions. If TRUE, the environment from which .ddg.source is
#' called. If FALSE, the user's workspace (global environment).
#' @param echo print each expression after parsing
#' @param print.eval print the result of each evaluation
#' @param verbose print extra diagnostics
#' @param max.deparse.length the maximum number of characters in output
#' for deparse of a single expression
#' @param chdir change the R working directory temporarily to
#' the directory containing the file to be sourced
#' @param encoding encoding to be assumed when file is a
#' character string
#' @param ignore.ddg.calls if TRUE, ignore DDG function calls
#' @param calling.script the number of the calling script
#' @param startLine the line that the source call starts on
#' @param startCol the column that the source call starts on
#' @param endLine the line that the source call ends on
#' @param endCol the column that the source call ends on
#' @return nothing
.ddg.source <- function (file, local = FALSE, echo = verbose, print.eval = echo,
verbose = getOption("verbose"), max.deparse.length = 150, chdir = FALSE,
encoding = getOption("encoding"), ignore.ddg.calls = TRUE, calling.script=NA,
startLine=NA, startCol=NA, endLine=NA, endCol=NA){
# Store script number & name.
sname <- basename(file)
snum <- .ddg.store.script.info (sname)
# Save a copy of the script
file.copy(file, paste(.ddg.path.scripts(), basename(sname), sep="/"))
### CODE IN THIS SECTION IS A SLIGHT MODIFICATION OF A PORTION OF ###
### R's source FUNCTION ###
# To see the current version of the source function source code, say:
# getMethod("source", "ANY")
# I don't know how to see the functions called from source, though.
# An older version of the source code of R's source function can be found here:
# https://github.com/SurajGupta/r-source/blob/master/src/library/base/R/source.R
# Note that R's source function has more parameters than we allow. This could
# conceivably cause problems when we replace calls to source with calls to .ddg.source.
# The additional parameters are:
# exprs
# spaced
# prompt.echo
# width.cutoff
# deparseCtrl
# continue.echo
# skip.echo
# keep.source
# Get the environment under which the script should be executed.
envir <-
if (isTRUE(local)) {
parent.frame()
}
else if (isFALSE(local)) {
.GlobalEnv
}
else if (is.environment(local)) {
local
}
else stop("'local' must be TRUE, FALSE or an environment")
# Parse encoding information.
have_encoding <- !missing(encoding) && encoding != "unknown"
if (!missing(echo))
{
if (!is.logical(echo))
stop ("'echo' must be logical")
if (!echo && verbose)
{
warning("'verbose' is TRUE, 'echo' not; ... coercing 'echo <- TRUE'\n")
echo <- TRUE
}
}
# Print extra information about environment.
if (verbose)
{
cat("'envir' chosen:")
print(envir)
}
# Parse input file and figure out encoding.
ofile <- file
from_file <- FALSE
srcfile <- NULL
if (is.character(file))
{
if (identical(encoding, "unknown"))
{
enc <- utils::localeToCharset()
encoding <- enc[length(enc)]
}
else enc <- encoding
if (length(enc) > 1L)
{
encoding <- NA
owarn <- options(warn = 2)
for (e in enc)
{
if (is.na(e))
next
zz <- file(file, encoding = e)
res <- tryCatch(readLines(zz, warn = FALSE), error = identity)
close(zz)
if (!inherits(res, "error"))
{
encoding <- e
break
}
}
options(owarn)
}
if (is.na(encoding))
stop("unable to find a plausible encoding")
if (verbose)
cat(gettextf("encoding = \"%s\" chosen", encoding), "\n", sep = "")
if (file == "")
{
filename <- "stdin"
file <- stdin()
srcfile <- "<stdin>"
}
else
{
filename <- file
file <- file(filename, "r", encoding = encoding)
on.exit(close(file))
lines <- readLines(file, warn = FALSE)
on.exit()
close(file)
srcfile <- srcfilecopy(filename, lines, file.mtime(filename)[1], isFile = TRUE)
}
loc <- utils::localeToCharset()[1L]
encoding <- if (have_encoding)
switch(loc, `UTF-8` = "UTF-8", `ISO8859-1` = "latin1", "unknown")
else "unknown"
}
else
{
filename <- "Connection"
lines <- readLines(file, warn = FALSE)
srcfile <- srcfilecopy(deparse(substitute(file)), lines)
}
# Parse the expressions from the file.
exprs <- if (!from_file)
{
if (length(lines))
parse(stdin(), n = -1, lines, "?", srcfile, encoding, keep.source=TRUE)
else expression()
}
else {
parse(file, n = -1, NULL, "?", srcfile, encoding, keep.source=TRUE)
}
on.exit()
# Set the working directory for the current script and
# expressions.
if (from_file)
close(file)
if (verbose)
cat("--> parsed", "expressions; now eval(.)ing them:\n")
if (chdir)
{
if (is.character (ofile))
{
if (grepl("^(ftp|http|file)://", ofile))
warning("'chdir = TRUE' makes no sense for a URL")
else if ( (path <- dirname (ofile)) != ".")
{
owd <- getwd()
if (is.null(owd))
{
stop("cannot 'chdir' as current directory is unknown")
on.exit(setwd(owd), add = TRUE)
setwd(path)
}
}
}
else {
warning("'chdir = TRUE' makes no sense for a connection")
}
}
### END OF MODIFIED source CODE SECTION ###
# Calculate the regular expressions for what should be ignored
# and what shouldn't.
# Ignore calculation of certain execution steps.
ignores <- c("^library[(]RDataTracker[)]$",
if (ignore.ddg.calls) "^ddg."
else c("^prov.init", "^prov.run"))
# Now we can parse the commands as we normally would for a DDG.
if (length(exprs) > 0)
{
# Initialize the tables for ddg.capture.
.ddg.set("from.source", TRUE)
# Parse and execute the commands, collecting provenance along the way.
# If called from prov.run, there is no position information. Otherwise,
# record script number and posiiton in the start and finish nodes.
if (is.na(calling.script)) {
.ddg.add.start.node (node.name=sname)
}
else {
.ddg.add.start.node (node.name=paste0 ("source (\"", sname, "\")"),
script.num=calling.script,
startLine=startLine, startCol=startCol,
endLine=endLine, endCol=endCol)
}
.ddg.parse.commands(exprs, sname, snum, environ=envir, ignore.patterns=ignores,
echo = echo, print.eval = print.eval, max.deparse.length = max.deparse.length,
run.commands = TRUE)
if (is.na(calling.script)) {
.ddg.add.finish.node ()
}
else {
.ddg.add.finish.node (script.num=calling.script,
startLine=startLine, startCol=startCol, endLine=endLine, endCol=endCol)
}
}
invisible()
}
#' Provenance Access Functions
#'
#' prov.json returns the current provenance graph as a prov-json string.
#'
#' RDataTracker collects provenance as a script executes. Once collected,
#' prov.json can be called to access the provenance as a JSON string.
#' This is useful for applications that operate on the provenance. The
#' JSON is consistent with the PROV-JSON standard.
#'
#' One such application is a graphic visualizer built into RDataTracker.
#' To view the provenance graphically, call prov.display. In the provenance
#' graph, the nodes
#' are data values and operations, with edges connecting them to show
#' data and control flow dependencies. The visualizer also
#' allows the user to view intermediate
#' values of variables, and to graphically view the lineage of how a value
#' was computed, or to look at how a value is used moving forward in the computation.
#' The user can also search for specific
#' data or operation nodes, files, or error messages in the provenance.
#'
#' @return prov.json returns the current provenance graph as a prov-json
#' string
#' @export
#' @rdname prov.json
#' @seealso \code{\link{prov.init}} and \code{\link{prov.run}} for functions to collect provenance
#' @references PROV-JSON standard: \url{https://www.w3.org/Submission/2013/SUBM-prov-json-20130424/}
#' @references RDataTracker PROV-JSON output: \url{https://github.com/End-to-end-provenance/RDataTracker/blob/export/JSON-format.md}
#' @examples
#' prov.init ()
#' a <- 1
#' b <- 2
#' ab <- a + b
#' prov.quit ()
#' str <- prov.json()
prov.json <- function()
{
# This is a wrapper function.
# Calls and returns the function with the bulk of the code in OutputJSON.R
return( .ddg.json.string() )
}
#' .ddg.start.ddg.explorer starts DDG Explorer
#' @return nothing
.ddg.start.ddg.explorer <- function () {
jar.path<- "/RDataTracker/java/DDGExplorer.jar"
check.library.paths<- file.exists(paste(.libPaths(), jar.path, sep = ""))
index<- min(which(check.library.paths == TRUE))
ddgexplorer_path<- paste(.libPaths()[index], jar.path, sep = "")
ddgjson.path<- paste(.ddg.path(), "prov.json", sep = "/")
# ddgjson.path<- paste(getwd(), .ddg.path() ,"ddg.json",sep = "/")
# -s flag starts DDG Explorer as a server. This allows each new ddg to show
# up in a new tab of an existing running DDG Explorer.
# print("Starting DDG Explorer server")
system2("java",
c("-jar", ddgexplorer_path, ddgjson.path, "-port",
.ddg.get(".ddg.explorer.port")),
wait = FALSE)
}
#' prov.display
#'
#' prov.display displays the current provenance as a graph.
#' @return prov.display loads and displays the current provenance graph
#' in DDG Explorer. The prov.display function does not return a value.
#' @export
#' @rdname prov.json
#' @examples
#' \dontrun{prov.display()}
prov.display <- function () {
# See if the server is already running
# print("Opening socket connection")
tryCatch ({
con <- socketConnection(host= "localhost",
port = .ddg.get(".ddg.explorer.port"),
blocking = FALSE,
server=FALSE, open="w", timeout=1)
ddgjson.path<- paste(.ddg.path(), "prov.json", sep = "/")
# ddgjson.path<- paste(getwd(), .ddg.path() ,"ddg.json",sep = "/")
# print ("Socket open; writing to socket")
writeLines(ddgjson.path, con)
# print ("Wrote to socket")
close(con)
},
warning = function(e) {
# print("Warning!")
.ddg.start.ddg.explorer()
}
)
invisible()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.