# Copyright (C) 2017 Harvard University, Mount Holyoke College
#
# This file is part of ProvR.
#
# ProvR is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# ProvR is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ProvR; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# This package was forked from <https://github.com/End-to-end-provenance/RDataTracker>
#
# Contact: Matthew Lau <matthewklau@fas.harvard.edu>
# ddg.function creates a procedure node of type Operation for procedures
# implemented as functions in the original R script. The function name and input
# parameters are obtained automatically from the calling environment. The outs
# parameters may be used optionally to create output data nodes. These MUST be
# passed as a list of strings, not names, unless the value is a file name. Users
# can right-click on the procedure node in DDG Explorer to see the code for the
# function in the original script. For more details on outs parameters, see
# .ddg.create.output.nodes.
# outs (optional) - a list of names of data nodes to be created as outputs to
# this procedure node. These MUST be passed as a list of strings, not names,
# unless the value is a file name. graphic.fext (optional) - the file extension
# for a graphics file.
ddg.function <- function(outs.graphic = NULL, outs.data = NULL, outs.exception = NULL,
outs.url = NULL, outs.file = NULL, graphic.fext = "jpeg") {
if (!(.ddg.is.set(".ddg.initialized") && .ddg.get(".ddg.initialized")))
return(invisible())
.ddg.inc(".ddg.func.depth")
pname <- NULL
.ddg.lookup.function.name(pname)
if (interactive() && .ddg.get(".ddg.enable.console"))
.ddg.console.node()
# Look up input parameters from calling environment.
call <- sys.call(-1)
# Try to find the full call so that we can bind the parameters by name in the
# DDG. In the case that the function being executed has been passed as a
# parameter to another function and is being called from the context (for
# example, with lapply and other higher-order functions), the match.call will
# fail. In that case, we will use the call as it appears in side the
# higher-order function.
full.call <- tryCatch(match.call(sys.function(-1), call = call), error = function(e) call)
# Create start node for the calling statement if one is not already created.
.ddg.create.start.for.cur.cmd(call, sys.frame(-1))
.ddg.create.function.nodes(pname, call, full.call, outs.graphic, outs.data, outs.exception,
outs.url, outs.file, graphic.fext, env = sys.frame(.ddg.get.frame.number(sys.calls())))
invisible()
}
# ddg.procedure creates a procedure node of type Operation for procedures not
# implemented as functions in the original R script. For more details on outs
# parameters, see .ddg.create.output.nodes.
# pname - the label for the node. Can be passed in as a string or as a name. ins
# (optional) - a list of names of data nodes to be linked as inputs to this
# procedure node. These MUST be passed as as a list of strings, not names, unless
# the value is a file name. outs (optional) - a list of names of data nodes to
# be created as outputs to this procedure node. These MUST be passed as a list of
# strings, not names, unless the value is a file name. graphic.fext (optional) -
# file extension for graphics file. env (optional) - the environment in which
# the procedure occurs. It defaults to the global environment.
ddg.procedure <- function(pname, ins = NULL, outs.graphic = NULL, outs.data = NULL,
outs.exception = NULL, outs.url = NULL, outs.file = NULL, graphic.fext = "jpeg") {
if (!(.ddg.is.set(".ddg.initialized") && .ddg.get(".ddg.initialized")))
return(invisible())
.ddg.lookup.function.name(pname)
.proc.node("Operation", pname, pname)
# Create control flow edge from preceding procedure node.
.ddg.proc2proc()
# Create the input edges if ins list provided.
if (!is.null(ins)) {
stack <- sys.calls()
function.scope <- parent.frame()
lapply(ins, function(param) {
# First see if param is defined in the function where ddg.procedure is called. If
# that does not find it, look in the scope of the function that calls the
# function that calls ddg.procedure. The difference probably depends on whether
# ddg.procedure is used to annotate a chunk of code where param is really in the
# local scope but used in the annotated chunk (the first case) or to annotate an
# entire function and param is an actual funciton parameter (the second case).
scope <- .ddg.get.scope(param, calls = stack)
if (.ddg.is.local(param, function.scope)) {
if (.ddg.data.node.exists(param, scope)) {
.ddg.data2proc(param, scope, pname)
if (.ddg.get("ddg.debug.lib"))
print(paste("param:", param))
} else {
error.msg <- paste("No data node found for local", param)
.ddg.insert.error.message(error.msg)
}
} else if (scope != "undefined" && .ddg.data.node.exists(param, scope)) {
.ddg.data2proc(param, scope, pname)
if (.ddg.get("ddg.debug.lib"))
print(paste("param:", param))
} else {
scope <- .ddg.get.scope(param, for.caller = TRUE, calls = stack)
if (scope != "undefined" && .ddg.data.node.exists(param, scope)) {
.ddg.data2proc(param, scope, pname)
if (.ddg.get("ddg.debug.lib"))
print(paste("param:", param))
} else if (.ddg.data.node.exists(param, "undefined")) {
# This could be the case if the parameter is the name of a file rather than a
# variable in the program.
.ddg.data2proc(param, "undefined", pname)
if (.ddg.get("ddg.debug.lib"))
print(paste("param:", param))
} else {
error.msg <- paste("No data node found for", param)
.ddg.insert.error.message(error.msg)
}
}
})
}
# create output nodes
.ddg.create.output.nodes(fname = "ddg.procedure", pname, outs.graphic, outs.data,
outs.exception, outs.url, outs.file, graphic.fext, parent.frame())
invisible()
}
# ddg.start creates a procedure node of type Start called pname. Users can
# right-click on a start node in DDG Explorer and see the code between start and
# finish nodes in the original script.
# pname (optional) - the label for the node. This can be passed as a string or
# as a name. It can be omitted if ddg.start is called by a function, in which
# case the name of the function will be used.
ddg.start <- function(pname = NULL) {
if (!(.ddg.is.set(".ddg.initialized") && .ddg.get(".ddg.initialized")))
return(invisible())
.ddg.lookup.function.name(pname)
# check for NULL.
if (is.null(pname)) {
msg <- "Cannot call ddg.start with NULL value from top-level."
.ddg.insert.error.message(msg)
return
}
# Create start node for the calling statement if one is not already created.
frame.number <- .ddg.get.frame.number(sys.calls())
env <- sys.frame(frame.number)
call <- sys.call(frame.number)
.ddg.create.start.for.cur.cmd(env)
# Create start non-operational step.
.proc.node("Start", pname, pname)
# Create control flow edge from preceding procedure node.
.ddg.proc2proc()
}
# ddg.finish creates a procedure node of type Finish called pname. Users can
# right-click on a finish node in DDG Explorer and see the code between start and
# finish nodes in the original script.
# pname (optional) - the label for the node. This can be passed as a string or as
# a name. It can be omitted if ddg.finish is called by a function, in which case
# the name of the function will be used.
ddg.finish <- function(pname = NULL) {
if (!(.ddg.is.set(".ddg.initialized") && .ddg.get(".ddg.initialized")))
return(invisible())
.ddg.lookup.function.name(pname)
# check for NULL.
if (is.null(pname)) {
msg <- "Cannot call ddg.finish with NULL value from top-level."
.ddg.insert.error.message(msg)
}
# Create finish non-operational step.
.proc.node("Finish", pname, pname)
# Create control flow edge from preceding procedure node.
.ddg.proc2proc()
# ddg.finish is added to the end of blocks. We want the block to return the
# value of the last R statement.
return(.ddg.get(".ddg.last.R.value"))
}
# ddg.ret.value creates a data node for a function's return value. If the
# function is called from a console command and console mode is enabled, a data
# flow edge will be created linking this node to the console command that uses
# the value. ddg.ret.value returns the same value as the function (expr) and
# can be used in place of the function's normal return statement(s) if it is the
# last statement in the function. Otherwise, it should be a parameter to return,
# as in return(ddg.ret.value(expr)). If expr is an assignment, nodes and edges
# are created for the assignment.
# expr - the value returned by the function.
ddg.ret.value <- function(expr = NULL, cmd.func = NULL) {
if (!(.ddg.is.set(".ddg.initialized") && .ddg.get(".ddg.initialized")))
return(expr)
dev.file <- NULL
parsed.stmt <- NULL
# Capture graphics if dev.off is about to be called.
if (!is.null(cmd.func)) {
parsed.stmt <- cmd.func()
if (parsed.stmt@has.dev.off) {
if (.ddg.is.call.to(parsed.stmt@parsed[[1]], "dev.off") || !.ddg.get("ddg.loop.annotate")) {
dev.file <- .ddg.capture.graphics(NULL)
dev.node.name <- paste0("dev.", dev.cur())
}
}
}
# If expr is an assignment, create nodes and edges for the assignment.
orig.expr <- substitute(expr)
frame.num <- .ddg.get.frame.number(sys.calls())
env <- sys.frame(frame.num)
orig.return <- paste("return(", deparse(orig.expr), ")", sep = "")
pname <- NULL
.ddg.lookup.function.name(pname)
# If this is a recursive call to ddg.ret.value, find the caller of the first
# ddg.ret.value
if (grepl("(^ddg|.ddg)", pname)) {
caller.frame <- .ddg.find.ddg.ret.value.caller.frame.number()
pname <- as.character(sys.call(caller.frame)[[1]])
} else {
caller.frame <- -1
}
# Prints the call & arguments. expr forces evaluation of the function early. I
# think that causes some examples to work with debugging on but not off.
# checking. (6/26/2015 - Barb). Yes, ReturnTest.R fails on the recursive f5
# function
ddg.ret.values <- .ddg.get(".ddg.ret.values")
ddg.num.returns <- .ddg.get(".ddg.num.returns")
if (nrow(ddg.ret.values) == ddg.num.returns) {
size = 100
new.rows <- data.frame(ddg.call = character(size), line = integer(size),
ret.used = logical(size), ret.node.id = integer(size), stringsAsFactors = FALSE)
.ddg.add.rows(".ddg.ret.values", new.rows)
ddg.ret.values <- .ddg.get(".ddg.ret.values")
}
# If this is not a recursive call to ddg.ret.value and ddg.function was not
# called, create the function nodes that it would have created.
call <- sys.call(caller.frame)
if (!.proc.node.exists(pname)) {
full.call <- match.call(sys.function(caller.frame), call = call)
.ddg.create.function.nodes(pname, call, full.call, auto.created = TRUE, env = sys.frame(.ddg.get.frame.number(sys.calls())))
} else {
.ddg.dec(".ddg.func.depth")
}
if (is.null(cmd.func)) {
ret.stmt <- .construct.DDGStatement(parse(text = orig.return), pos = NA,
script.num = NA)
} else {
ret.stmt <- cmd.func()
parsed.statement <- ret.stmt@parsed
}
# Create a data node for the return value. We want the scope of the function that
# called the function that called ddg.ret.
call.text <- gsub(" ", "", deparse(call, nlines = 1))
ret.node.name <- paste(call.text, "return")
ret.node.name <- gsub("\"", "\\\\\"", ret.node.name)
ret.node.scope <- environmentName(if (sys.nframe() == 2)
.GlobalEnv else parent.env(sys.frame(caller.frame)))
.ddg.save.data(ret.node.name, expr, fname = "ddg.return", scope = ret.node.scope)
caller.env = sys.frame(caller.frame)
# check if there is a return call within this call to ddg.ret.
if (.has.call.to(parsed.stmt, "return")) {
.proc.node("Operation", ret.stmt@abbrev, ret.stmt@abbrev, console = TRUE, cmd = ret.stmt)
# Create control flow edge from preceding procedure node.
.ddg.proc2proc()
# Create an edge from the return statement to its return value.
.ddg.proc2data(ret.stmt@abbrev, ret.node.name, ret.node.scope, ret.value = TRUE)
if (!is.null(dev.file)) {
ddg.file.out(dev.file, pname = ret.stmt@abbrev)
# Remove the temporary file
file.remove(dev.file)
# Add an input edge from the current device
.ddg.data2proc(dev.node.name, NULL, ret.stmt@abbrev)
}
} else {
.ddg.lastproc2data(ret.node.name, dscope = ret.node.scope)
}
# Update the table.
ddg.num.returns <- ddg.num.returns + 1
ddg.ret.values$ddg.call[ddg.num.returns] <- call.text
ddg.ret.values$ret.used[ddg.num.returns] <- FALSE
ddg.ret.values$ret.node.id[ddg.num.returns] <- .ddg.get("ddg.dnum")
ddg.cur.cmd.stack <- .ddg.get(".ddg.cur.cmd.stack")
ddg.ret.values$line[ddg.num.returns] <- if (length(ddg.cur.cmd.stack) == 0)
NA else ddg.cur.cmd.stack[length(ddg.cur.cmd.stack) - 1][[1]]@pos@startLine
.ddg.set(".ddg.ret.values", ddg.ret.values)
.ddg.set(".ddg.num.returns", ddg.num.returns)
# If it does not have return, then its parameter was a call to ddg.eval and this
# stuff has been done already.
if (.has.call.to(parsed.stmt, "return")) {
# Create edges from variables used in the return statement
vars.used <- ret.stmt@vars.used
for (var in vars.used) {
# Make sure there is a node we could connect to.
scope <- .ddg.get.scope(var)
if (.ddg.data.node.exists(var, scope)) {
.ddg.data2proc(var, scope, ret.stmt@abbrev)
}
}
for (var in ret.stmt@vars.set) {
if (var != "") {
# Create output data node.
dvalue <- eval(as.symbol(var), envir = env)
# check for non-local assignment
if (.ddg.is.nonlocal.assign(ret.stmt@parsed[[1]])) {
env <- .ddg.where(var, env = parent.env(parent.frame()), warning = FALSE)
if (identical(env, "undefined"))
env <- globalenv()
}
dscope <- .ddg.get.scope(var, env = env)
.ddg.save.data(var, dvalue, scope = dscope)
# Create an edge from procedure node to data node.
.ddg.proc2data(ret.stmt@abbrev, var, dscope = dscope, ret.value = FALSE)
}
}
# Create nodes and edges dealing with reading and writing files
.ddg.create.file.read.nodes.and.edges(ret.stmt, env)
.ddg.create.file.write.nodes.and.edges(ret.stmt, env)
if (ret.stmt@createsGraphics) {
.ddg.set.graphics.files(ret.stmt, env)
}
}
# Create the finish node for the function
if (typeof(call[[1]]) == "closure") {
.add.abstract.node("Finish", node.name = pname, env = caller.env)
} else {
.add.abstract.node("Finish", node.name = paste(deparse(call), collapse = ""),
env = caller.env)
}
return(expr)
}
# ddg.eval evaluates a statement and creates data flow edges from variable and
# function return nodes that are used in the statement. If the statement is an
# assignment statement, it also creates a data node for the variable assigned and
# a corresponding data flow edge. If ddg.eval is called from inside a function,
# cmd.func is a function that returns the corresponding DDGStatement object. If
# ddg.eval is called from inside a control block, cmd.func is an integer that
# points to the corresponding DDGStatement object stored in the list
# .ddg.statements.
# statement - the statement to evaluate.
ddg.eval <- function(statement, cmd.func = NULL) {
# Statement at top level.
if (is.null(cmd.func)) {
parsed.statement <- parse(text = statement)
cmd <- NULL
} else if (is.numeric(cmd.func)) {
# Statement inside control block.
num <- cmd.func
statements <- .ddg.get("ddg.statements")
cmd <- statements[[num]]
parsed.statement <- cmd@parsed
# Statement inside function.
} else {
cmd <- cmd.func()
parsed.statement <- cmd@parsed
}
if (.ddg.get("ddg.debug.lib"))
print(paste("ddg.eval: statement =", statement))
frame.num <- .ddg.get.frame.number(sys.calls())
env <- sys.frame(frame.num)
if (!(.ddg.is.set(".ddg.initialized") && .ddg.get(".ddg.initialized"))) {
return(eval(parsed.statement, env))
}
if (interactive() && .ddg.get(".ddg.enable.console") && !(.ddg.is.set("from.source") && .ddg.get("from.source"))) {
.ddg.console.node()
}
# If break statement, create procedure node and close open start nodes.
if (!is.null(cmd) && cmd@text == "break") {
.ddg.break.statement()
}
# If next statement, create procedure node and close open start nodes.
if (!is.null(cmd) && cmd@text == "next") {
.ddg.next.statement()
}
ret.value <- .ddg.parse.commands(parsed.statement, environ = env, run.commands = TRUE,
node.name = statement, called.from.ddg.eval = TRUE, cmds = list(cmd))
if (.ddg.get(".ddg.func.depth")) {
if (!is.null(cmd)) {
.ddg.link.function.returns(cmd)
}
}
return(ret.value)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.