# Copyright (C) 2017 Harvard University, Mount Holyoke College
#
# This file is part of ProvR.
#
# ProvR is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# ProvR is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ProvR; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# This package was forked from <https://github.com/End-to-end-provenance/RDataTracker>
#
# Contact: Matthew Lau <matthewklau@fas.harvard.edu>
# Needed to work with S4 classes. Normally, this library is automatically
# loaded. However, it is not loaded when running non-interactively, as in our
# test cases or if a user uses RScript to run R files.
library(methods)
# Information about where in the source code this statement appears.
setClass("DDGStatementPos", slots = list(startLine = "numeric", startCol = "numeric",
endLine = "numeric", endCol = "numeric"))
# This is called automatically when there is a call to create a new
# DDGStatementPos object.
setMethod("initialize", "DDGStatementPos", function(.Object, parseData) {
# If the parse data is missing, we set all the fields to -1
if (length(parseData) == 1 && is.na(parseData)) {
.Object@startLine <- -1
.Object@startCol <- -1
.Object@endLine <- -1
.Object@endCol <- -1
} else {
# If we have parseData, we extract the information into oure object.
.Object@startLine <- parseData$line1
.Object@startCol <- parseData$col1
.Object@endLine <- parseData$line2
.Object@endCol <- parseData$col2
}
return(.Object)
})
# This class contains all the information that we need when building a ddg. We
# create this when we parse the statement so that it is only done once and then
# look up the information we need when the statement executes.
## text = 'character', # The original text in the file parsed = 'expression', #
## The parse tree for the statement abbrev = 'character', # A shortened version of
## the text to use in node names annotated = 'expression', # An annotated version
## of the statement. This is # what we actually execute.
## # Note that vars.used through has.dev.off do not apply to a situation where #
## the statement is a function declaration, since declaring the statement # does
## not read from files, etc. That happens when the function is called, # at which
## point we will refer to the information in the contained statements.
## vars.used = 'character', # A list of the variables that are used in the
## statement vars.set = 'character', # If this is an assignment statement, this is
## the variable assigned vars.possibly.set = 'character', # If this contains any
## internal assignment statements, # like an if-statement might, for example,
## these are the # variables assigned within the statement. isDdgFunc =
## 'logical', # True if this is a call to a ddg function readsFile = 'logical', #
## True if this statement contains a call to a function that # reads from a file
## writesFile = 'logical', # True if this statement contains a call to a function
## that # writes to a file createsGraphics = 'logical', # True if this is a
## function that creates a graphics # object, like a call to pdf, for example
## updatesGraphics = 'logical', # True if this is a function that updates a
## graphics # object, like a call to a function in the graphics package, for
## example has.dev.off = 'logical', # True if this statement contains a call to
## dev.off pos = 'DDGStatementPos', # The location of this statement in the source
## code. # Has the value null.pos() if it is not available. script.num =
## 'numeric', # The number for the script this statement comes from. # Has the
## value -1 if it is not available
setClass("DDGStatement", slots = list(text = "character", parsed = "expression",
abbrev = "character", annotated = "expression", vars.used = "character", vars.set = "character",
vars.possibly.set = "character", isDdgFunc = "logical", readsFile = "logical",
writesFile = "logical", createsGraphics = "logical", updatesGraphics = "logical",
has.dev.off = "logical", pos = "DDGStatementPos", script.num = "numeric", contained = "list"))
## This is called when a new DDG Statement is created. It initializes all of the
## slots.
setMethod("initialize", "DDGStatement", function(.Object, parsed, pos, script.name,
script.num, parseData) {
.Object@parsed <- parsed
# deparse can return a vector of strings. We convert that into one long string.
.Object@text <- paste(deparse(.Object@parsed[[1]]), collapse = "")
# If this is a call to ddg.eval, we only want the argument to ddg.eval (which is
# a string) to appear in the node label
.Object@abbrev <- if (grepl("^ddg.eval", .Object@text)) {
.abbrev.cmd(.Object@parsed[[1]][[2]])
} else {
.abbrev.cmd(.Object@text)
}
vars.used <- .find.var.uses(.Object@parsed[[1]])
# Remove index variable in for statement (handled separately in ddg.forloop).
if (length(parsed) > 0 && !is.symbol(parsed[[1]]) && parsed[[1]][[1]] == "for") {
index.var <- c(parsed[[1]][[2]])
vars.used <- vars.used[!vars.used %in% index.var]
}
.Object@vars.used <- vars.used
.Object@vars.set <- .find.simple.assign(.Object@parsed[[1]])
.Object@vars.possibly.set <- .find.assign(.Object@parsed[[1]])
# ddg.eval is treated differently than other calls to ddg functions since we will
# execute the parameter as a command and want a node for it.
.Object@isDdgFunc <- grepl("^ddg.", .Object@text) & !grepl("^ddg.eval", .Object@text)
.Object@readsFile <- .reads.file(.Object@parsed[[1]])
.Object@writesFile <- .writes.file(.Object@parsed[[1]])
.Object@createsGraphics <- .creates.graphics(.Object@parsed[[1]])
.Object@updatesGraphics <- .updates.graphics(.Object@parsed[[1]])
.Object@has.dev.off <- .has.call.to(.Object@parsed[[1]], "dev.off")
.Object@pos <- if (is.object(pos)) {
pos
} else {
null.pos()
}
.Object@script.num <- if (is.na(script.num))
-1 else script.num
# The contained field is a list of DDGStatements for all statements inside the
# function or control statement. If we are collecting provenance inside
# functions or control statements, we will execute annotated versions of these
# statements. If this is a call to ddg.eval, we only want to execute the
# argument to ddg.eval
.Object@contained <- .parse.contained(.Object, script.name, parseData)
.Object@annotated <- if (grepl("^ddg.eval", .Object@text)) {
parse(text = .Object@parsed[[1]][[2]])
} else {
.add.annotations(.Object)
}
return(.Object)
})
# A special null value for when source code position information is missing.
null.pos <- function() {
return(new(Class = "DDGStatementPos", NA))
}
# Create a DDGStatement. expr - the parsed expression pos - the DDGStatementPos
# object for this statement script.name - the name of the script the statement is
# from script.num - the script number used to find the script in the sourced
# the object created by the parser that gives us source position information
.construct.DDGStatement <- function(expr, pos, script.name, script.num, parseData) {
# Surprisingly, if a statement is just a number, like 1 (which could be the last
# statement in a function, for example), the parser returns a number, rather than
# a parse tree!
if (is.numeric(expr))
expr <- parse(text = expr)
return(new(Class = "DDGStatement", parsed = expr, pos, script.name, script.num, parseData))
}
# .abbrev.cmd abbreviates a command to the specified length. Default is 60
# characters.
# cmd - command string. len (optional) - number of characters.
.abbrev.cmd <- function(cmd, len = 60) {
if (length(cmd) > 1) {
cmd <- paste(cmd, collapse = " ")
}
if (file.exists(cmd))
basename(cmd) else if (nchar(cmd) <= len)
cmd else if (substr(cmd, len, len) != "\\")
substr(cmd, 1, len) else if (substr(cmd, len - 1, len) == "\\\\")
substr(cmd, 1, len) else substr(cmd, 1, len - 1)
}
# .find.var.uses returns a vector containing all the variables used in an
# expression. Each value is unique in the returned vector, so that if a variable
# is used more than once, it only appears once.
# main.object - input expression.
.find.var.uses <- function(main.object) {
# Recursive helper function.
.find.var.uses.rec <- function(obj) {
# Base cases.
if (is.atomic(obj)) {
return(character()) # A name is not atomic!
}
if (is.name(obj)) {
if (nchar(obj) == 0)
return(character())
# Operators also pass the is.name test. Make sure that if it is a single
# character, then it is alpha-numeric.
if (nchar(obj) == 1 && !grepl("[[:alpha:]]", obj))
return(character())
return(deparse(obj))
}
if (!is.recursive(obj))
return(character())
if (.is.functiondecl(obj))
return(character())
tryCatch({
if (.is.assign(obj)) {
# If assigning to a simple variable, recurse on the right hand side of the
# assignment.
# covers cases: '=', '<-', '<<-' for simple variable assignments e.g. a <- 2
if (is.symbol(obj[[2]])) {
unique(unlist(.find.var.uses.rec(obj[[3]])))
} else if (is.call(obj[[2]])) {
# If assigning to an expression (like a[b]), recurse on the indexing part of the
# lvalue as well as on the expression. covers cases: storage.mode(z) a[1] <- 2,
# a[b] <- 3
variables <- c(.find.var.uses.rec(obj[[2]][[2]]), unlist(.find.var.uses.rec(obj[[3]])))
# for array index cases like a[b] <- 3, where there could be a variable in the
# brackets
if (obj[[2]][[1]] == "[")
append(variables, .find.var.uses.rec(obj[[2]][[3]]))
unique(variables)
} else if (is.character(obj[[2]])) {
# covers cases where there is a string literal. for assign function
unique(c(unlist(.find.var.uses.rec(parse(text = obj[[2]])[[1]])),
unlist(.find.var.uses.rec(parse(text = obj[[3]])[[1]]))))
} else {
# not entirely sure what this catches
unique(c(.find.var.uses.rec(obj[[2]]), unlist(.find.var.uses.rec(obj[[3]]))))
}
} else {
# Not an assignment. Recurse on all parts of the expression except the operator.
unique(unlist(lapply(obj[1:length(obj)], .find.var.uses.rec)))
}
}, error = function(e) {
print(paste(".find.var.uses.rec: Error analyzing", deparse(obj)))
character()
})
}
return(.find.var.uses.rec(main.object))
}
# .find.simple.assign returns the name of the variable assigned to if the
# object passed in is an expression representing an assignment statement.
# Otherwise, it returns NULL.
# obj - input expression.
.find.simple.assign <- function(obj) {
if (.is.assign(obj)) {
.get.var(obj[[2]])
} else {
""
}
}
# .is.assign returns TRUE if the object passed is an expression object
# containing an assignment statement.
# expr - a parsed expression. This also finds uses of ->. This also finds uses
# of ->>.
.is.assign <- function(expr) {
if (is.call(expr)) {
if (identical(expr[[1]], as.name("<-")))
return(TRUE) else if (identical(expr[[1]], as.name("<<-")))
return(TRUE) else if (identical(expr[[1]], as.name("=")))
return(TRUE) else if (identical(expr[[1]], as.name("assign")))
return(TRUE)
}
return(FALSE)
}
# .get.var returns the variable being referenced in an expression. It should
# be passed an expression object that is either a variable, a vector access (like
# a[1]), a list member (like a[[i]]) or a data frame access (like a$foo[i]). For
# all of these examples, it would return 'a'.
# lvalue - a parsed expression.
# for string literals e.g. when the assign function is used
.get.var <- function(lvalue) {
if (is.symbol(lvalue))
deparse(lvalue) else if (is.character(lvalue))
.get.var(parse(text = lvalue)[[1]]) else .get.var(lvalue[[2]])
}
# .find.assign returns a vector containing the names of all the variables
# assigned in an expression. The parameter should be an expression object. For
# example, if obj represents the expression 'a <- (b <- 2) * 3', the vector
# returned will contain both a and b.
# obj - a parsed expression.
# Assignment statement. Add the variable being assigned to the vector and
# recurse on the expression being assigned.
# Don't look for assignments in the body of a function as those won't happen
# until the function is called. Don't recurse on NULL.
# Not an assignment statement. Recurse on the parts of the expression. Base
# case.
.find.assign <- function(obj) {
if (!is.recursive(obj))
return(character())
if (.is.assign(obj)) {
var <- .get.var(obj[[2]])
if (!(is.null(obj[[3]]))) {
if (.is.functiondecl(obj[[3]]))
var else c(var, unlist(lapply(obj[[3]], .find.assign)))
} else var
} else {
unique(unlist(lapply(obj, .find.assign)))
}
}
# ddg.is.functiondecl tests to see if an expression is a function declaration.
# expr - a parsed expression.
.is.functiondecl <- function(expr) {
if (is.symbol(expr) || !is.language(expr))
return(FALSE)
if (is.null(expr[[1]]) || !is.language(expr[[1]]))
return(FALSE)
return(expr[[1]] == "function")
}
# .get.statement.type returns the control type (if applicable) of a parsed
# statement.
.get.statement.type <- function(parsed.command) {
if (length(parsed.command) > 1)
return(parsed.command[[1]])
return(0)
}
# .add.annotations accepts a DDGStatement and returns an expression. The
# returned expression is annotated as needed. Return if statement is empty.
# Replace source with ddg.source. Annotate user-defined functions. Note that
# this will not annotate anonymous functions, like ones that might be passed to
# lapply, for example Is that what we want?
.add.annotations <- function(command) {
parsed.command <- command@parsed[[1]]
if (length(parsed.command) == 0)
return(command@parsed)
if (is.call(parsed.command) && parsed.command[[1]] == "source") {
return(.replace.source(parsed.command))
}
if (.is.assign(parsed.command) && .is.functiondecl(parsed.command[[3]])) {
return(.add.function.annotations(command))
}
statement.type <- as.character(.get.statement.type(parsed.command))
loop.types <- list("for", "while", "repeat")
if (length(statement.type > 0) && !is.null(statement.type)) {
# Move into funcs below && ddg.max.loops() > 0) { Annotate if statement.
if (statement.type == "if") {
return(.annotate.if.statement(command))
} else if (statement.type %in% loop.types) {
# Annotate for, while, repeat statement.
return(.annotate.loop.statement(command, statement.type))
} else if (statement.type == "{") {
# Annotate simple block.
return(.annotate.simple.block(command))
}
}
# Not a function or control construct. No annotation required.
return(command@parsed)
}
# .parse.contained creates the DDGStatement objects that correspond to
# statements inside a function or control block (or blocks). cmd - the
# DDGStatement being considered script.name - the name of the script the
# statement is from parseData - the data returned by the parser that is used to
# extract source position information Returns a list of DDTStatements or an empty
# list if this is not a function declaration or a control construct.
.parse.contained <- function(cmd, script.name, parseData) {
parsed.cmd <- cmd@parsed[[1]]
# Function declaration
if (.is.assign(parsed.cmd) && .is.functiondecl(parsed.cmd[[3]])) {
# Create the DDGStatement objects for the statements in the function
return(.parse.contained.function(cmd, script.name, parseData, parsed.cmd[[3]][[3]]))
} else if (ddg.max.loops() == 0) {
# Check if we want to go inside loop and if-statements
return(list())
}
# Control statements.
st.type <- as.character(.get.statement.type(parsed.cmd))
# If statement.
if (st.type == "if") {
return(.parse.contained.if(cmd, script.name, parseData, parsed.cmd))
} else {
# Other control statements
control.types <- list("for", "while", "repeat", "{")
if (length(st.type) > 0 && !is.null(st.type) && (st.type %in% control.types)) {
return(.parse.contained.control(cmd, script.name, parseData, parsed.cmd,
st.type))
}
}
# Not a function declaration or control construct.
return(list())
}
.parse.contained.function <- function(cmd, script.name, parseData, func.body) {
if (func.body[[1]] == "{") {
# The function body is a block. Extract the statements inside the block
func.stmts <- func.body[2:length(func.body)]
} else {
# The function body is a single statement.
func.stmts <- list(func.body)
}
# Create the DDGStatement objects for the statements in the function
return(.ddg.create.DDGStatements(func.stmts, script.name, cmd@script.num, parseData,
cmd@pos))
}
.parse.contained.if <- function(cmd, script.name, parseData, parent) {
block.stmts <- list()
# If and else if blocks.
while (!is.symbol(parent) && parent[[1]] == "if") {
# Get block
block <- parent[[3]]
block <- .ensure.in.block(block)
# Get statements for this block.
for (i in 2:(length(block))) {
block.stmts <- c(block.stmts, block[[i]])
}
# Check for possible final else.
if (length(parent) == 4) {
final.else <- TRUE
} else {
final.else <- FALSE
}
# Get next parent
parent <- parent[[(length(parent))]]
}
# Final else block (if any).
if (final.else) {
# Get block.
block <- parent
block <- .ensure.in.block(block)
# Get statements for this block.
for (i in 2:(length(block))) {
block.stmts <- c(block.stmts, block[[i]])
}
}
# Create the DDGStatement objects for statements in block
return(.ddg.create.DDGStatements(block.stmts, script.name, cmd@script.num, parseData,
cmd@pos))
}
# If there is a singleton statement inside a control construct nest it inside a
# block. Return the block.
.ensure.in.block <- function(block) {
if (is.symbol(block) || block[[1]] != "{")
call("{", block) else block
}
.parse.contained.control <- function(cmd, script.name, parseData, parsed.cmd,
st.type) {
block.stmts <- list()
if (st.type == "for")
block <- parsed.cmd[[4]] else if (st.type == "while")
block <- parsed.cmd[[3]] else if (st.type == "repeat")
block <- parsed.cmd[[2]] else if (st.type == "{")
block <- parsed.cmd
block <- .ensure.in.block(block)
for (i in 2:length(block)) {
block.stmts <- c(block.stmts, block[[i]])
}
# Create the DDGStatement objects for statements in block
return(.ddg.create.DDGStatements(block.stmts, script.name, cmd@script.num, parseData,
cmd@pos))
}
# .replace.source replaces source with ddg.source. parsed.command must be a
# parsed expression that is a call to the source function.
.replace.source <- function(parsed.command) {
script.name <- deparse(parsed.command[[2]])
parsed.command.txt <- paste("ddg.source(", script.name, ")", sep = "")
return(parse(text = parsed.command.txt))
}
# .add.function.annotations is passed a command that corresponds to a
# function declaration. It returns a parsed command corresponding to the same
# function declaration but with calls to ddg.function, ddg.eval and
# ddg.ret.value inserted if they are not already present. The functions
# ddg.annotate.on and ddg.annotate.off may be used to provide a list of functions
# to annotate or not to annotate, respectively. function.decl should be a
# command that contains an assignment statement where the value being bound is a
# function declaration
.add.function.annotations <- function(function.decl) {
parsed.function.decl <- function.decl@parsed[[1]]
# Get function name.
func.name <- toString(parsed.function.decl[[2]])
# Get function definition.
func.definition <- parsed.function.decl[[3]]
# Create function block if necessary.
if (func.definition[[3]][[1]] != "{") {
func.definition <- .create.function.block(func.definition)
}
# Create new function body with an if-then statement for annotations.
func.definition <- .add.conditional.statement(func.definition, func.name)
# Insert call to ddg.function if not already added.
if (!.has.call.to(func.definition[[3]], "ddg.function")) {
func.definition <- .insert.ddg.function(func.definition)
}
# Insert calls to ddg.ret.value if not already added.
if (!.has.call.to(func.definition[[3]], "ddg.ret.value")) {
func.definition <- .wrap.all.ret.parameters(func.definition, function.decl@contained)
}
# Wrap last statement with ddg.ret.value if not already added and if last
# statement is not a simple return or a ddg function.
last.statement <- .find.last.statement(func.definition)
if (!.ddg.is.call.to(last.statement, "ddg.ret.value") & !.ddg.is.call.to(last.statement,
"return") & !.is.call.to.ddg.function(last.statement)) {
func.definition <- .wrap.last.line(func.definition, function.decl@contained)
}
# Wrap statements with ddg.eval if not already added and if statements are not
# calls to a ddg function and do not contain ddg.ret.value.
if (!.has.call.to(func.definition, "ddg.eval")) {
func.definition <- .wrap.with.ddg.eval(func.definition, function.decl@contained)
}
# Reassemble parsed.command.
return(as.expression(call("<-", as.name(func.name), func.definition)))
}
# .create.function.block creates a function block. func.definition is a
# parsed expression for a function declaration (not the full assignment statement
# in which it is declared) Returns a parse tree for the same function declaration
# but with the function statements inside a block.
.create.function.block <- function(func.definition) {
# Get the function parameters.
func.params <- func.definition[[2]]
# Get the body of the function.
func.body <- func.definition[[3]]
# Add block and reconstruct the call.
new.func.body <- call("{", func.body)
return(call("function", func.params, as.call(new.func.body)))
}
# .add.conditional.statement creates a new function definition containing an
# if-then statement used to control annotation. func.definition - original
# function definition.
.add.conditional.statement <- function(func.definition, func.name) {
# Get the function parameters.
func.params <- func.definition[[2]]
# Get the body of the function.
func.body <- func.definition[[3]]
pos <- length(func.body)
# Create new function definition containing if-then statement. This will prevent
# us from collecting provenance inside functions that are inside control
# structures when we are not collecting provenance in control structures.
new.func.body.txt <- c(paste("if (ddg.should.run.annotated(\"", func.name, "\")) {",
sep = ""), as.list(func.body[2:pos]), paste("} else {", sep = ""), as.list(func.body[2:pos]),
paste("}", sep = ""))
new.func.expr <- parse(text = new.func.body.txt)
new.func.body <- new.func.expr[[1]]
return(call("function", func.params, call("{", new.func.body)))
}
# .insert.ddg.functioninserts ddg.function before the first line in the
# annotated block of a function body. func.definition is a parsed expression for
# a function declaration (not the full assignment statement in which it is
# declared) Returns a parse tree for the same function declaration but with a
# call to ddg.function() as the first statement.
.insert.ddg.function <- function(func.definition) {
# Get the function parameters.
func.params <- func.definition[[2]]
# Get the body of the function.
func.body <- func.definition[[3]]
# Get annotated block.
block <- func.body[[2]][[3]]
pos <- length(block)
# Insert ddg.function.
inserted.statement <- call("ddg.function")
new.statements.txt <- c(as.list("{"), inserted.statement, as.list(block[2:pos]),
as.list("}"))
block <- parse(text = new.statements.txt)[[1]]
func.body[[2]][[3]] <- block
return(call("function", func.params, as.call(func.body)))
}
# .wrap.ret.parameters wraps parameters of return functions with
# ddg.ret.value in the annotated block of a function body. block is the parse
# tree corresponding to the statements within the annotated block of a function
# parsed.stmts is the list of DDGStatement objects contained in the function
# Returns a parse tree for the same function body but with a call to
# ddg.ret.value wrapped around all expressions that are returned.
.wrap.ret.parameters <- function(block, parsed.stmts) {
# Check each statement in the annotated block to see if it contains a ret.
pos <- length(block)
for (i in 1:pos) {
statement <- block[[i]]
if (.has.call.to(statement, "return")) {
# If statement is a return, wrap parameters with ddg.ret.value.
if (.ddg.is.call.to(statement, "return")) {
# Need to handle empty parameter separately.
if (length(statement) == 1) {
ret.params <- ""
} else {
ret.params <- statement[[2]]
}
if (is.list(parsed.stmts)) {
parsed.stmt <- parsed.stmts[[i - 2]]
} else {
parsed.stmt <- parsed.stmts
}
# If parameters contain a return, recurse on parameters.
if (.has.call.to(ret.params, "return")) {
ret.params <- .wrap.ret.parameters(ret.params, parsed.stmt)
}
new.ret.params <- .create.ddg.ret.call(ret.params, parsed.stmt)
new.statement <- call("return", new.ret.params)
block[[i]] <- new.statement
# If statement contains a return, recurse on statement.
} else {
if (is.list(parsed.stmts)) {
parsed.stmt <- parsed.stmts[[i - 2]]
} else {
parsed.stmt <- parsed.stmts
}
block[[i]] <- .wrap.ret.parameters(statement, parsed.stmt)
}
}
}
return(block)
}
# .wrap.all.ret.parameters wraps parameters of all return functions with
# ddg.ret.value in the annotated block of a function definition.
# func.definition is a parsed expression for a function declaration (not the full
# assignment statement in which it is declared) parsed.stmts is the list of
# DDGStatement objects contained in the function Returns a parse tree for the
# same function declaration but with a call to ddg.ret.value wrapped around
# all expressions that are returned.
.wrap.all.ret.parameters <- function(func.definition, parsed.stmts) {
# Get function parameters.
func.params <- func.definition[[2]]
# Get the body of the function.
func.body <- func.definition[[3]]
# Get annotated block.
block <- func.body[[2]][[3]]
pos <- length(block)
# Wrap individual return functions.
block <- .wrap.ret.parameters(block, parsed.stmts)
# Get new function body
func.body[[2]][[3]] <- block
# Reconstruct function.
return(call("function", func.params, as.call(func.body)))
}
# .find.last.statement finds the last statement in the annotated block of a
# function. func.definition is a parsed expression for a function declaration
# (not the full assignment statement in which it is declared) Returns the parse
# tree corresponding to the last statement in the function definition.
.find.last.statement <- function(func.definition) {
# Get function body.
func.body <- func.definition[[3]]
# Get annotated block.
block <- func.body[[2]][[3]]
pos <- length(block)
# Return final statement in block.
return(block[[pos]])
}
# Creates a call to ddg.eval using a closure so that we will be able to refer to
# the correct DDGStatement object when the return call is executed. statement is
# the parse tree for the expression being returned parsed.stmt is the
# DDGStatement object corresponding to the last statement Returns a parse tree
# with a call to ddg.eval. The arguments to ddg.eval are the original statement
# and the DDGStatement object.
.create.ddg.eval.call <- function(statement, parsed.stmt) {
# We need to force the evaluation of parsed.stmt for the closure to return the
# value that parsed.stmt has at the time the ddg.eval call is created.
force(parsed.stmt)
return(call("ddg.eval", paste(deparse(statement), collapse = ""), function() parsed.stmt))
}
# Creates a call to ddg.ret.value using a closure so that we will be able to
# refer to the correct DDGStatement object when the return call is executed.
# last.statement is the parse tree for the expression being returned parsed.stmt
# is the DDGStatement object corresponding to the last statement Returns a parse
# tree with a call to ddg.ret.value. The arguments to ddg.ret.value are
# the parsed statement and the DDGStatement object.
.create.ddg.ret.call <- function(last.statement, parsed.stmt) {
# We need to force the evaluation of parsed.stmt for the closure to return the
# value that parsed.stmt has at the time the ddg.eval call is created.
force(parsed.stmt)
if (.has.call.to(last.statement, "return")) {
return(call("ddg.ret.value", last.statement, function() parsed.stmt))
} else {
# If there is no return call, we will use ddg.eval to execute the statement and
# then ddg.ret.value to create the necessary return structure. We cannot use
# this technique if there is a return call because we if tried to eval a return
# call, we would end up returning from some code inside RDT, instead of the
# user's function.
eval.cmd <- .construct.DDGStatement(parse(text = deparse(last.statement)),
pos = NA, script.num = NA, parseData = NULL)
new.statement <- .create.ddg.eval.call(last.statement, parsed.stmt)
return(call("ddg.ret.value", new.statement, function() parsed.stmt))
}
}
# .wrap.last.line wraps the last line of the annotated block of a function
# with ddg.ret.value. func.definition is a parsed expression for a function
# declaration (not the full assignment statement in which it is declared)
# parsed.stmts is the list of DDGStatement objects contained in the function
# Returns a parse tree for the same function declaration but with a call to
# ddg.ret.value wrapped around the last line in the body.
.wrap.last.line <- function(func.definition, parsed.stmts) {
# Get function parameters.
func.params <- func.definition[[2]]
# Get the body of the function.
func.body <- func.definition[[3]]
# Get annotated block.
block <- func.body[[2]][[3]]
pos <- length(block)
last.statement <- block[[pos]]
parsed.stmt <- parsed.stmts[[length(parsed.stmts)]]
wrapped.statement <- .create.ddg.ret.call(last.statement, parsed.stmt)
func.body[[2]][[3]][[pos]] <- wrapped.statement
return(call("function", func.params, as.call(func.body)))
}
# .wrap.with.ddg.eval wraps each statement in the annotated block of a
# function body with ddg.eval if the statement is not a call to a ddg function
# and does not contain a call to ddg.ret.value. The statement is enclosed in
# quotation marks. func.definition is a parsed expression for a function
# declaration (not the full assignment statement in which it is declared)
# parsed.stmts is the list of DDGStatement objects contained in the function
# Returns a parse tree for the same function declaration but with the calls to
# ddg.eval inserted.
.wrap.with.ddg.eval <- function(func.definition, parsed.stmts) {
# Get the function parameters.
func.params <- func.definition[[2]]
# Get the body of the function.
func.body <- func.definition[[3]]
# Get annotated block.
block <- func.body[[2]][[3]]
pos <- length(block)
# Process each statement in block.
for (i in 2:pos) {
# Wrap with ddg.eval if statement is not a call to a ddg function and does not
# contain a call to ddg.ret.value. Enclose statement in quotation marks.
statement <- block[[i]]
if (!grepl("^ddg.", statement[1]) & !.has.call.to(statement, "ddg.ret.value")) {
parsed.stmt <- parsed.stmts[[i - 2]]
new.statement <- .create.ddg.eval.call(statement, parsed.stmt)
func.body[[2]][[3]][[i]] <- new.statement
}
}
return(call("function", func.params, as.call(func.body)))
}
# Creates a call to ddg.eval using the number of the DDGStatement stored in the
# list ddg.statements in the ddg environment. statement is the parse tree for
# the expression being returned and parsed.stmt is the corresponding DDGStatement
# object. Returns a parse tree with a call to ddg.eval. The arguments to
# ddg.eval are the original statement and the number of the DDGStatement object.
.create.block.ddg.eval.call <- function(statement, parsed.stmt) {
# Get the next DDGStatement number and store parsed.stmt at this location.
.ddg.inc("ddg.statement.num")
num <- .ddg.get("ddg.statement.num")
ddg.statements <- c(.ddg.get("ddg.statements"), parsed.stmt)
.ddg.set("ddg.statements", ddg.statements)
return(call("ddg.eval", paste(deparse(statement), collapse = ""), num))
}
# .wrap.block.with.ddg.eval wraps each statement in a block with ddg.eval
# unless the statement is a ddg function or contains a call to ddg.ret.value.
.wrap.block.with.ddg.eval <- function(block, parsed.stmts) {
# Ignore initial brace.
for (i in 2:length(block)) {
# Enclose statement in quotation marks and wrap with ddg.eval.
statement <- block[[i]]
if (!grepl("^ddg.", statement) && !.has.call.to(statement, "ddg.ret.value")) {
parsed.stmt <- parsed.stmts[[i - 1]]
# print(statement) print(parsed.stmt@text)
new.statement <- .create.block.ddg.eval.call(statement, parsed.stmt)
block[[i]] <- new.statement
}
}
return(block)
}
# .add.block.start.finish adds start and finish nodes to blocks in control
# statements.
.add.block.start.finish <- function(block, pname) {
# Create ddg.start & ddg.finish statements.
start.statement <- deparse(call("ddg.start", pname))
finish.statement <- deparse(call("ddg.finish", pname))
# Get internal statements.
pos <- length(block)
statements <- deparse(block[[2]])
if (pos > 2) {
for (i in 3:pos) {
statements <- append(statements, deparse(block[[i]]))
}
}
# Create new block.
block.txt <- paste(c("{", start.statement, statements, finish.statement, "}"),
collapse = "\n")
block.parsed <- parse(text = block.txt)
return(block.parsed[[1]])
}
# .insert.ddg.forloop inserts a ddg.forloop statement at the top of a block.
.insert.ddg.forloop <- function(block, index.var) {
pos <- length(block)
inserted.statement <- call("ddg.forloop", index.var)
# Block with single statement.
if (pos == 2) {
new.statements <- c(as.list(block[[1]]), inserted.statement, as.list(block[2]))
return(as.call(new.statements))
} else {
# Block with multiple statements.
new.statements <- c(as.list(block[[1]]), inserted.statement, as.list(block[2:pos]))
return(as.call(new.statements))
}
}
# .insert.ddg.loop.annotate inserts a ddg.loop.annotate.on or
# ddg.loop.annotate.off statement at the beginning of a block.
.insert.ddg.loop.annotate <- function(block, var) {
pos <- length(block)
if (var == "on")
inserted.statement <- call("ddg.loop.annotate.on") else if (var == "off")
inserted.statement <- call("ddg.loop.annotate.off")
# Block with single statement.
if (pos == 2) {
new.statements <- c(as.list(block[[1]]), inserted.statement, as.list(block[2]))
return(as.call(new.statements))
} else {
# Block with multiple statements.
new.statements <- c(as.list(block[[1]]), inserted.statement, as.list(block[2:pos]))
return(as.call(new.statements))
}
}
# .annotate.if.statement adds annotations to if statements.
.annotate.if.statement <- function(command) {
if (ddg.max.loops() == 0) {
parsed.command.txt <- deparse(command@parsed[[1]])
} else {
# Get parsed command & contained statements
parsed.command <- command@parsed[[1]]
parsed.stmts <- command@contained
# Set initial values.
bnum <- 1
ptr <- 0
parent <- parsed.command
parsed.command.txt <- vector()
# If & else if blocks.
while (!is.symbol(parent) && parent[[1]] == "if") {
# Get block
block <- parent[[3]]
block <- .ensure.in.block(block)
# Get statements for this block.
block.stmts <- list()
for (i in 1:(length(block) - 1)) {
block.stmts <- c(block.stmts, parsed.stmts[[i + ptr]])
}
# Advance pointer for next block.
ptr <- ptr + length(block) - 1
# Wrap each statement with ddg.eval.
block <- .wrap.block.with.ddg.eval(block, block.stmts)
# Add start and finish nodes.
block <- .add.block.start.finish(block, "if")
# Reconstruct original statement.
cond <- paste(deparse(parent[[2]]), collapse = "")
if (bnum == 1) {
statement.txt <- paste(c(paste("if (", cond, ")", sep = ""), deparse(block),
collapse = "\n"))
} else {
statement.txt <- paste(c(paste("} else if (", cond, ")", sep = ""),
deparse(block), collapse = "\n"))
}
# Remove final brace & new line.
if (bnum > 1) {
last <- length(parsed.command.txt) - 2
parsed.command.txt <- parsed.command.txt[c(1:last)]
}
parsed.command.txt <- append(parsed.command.txt, statement.txt)
# Check for possible final else.
if (length(parent) == 4) {
final.else <- TRUE
} else {
final.else <- FALSE
}
# Get next parent
bnum <- bnum + 1
parent <- parent[[(length(parent))]]
}
# Final else block (if any).
if (final.else) {
# Get block.
block <- parent
block <- .ensure.in.block(block)
# Get statements for this block
block.stmts <- list()
for (i in 1:(length(block) - 1)) {
block.stmts <- c(block.stmts, parsed.stmts[[i + ptr]])
}
# Wrap each statement with ddg.eval.
block <- .wrap.block.with.ddg.eval(block, block.stmts)
# Add start and finish nodes.
block <- .add.block.start.finish(block, "if")
# Reconstruct original statement
statement.txt <- paste(c(paste("} else", sep = ""), deparse(block), collapse = ""))
# Remove final brace.
last <- length(parsed.command.txt) - 2
parsed.command.txt <- parsed.command.txt[c(1:last)]
parsed.command.txt <- append(parsed.command.txt, statement.txt)
}
}
parsed.command.txt <- append(parsed.command.txt, "ddg.set.inside.loop()", after = 0)
parsed.command.txt <- append(parsed.command.txt, "ddg.not.inside.loop()")
return(parse(text = parsed.command.txt))
}
# .annotate.loop.statement adds annotations to for, while and repeat
# statements. Provenance is collected for the number of iterations specified in
# the parameter max.loops, beginning with the iteration specified in the
# parameter first.loop. A Details Omitted node may be added before and after the
# annotated section, as needed.
.annotate.loop.statement <- function(command, loop.type) {
if (ddg.max.loops() == 0) {
# Note that I can't just use command@text because it does not separate statements
# with newlines
parsed.command.txt <- deparse(command@parsed[[1]])
} else {
# Get parsed command
parsed.command <- command@parsed[[1]]
# Add new loop & get loop number.
ddg.loops <- c(.ddg.get("ddg.loops"), 0)
.ddg.set("ddg.loops", ddg.loops)
.ddg.inc("ddg.loop.num")
ddg.loop.num <- .ddg.get("ddg.loop.num")
# Get statements in block.
if (loop.type == "for") {
block <- parsed.command[[4]]
} else if (loop.type == "while") {
block <- parsed.command[[3]]
} else {
# repeat
block <- parsed.command[[2]]
}
# Add braces if necessary.
block <- .ensure.in.block(block)
# Wrap each statement with ddg.eval.
annotated.block <- .wrap.block.with.ddg.eval(block, command@contained)
# Insert ddg.forloop statement.
if (loop.type == "for") {
index.var <- parsed.command[[2]]
annotated.block <- .insert.ddg.forloop(annotated.block, index.var)
}
# Add start and finish nodes.
annotated.block <- .add.block.start.finish(annotated.block, paste(loop.type,
"loop"))
# Insert ddg.loop.annotate statements.
block <- .insert.ddg.loop.annotate(block, "off")
# Reconstruct for statement.
block.txt <- deparse(block)
annotated.block.txt <- deparse(annotated.block)
# Calculate the control line of the annotated code
if (loop.type == "for") {
firstLine <- paste("for (", deparse(parsed.command[[2]]), " in ", deparse(parsed.command[[3]]),
") {", sep = "")
} else if (loop.type == "while") {
firstLine <- paste("while (", deparse(parsed.command[[2]]), ") {", sep = "")
} else {
# repeat
firstLine <- paste("repeat {", sep = "")
}
# Turn loop annotations back on in case we reached the max.
parsed.command.txt <- paste(c(firstLine, paste("if (ddg.loop.count.inc(",
ddg.loop.num, ") >= ddg.first.loop() && ddg.loop.count(", ddg.loop.num,
") <= ddg.first.loop() + ddg.max.loops() - 1)", sep = ""), annotated.block.txt,
paste("else", sep = ""), block.txt, paste("}", sep = ""), paste("if (ddg.loop.count(",
ddg.loop.num, ") > ddg.first.loop() + ddg.max.loops() - 1) ddg.details.omitted()",
sep = ""), paste("ddg.reset.loop.count(", ddg.loop.num, ")", sep = ""),
paste("if (ddg.max.loops() != 0) ddg.loop.annotate.on()"), collapse = "\n"))
}
parsed.command.txt <- append(parsed.command.txt, "ddg.set.inside.loop()", after = 0)
parsed.command.txt <- append(parsed.command.txt, "ddg.not.inside.loop()")
return(parse(text = parsed.command.txt))
}
# .annotate.simple.block adds annotations to simple blocks.
.annotate.simple.block <- function(command) {
# Get parsed command
parsed.command <- command@parsed[[1]]
# Get statements in block.
block <- parsed.command
# Wrap each statement with ddg.eval.
block <- .wrap.block.with.ddg.eval(block, command@contained)
# Add start and finish nodes.
block <- .add.block.start.finish(block, "block")
# Reconstruct block.
block.txt <- deparse(block)
return(parse(text = block.txt))
}
# .ddg.is.call.to returns TRUE if the parsed expression passed in is a call to
# the specified function. parsed.expr - a parse tree func.name - the name of a
# function
.ddg.is.call.to <- function(parsed.expr, func.name) {
# Check if a function call.
if (is.call(parsed.expr)) {
# Check if the function called is the specified function.
if (parsed.expr[[1]] == func.name) {
return(TRUE)
}
}
return(FALSE)
}
# .has.call.to returns TRUE if the parsed expression passed in contains a
# call to the specified function. parsed.expr - a parse tree func.name - the
# name of a function
.has.call.to <- function(parsed.expr, func.name) {
# Base case.
if (!is.recursive(parsed.expr))
return(FALSE)
# If this is a function declaration, skip it
if (.is.functiondecl(parsed.expr))
return(FALSE)
# A call to the specified function.
if (.ddg.is.call.to(parsed.expr, func.name)) {
return(TRUE)
} else {
# Not a call to the specified function. Recurse on the parts of the expression.
return(any(sapply(parsed.expr, function(parsed.expr) {
return(.has.call.to(parsed.expr, func.name))
})))
}
}
# .is.call.to.ddg.function returns TRUE if the parsed expression passed in is
# a call to a ddg function. parsed.expr - a parse tree
.is.call.to.ddg.function <- function(parsed.expr) {
# Check if a function call.
if (is.call(parsed.expr)) {
# Check if the function called is a ddg function.
if (grepl("^ddg.", parsed.expr[1])) {
return(TRUE)
}
}
return(FALSE)
}
# Returns true if the statement contains a call to a function that read from a
# file parsed.statement - a parse tree
.reads.file <- function(parsed.statement) {
.ddg.file.read.functions.df <- .ddg.get(".ddg.file.read.functions.df")
reading.functions <- .ddg.file.read.functions.df$function.names
return(TRUE %in% (lapply(reading.functions, function(fun.name) {
return(.has.call.to(parsed.statement, fun.name))
})))
}
# Returns true if the statement contains a call to a function that writes to a
# file parsed.statement - a parse tree
.writes.file <- function(parsed.statement) {
.ddg.file.write.functions.df <- .ddg.get(".ddg.file.write.functions.df")
writing.functions <- .ddg.file.write.functions.df$function.names
return(TRUE %in% (lapply(writing.functions, function(fun.name) {
return(.has.call.to(parsed.statement, fun.name))
})))
}
# Returns true if the statement contains a call to a function that creates a
# graphics object parsed.statement - a parse tree
.creates.graphics <- function(parsed.statement) {
.ddg.graphics.functions.df <- .ddg.get(".ddg.graphics.functions.df")
graphics.functions <- .ddg.graphics.functions.df$function.names
if (TRUE %in% (lapply(graphics.functions, function(fun.name) {
return(.has.call.to(parsed.statement, fun.name))
}))) {
return(TRUE)
}
return(FALSE)
}
# Returns true if the statement contains a call to a function that updates a
# graphics object parsed.statement - a parse tree
.updates.graphics <- function(parsed.statement) {
graphics.update.functions <- .ddg.get(".ddg.graphics.update.functions")
if (TRUE %in% (lapply(graphics.update.functions, function(fun.name) {
return(.has.call.to(parsed.statement, fun.name))
}))) {
return(TRUE)
}
return(FALSE)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.