# Copyright (C) 2017 Harvard University, Mount Holyoke College
#
# This file is part of ProvR.
#
# ProvR is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# ProvR is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ProvR; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# This package was forked from <https://github.com/End-to-end-provenance/RDataTracker>
#
# Contact: Matthew Lau <matthewklau@fas.harvard.edu>
# Create DDG environment variable.
.ddg.env <- new.env(parent = emptyenv())
# Set the number of lines the history file keeps (and therefore can be analyzed).
# Note: this setting has no effect on some systems.
ddg.MAX_HIST_LINES <- 2^14
# .ddg.init.environ() sets up the filesystem and R environments for use.
.ddg.init.environ <- function() {
dir.create(.ddg.get("ddg.path"), showWarnings = FALSE)
dir.create(paste(.ddg.get("ddg.path"), "/data", sep = ""), showWarnings = FALSE)
dir.create(paste(.ddg.get("ddg.path"), "/debug", sep = ""), showWarnings = FALSE)
dir.create(paste(.ddg.get("ddg.path"), "/scripts", sep = ""), showWarnings = FALSE)
if (interactive() && .ddg.get(".ddg.enable.console")) {
.ddg.set("ddg.original.hist.size", Sys.getenv("R_HISTSIZE"))
Sys.setenv(R_HISTSIZE = ddg.MAX_HIST_LINES)
}
}
# .ddg.sourced.script.names returns a string containing the names of sourced
# scripts, if any. If no scripts were sourced it returns an empty string.
.ddg.sourced.script.names <- function() {
ss <- .ddg.get(".ddg.sourced.scripts")
# First row is main script.
if (nrow(ss) == 1)
snames <- "" else {
snames <- ss[ss$snum >= 1, "sname"]
snames <- paste0(snames, collapse = ",")
}
return(snames)
}
# ddg.installedpackages() returns information on packages installed at the time
# of execution and their versions.
.ddg.installedpackages <- function() {
packages <- devtools::session_info()
packages <- packages[[2]]
installed <- packages[packages[, 2] == "*", ]
installed <- installed[, c(1, 3)]
return(installed)
}
# .ddg.is.simple returns TRUE if the value passed in is a simple data value which
# should be saved locally as opposed to stored in a separate file. The assumption
# is that the value passed in has already been declared not to be a graphic.
# value - input value.
.ddg.is.simple <- function(value) {
# Note that is.vector returns TRUE for lists, so we need to check lists
# separately. Since every value in a list can have a different type, if it is a
# list, we will assume the value is complex. We consider NULL values to be
# simple.
return((!.ddg.is.graphic(value) && !is.list(value) && is.vector(value) && length(value) ==
1) || is.null(value))
}
# .ddg.is.csv returns TRUE if the value passed in should be saved as a csv file,
# i.e. if it is a vector, matrix, or data frame. Note that is.vector returns
# TRUE for lists.
# value - input value.
.ddg.is.csv <- function(value) {
return(!.ddg.is.simple(value) && ((is.vector(value) && !is.list(value)) || is.matrix(value) ||
is.data.frame(value)))
}
# .ddg.is.object returns TRUE if the value is determined to be an object by our
# standards.
# value - input value.
.ddg.is.object <- function(value) {
return(is.object(value) || is.environment(value))
}
# .ddg.is.function returns TRUE if the value is determined to be a function or we
# want to save it as a function.
# value - input value.
.ddg.is.function <- function(value) {
return(is.function(value))
}
# .ddg.dev.change determines whether or not a new graphic device has become
# active and whether or not we should capture the previous graphic device. It
# returns the device number we should capture (0 means we shouldn't capture any
# device).
.ddg.dev.change <- function() {
prev.device <- .ddg.get("prev.device")
curr.device <- dev.cur()
device.list <- dev.list()
# We've switched devices .
if (prev.device != curr.device) {
# Update device.
.ddg.set("prev.device", curr.device)
# Previous device still accessible.
if (prev.device %in% device.list)
return(prev.device)
}
# No switching, or previous is not accessible (NULL or removed).
return(0)
}
# .ddg.write.csv takes as input a name-value pair for a variable and attempts to
# save the data as a csv file. It does not create any edges but does add the node
# to the DDG. Edge creation should occur from wherever this function is called.
# name - data node name. value - data node value. scope - data node scope.
.ddg.write.csv <- function(name, value, scope = NULL, from.env = FALSE) {
tryCatch({
.snapshot.node(name, "csv", value, dscope = scope, from.env = from.env)
}, error = function(e) {
.snapshot.node(name, "txt", value, save.object = TRUE, dscope = scope,
from.env = from.env)
})
}
# Returns a string representation of the type information of the given value.
.ddg.get.val.type.string <- function(value) {
val.type <- .ddg.get.val.type(value)
if (is.null(val.type))
return("null")
# list, object, environment, function, language
if (length(val.type) == 1)
return(paste("\"", val.type, "\"", sep = ""))
# vector, matrix, array, data frame type information recorded in a list of 3
# vectors (container,dimension,type)
container <- val.type[[1]]
dimension <- val.type[[2]]
type <- val.type[[3]]
# vector: a 1-dimensional array (uniform typing)
if (identical(container, "vector"))
return(paste("{\"container\":\"vector\", \"dimension\":[", dimension, "], \"type\":[\"",
type, "\"]}", sep = ""))
# matrix: a 2-dimensional array (uniform typing)
if (identical(container, "matrix")) {
dimension <- paste(dimension, collapse = ",")
return(paste("{\"container\":\"matrix\", \"dimension\":[", dimension, "], \"type\":[\"",
type, "\"]}", sep = ""))
}
# array: n-dimensional (uniform typing)
if (identical(container, "array")) {
dimension <- paste(dimension, collapse = ",")
return(paste("{\"container\":\"array\", \"dimension\":[", dimension, "], \"type\":[\"",
type, "\"]}", sep = ""))
}
# data frame: is a type of list
dimension <- paste(dimension, collapse = ",")
type <- paste(type, collapse = "\",\"")
return(paste("{\"container\":\"data_frame\", \"dimension\":[", dimension, "], \"type\":[\"",
type, "\"]}", sep = ""))
}
# Returns the type information of the value of the given variable. Does not
# contain information on dimensions.
.ddg.get.val.type.from.var <- function(var) {
val.type <- .ddg.get.val.type(get(var))
# remove dimension information, if any
if (length(val.type) > 1)
val.type[2] <- NULL
return(val.type)
}
# Returns the type information of the given value, broken into its parts and
# returned in a vecctor or a list.
.ddg.get.val.type <- function(value) {
# vector: a 1-dimensional array (uniform typing)
if (is.vector(value))
return(list("vector", length(value), class(value)))
# matrix: a 2-dimensional array (uniform typing)
if (is.matrix(value))
return(list("matrix", dim(value), class(value[1])))
# array: n-dimensional (uniform typing)
if (is.array(value))
return(list("array", dim(value), class(value[1])))
# data frame: is a type of list
if (is.data.frame(value)) {
types <- unname(sapply(value, class))
return(unname(list("data_frame", dim(value), types)))
}
# a list
if (is.list(value))
return("list")
# an object
if (is.object(value))
return("object")
# envrionment, function, language
if (is.environment(value))
return("environment")
if (is.function(value))
return("function")
if (is.language(value))
return("language")
# none of the above - null is a character, not NULL or NA
return(NULL)
}
# .ddg.record.edge records a control flow edge or a data flow edge in the edges
# table.
# etype - type of edge node1 - name of first node node1 - name of second node
.ddg.record.edge <- function(etype, node1, node2) {
# Increment edge counter.
.ddg.inc("ddg.enum")
ddg.enum <- .ddg.get("ddg.enum")
# If the table is full, make it bigger.
ddg.edges <- .ddg.get("ddg.edges")
if (nrow(ddg.edges) < ddg.enum) {
size = 100
new.rows <- data.frame(ddg.num = numeric(size), ddg.type = character(size),
ddg.from = character(size), ddg.to = character(size), stringsAsFactors = FALSE)
.ddg.add.rows("ddg.edges", new.rows)
ddg.edges <- .ddg.get("ddg.edges")
}
ddg.edges$ddg.num[ddg.enum] <- ddg.enum
ddg.edges$ddg.type[ddg.enum] <- etype
ddg.edges$ddg.from[ddg.enum] <- node1
ddg.edges$ddg.to[ddg.enum] <- node2
.ddg.set("ddg.edges", ddg.edges)
# Output control flow or data flow edge.
.ddg.output.edge(etype, node1, node2)
if (.ddg.get("ddg.debug.lib")) {
if (etype == "cf")
etype.long <- "control flow" else if (etype == "df.in")
etype.long <- "data flow in" else etype.long <- "data flow out"
print(paste("Adding", etype.long, "edge", ddg.enum, "for", node1, "to", node2))
}
}
# .ddg.is.nonlocal.assign returns TRUE if the object passed is an expression
# object containing a non-local assignment.
# expr - input expression.
.ddg.is.nonlocal.assign <- function(expr) {
# <<- or ->> means that the assignment is non-local
if (is.call(expr) && identical(expr[[1]], as.name("<<-"))) {
return(TRUE)
}
return(FALSE)
}
# .ddg.create.empty.vars.set creates an empty data frame initialized to contain
# information about variable assignments. The difference between first.writer
# and possible.first.writer is that first.writer is for simple assignments (like
# a <- 1), while possible.first.writer is for situations where the assignment
# might not have occurred, like 'if (foo) a <- 1'.
# The data frame is structured as follows - the variable name. - the position of
# the statement that wrote the variable first. - the position of the statement
# that wrote the variable last. - the position of the first statement that may
# have assigned to a variable . - the position of the last statement that may
# have assigned to a variable.
# var.table.size - desired size of the data frame. Negative values and 0 are
# coerced to 1.
.ddg.create.empty.vars.set <- function(var.table.size = 1) {
if (var.table.size <= 0)
var.table.size <- 1
vars.set <- data.frame(variable = character(var.table.size), first.writer = numeric(var.table.size),
last.writer = numeric(var.table.size), possible.first.writer = numeric(var.table.size),
possible.last.writer = numeric(var.table.size), stringsAsFactors = FALSE)
# Initialize first writer.
vars.set$first.writer <- var.table.size + 1
vars.set$possible.first.writer <- var.table.size + 1
return(vars.set)
}
# .ddg.increase.vars.set simply doubles the size of a variable assignment data
# frame and returns the new one.
# vars.set - data frame containing variable assignments. size (optional) -
# number of rows in data frame.
.ddg.double.vars.set <- function(vars.set, size = nrow(vars.set)) {
# Create the right size data frame from input frame.
new.vars.set <- rbind(vars.set, .ddg.create.empty.vars.set(size))
# Update first/last writer.
new.vars.set$first.writer <- ifelse(new.vars.set$first.writer == size + 1, size *
2 + 1, new.vars.set$first.writer)
new.vars.set$possible.first.writer <- ifelse(new.vars.set$possible.first.writer ==
size + 1, size * 2 + 1, new.vars.set$possible.first.writer)
return(new.vars.set)
}
# .ddg.add.to.vars.set parses a command and adds the new variable information to
# the variable assignment data frame. Note that var.num is a global variable! It
# should be intialized when vars.set is first created.
# vars.set - variable assignment data frame. cmd.expr - command expression. i -
# position of variable in data frame.
.ddg.add.to.vars.set <- function(vars.set, cmd, i) {
# Find out the variable being assigned to by a simple assignment statement.
main.var.assigned <- cmd@vars.set
# Find all the variables that may be assigned in the statement.
vars.assigned <- cmd@vars.possibly.set
for (var in vars.assigned) {
nRow <- which(vars.set$variable == var)
# If the variable is already in the table, update its entry.
if (length(nRow) > 0) {
if (!is.null(main.var.assigned) && var == main.var.assigned) {
vars.set$last.writer[nRow] <- i
} else {
vars.set$possible.last.writer[nRow] <- i
}
} else {
# The variable was not in the table. Add a new line for this variable.
# Find the first empty row
empty.rows <- which(vars.set$variable == "")
if (length(empty.rows) == 0) {
vars.set <- .ddg.double.vars.set(vars.set, nrow(vars.set))
empty.rows <- which(vars.set$variable == "")
}
var.num <- empty.rows[1]
# Set the variable.
vars.set$variable[var.num] <- var
if (!is.null(main.var.assigned) && var == main.var.assigned) {
vars.set$first.writer[var.num] <- i
vars.set$last.writer[var.num] <- i
} else {
vars.set$possible.first.writer[var.num] <- i
vars.set$possible.last.writer[var.num] <- i
}
}
}
return(vars.set)
}
## .ddg.find.var.assigments finds the possible variable assignments for a fixed
## set of parsed commands. See .ddg.create.empty.vars.set for more information on
## the structure of the returned data frame. parsed.commands - a list of parsed
## commands.
.ddg.find.var.assignments <- function(cmds) {
if (length(cmds) == 0)
return(data.frame())
# Make it big so we don't run out of space.
var.table.size <- length(cmds)
vars.set <- .ddg.create.empty.vars.set(var.table.size)
# Build the table recording where variables are assigned to or may be assigned
# to.
for (i in 1:length(cmds)) {
cmd.expr <- cmds[[i]]
vars.set <- .ddg.add.to.vars.set(vars.set, cmd.expr, i)
}
return(vars.set)
}
# .ddg.create.data.use.edges.for.console.cmd creates a data flow edge from the
# node for each variable used in cmd.expr to the procedural node labeled cmd, as
# long as the value would either be one that exists prior to starting the console
# block, or corresponds to the last setting of this variable in the console
# block.
# vars.set - variable assignment data frame. cmd - name of procedure node.
# cmd.expr - command expression. cmd.pos - position of command.
.ddg.create.data.use.edges.for.console.cmd <- function(vars.set, cmd, cmd.pos, for.caller) {
# Find all the variables used in this command.
vars.used <- cmd@vars.used
for (var in vars.used) {
# Make sure there is a node we could connect to.
scope <- .ddg.get.scope(var, for.caller)
if (.ddg.data.node.exists(var, scope)) {
nRow <- which(vars.set$variable == var)
# check if the node is written in the console block.
if (length(nRow) > 0) {
first.writer <- min(vars.set$first.writer[nRow], vars.set$possible.first.writer[nRow])
last.writer <- max(vars.set$last.writer[nRow], vars.set$possible.last.writer[nRow])
# Draw the edge if we will connect to a node that exists before the console block
# or to the last writer of this variable within the console block.
if (cmd.pos <= first.writer || cmd.pos >= last.writer) {
.ddg.data2proc(var, scope, cmd@abbrev)
}
# TODO - add some sort of warning to the user that the node is not being created
} else {
# The variable is not set at all in this console block. Connect to a
# pre-existing data node.
.ddg.data2proc(var, scope, cmd@abbrev)
}
} else {
# TODO - add some sort of warning that the data node was NOT found.
}
}
}
# .ddg.create.data.set.edges.for.cmd creates edges that correspond to a console
# command assigning to a variable.
# vars.set - variable assignment data frame. cmd.abbrev - name of procedure
# node. cmd.expr - command expression. cmd.pos - position of command. env -
# environment to use for evaluating variable. for.finish.node (optional) - if
# TRUE, data edge is for finish node. scope (optional) - scope of variable.
# stack (optional) - stack to use for evaluating variable.
.ddg.create.data.set.edges.for.cmd <- function(vars.set, cmd, cmd.pos, env, for.finish.node = FALSE,
scope = NULL, stack = NULL) {
vars.assigned <- cmd@vars.set
for (var in vars.assigned) {
whichRows <- which(vars.set$variable == var)
# Only create a node edge for the last place that a variable is set within a
# console block.
if ((length(whichRows) > 0 && vars.set$last.writer[whichRows] == cmd.pos &&
vars.set$possible.last.writer[whichRows] <= vars.set$last.writer[whichRows]) ||
for.finish.node) {
if (is.null(env)) {
env <- .ddg.get.env(var, calls = stack)
}
scope <- .ddg.get.scope(var, calls = stack, env = env)
# Special operators are defined by enclosing the name in `. However, the R
# parser drops those characters when we deparse, so when we parse here they are
# missing and we get an error about unexpected SPECIAL characters. The first
# tryCatch, puts the ` back in and parses again. The second tryCatch handles
# errors associated with evaluated the variable.
parsed <- tryCatch(parse(text = var), error = function(e) parse(text = paste("`",
var, "`", sep = "")))
val <- tryCatch(eval(parsed, env), error = function(e) {
eval(parse(text = var), parent.env(env))
})
tryCatch(.ddg.save.data(var, val, fname = ".ddg.create.data.set.edges.for.cmd",
error = TRUE, scope = scope, stack = stack, env = env), error = function(e) {
.ddg.data.node("Data", var, "complex", scope)
})
.ddg.proc2data(cmd@abbrev, var, scope)
}
}
}
# .ddg.create.data.node.for.possible.writes creates a data node for each variable
# that might have been set in something other than a simple assignment. An edge
# is created from the last node in the console block.
# vars.set - variable assignment data frame. last.command - last command in
# console block.
.ddg.create.data.node.for.possible.writes <- function(vars.set, last.command, env = NULL) {
environment <- if (is.environment(env))
env else .GlobalEnv
for (i in 1:nrow(vars.set)) {
if (vars.set$possible.last.writer[i] > vars.set$last.writer[i]) {
value <- tryCatch(eval(parse(text = vars.set$variable[i]), environment),
error = function(e) {
NULL
})
# Only create the node and edge if we were successful in looking up the value.
if (!is.null(value)) {
envName <- environmentName(environment)
if (envName == "")
envName <- .ddg.get.scope(vars.set$variable[i])
.ddg.data.node("Data", vars.set$variable[i], value, envName)
.ddg.proc2data(last.command@abbrev, vars.set$variable[i], envName)
}
}
}
}
# Given a parse tree, this function returns a list containing the expressions
# that correspond to the filename argument of the calls to functions that read or
# write the files. If there are none, it returns NULL. main.object - the parsed
# expression to search through func.df - the data frame describing the functions
# with file arguments
.ddg.find.files <- function(main.object, func.df, env = NULL) {
environment <- if (is.environment(env))
env else .GlobalEnv
# Recursive helper function.
find.files.rec <- function(obj) {
# Base cases.
if (!is.recursive(obj)) {
return(NULL)
}
if (length(obj) == 0) {
return(NULL)
}
## It might be useful to record somehow that this function reads a file, but we
## wouldn't actually do the reading until the function is called, not here where
## it is being declared.
if (.is.functiondecl(obj))
return(NULL)
if (is.call(obj)) {
# Call has no arguments, so it can't be reading a function. Recurse on the first
# part, in case it is more than just a symbol.
if (length(obj) == 1)
return(find.files.rec(obj[[1]])) else if (is.symbol(obj[[1]])) {
# Call with arguments Is this is file reading function?
read.func.pos <- match(as.character(obj[[1]]), func.df$function.names)
if (!is.na(read.func.pos)) {
# Find the file argument.
arg.name <- func.df$param.names[read.func.pos]
# Find a matching parameter passed by name
file.name.arg.matches <- unlist(lapply(names(obj), function(arg) {
return(pmatch(arg, arg.name))
}))
match.pos <- match(1, file.name.arg.matches)
# If no argument qualified by the file parameter name, use the argument in the
# expected position
if (is.na(match.pos)) {
file.name <- tryCatch(eval(obj[[func.df$param.pos[read.func.pos] +
1]], environment), error = function(e) NULL)
} else {
file.name <- tryCatch(eval(obj[[match.pos]], environment), error = function(e) NULL)
}
# Recurse over the arguments to the function. We can't just skip over the 2nd
# element since the filename parameter is not necessarily there if it was passed
# by name.
funcs <- find.files.rec(obj[2:length(obj)])
# Add this file name to the list of files being read. Make sure the file name
# could be evaluated and that it results in a name, not a connection.
if (!is.null(file.name) && is.character(file.name)) {
unique(c(file.name, funcs))
}
} else {
# Not a file reading function. Recurse over the arguments.
find.files.rec(obj[2:length(obj)])
}
} else {
# Function call, but the first list element is not simply a function name.
# Recurse over all the list elements.
unique(append(find.files.rec(obj[[1]]), find.files.rec(obj[2:length(obj)])))
}
} else if (length(obj) == 1) {
# A recursive structure that is not a call. Not sure if there are any, but just
# in case...
unique(find.files.rec(obj[[1]]))
} else {
unique(append(find.files.rec(obj[[1]]), find.files.rec(obj[2:length(obj)])))
}
}
return(find.files.rec(main.object@parsed))
}
# Creates file nodes and data in edges for any files that are read in this cmd
# cmd - text command cmd.expr - parsed command
.ddg.create.file.read.nodes.and.edges <- function(cmd, env) {
# Find all the files potentially read in this command. This may include files
# that are not actually read if the read are within an if-statement, for example.
files.read <- .ddg.find.files(cmd, .ddg.get(".ddg.file.read.functions.df"), env)
for (file in files.read) {
# Only create the node and edge if there actually is a file then if the file
# exists, it is possible that it was not read here
if (file.exists(file)) {
# Create the file node and edge
ddg.file(file)
ddg.data.in(basename(file), pname = cmd@abbrev)
} else if (grepl("^http", file) || grepl("^ftp", file)) {
scope <- environmentName(.GlobalEnv)
.ddg.data.node("URL", file, file, scope)
.ddg.data2proc(file, scope, cmd@abbrev)
}
}
}
# Creates file nodes and data in edges for any files that are written in this cmd
# cmd - text command cmd.expr - parsed command
.ddg.create.file.write.nodes.and.edges <- function(cmd, env) {
# Find all the files potentially written in this command. This may include files
# that are not actually written if the write calls are within an if-statement,
# for example.
files.written <- .ddg.find.files(cmd, .ddg.get(".ddg.file.write.functions.df"), env)
for (file in files.written) {
# check that the file exists. If it does, we will assume that it was created by
# the write call that we just found.
if (file.exists(file)) {
# Create the file node and edge
ddg.file.out(file, pname = cmd@abbrev)
}
}
}
# .ddg.link.function.returns determines if the command calls a function for which
# ddg.return has created a node for the return value. If so, a data flow edge is
# created from the return value data node to the finish node for the command.
# Note that if the assignment is an expression, like 'd <- f(a) + f(b)', there
# may be multiple return value nodes to link to.
# command - input command.
.ddg.link.function.returns <- function(command) {
# Find the functions that have completed but whose returns have not been used
# yet.
returns <- .ddg.get(".ddg.ret.values")
if (!is.na(command@pos@startLine)) {
unused.returns <- returns[!returns$ret.used & returns$ret.node.id >
0 & !is.na(returns$line) & returns$line == command@pos@startLine, ]
} else {
unused.returns <- returns[!returns$ret.used & returns$ret.node.id >
0, ]
}
if (nrow(unused.returns) == 0)
return()
# See which of these are called from the command we are processing now.
unused.calls <- unused.returns$ddg.call
command.text <- gsub(" ", "", command@text)
uses <- sapply(unused.calls, function(call) {
grepl(call, command.text, fixed = TRUE)
})
# The following line is here to get around R CMD check, which otherwise reports:
# no visible binding for global variable. Note that ret.node.id is not a
# variable in the subset call, but the name of a column in the data frame being
# subsetted.
ret.node.id <- NULL
# Extracts for the return value nodes.
new.uses <- subset(unused.returns, uses, ret.node.id)
# Create an edge from each of these to the last procedure node.
lapply(new.uses$ret.node.id, function(data.num) {
proc.num <- .ddg.get("ddg.pnum")
# Record in edges table
etype <- "df.in"
node1 <- paste("d", data.num, sep = "")
node2 <- paste("p", proc.num, sep = "")
.ddg.record.edge(etype, node1, node2)
if (.ddg.get("ddg.debug.lib")) {
print(paste(".ddg.link.function.returns:", command@abbrev))
print(paste("DF ", node1, " ", node2, sep = ""))
}
# Set the return value as being used.
returns$ret.used[returns$ret.node.id == data.num] <- TRUE
.ddg.set(".ddg.ret.values", returns)
})
}
# Create the DDGStatement list for a list of parsed expressions.
# exprs - a list of parsed expressions script.name - the name of the script the
# expressions come from parseData - information provided by the parser that we
# use to find line numbers enclosing.pos - if exprs are statements within a
# function definition, enclosing.pos is the source position information of the
# entire function declaration
# Returns a list of DDGStatement objects
.ddg.create.DDGStatements <- function(exprs, script.name, script.num, parseData = NULL,
enclosing.pos = NULL) {
# The parse data gives us line number information
if (is.null(parseData)) {
parseData <- getParseData(exprs, includeText = TRUE)
if (is.null(parseData)) {
# In this case there is no line number information available
cmds <- vector("list", (length(exprs)))
for (i in 1:length(exprs)) {
expr <- as.expression(exprs[i])
cmds[[i]] <- .construct.DDGStatement(expr, NA, script.name, script.num, parseData)
}
return(cmds)
}
non.comment.parse.data <- parseData[parseData$token != "COMMENT", ]
if (nrow(non.comment.parse.data) == 0) {
return(list())
}
# Start at the first non-comment expression in parseData
next.parseData <- 1
} else {
non.comment.parse.data <- parseData[parseData$token != "COMMENT", ]
# Start at the first entry in parse data that begins after the enclosing function
# begins, ends before the enclosing function ends, and matches the text of the
# first expression.
next.parseData <- which(non.comment.parse.data$line1 >= enclosing.pos@startLine &
non.comment.parse.data$line2 <= enclosing.pos@endLine & non.comment.parse.data$text ==
paste(deparse(exprs[[1]]), collapse = "\n"))[1]
}
# Create the DDGStatements
cmds <- vector("list", (length(exprs)))
next.cmd <- 1
for (i in 1:length(exprs)) {
expr <- as.expression(exprs[i][[1]])
next.expr.pos <- new(Class = "DDGStatementPos", non.comment.parse.data[next.parseData,
])
cmds[[next.cmd]] <- .construct.DDGStatement(expr, next.expr.pos, script.name,
script.num, parseData)
next.cmd <- next.cmd + 1
# If there are more expressions, determine where to look next in the parseData
if (i < length(exprs)) {
last.ending.line <- non.comment.parse.data[next.parseData, ]$line2
last.parent <- non.comment.parse.data[next.parseData, "parent"]
last.id <- non.comment.parse.data[next.parseData, "id"]
# Find the first entry in parseData that has the same parent as the previous
# expression and starts after the previous expression.
next.parseData <- which(non.comment.parse.data$parent == last.parent &
non.comment.parse.data$line1 >= last.ending.line & non.comment.parse.data$id >
last.id)[1]
}
}
return(cmds)
}
# .ddg.save.annotated.script saves a copy of the annotated script to the debug
# directory.
.ddg.save.annotated.script <- function(cmds, script.name) {
for (i in 1:length(cmds)) {
expr <- cmds[[i]]@annotated
for (j in 1:length(expr)) {
line <- deparse(expr[[j]])
if (i == 1 && j == 1)
script <- line else script <- append(script, line)
}
}
fileout <- file(paste(paste(.ddg.get("ddg.path"), "/debug", sep = ""), "/annotated-", script.name, sep = ""))
write(script, fileout)
close(fileout)
}
# .ddg.file.node creates a node of type File. File nodes are used for files
# written to the DDG directory by capturing output from the script or by copying
# a file that is written by the script. Returns the path where the file
# referenced by the node is stored.
# dtype - type of data node. fname - path and name of original file. dname -
# name of data node. dscope (optional) - scope of data node.
.ddg.file.node <- function(dtype, fname, dname, dscope = NULL) {
# Get original file location.
file.name <- basename(fname)
file.loc <- normalizePath(fname, winslash = "/", mustWork = FALSE)
loc.value <- if (dtype == "File")
paste(" Location=\"", file.loc, "\"", sep = "") else ""
# Add number to file name.
dfile <- paste(.ddg.get("ddg.dnum") + 1, "-", file.name, sep = "")
# Calculate the path to the file relative to the ddg directory. This is the
# value stored in the node.
dpfile <- paste("data", dfile, sep = "/")
dtime <- .format.time(Sys.time())
# Set the node label.
if (is.null(dname))
dname <- file.name
# Get scope if necessary.
if (is.null(dscope))
dscope <- .ddg.get.scope(dname)
# Not from environment.
from.env <- FALSE
# Record in data node table
.ddg.record.data(dtype, dname, dpfile, dpfile, dscope, from.env = from.env, dtime,
file.loc)
# Get path plus file name to where the file will be copied
dpath <- paste(paste(.ddg.get("ddg.path"), "/data", sep = ""), "/", dfile, sep = "")
return(dpath)
}
# .ddg.file.copy creates a data node of type File. File nodes are used for files
# written by the main script. A copy of the file is written to the DDG directory.
# dtype - type of data node. fname - path and name of original file. dname -
# name of data node. dscope - scope of data node.
.ddg.file.copy <- function(dtype, fname, dname, dscope) {
# Calculate location of original file.
file.loc <- normalizePath(fname, winslash = "/", mustWork = FALSE)
# Copy file.
if (file.exists(file.loc)) {
# Create file node in DDG.
dpfile.out <- .ddg.file.node(dtype, fname, dname, dscope)
if (.ddg.get("ddg.save.to.disk")) {
file.copy(file.loc, dpfile.out, overwrite = TRUE)
}
} else {
error.msg <- paste("File to copy does not exist:", fname)
.ddg.insert.error.message(error.msg)
return(NULL)
}
if (.ddg.get("ddg.debug.lib"))
print(paste("file.copy: ", dtype, " ", file.loc))
return(dpfile.out)
}
# .ddg.insert.error.message issues a warning and inserts an exception node after
# the last procedure step. The name of the node is 'error.msg' and the value is
# the error message passed to this function.
# msg - error message. msg.type - error or warning scope - scope for evaluating
# any data doWarn - if true, this function displays a warning
.ddg.insert.error.message <- function(msg, msg.type = "error.msg", scope = "ddg.library",
doWarn = TRUE) {
if (doWarn) {
warning(msg)
}
.ddg.data.node("Exception", msg.type, msg, scope)
.ddg.lastproc2data(msg.type, dscope = scope)
}
# .ddg.delete.temp deletes any temporary files created during the processing of a
# script. These include the temporary history file.
.ddg.delete.temp <- function() {
# Delete the temporary history file if we made it.
if (.ddg.is.set("ddg.history.file"))
unlink(.ddg.get("ddg.history.file"))
# Clear the environment.
.ddg.env <- new.env(parent = emptyenv())
}
# .ddg.create.output.nodes creates output nodes for ddg.function and
# ddg.procedure. Outs values must be passed as strings, not names, unless the
# value is a file name.
# fname - the name of the function calling .ddg.create.output.nodes. pname - the
# name of the procedure node. outs.graphic - the name of a snapshot node to be
# used as a file name. A graphical snapshot is simply a captured image of the
# graphic device active at the time of the call to ddg.function or ddg.procedure.
# outs.data - a list of names of data nodes. outs.exception - a list of names of
# exception nodes. outs.url - a list of names of url nodes. outs.file - a list
# of names of file nodes. Supported file extensions include: .csv, .jpg, .jpeg,
# .pdf, and .txt. graphic.fext - the file extension to be used when saving the
# captured graphic. Supported extensions are .jpg, .jpeg, .pdf.
.ddg.create.output.nodes <- function(fname, pname, outs.graphic, outs.data, outs.exception,
outs.url, outs.file, graphic.fext, env) {
# Capture graphics device.
if (is.character(outs.graphic)) {
name <- outs.graphic
gfext <- as.character(graphic.fext)
.ddg.write.graphic(name, "Graphical Plot. Not saved in script.", fext = gfext) # value is ignored
.ddg.proc2data(pname, name)
}
# Create output nodes and edges if outs list provided.
# Exception node.
if (!is.null(outs.exception)) {
stack <- sys.calls()
lapply(outs.exception, function(param) {
# Get value in calling environment.
name <- param
value <- NULL
.ddg.lookup.value(name, value, env, fname, warn = FALSE)
# Exception node.
scope <- .ddg.get.scope(param, calls = stack)
.ddg.data.node("Exception", name, value, scope)
.ddg.proc2data(pname, name, scope)
})
}
# URL node.
if (!is.null(outs.url)) {
stack <- sys.calls()
lapply(outs.url, function(param) {
# Get value in calling environment.
name <- param
value <- NULL
.ddg.lookup.value(name, value, env, fname, warn = FALSE)
# URL node.
scope <- .ddg.get.scope(param, calls = stack)
.ddg.data.node("URL", name, value, scope)
.ddg.proc2data(pname, name, scope)
})
}
# Generalized data node (includes simple data values as well as snapshots)
if (!is.null(outs.data)) {
stack <- sys.calls()
lapply(outs.data, function(param) {
# Get value in calling environment.
name <- param
value <- NULL
.ddg.lookup.value(name, value, env, fname, warn = FALSE)
tryCatch({
if (!is.character(name))
name <- deparse(substitute(name))
envName <- environmentName(env)
scope <- .ddg.get.scope(param, calls = stack)
.ddg.save.data(name, value, fname, error = TRUE, scope = scope)
.ddg.proc2data(pname, name, scope)
}, error = function(e) {
.ddg.insert.error.message(e)
})
})
}
# File node.
if (!is.null(outs.file)) {
stack <- sys.calls()
lapply(outs.file, function(param) {
# Get value in calling environment.
name <- param
value <- NULL
.ddg.lookup.value(name, value, env, fname, warn = FALSE)
scope <- .ddg.get.scope(param, calls = stack)
if (value == "") {
# Filename passed as value.
.ddg.file.copy("File", name, name, scope)
.ddg.proc2data(pname, name, scope)
} else {
# Filename passed as name.
.ddg.file.copy("File", value, name, scope)
.ddg.proc2data(pname, name, scope)
}
})
}
}
# .ddg.create.function.nodes creates the procedure node, input binding nodes, and
# output nodes for the function.
# pname - name of procedure node. full.call - full function call. outs.data,
# etc (optional) - output nodes. auto.created - TRUE if the function node is
# created automatically when a return is found env (optional) - the environment
# local to the function
.ddg.create.function.nodes <- function(pname, call, full.call, outs.graphic = NULL,
outs.data = NULL, outs.exception = NULL, outs.url = NULL, outs.file = NULL, graphic.fext = "jpeg",
auto.created = FALSE, env = NULL) {
# Create the start node
if (typeof(call[[1]]) == "closure") {
.add.abstract.node("Start", node.name = pname, env = env)
} else {
.add.abstract.node("Start", node.name = paste(deparse(call), collapse = ""),
env = env)
}
# Tokens will contain the function name and the argument expressions.
# Get parameters and create edges.
if (length(full.call) > 1) {
# args contains the names of the variable that was passed into the function.
args <- full.call[2:length(full.call)]
# param,names contains the names of the parameters (this is what the variable is
# known as inside the function).
param.names <- names(full.call)
param.names <- param.names[2:length(param.names)]
stack <- sys.calls()
# scope <- .ddg.get.scope(args[[1]], for.caller = TRUE)
bindings <- list()
for (i in 1:length(args)) bindings[[i]] <- list(args[[i]], param.names[[i]])
missing.params <- character()
lapply(bindings, function(binding) {
# Here, arg is the arguments passed IN.
arg <- binding[[1]]
# formal is the paramenter name of the function (what is the variable known as
# inside?).
formal <- binding[[2]][[1]]
if (is.null(formal) || formal == "")
formal <- "..."
# Find all the variables used in this parameter. If the argument is a string
# constant, don't bother looking for variables. Also add quotes around it in the
# node name.
if (is.character(arg)) {
vars.used <- character()
binding.node.name <- paste(formal, " <- \"", arg, "\"", sep = "")
} else {
vars.used <- .find.var.uses(arg)
binding.node.name <- paste(formal, " <- ", paste(deparse(arg), collapse = " "))
}
.proc.node("Binding", binding.node.name)
.ddg.proc2proc()
for (var in vars.used) {
param.scope <- .ddg.get.scope(var, for.caller = TRUE, calls = stack)
if (.ddg.data.node.exists(var, param.scope)) {
.ddg.data2proc(as.character(var), param.scope, binding.node.name)
if (.ddg.get("ddg.debug.lib"))
print(paste("param:", var))
}
}
if (formal != "...") {
formal.scope <- .ddg.get.scope(formal, calls = stack)
formal.env <- .ddg.get.env(formal, calls = stack)
# If we can evaluate the argument without an error, we record the value. If an
# error occurs, we do not record the value as it's possible that the function
# never actually uses it.
tryCatch({
.ddg.save.data(formal, eval(parse(text = formal), formal.env),
fname = ".ddg.save.data", scope = formal.scope, stack = stack)
.ddg.proc2data(binding.node.name, formal, formal.scope)
}, error = function(e) {
})
}
})
}
.proc.node("Operation", pname, pname, auto.created = auto.created)
# Link to the definition of the function if the function is defined in this
# script.
if (.ddg.data.node.exists(pname, environmentName(.GlobalEnv))) {
.ddg.data2proc(pname, environmentName(.GlobalEnv), pname)
}
if (length(full.call) > 1) {
lapply(bindings, function(binding) {
formal <- binding[[2]][[1]]
# Formal will be NULL if declared as ... Don't create the data node in that
# case.
if (!is.null(formal) && formal != "") {
formal.scope <- .ddg.get.scope(formal, calls = stack)
if (.ddg.data.node.exists(formal, formal.scope)) {
.ddg.data2proc(formal, formal.scope, pname)
}
}
})
}
# Create control flow edge from preceding procedure node.
.ddg.proc2proc()
# create output nodes
.ddg.create.output.nodes(fname = "ddg.function", pname, outs.graphic, outs.data,
outs.exception, outs.url, outs.file, graphic.fext, parent.frame(2))
}
# .ddg.get.frame.number gets the frame number of the closest non-library calling
# function.
# calls - system calls. for.caller (optional) - if TRUE, go up one level before
# searching.
.ddg.get.frame.number <- function(calls, for.caller = FALSE) {
if (is.null(calls))
calls <- sys.calls()
script.func.found <- FALSE
nframe <- length(calls)
for (i in nframe:1) {
call <- sys.call(i)[[1]]
# Guess that if we have a closure it is a user-defined function and not a ddg
# function Is this a good assumption ????
if (typeof(call) == "closure") {
if (for.caller && !script.func.found) {
script.func.found <- TRUE
} else {
return(i)
}
} else {
call.func <- as.character(call)
# Ignore calls to ddg functions or to the functions that get called from the
# outermost tryCatch to ddg code.
if (substr(call.func, 1, 4) != ".ddg" && substr(call.func, 1, 3) != "ddg" &&
substr(call.func, 1, 10) != "doTryCatch" && substr(call.func, 1,
11) != "tryCatchOne" && substr(call.func, 1, 12) != "tryCatchList" &&
substr(call.func, 1, 8) != "tryCatch") {
if (for.caller && !script.func.found) {
script.func.found <- TRUE
} else {
return(i)
}
}
}
}
return(0)
}
# .ddg.where looks up the environment for the variable specified by name.
# Adapted from Hadley Wickham, Advanced R programming.
# name - name of variable. env (optional) - environment in which to look for
# variable. warning (optional) - set to TRUE if a warning should be thrown when
# a variable is not found.
.ddg.where <- function(name, env = parent.frame(), warning = TRUE) {
stopifnot(is.character(name), length(name) == 1)
if (identical(env, emptyenv())) {
if (warning)
warning("Can't find ", name)
return("undefined")
}
if (exists(name, env, inherits = FALSE)) {
env
} else {
.ddg.where(name, parent.env(env), warning)
}
}
# .ddg.get.env gets the environment in which name is declared.
# name - variable name. for.caller (optional) - if TRUE, go up one level before
# searching. calls (optional) - system calls.
.ddg.get.env <- function(name, for.caller = FALSE, calls = NULL) {
if (is.null(calls))
calls <- sys.calls()
fnum <- .ddg.get.frame.number(calls, for.caller)
stopifnot(!is.null(fnum))
# This statement was broken into two statements so that we can add print
# statements to .ddg.where or step through it with a debugger without breaking
# it. If we don't do that the print output gets captured by capture.output and
# does not display to the user and also causes the subsequent grepl call in this
# function to fail.
tryCatch(if (!exists(name, sys.frame(fnum), inherits = TRUE))
return(NULL), error = function(e) {
})
env <- .ddg.where(name, sys.frame(fnum))
return(env)
}
# .ddg.get.scope gets the id of the closest non-library environment.
# name - name of variable. for.caller (optional) - if TRUE, go up one level
# before searching. calls (optional) - system calls. env (optional) - the
# environment to get the scope for
.ddg.get.scope <- function(name, for.caller = FALSE, calls = NULL, env = NULL) {
# Get the environment for the variable call.
if (is.null(env)) {
env <- .ddg.get.env(name, for.caller, calls)
}
# If no environment found, name does not exist, so scope is undefined.
if (is.null(env))
return("undefined")
scope <- sub("^<environment: (.*)>$", "\\1", capture.output(env)[1])
if (grepl("undefined", scope))
scope <- "undefined"
return(scope)
}
# .ddg.is.local returns TRUE if the specified name is local in the specified
# scope.
# name of variable. scope of variable.
.ddg.is.local <- function(name, scope) {
return(exists(name, scope, inherits = FALSE))
}
# Creates a start node for the current command if one has not been created
# already.
.ddg.create.start.for.cur.cmd <- function(call, caller.env) {
if (.ddg.is.set(".ddg.cur.cmd")) {
.ddg.cur.cmd <- .ddg.get(".ddg.cur.cmd")
.ddg.cur.cmd.stack <- .ddg.get(".ddg.cur.cmd.stack")
stack.length <- length(.ddg.cur.cmd.stack)
if (stack.length >= 1) {
last.created <- .ddg.cur.cmd.stack[stack.length]
# Only create a start node for the current command if we have not already created
# one and the command is more than just the call to this function
if (last.created[[1]] == "FALSE") {
if (.ddg.cur.cmd@text != paste(deparse(call), collapse = "")) {
cmd.abbrev <- .add.abstract.node("Start", .ddg.cur.cmd, caller.env)
.ddg.cur.expr.stack <- .ddg.get(".ddg.cur.expr.stack")
st.type <- .get.statement.type(.ddg.cur.cmd@parsed[[1]])
loop.statement <- (st.type == "for" || st.type == "while" || st.type ==
"repeat")
control.statement <- loop.statement || st.type == "if"
.ddg.create.data.use.edges.for.console.cmd(vars.set = data.frame(),
.ddg.cur.cmd, 0, for.caller = !control.statement)
# Add Details Omitted node before annotated loops if needed.
if (loop.statement && ddg.first.loop() > 1) {
ddg.details.omitted()
}
# Mark the start node as created on the stack. Mark it even if we did not create
# the abstract node above, because we will create it below.
.ddg.set(".ddg.cur.cmd.stack", c(.ddg.cur.cmd.stack[1:stack.length -
1], TRUE))
} else {
.ddg.set(".ddg.cur.cmd.stack", c(.ddg.cur.cmd.stack[1:stack.length -
1], "MATCHES_CALL"))
}
}
}
}
}
# .ddg.get.last.cmd returns the last command on the stack.
.ddg.get.last.cmd <- function() {
.ddg.cur.cmd.stack <- .ddg.get(".ddg.cur.cmd.stack")
stack.length <- length(.ddg.cur.cmd.stack)
cmd <- .ddg.cur.cmd.stack[stack.length - 1][[1]]
}
# .ddg.remove.last.cmd.start.created removes the last command and start.created
# from the stack.
.ddg.remove.last.cmd.start.created <- function() {
.ddg.cur.cmd.stack <- .ddg.get(".ddg.cur.cmd.stack")
stack.length <- length(.ddg.cur.cmd.stack)
if (stack.length == 2) {
.ddg.set(".ddg.cur.cmd.stack", vector())
} else {
.ddg.set(".ddg.cur.cmd.stack", .ddg.cur.cmd.stack[1:(stack.length - 2)])
}
}
# .ddg.break.statement creates a procedure node for a break statement in a for,
# repeat, or while statement. It also adds a finish node for the if statement (if
# any) where the break occurs, adds a finish node for the for, repeat, or while
# loop where the break occurs, and adds a finish node for the for, repeat, or
# while statement.
.ddg.break.statement <- function() {
# Create procedure node for break statement.
.proc.node("Operation", "break", "break")
.ddg.proc2proc()
# Get last command from stack.
cmd <- .ddg.get.last.cmd()
# Get loop type.
loop.type <- as.character(cmd@parsed[[1]][[1]])
# Create finish nodes if break occurs in if statement.
if (loop.type == "if") {
# Create finish node for if loop.
ddg.finish("if")
# Create finish node for if statement.
.add.abstract.node("Finish", cmd, parent.frame())
# Remove last command & start.created from stack.
.ddg.remove.last.cmd.start.created()
# Get last command from stack.
cmd <- .ddg.get.last.cmd()
# Get loop type.
loop.type <- as.character(cmd@parsed[[1]][[1]])
}
# Create finish node for for, repeat, or while loop.
loop.name <- paste(loop.type, "loop")
ddg.finish(loop.name)
# Create finish node for for, while, or repeat statement.
.add.abstract.node("Finish", cmd, parent.frame())
# Remove last command & start.created from stack.
.ddg.remove.last.cmd.start.created()
}
# .ddg.next.statement creates a procedure node for a next statement in a for,
# repeat, or while statement. It also adds a finish node for the if statement (if
# any) where the next occurs and adds a finish node for the for, while, or repeat
# loop where the next occurs.
.ddg.next.statement <- function() {
# Create procedure node for next statement.
.proc.node("Operation", "next", "next")
.ddg.proc2proc()
# Get last command from stack.
cmd <- .ddg.get.last.cmd()
# Get loop type.
loop.type <- as.character(cmd@parsed[[1]][[1]])
# Create finish nodes if break occurs in if statement.
if (loop.type == "if") {
# Create finish node for if loop.
ddg.finish("if")
# Create finish node for if statement.
.add.abstract.node("Finish", cmd, parent.frame())
# Remove last command & start.created from stack.
.ddg.remove.last.cmd.start.created()
# Get last command from stack.
cmd <- .ddg.get.last.cmd()
# Get loop type.
loop.type <- as.character(cmd@parsed[[1]][[1]])
}
# Create finish node for for, repeat, or while loop.
loop.name <- paste(loop.type, "loop")
ddg.finish(loop.name)
}
# .ddg.markdown takes a Rmd file and extracts the R code and text through the
# purl function in the knitr library. It then annotates the R script to insert
# start and finish nodes based on the chunks the user already created. If eval =
# false, then the chunk will not be added to the DDG. If the user has a name for
# the chunk, then that name will be used, else a chunk name 'ddg.chunk_1' and
# higher numbers will be generated. Important: If in a code chunk, there is an
# empty line followed by '# ----' or '#'', then an extra finish node will be
# inserted, causing an error. r.script.path is the path of the original Rmd file
# output.path is the path of the generated R script
.ddg.markdown <- function(r.script.path = NULL, output.path = NULL) {
# generates R script file from markdown file
knitr::purl(r.script.path, documentation = 2L, quiet = TRUE)
# moves file to ddg directory
file.rename(from = paste(getwd(), "/", basename(tools::file_path_sans_ext(r.script.path)),
".R", sep = ""), to = output.path)
script <- readLines(output.path)
skip <- FALSE
name <- "ddg.chunk"
annotated <- character(0)
index <- 1
# This for loop goes through the script line by line and searches for patterns to
# insert the start and finish nodes
for (i in 1:length(script)) {
# eval = false means we skip this code chunk, therefore skip = TRUE
if (regexpr("eval+(\\s*)+=+(\\s*)+FALSE", script[i]) != -1) {
skip <- TRUE
annotated <- append(annotated, script[i])
} else if (regexpr("## ----", script[i]) != -1) {
# if no options in the line, then generate default name.
if (regexpr("## -----", script[i]) == -1) {
if (regexpr("=", script[i]) == -1) {
end <- regexpr("-----", script[i])
name <- substring(script[i], 8, last = end - 1)
} else if (regexpr(",", script[i]) != -1) {
comma <- regexpr(",", script[i])
name <- substring(script[i], 8, last = comma - 1)
} else {
name <- paste("ddg.chunk_", index, sep = "")
index <- index + 1
}
} else {
name <- paste("ddg.chunk_", index, sep = "")
index <- index + 1
}
name <- stringr::str_trim(name, side = "both")
annotated <- append(annotated, paste("ddg.start(\"", name, "\")", sep = ""))
} else if (nchar(script[i]) == 0 && (regexpr("#'", script[i + 1]) != -1 ||
i == length(script) || regexpr("## ----", script[i + 1]) != -1)) {
if (skip) {
annotated <- append(annotated, script[i])
skip <- FALSE
} else {
annotated <- append(annotated, paste("ddg.finish(\"", name, "\")",
sep = ""))
}
} else {
annotated <- append(annotated, script[i])
}
}
writeLines(annotated, output.path)
r.script.path
}
# .ddg.save.debug.files saves debug files to the debug directory.
.ddg.save.debug.files <- function() {
# Save initial environment table to file.
fileout <- paste(paste(.ddg.get("ddg.path"), "/debug", sep = ""), "/initial-environment.csv", sep = "")
ddg.initial.env <- .ddg.get("ddg.initial.env")
write.csv(ddg.initial.env, fileout, row.names = FALSE)
# Save procedure nodes table to file.
fileout <- paste(paste(.ddg.get("ddg.path"), "/debug", sep = ""), "/procedure-nodes.csv", sep = "")
ddg.proc.nodes <- .ddg.get("ddg.proc.nodes")
ddg.proc.nodes <- ddg.proc.nodes[ddg.proc.nodes$ddg.num > 0, ]
write.csv(ddg.proc.nodes, fileout, row.names = FALSE)
# Save data nodes table to file.
fileout <- paste(paste(.ddg.get("ddg.path"), "/debug", sep = ""), "/data-nodes.csv", sep = "")
ddg.data.nodes <- .ddg.get("ddg.data.nodes")
ddg.data.nodes2 <- ddg.data.nodes[ddg.data.nodes$ddg.num > 0, ]
write.csv(ddg.data.nodes2, fileout, row.names = FALSE)
# Save edges table to file.
fileout <- paste(paste(.ddg.get("ddg.path"), "/debug", sep = ""), "/edges.csv", sep = "")
ddg.edges <- .ddg.get("ddg.edges")
ddg.edges2 <- ddg.edges[ddg.edges$ddg.num > 0, ]
write.csv(ddg.edges2, fileout, row.names = FALSE)
# Save function return table to file.
fileout <- paste(paste(.ddg.get("ddg.path"), "/debug", sep = ""), "/function-returns.csv", sep = "")
ddg.returns <- .ddg.get(".ddg.ret.values")
ddg.returns2 <- ddg.returns[ddg.returns$ret.node.id > 0, ]
write.csv(ddg.returns2, fileout, row.names = FALSE)
# Save if script is sourced.
if (.ddg.get(".ddg.is.sourced")) {
# Save sourced script table to file.
fileout <- paste(paste(.ddg.get("ddg.path"), "/debug", sep = ""), "/sourced-scripts.csv", sep = "")
ddg.sourced.scripts <- .ddg.get(".ddg.sourced.scripts")
ddg.sourced.scripts2 <- ddg.sourced.scripts[ddg.sourced.scripts$snum >= 0,
]
write.csv(ddg.sourced.scripts2, fileout, row.names = FALSE)
# Save data object table to file.
fileout <- paste(paste(.ddg.get("ddg.path"), "/debug", sep = ""), "/data-objects.csv", sep = "")
ddg.data.objects <- .ddg.data.objects()
write.csv(ddg.data.objects, fileout, row.names = FALSE)
}
}
#--------------------USER FUNCTIONS-----------------------#
# .ddg.find.ddg.ret.value.caller.frame.number returns the frame number of the
# first caller to ddg.ret.value. If ddg.ret.value is called recursively,
# this will give us the position of the earliest one called.
.ddg.find.ddg.ret.value.caller.frame.number <- function() {
# Get the stack
calls <- sys.calls()
# Find the calls to ddg.ret.value
ddg.funcs <- unlist(lapply(calls, function(call) return(grepl("^ddg|.ddg", deparse(call)[[1]]))))
calls.to.ddg.ret.value <- unlist(lapply(calls, function(call) return(.ddg.is.call.to(call,
as.name("ddg.ret.value")))))
non.ddg.calls.to.ddg.ret.value <- !(ddg.funcs[1:length(ddg.funcs) - 1]) &
calls.to.ddg.ret.value[2:length(calls.to.ddg.ret.value)]
which.frame <- Position(function(call) return(call), non.ddg.calls.to.ddg.ret.value,
right = TRUE)
# Return the frame number of the caller to ddg.ret.value
return(which.frame)
}
# Returns true if we should run the annotated version of a function and false if
# we should run the unannotated version.
ddg.should.run.annotated <- function(func.name) {
# check if we are in a loop and loop annotations are off
if (!.ddg.get("ddg.loop.annotate") && .ddg.get("ddg.inside.loop") > 0)
return(FALSE)
# Make sure this specific function has not been disabled
if (!is.null(.ddg.get("ddg.annotate.off")) & func.name %in% .ddg.get("ddg.annotate.off"))
return(FALSE)
# Not annotating functions in general check if this specific function should be
# annotated
if (!is.null(.ddg.get("ddg.annotate.on")) & func.name %in% .ddg.get("ddg.annotate.on"))
return(TRUE)
# If we do not know anything specific about this function, follow the general
# rule
return(.ddg.get("ddg.annotate.inside"))
}
# .ddg.save inserts attribute information and the number of procedure steps at the
# top of the DDG. It writes the DDG and the procedure nodes, data nodes, and
# function return tables to the DDG directory.
# r.script.path (optional) - Path to the R script. save.debug (optional) - If
# TRUE, save debug files to debug directory. Used in console mode. quit
# (optional) - If TRUE, remove all DDG files from memory. Unlike ddg.run, this
# is set to false as default since it will generally be called internally and by
# tests, as opposed to by the user.
.ddg.save <- function(r.script.path = NULL, save.debug = FALSE, quit = FALSE) {
if (!(.ddg.is.set(".ddg.initialized") && .ddg.get(".ddg.initialized")))
return(invisible())
if (interactive() && .ddg.get(".ddg.enable.console")) {
# Get the final commands
.ddg.console.node()
}
# If there is a display device open, grab what is on the display
if (length(dev.list()) >= 1) {
tryCatch(.ddg.capture.graphics(basename(.ddg.get("ddg.r.script.path")), called.from.save = TRUE),
error = function(e) print(e))
}
# Save ddg.json to file.
.ddg.json.write()
if (interactive())
print(paste("Saving ddg.json in ", .ddg.get("ddg.path"), sep = ""))
# Save sourced scripts (if any). First row is main script.
ddg.sourced.scripts <- .ddg.get(".ddg.sourced.scripts")
if (!is.null(ddg.sourced.scripts)) {
if (nrow(ddg.sourced.scripts) > 1) {
for (i in 1:nrow(ddg.sourced.scripts)) {
sname <- ddg.sourced.scripts[i, "sname"]
if (.ddg.get("ddg.save.to.disk")) {
file.copy(sname, paste(paste(.ddg.get("ddg.path"), "/scripts", sep = ""), basename(sname), sep = "/"))
}
}
}
}
# Save debug files to debug directory.
if (save.debug | .ddg.get("ddg.save.debug")) {
.ddg.save.debug.files()
}
# Clear DDGStatements from ddg environment.
.ddg.set("ddg.statement.num", 0)
.ddg.set("ddg.statements", list())
# Clear loop information from ddg environment.
.ddg.set("ddg.loop.num", 0)
.ddg.set("ddg.loops", list())
# By convention, this is the final call to ddg.save.
if (quit) {
# Restore history settings.
if (.ddg.is.set("ddg.original.hist.size"))
Sys.setenv(R_HISTSIZE = .ddg.get("ddg.original.hist.size"))
# Delete temporary files.
.ddg.delete.temp()
# Capture current graphics device.
.ddg.auto.graphic.node(dev.to.capture = dev.cur)
# Shut down the DDG.
.ddg.clear()
}
invisible()
}
# .ddg.console.off turns off the console mode of DDG construction.
.ddg.console.off <- function() {
if (!(.ddg.is.set(".ddg.initialized") && .ddg.get(".ddg.initialized")))
return(invisible())
# Capture history if console was on up to this point.
if (interactive() && .ddg.get(".ddg.enable.console")) {
.ddg.console.node()
}
# Set the console to off.
.ddg.set(".ddg.enable.console", FALSE)
}
# .ddg.console.on turns on the console mode of DDG construction.
.ddg.console.on <- function() {
if (!(.ddg.is.set(".ddg.initialized") && .ddg.get(".ddg.initialized")))
return(invisible())
# Write a new timestamp if we're turning on the console so we only capture
# history from this point forward.
if (!.ddg.get(".ddg.enable.console"))
.write.timestamp.to.history()
.ddg.set(".ddg.enable.console", TRUE)
}
# .ddg.delete.save removes all files from the DDG directories unless the the DDG
# directory is the working directory. If no DDG directory is specified, the
# current DDG directory is assumed.
# ddg.path (optional) - path to DDG directory.
.ddg.delete.save <- function(ddg.path = NULL) {
# Use current DDG directories if no directory is specified.
if (is.null(ddg.path)) {
ddg.path <- .ddg.get("ddg.path")
ddg.path.data <- paste(.ddg.get("ddg.path"), "/data", sep = "")
ddg.path.debug <- paste(.ddg.get("ddg.path"), "/debug", sep = "")
ddg.path.scripts <- paste(.ddg.get("ddg.path"), "/scripts", sep = "")
}
# Remove files unless the DDG directory is the working directory.
if (ddg.path != getwd()) {
unlink(paste(ddg.path, "*.*", sep = "/"))
unlink(paste(ddg.path.data, "*.*", sep = "/"))
unlink(paste(ddg.path.data, ".ddghistory", sep = "/"))
unlink(paste(ddg.path.debug, "*.*", sep = "/"))
unlink(paste(ddg.path.scripts, "*.*", sep = "/"))
}
invisible()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.