R/core.r

Defines functions .ddg.init.environ .ddg.sourced.script.names .ddg.installedpackages .ddg.is.simple .ddg.is.csv .ddg.is.object .ddg.is.function .ddg.dev.change .ddg.write.csv .ddg.get.val.type.string .ddg.get.val.type.from.var .ddg.get.val.type .ddg.record.edge .ddg.is.nonlocal.assign .ddg.create.empty.vars.set .ddg.double.vars.set .ddg.add.to.vars.set .ddg.find.var.assignments .ddg.create.data.use.edges.for.console.cmd .ddg.create.data.set.edges.for.cmd .ddg.create.data.node.for.possible.writes .ddg.find.files .ddg.create.file.read.nodes.and.edges .ddg.create.file.write.nodes.and.edges .ddg.link.function.returns .ddg.create.DDGStatements .ddg.save.annotated.script .ddg.file.node .ddg.file.copy .ddg.insert.error.message .ddg.delete.temp .ddg.create.output.nodes .ddg.create.function.nodes .ddg.get.frame.number .ddg.where .ddg.get.env .ddg.get.scope .ddg.is.local .ddg.create.start.for.cur.cmd .ddg.get.last.cmd .ddg.remove.last.cmd.start.created .ddg.break.statement .ddg.next.statement .ddg.markdown .ddg.save.debug.files .ddg.find.ddg.ret.value.caller.frame.number ddg.should.run.annotated .ddg.save .ddg.console.off .ddg.console.on .ddg.delete.save

# Copyright (C) 2017 Harvard University, Mount Holyoke College
#
# This file is part of ProvR.
#
# ProvR is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# ProvR is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ProvR; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#
# This package was forked from <https://github.com/End-to-end-provenance/RDataTracker>
#
# Contact: Matthew Lau <matthewklau@fas.harvard.edu>

# Create DDG environment variable.
.ddg.env <- new.env(parent = emptyenv())

# Set the number of lines the history file keeps (and therefore can be analyzed).
# Note: this setting has no effect on some systems.
ddg.MAX_HIST_LINES <- 2^14

# .ddg.init.environ() sets up the filesystem and R environments for use.
.ddg.init.environ <- function() {
    dir.create(.ddg.get("ddg.path"), showWarnings = FALSE)
    dir.create(paste(.ddg.get("ddg.path"), "/data", sep = ""), showWarnings = FALSE)
    dir.create(paste(.ddg.get("ddg.path"), "/debug", sep = ""), showWarnings = FALSE)
    dir.create(paste(.ddg.get("ddg.path"), "/scripts", sep = ""), showWarnings = FALSE)
    if (interactive() && .ddg.get(".ddg.enable.console")) {
        .ddg.set("ddg.original.hist.size", Sys.getenv("R_HISTSIZE"))
        Sys.setenv(R_HISTSIZE = ddg.MAX_HIST_LINES)
    }
}

# .ddg.sourced.script.names returns a string containing the names of sourced
# scripts, if any. If no scripts were sourced it returns an empty string.

.ddg.sourced.script.names <- function() {
    ss <- .ddg.get(".ddg.sourced.scripts")
    # First row is main script.
    if (nrow(ss) == 1)
        snames <- "" else {
        snames <- ss[ss$snum >= 1, "sname"]
        snames <- paste0(snames, collapse = ",")
    }
    return(snames)
}

# ddg.installedpackages() returns information on packages installed at the time
# of execution and their versions.
.ddg.installedpackages <- function() {
    packages <- devtools::session_info()
    packages <- packages[[2]]
    installed <- packages[packages[, 2] == "*", ]
    installed <- installed[, c(1, 3)]
    return(installed)
}

# .ddg.is.simple returns TRUE if the value passed in is a simple data value which
# should be saved locally as opposed to stored in a separate file. The assumption
# is that the value passed in has already been declared not to be a graphic.

# value - input value.

.ddg.is.simple <- function(value) {
    # Note that is.vector returns TRUE for lists, so we need to check lists
    # separately.  Since every value in a list can have a different type, if it is a
    # list, we will assume the value is complex. We consider NULL values to be
    # simple.
    return((!.ddg.is.graphic(value) && !is.list(value) && is.vector(value) && length(value) ==
        1) || is.null(value))
}

# .ddg.is.csv returns TRUE if the value passed in should be saved as a csv file,
# i.e. if it is a vector, matrix, or data frame.  Note that is.vector returns
# TRUE for lists.

# value - input value.

.ddg.is.csv <- function(value) {
    return(!.ddg.is.simple(value) && ((is.vector(value) && !is.list(value)) || is.matrix(value) ||
        is.data.frame(value)))
}


# .ddg.is.object returns TRUE if the value is determined to be an object by our
# standards.

# value - input value.

.ddg.is.object <- function(value) {
    return(is.object(value) || is.environment(value))
}

# .ddg.is.function returns TRUE if the value is determined to be a function or we
# want to save it as a function.

# value - input value.

.ddg.is.function <- function(value) {
    return(is.function(value))
}

# .ddg.dev.change determines whether or not a new graphic device has become
# active and whether or not we should capture the previous graphic device. It
# returns the device number we should capture (0 means we shouldn't capture any
# device).

.ddg.dev.change <- function() {
    prev.device <- .ddg.get("prev.device")
    curr.device <- dev.cur()
    device.list <- dev.list()
    # We've switched devices .
    if (prev.device != curr.device) {
        # Update device.
        .ddg.set("prev.device", curr.device)
        # Previous device still accessible.
        if (prev.device %in% device.list)
            return(prev.device)
    }
    # No switching, or previous is not accessible (NULL or removed).
    return(0)
}



# .ddg.write.csv takes as input a name-value pair for a variable and attempts to
# save the data as a csv file. It does not create any edges but does add the node
# to the DDG. Edge creation should occur from wherever this function is called.

# name - data node name.  value - data node value.  scope - data node scope.

.ddg.write.csv <- function(name, value, scope = NULL, from.env = FALSE) {
    tryCatch({
        .snapshot.node(name, "csv", value, dscope = scope, from.env = from.env)
    }, error = function(e) {
        .snapshot.node(name, "txt", value, save.object = TRUE, dscope = scope,
            from.env = from.env)
    })
}

# Returns a string representation of the type information of the given value.
.ddg.get.val.type.string <- function(value) {
    val.type <- .ddg.get.val.type(value)
    if (is.null(val.type))
        return("null")
    # list, object, environment, function, language
    if (length(val.type) == 1)
        return(paste("\"", val.type, "\"", sep = ""))
    # vector, matrix, array, data frame type information recorded in a list of 3
    # vectors (container,dimension,type)
    container <- val.type[[1]]
    dimension <- val.type[[2]]
    type <- val.type[[3]]
    # vector: a 1-dimensional array (uniform typing)
    if (identical(container, "vector"))
        return(paste("{\"container\":\"vector\", \"dimension\":[", dimension, "], \"type\":[\"",
            type, "\"]}", sep = ""))
    # matrix: a 2-dimensional array (uniform typing)
    if (identical(container, "matrix")) {
        dimension <- paste(dimension, collapse = ",")
        return(paste("{\"container\":\"matrix\", \"dimension\":[", dimension, "], \"type\":[\"",
            type, "\"]}", sep = ""))
    }
    # array: n-dimensional (uniform typing)
    if (identical(container, "array")) {
        dimension <- paste(dimension, collapse = ",")
        return(paste("{\"container\":\"array\", \"dimension\":[", dimension, "], \"type\":[\"",
            type, "\"]}", sep = ""))
    }
    # data frame: is a type of list
    dimension <- paste(dimension, collapse = ",")
    type <- paste(type, collapse = "\",\"")
    return(paste("{\"container\":\"data_frame\", \"dimension\":[", dimension, "], \"type\":[\"",
        type, "\"]}", sep = ""))
}


# Returns the type information of the value of the given variable.  Does not
# contain information on dimensions.

.ddg.get.val.type.from.var <- function(var) {
    val.type <- .ddg.get.val.type(get(var))
    # remove dimension information, if any
    if (length(val.type) > 1)
        val.type[2] <- NULL
    return(val.type)
}


# Returns the type information of the given value, broken into its parts and
# returned in a vecctor or a list.

.ddg.get.val.type <- function(value) {
    # vector: a 1-dimensional array (uniform typing)
    if (is.vector(value))
        return(list("vector", length(value), class(value)))
    # matrix: a 2-dimensional array (uniform typing)
    if (is.matrix(value))
        return(list("matrix", dim(value), class(value[1])))
    # array: n-dimensional (uniform typing)
    if (is.array(value))
        return(list("array", dim(value), class(value[1])))
    # data frame: is a type of list
    if (is.data.frame(value)) {
        types <- unname(sapply(value, class))
        return(unname(list("data_frame", dim(value), types)))
    }
    # a list
    if (is.list(value))
        return("list")
    # an object
    if (is.object(value))
        return("object")
    # envrionment, function, language
    if (is.environment(value))
        return("environment")
    if (is.function(value))
        return("function")
    if (is.language(value))
        return("language")
    # none of the above - null is a character, not NULL or NA
    return(NULL)
}


# .ddg.record.edge records a control flow edge or a data flow edge in the edges
# table.

# etype - type of edge node1 - name of first node node1 - name of second node

.ddg.record.edge <- function(etype, node1, node2) {
    # Increment edge counter.
    .ddg.inc("ddg.enum")
    ddg.enum <- .ddg.get("ddg.enum")
    # If the table is full, make it bigger.
    ddg.edges <- .ddg.get("ddg.edges")
    if (nrow(ddg.edges) < ddg.enum) {
        size = 100
        new.rows <- data.frame(ddg.num = numeric(size), ddg.type = character(size),
            ddg.from = character(size), ddg.to = character(size), stringsAsFactors = FALSE)
        .ddg.add.rows("ddg.edges", new.rows)
        ddg.edges <- .ddg.get("ddg.edges")
    }
    ddg.edges$ddg.num[ddg.enum] <- ddg.enum
    ddg.edges$ddg.type[ddg.enum] <- etype
    ddg.edges$ddg.from[ddg.enum] <- node1
    ddg.edges$ddg.to[ddg.enum] <- node2
    .ddg.set("ddg.edges", ddg.edges)
    # Output control flow or data flow edge.
    .ddg.output.edge(etype, node1, node2)

    if (.ddg.get("ddg.debug.lib")) {
        if (etype == "cf")
            etype.long <- "control flow" else if (etype == "df.in")
            etype.long <- "data flow in" else etype.long <- "data flow out"
        print(paste("Adding", etype.long, "edge", ddg.enum, "for", node1, "to", node2))
    }
}

# .ddg.is.nonlocal.assign returns TRUE if the object passed is an expression
# object containing a non-local assignment.

# expr - input expression.

.ddg.is.nonlocal.assign <- function(expr) {
    # <<- or ->> means that the assignment is non-local
    if (is.call(expr) && identical(expr[[1]], as.name("<<-"))) {
      return(TRUE)
    }
    return(FALSE)
}


# .ddg.create.empty.vars.set creates an empty data frame initialized to contain
# information about variable assignments.  The difference between first.writer
# and possible.first.writer is that first.writer is for simple assignments (like
# a <- 1), while possible.first.writer is for situations where the assignment
# might not have occurred, like 'if (foo) a <- 1'.

# The data frame is structured as follows - the variable name.  - the position of
# the statement that wrote the variable first.  - the position of the statement
# that wrote the variable last.  - the position of the first statement that may
# have assigned to a variable .  - the position of the last statement that may
# have assigned to a variable.

# var.table.size - desired size of the data frame. Negative values and 0 are
# coerced to 1.

.ddg.create.empty.vars.set <- function(var.table.size = 1) {
    if (var.table.size <= 0)
        var.table.size <- 1
    vars.set <- data.frame(variable = character(var.table.size), first.writer = numeric(var.table.size),
        last.writer = numeric(var.table.size), possible.first.writer = numeric(var.table.size),
        possible.last.writer = numeric(var.table.size), stringsAsFactors = FALSE)
    # Initialize first writer.
    vars.set$first.writer <- var.table.size + 1
    vars.set$possible.first.writer <- var.table.size + 1
    return(vars.set)
}

# .ddg.increase.vars.set simply doubles the size of a variable assignment data
# frame and returns the new one.

# vars.set - data frame containing variable assignments.  size (optional) -
# number of rows in data frame.

.ddg.double.vars.set <- function(vars.set, size = nrow(vars.set)) {
    # Create the right size data frame from input frame.
    new.vars.set <- rbind(vars.set, .ddg.create.empty.vars.set(size))
    # Update first/last writer.
    new.vars.set$first.writer <- ifelse(new.vars.set$first.writer == size + 1, size *
        2 + 1, new.vars.set$first.writer)
    new.vars.set$possible.first.writer <- ifelse(new.vars.set$possible.first.writer ==
        size + 1, size * 2 + 1, new.vars.set$possible.first.writer)
    return(new.vars.set)
}

# .ddg.add.to.vars.set parses a command and adds the new variable information to
# the variable assignment data frame. Note that var.num is a global variable! It
# should be intialized when vars.set is first created.

# vars.set - variable assignment data frame.  cmd.expr - command expression.  i -
# position of variable in data frame.

.ddg.add.to.vars.set <- function(vars.set, cmd, i) {
    # Find out the variable being assigned to by a simple assignment statement.
    main.var.assigned <- cmd@vars.set
    # Find all the variables that may be assigned in the statement.
    vars.assigned <- cmd@vars.possibly.set
    for (var in vars.assigned) {
        nRow <- which(vars.set$variable == var)
        # If the variable is already in the table, update its entry.
        if (length(nRow) > 0) {
            if (!is.null(main.var.assigned) && var == main.var.assigned) {
                vars.set$last.writer[nRow] <- i
            } else {
                vars.set$possible.last.writer[nRow] <- i
            }
        } else {
            # The variable was not in the table. Add a new line for this variable.
            # Find the first empty row
            empty.rows <- which(vars.set$variable == "")
            if (length(empty.rows) == 0) {
                vars.set <- .ddg.double.vars.set(vars.set, nrow(vars.set))
                empty.rows <- which(vars.set$variable == "")
            }
            var.num <- empty.rows[1]
            # Set the variable.
            vars.set$variable[var.num] <- var
            if (!is.null(main.var.assigned) && var == main.var.assigned) {
                vars.set$first.writer[var.num] <- i
                vars.set$last.writer[var.num] <- i
            } else {
                vars.set$possible.first.writer[var.num] <- i
                vars.set$possible.last.writer[var.num] <- i
            }
        }
    }
    return(vars.set)
}


## .ddg.find.var.assigments finds the possible variable assignments for a fixed
## set of parsed commands. See .ddg.create.empty.vars.set for more information on
## the structure of the returned data frame.  parsed.commands - a list of parsed
## commands.
.ddg.find.var.assignments <- function(cmds) {
    if (length(cmds) == 0)
        return(data.frame())
    # Make it big so we don't run out of space.
    var.table.size <- length(cmds)
    vars.set <- .ddg.create.empty.vars.set(var.table.size)
    # Build the table recording where variables are assigned to or may be assigned
    # to.
    for (i in 1:length(cmds)) {
        cmd.expr <- cmds[[i]]
        vars.set <- .ddg.add.to.vars.set(vars.set, cmd.expr, i)
    }
    return(vars.set)
}





# .ddg.create.data.use.edges.for.console.cmd creates a data flow edge from the
# node for each variable used in cmd.expr to the procedural node labeled cmd, as
# long as the value would either be one that exists prior to starting the console
# block, or corresponds to the last setting of this variable in the console
# block.

# vars.set - variable assignment data frame.  cmd - name of procedure node.
# cmd.expr - command expression.  cmd.pos - position of command.

.ddg.create.data.use.edges.for.console.cmd <- function(vars.set, cmd, cmd.pos, for.caller) {
    # Find all the variables used in this command.
    vars.used <- cmd@vars.used
    for (var in vars.used) {
        # Make sure there is a node we could connect to.
        scope <- .ddg.get.scope(var, for.caller)
        if (.ddg.data.node.exists(var, scope)) {
            nRow <- which(vars.set$variable == var)
            # check if the node is written in the console block.
            if (length(nRow) > 0) {
                first.writer <- min(vars.set$first.writer[nRow], vars.set$possible.first.writer[nRow])
                last.writer <- max(vars.set$last.writer[nRow], vars.set$possible.last.writer[nRow])
                # Draw the edge if we will connect to a node that exists before the console block
                # or to the last writer of this variable within the console block.
                if (cmd.pos <= first.writer || cmd.pos >= last.writer) {
                  .ddg.data2proc(var, scope, cmd@abbrev)
                }
                # TODO - add some sort of warning to the user that the node is not being created
            } else {
                # The variable is not set at all in this console block.  Connect to a
                # pre-existing data node.
                .ddg.data2proc(var, scope, cmd@abbrev)
            }
        } else {
            # TODO - add some sort of warning that the data node was NOT found.
        }
    }
}

# .ddg.create.data.set.edges.for.cmd creates edges that correspond to a console
# command assigning to a variable.

# vars.set - variable assignment data frame.  cmd.abbrev - name of procedure
# node.  cmd.expr - command expression.  cmd.pos - position of command.  env -
# environment to use for evaluating variable.  for.finish.node (optional) - if
# TRUE, data edge is for finish node.  scope (optional) - scope of variable.
# stack (optional) - stack to use for evaluating variable.

.ddg.create.data.set.edges.for.cmd <- function(vars.set, cmd, cmd.pos, env, for.finish.node = FALSE,
    scope = NULL, stack = NULL) {
    vars.assigned <- cmd@vars.set
    for (var in vars.assigned) {
        whichRows <- which(vars.set$variable == var)
        # Only create a node edge for the last place that a variable is set within a
        # console block.
        if ((length(whichRows) > 0 && vars.set$last.writer[whichRows] == cmd.pos &&
            vars.set$possible.last.writer[whichRows] <= vars.set$last.writer[whichRows]) ||
            for.finish.node) {
            if (is.null(env)) {
                env <- .ddg.get.env(var, calls = stack)
            }
            scope <- .ddg.get.scope(var, calls = stack, env = env)
            # Special operators are defined by enclosing the name in `.  However, the R
            # parser drops those characters when we deparse, so when we parse here they are
            # missing and we get an error about unexpected SPECIAL characters.  The first
            # tryCatch, puts the ` back in and parses again.  The second tryCatch handles
            # errors associated with evaluated the variable.
            parsed <- tryCatch(parse(text = var), error = function(e) parse(text = paste("`",
                var, "`", sep = "")))
            val <- tryCatch(eval(parsed, env), error = function(e) {
                eval(parse(text = var), parent.env(env))
            })
            tryCatch(.ddg.save.data(var, val, fname = ".ddg.create.data.set.edges.for.cmd",
                error = TRUE, scope = scope, stack = stack, env = env), error = function(e) {
                .ddg.data.node("Data", var, "complex", scope)
            })
            .ddg.proc2data(cmd@abbrev, var, scope)
        }
    }
}


# .ddg.create.data.node.for.possible.writes creates a data node for each variable
# that might have been set in something other than a simple assignment.  An edge
# is created from the last node in the console block.

# vars.set - variable assignment data frame.  last.command - last command in
# console block.

.ddg.create.data.node.for.possible.writes <- function(vars.set, last.command, env = NULL) {
    environment <- if (is.environment(env))
        env else .GlobalEnv
    for (i in 1:nrow(vars.set)) {
        if (vars.set$possible.last.writer[i] > vars.set$last.writer[i]) {
            value <- tryCatch(eval(parse(text = vars.set$variable[i]), environment),
                error = function(e) {
                  NULL
                })
            # Only create the node and edge if we were successful in looking up the value.
            if (!is.null(value)) {
                envName <- environmentName(environment)
                if (envName == "")
                  envName <- .ddg.get.scope(vars.set$variable[i])
                .ddg.data.node("Data", vars.set$variable[i], value, envName)
                .ddg.proc2data(last.command@abbrev, vars.set$variable[i], envName)
            }
        }
    }
}


# Given a parse tree, this function returns a list containing the expressions
# that correspond to the filename argument of the calls to functions that read or
# write the files.  If there are none, it returns NULL.  main.object - the parsed
# expression to search through func.df - the data frame describing the functions
# with file arguments
.ddg.find.files <- function(main.object, func.df, env = NULL) {
    environment <- if (is.environment(env))
        env else .GlobalEnv
    # Recursive helper function.
    find.files.rec <- function(obj) {
        # Base cases.
        if (!is.recursive(obj)) {
            return(NULL)
        }
        if (length(obj) == 0) {
            return(NULL)
        }
        ## It might be useful to record somehow that this function reads a file, but we
        ## wouldn't actually do the reading until the function is called, not here where
        ## it is being declared.
        if (.is.functiondecl(obj))
            return(NULL)
        if (is.call(obj)) {
            # Call has no arguments, so it can't be reading a function.  Recurse on the first
            # part, in case it is more than just a symbol.
            if (length(obj) == 1)
                return(find.files.rec(obj[[1]])) else if (is.symbol(obj[[1]])) {
                # Call with arguments Is this is file reading function?
                read.func.pos <- match(as.character(obj[[1]]), func.df$function.names)
                if (!is.na(read.func.pos)) {
                  # Find the file argument.
                  arg.name <- func.df$param.names[read.func.pos]
                  # Find a matching parameter passed by name
                  file.name.arg.matches <- unlist(lapply(names(obj), function(arg) {
                    return(pmatch(arg, arg.name))
                  }))
                  match.pos <- match(1, file.name.arg.matches)
                  # If no argument qualified by the file parameter name, use the argument in the
                  # expected position
                  if (is.na(match.pos)) {
                    file.name <- tryCatch(eval(obj[[func.df$param.pos[read.func.pos] +
                      1]], environment), error = function(e) NULL)
                  } else {
                    file.name <- tryCatch(eval(obj[[match.pos]], environment), error = function(e) NULL)
                  }
                  # Recurse over the arguments to the function.  We can't just skip over the 2nd
                  # element since the filename parameter is not necessarily there if it was passed
                  # by name.
                  funcs <- find.files.rec(obj[2:length(obj)])
                  # Add this file name to the list of files being read.  Make sure the file name
                  # could be evaluated and that it results in a name, not a connection.
                  if (!is.null(file.name) && is.character(file.name)) {
                    unique(c(file.name, funcs))
                  }
                } else {
                  # Not a file reading function.  Recurse over the arguments.
                  find.files.rec(obj[2:length(obj)])
                }
            } else {
                # Function call, but the first list element is not simply a function name.
                # Recurse over all the list elements.
                unique(append(find.files.rec(obj[[1]]), find.files.rec(obj[2:length(obj)])))
            }
        } else if (length(obj) == 1) {
            # A recursive structure that is not a call.  Not sure if there are any, but just
            # in case...
            unique(find.files.rec(obj[[1]]))
        } else {
            unique(append(find.files.rec(obj[[1]]), find.files.rec(obj[2:length(obj)])))
        }
    }
    return(find.files.rec(main.object@parsed))
}

# Creates file nodes and data in edges for any files that are read in this cmd
# cmd - text command cmd.expr - parsed command
.ddg.create.file.read.nodes.and.edges <- function(cmd, env) {
    # Find all the files potentially read in this command.  This may include files
    # that are not actually read if the read are within an if-statement, for example.
    files.read <- .ddg.find.files(cmd, .ddg.get(".ddg.file.read.functions.df"), env)
    for (file in files.read) {
        # Only create the node and edge if there actually is a file then if the file
        # exists, it is possible that it was not read here
        if (file.exists(file)) {
            # Create the file node and edge
            ddg.file(file)
            ddg.data.in(basename(file), pname = cmd@abbrev)
        } else if (grepl("^http", file) || grepl("^ftp", file)) {
            scope <- environmentName(.GlobalEnv)
            .ddg.data.node("URL", file, file, scope)
            .ddg.data2proc(file, scope, cmd@abbrev)
        }
    }
}

# Creates file nodes and data in edges for any files that are written in this cmd
# cmd - text command cmd.expr - parsed command
.ddg.create.file.write.nodes.and.edges <- function(cmd, env) {
    # Find all the files potentially written in this command.  This may include files
    # that are not actually written if the write calls are within an if-statement,
    # for example.
    files.written <- .ddg.find.files(cmd, .ddg.get(".ddg.file.write.functions.df"), env)
    for (file in files.written) {
        # check that the file exists.  If it does, we will assume that it was created by
        # the write call that we just found.
        if (file.exists(file)) {
            # Create the file node and edge
            ddg.file.out(file, pname = cmd@abbrev)
        }
    }
}

# .ddg.link.function.returns determines if the command calls a function for which
# ddg.return has created a node for the return value.  If so, a data flow edge is
# created from the return value data node to the finish node for the command.
# Note that if the assignment is an expression, like 'd <- f(a) + f(b)', there
# may be multiple return value nodes to link to.

# command - input command.

.ddg.link.function.returns <- function(command) {
    # Find the functions that have completed but whose returns have not been used
    # yet.
    returns <- .ddg.get(".ddg.ret.values")
    if (!is.na(command@pos@startLine)) {
        unused.returns <- returns[!returns$ret.used & returns$ret.node.id >
            0 & !is.na(returns$line) & returns$line == command@pos@startLine, ]
    } else {
        unused.returns <- returns[!returns$ret.used & returns$ret.node.id >
            0, ]
    }
    if (nrow(unused.returns) == 0)
        return()
    # See which of these are called from the command we are processing now.
    unused.calls <- unused.returns$ddg.call
    command.text <- gsub(" ", "", command@text)
    uses <- sapply(unused.calls, function(call) {
        grepl(call, command.text, fixed = TRUE)
    })
    # The following line is here to get around R CMD check, which otherwise reports:
    # no visible binding for global variable.  Note that ret.node.id is not a
    # variable in the subset call, but the name of a column in the data frame being
    # subsetted.
    ret.node.id <- NULL
    # Extracts for the return value nodes.
    new.uses <- subset(unused.returns, uses, ret.node.id)
    # Create an edge from each of these to the last procedure node.
    lapply(new.uses$ret.node.id, function(data.num) {
        proc.num <- .ddg.get("ddg.pnum")
        # Record in edges table
        etype <- "df.in"
        node1 <- paste("d", data.num, sep = "")
        node2 <- paste("p", proc.num, sep = "")
        .ddg.record.edge(etype, node1, node2)

        if (.ddg.get("ddg.debug.lib")) {
            print(paste(".ddg.link.function.returns:", command@abbrev))
            print(paste("DF ", node1, " ", node2, sep = ""))
        }
        # Set the return value as being used.
        returns$ret.used[returns$ret.node.id == data.num] <- TRUE
        .ddg.set(".ddg.ret.values", returns)
    })
}

# Create the DDGStatement list for a list of parsed expressions.

# exprs - a list of parsed expressions script.name - the name of the script the
# expressions come from parseData - information provided by the parser that we
# use to find line numbers enclosing.pos - if exprs are statements within a
# function definition, enclosing.pos is the source position information of the
# entire function declaration

# Returns a list of DDGStatement objects

.ddg.create.DDGStatements <- function(exprs, script.name, script.num, parseData = NULL,
    enclosing.pos = NULL) {
    # The parse data gives us line number information
    if (is.null(parseData)) {
        parseData <- getParseData(exprs, includeText = TRUE)
        if (is.null(parseData)) {
            # In this case there is no line number information available
            cmds <- vector("list", (length(exprs)))
            for (i in 1:length(exprs)) {
                expr <- as.expression(exprs[i])
                cmds[[i]] <- .construct.DDGStatement(expr, NA, script.name, script.num, parseData)
            }
            return(cmds)
        }
        non.comment.parse.data <- parseData[parseData$token != "COMMENT", ]
        if (nrow(non.comment.parse.data) == 0) {
            return(list())
        }
        # Start at the first non-comment expression in parseData
        next.parseData <- 1
    } else {
        non.comment.parse.data <- parseData[parseData$token != "COMMENT", ]
        # Start at the first entry in parse data that begins after the enclosing function
        # begins, ends before the enclosing function ends, and matches the text of the
        # first expression.
        next.parseData <- which(non.comment.parse.data$line1 >= enclosing.pos@startLine &
            non.comment.parse.data$line2 <= enclosing.pos@endLine & non.comment.parse.data$text ==
            paste(deparse(exprs[[1]]), collapse = "\n"))[1]
    }
    # Create the DDGStatements
    cmds <- vector("list", (length(exprs)))
    next.cmd <- 1
    for (i in 1:length(exprs)) {
        expr <- as.expression(exprs[i][[1]])
        next.expr.pos <- new(Class = "DDGStatementPos", non.comment.parse.data[next.parseData,
            ])
        cmds[[next.cmd]] <- .construct.DDGStatement(expr, next.expr.pos, script.name,
            script.num, parseData)
        next.cmd <- next.cmd + 1
        # If there are more expressions, determine where to look next in the parseData
        if (i < length(exprs)) {
            last.ending.line <- non.comment.parse.data[next.parseData, ]$line2
            last.parent <- non.comment.parse.data[next.parseData, "parent"]
            last.id <- non.comment.parse.data[next.parseData, "id"]

            # Find the first entry in parseData that has the same parent as the previous
            # expression and starts after the previous expression.
            next.parseData <- which(non.comment.parse.data$parent == last.parent &
                non.comment.parse.data$line1 >= last.ending.line & non.comment.parse.data$id >
                last.id)[1]
        }
    }
    return(cmds)
}

# .ddg.save.annotated.script saves a copy of the annotated script to the debug
# directory.

.ddg.save.annotated.script <- function(cmds, script.name) {
    for (i in 1:length(cmds)) {
        expr <- cmds[[i]]@annotated
        for (j in 1:length(expr)) {
            line <- deparse(expr[[j]])
            if (i == 1 && j == 1)
                script <- line else script <- append(script, line)
        }
    }
    fileout <- file(paste(paste(.ddg.get("ddg.path"), "/debug", sep = ""), "/annotated-", script.name, sep = ""))
    write(script, fileout)
    close(fileout)
}

# .ddg.file.node creates a node of type File. File nodes are used for files
# written to the DDG directory by capturing output from the script or by copying
# a file that is written by the script.  Returns the path where the file
# referenced by the node is stored.

# dtype - type of data node.  fname - path and name of original file.  dname -
# name of data node.  dscope (optional) - scope of data node.

.ddg.file.node <- function(dtype, fname, dname, dscope = NULL) {
    # Get original file location.
    file.name <- basename(fname)
    file.loc <- normalizePath(fname, winslash = "/", mustWork = FALSE)
    loc.value <- if (dtype == "File")
        paste(" Location=\"", file.loc, "\"", sep = "") else ""
    # Add number to file name.
    dfile <- paste(.ddg.get("ddg.dnum") + 1, "-", file.name, sep = "")
    # Calculate the path to the file relative to the ddg directory.  This is the
    # value stored in the node.
    dpfile <- paste("data", dfile, sep = "/")
    dtime <- .format.time(Sys.time())
    # Set the node label.
    if (is.null(dname))
        dname <- file.name
    # Get scope if necessary.
    if (is.null(dscope))
        dscope <- .ddg.get.scope(dname)
    # Not from environment.
    from.env <- FALSE
    # Record in data node table
    .ddg.record.data(dtype, dname, dpfile, dpfile, dscope, from.env = from.env, dtime,
        file.loc)
    # Get path plus file name to where the file will be copied
    dpath <- paste(paste(.ddg.get("ddg.path"), "/data", sep = ""), "/", dfile, sep = "")
    return(dpath)
}

# .ddg.file.copy creates a data node of type File. File nodes are used for files
# written by the main script. A copy of the file is written to the DDG directory.

# dtype - type of data node.  fname - path and name of original file.  dname -
# name of data node.  dscope - scope of data node.

.ddg.file.copy <- function(dtype, fname, dname, dscope) {
    # Calculate location of original file.
    file.loc <- normalizePath(fname, winslash = "/", mustWork = FALSE)
    # Copy file.
    if (file.exists(file.loc)) {
        # Create file node in DDG.
        dpfile.out <- .ddg.file.node(dtype, fname, dname, dscope)
        if (.ddg.get("ddg.save.to.disk")) {
            file.copy(file.loc, dpfile.out, overwrite = TRUE)
        }
    } else {
        error.msg <- paste("File to copy does not exist:", fname)
        .ddg.insert.error.message(error.msg)
        return(NULL)
    }
    if (.ddg.get("ddg.debug.lib"))
        print(paste("file.copy: ", dtype, " ", file.loc))
    return(dpfile.out)
}

# .ddg.insert.error.message issues a warning and inserts an exception node after
# the last procedure step. The name of the node is 'error.msg' and the value is
# the error message passed to this function.

# msg - error message.  msg.type - error or warning scope - scope for evaluating
# any data doWarn - if true, this function displays a warning

.ddg.insert.error.message <- function(msg, msg.type = "error.msg", scope = "ddg.library",
    doWarn = TRUE) {
    if (doWarn) {
        warning(msg)
    }
    .ddg.data.node("Exception", msg.type, msg, scope)
    .ddg.lastproc2data(msg.type, dscope = scope)
}

# .ddg.delete.temp deletes any temporary files created during the processing of a
# script. These include the temporary history file.

.ddg.delete.temp <- function() {
    # Delete the temporary history file if we made it.
    if (.ddg.is.set("ddg.history.file"))
        unlink(.ddg.get("ddg.history.file"))
    # Clear the environment.
    .ddg.env <- new.env(parent = emptyenv())
}

# .ddg.create.output.nodes creates output nodes for ddg.function and
# ddg.procedure. Outs values must be passed as strings, not names, unless the
# value is a file name.

# fname - the name of the function calling .ddg.create.output.nodes.  pname - the
# name of the procedure node.  outs.graphic - the name of a snapshot node to be
# used as a file name.  A graphical snapshot is simply a captured image of the
# graphic device active at the time of the call to ddg.function or ddg.procedure.
# outs.data - a list of names of data nodes.  outs.exception - a list of names of
# exception nodes.  outs.url - a list of names of url nodes.  outs.file - a list
# of names of file nodes. Supported file extensions include: .csv, .jpg, .jpeg,
# .pdf, and .txt.  graphic.fext - the file extension to be used when saving the
# captured graphic. Supported extensions are .jpg, .jpeg, .pdf.

.ddg.create.output.nodes <- function(fname, pname, outs.graphic, outs.data, outs.exception,
    outs.url, outs.file, graphic.fext, env) {
    # Capture graphics device.
    if (is.character(outs.graphic)) {
        name <- outs.graphic
        gfext <- as.character(graphic.fext)
        .ddg.write.graphic(name, "Graphical Plot. Not saved in script.", fext = gfext)  # value is ignored
        .ddg.proc2data(pname, name)
    }

    # Create output nodes and edges if outs list provided.
    # Exception node.
    if (!is.null(outs.exception)) {
        stack <- sys.calls()
        lapply(outs.exception, function(param) {
            # Get value in calling environment.
            name <- param
            value <- NULL
            .ddg.lookup.value(name, value, env, fname, warn = FALSE)

            # Exception node.
            scope <- .ddg.get.scope(param, calls = stack)
            .ddg.data.node("Exception", name, value, scope)
            .ddg.proc2data(pname, name, scope)
        })
    }

    # URL node.
    if (!is.null(outs.url)) {
        stack <- sys.calls()
        lapply(outs.url, function(param) {
            # Get value in calling environment.
            name <- param
            value <- NULL
            .ddg.lookup.value(name, value, env, fname, warn = FALSE)
            # URL node.
            scope <- .ddg.get.scope(param, calls = stack)
            .ddg.data.node("URL", name, value, scope)
            .ddg.proc2data(pname, name, scope)
        })
    }

    # Generalized data node (includes simple data values as well as snapshots)
    if (!is.null(outs.data)) {
        stack <- sys.calls()
        lapply(outs.data, function(param) {
            # Get value in calling environment.
            name <- param
            value <- NULL
            .ddg.lookup.value(name, value, env, fname, warn = FALSE)

            tryCatch({
                if (!is.character(name))
                  name <- deparse(substitute(name))
                envName <- environmentName(env)
                scope <- .ddg.get.scope(param, calls = stack)
                .ddg.save.data(name, value, fname, error = TRUE, scope = scope)
                .ddg.proc2data(pname, name, scope)
            }, error = function(e) {
                .ddg.insert.error.message(e)
            })
        })
    }
    # File node.
    if (!is.null(outs.file)) {
        stack <- sys.calls()
        lapply(outs.file, function(param) {
            # Get value in calling environment.
            name <- param
            value <- NULL
            .ddg.lookup.value(name, value, env, fname, warn = FALSE)
            scope <- .ddg.get.scope(param, calls = stack)

            if (value == "") {
                # Filename passed as value.
                .ddg.file.copy("File", name, name, scope)
                .ddg.proc2data(pname, name, scope)
            } else {
                # Filename passed as name.
                .ddg.file.copy("File", value, name, scope)
                .ddg.proc2data(pname, name, scope)
            }
        })
    }
}

# .ddg.create.function.nodes creates the procedure node, input binding nodes, and
# output nodes for the function.

# pname - name of procedure node.  full.call - full function call.  outs.data,
# etc (optional) - output nodes.  auto.created - TRUE if the function node is
# created automatically when a return is found env (optional) - the environment
# local to the function

.ddg.create.function.nodes <- function(pname, call, full.call, outs.graphic = NULL,
    outs.data = NULL, outs.exception = NULL, outs.url = NULL, outs.file = NULL, graphic.fext = "jpeg",
    auto.created = FALSE, env = NULL) {
    # Create the start node
    if (typeof(call[[1]]) == "closure") {
        .add.abstract.node("Start", node.name = pname, env = env)
    } else {
        .add.abstract.node("Start", node.name = paste(deparse(call), collapse = ""),
            env = env)
    }
    # Tokens will contain the function name and the argument expressions.
    # Get parameters and create edges.
    if (length(full.call) > 1) {
        # args contains the names of the variable that was passed into the function.
        args <- full.call[2:length(full.call)]
        # param,names contains the names of the parameters (this is what the variable is
        # known as inside the function).
        param.names <- names(full.call)
        param.names <- param.names[2:length(param.names)]
        stack <- sys.calls()
        # scope <- .ddg.get.scope(args[[1]], for.caller = TRUE)
        bindings <- list()
        for (i in 1:length(args)) bindings[[i]] <- list(args[[i]], param.names[[i]])
        missing.params <- character()

        lapply(bindings, function(binding) {
            # Here, arg is the arguments passed IN.
            arg <- binding[[1]]
            # formal is the paramenter name of the function (what is the variable known as
            # inside?).
            formal <- binding[[2]][[1]]
            if (is.null(formal) || formal == "")
                formal <- "..."
            # Find all the variables used in this parameter.  If the argument is a string
            # constant, don't bother looking for variables.  Also add quotes around it in the
            # node name.
            if (is.character(arg)) {
                vars.used <- character()
                binding.node.name <- paste(formal, " <- \"", arg, "\"", sep = "")
            } else {
                vars.used <- .find.var.uses(arg)
                binding.node.name <- paste(formal, " <- ", paste(deparse(arg), collapse = " "))
            }

            .proc.node("Binding", binding.node.name)
            .ddg.proc2proc()
            for (var in vars.used) {
                param.scope <- .ddg.get.scope(var, for.caller = TRUE, calls = stack)
                if (.ddg.data.node.exists(var, param.scope)) {
                  .ddg.data2proc(as.character(var), param.scope, binding.node.name)
                  if (.ddg.get("ddg.debug.lib"))
                    print(paste("param:", var))
                }
            }
            if (formal != "...") {
                formal.scope <- .ddg.get.scope(formal, calls = stack)
                formal.env <- .ddg.get.env(formal, calls = stack)
                # If we can evaluate the argument without an error, we record the value. If an
                # error occurs, we do not record the value as it's possible that the function
                # never actually uses it.
                tryCatch({
                  .ddg.save.data(formal, eval(parse(text = formal), formal.env),
                    fname = ".ddg.save.data", scope = formal.scope, stack = stack)
                  .ddg.proc2data(binding.node.name, formal, formal.scope)
                }, error = function(e) {
                })

            }
        })
    }
    .proc.node("Operation", pname, pname, auto.created = auto.created)
    # Link to the definition of the function if the function is defined in this
    # script.
    if (.ddg.data.node.exists(pname, environmentName(.GlobalEnv))) {
        .ddg.data2proc(pname, environmentName(.GlobalEnv), pname)
    }
    if (length(full.call) > 1) {
        lapply(bindings, function(binding) {
            formal <- binding[[2]][[1]]
            # Formal will be NULL if declared as ...  Don't create the data node in that
            # case.
            if (!is.null(formal) && formal != "") {
                formal.scope <- .ddg.get.scope(formal, calls = stack)
                if (.ddg.data.node.exists(formal, formal.scope)) {
                  .ddg.data2proc(formal, formal.scope, pname)
                }
            }
        })
    }
    # Create control flow edge from preceding procedure node.
    .ddg.proc2proc()
    # create output nodes
    .ddg.create.output.nodes(fname = "ddg.function", pname, outs.graphic, outs.data,
        outs.exception, outs.url, outs.file, graphic.fext, parent.frame(2))

}

# .ddg.get.frame.number gets the frame number of the closest non-library calling
# function.

# calls - system calls.  for.caller (optional) - if TRUE, go up one level before
# searching.

.ddg.get.frame.number <- function(calls, for.caller = FALSE) {
    if (is.null(calls))
        calls <- sys.calls()
    script.func.found <- FALSE
    nframe <- length(calls)
    for (i in nframe:1) {
        call <- sys.call(i)[[1]]
        # Guess that if we have a closure it is a user-defined function and not a ddg
        # function Is this a good assumption ????
        if (typeof(call) == "closure") {
            if (for.caller && !script.func.found) {
                script.func.found <- TRUE
            } else {
                return(i)
            }
        } else {
            call.func <- as.character(call)
            # Ignore calls to ddg functions or to the functions that get called from the
            # outermost tryCatch to ddg code.
            if (substr(call.func, 1, 4) != ".ddg" && substr(call.func, 1, 3) != "ddg" &&
                substr(call.func, 1, 10) != "doTryCatch" && substr(call.func, 1,
                11) != "tryCatchOne" && substr(call.func, 1, 12) != "tryCatchList" &&
                substr(call.func, 1, 8) != "tryCatch") {
                if (for.caller && !script.func.found) {
                  script.func.found <- TRUE
                } else {
                  return(i)
                }
            }
        }
    }
    return(0)
}


# .ddg.where looks up the environment for the variable specified by name.
# Adapted from Hadley Wickham, Advanced R programming.

# name - name of variable.  env (optional) - environment in which to look for
# variable.  warning (optional) - set to TRUE if a warning should be thrown when
# a variable is not found.

.ddg.where <- function(name, env = parent.frame(), warning = TRUE) {
    stopifnot(is.character(name), length(name) == 1)

    if (identical(env, emptyenv())) {
        if (warning)
            warning("Can't find ", name)

        return("undefined")
    }
    if (exists(name, env, inherits = FALSE)) {
        env
    } else {
        .ddg.where(name, parent.env(env), warning)
    }
}


# .ddg.get.env gets the environment in which name is declared.

# name - variable name.  for.caller (optional) - if TRUE, go up one level before
# searching.  calls (optional) - system calls.

.ddg.get.env <- function(name, for.caller = FALSE, calls = NULL) {
    if (is.null(calls))
        calls <- sys.calls()
    fnum <- .ddg.get.frame.number(calls, for.caller)
    stopifnot(!is.null(fnum))
    # This statement was broken into two statements so that we can add print
    # statements to .ddg.where or step through it with a debugger without breaking
    # it.  If we don't do that the print output gets captured by capture.output and
    # does not display to the user and also causes the subsequent grepl call in this
    # function to fail.
    tryCatch(if (!exists(name, sys.frame(fnum), inherits = TRUE))
        return(NULL), error = function(e) {
    })
    env <- .ddg.where(name, sys.frame(fnum))
    return(env)
}

# .ddg.get.scope gets the id of the closest non-library environment.

# name - name of variable.  for.caller (optional) - if TRUE, go up one level
# before searching.  calls (optional) - system calls.  env (optional) - the
# environment to get the scope for

.ddg.get.scope <- function(name, for.caller = FALSE, calls = NULL, env = NULL) {
    # Get the environment for the variable call.
    if (is.null(env)) {
        env <- .ddg.get.env(name, for.caller, calls)
    }
    # If no environment found, name does not exist, so scope is undefined.
    if (is.null(env))
        return("undefined")
    scope <- sub("^<environment: (.*)>$", "\\1", capture.output(env)[1])
    if (grepl("undefined", scope))
        scope <- "undefined"
    return(scope)
}

# .ddg.is.local returns TRUE if the specified name is local in the specified
# scope.

# name of variable.  scope of variable.

.ddg.is.local <- function(name, scope) {
    return(exists(name, scope, inherits = FALSE))
}

# Creates a start node for the current command if one has not been created
# already.
.ddg.create.start.for.cur.cmd <- function(call, caller.env) {
    if (.ddg.is.set(".ddg.cur.cmd")) {
        .ddg.cur.cmd <- .ddg.get(".ddg.cur.cmd")
        .ddg.cur.cmd.stack <- .ddg.get(".ddg.cur.cmd.stack")
        stack.length <- length(.ddg.cur.cmd.stack)
        if (stack.length >= 1) {
            last.created <- .ddg.cur.cmd.stack[stack.length]
            # Only create a start node for the current command if we have not already created
            # one and the command is more than just the call to this function
            if (last.created[[1]] == "FALSE") {
                if (.ddg.cur.cmd@text != paste(deparse(call), collapse = "")) {
                  cmd.abbrev <- .add.abstract.node("Start", .ddg.cur.cmd, caller.env)
                  .ddg.cur.expr.stack <- .ddg.get(".ddg.cur.expr.stack")
                  st.type <- .get.statement.type(.ddg.cur.cmd@parsed[[1]])
                  loop.statement <- (st.type == "for" || st.type == "while" || st.type ==
                    "repeat")
                  control.statement <- loop.statement || st.type == "if"
                  .ddg.create.data.use.edges.for.console.cmd(vars.set = data.frame(),
                    .ddg.cur.cmd, 0, for.caller = !control.statement)
                  # Add Details Omitted node before annotated loops if needed.
                  if (loop.statement && ddg.first.loop() > 1) {
                    ddg.details.omitted()
                  }
                  # Mark the start node as created on the stack.  Mark it even if we did not create
                  # the abstract node above, because we will create it below.
                  .ddg.set(".ddg.cur.cmd.stack", c(.ddg.cur.cmd.stack[1:stack.length -
                    1], TRUE))
                } else {
                  .ddg.set(".ddg.cur.cmd.stack", c(.ddg.cur.cmd.stack[1:stack.length -
                    1], "MATCHES_CALL"))
                }
            }
        }
    }
}

# .ddg.get.last.cmd returns the last command on the stack.

.ddg.get.last.cmd <- function() {
    .ddg.cur.cmd.stack <- .ddg.get(".ddg.cur.cmd.stack")
    stack.length <- length(.ddg.cur.cmd.stack)
    cmd <- .ddg.cur.cmd.stack[stack.length - 1][[1]]
}

# .ddg.remove.last.cmd.start.created removes the last command and start.created
# from the stack.

.ddg.remove.last.cmd.start.created <- function() {
    .ddg.cur.cmd.stack <- .ddg.get(".ddg.cur.cmd.stack")
    stack.length <- length(.ddg.cur.cmd.stack)

    if (stack.length == 2) {
        .ddg.set(".ddg.cur.cmd.stack", vector())
    } else {
        .ddg.set(".ddg.cur.cmd.stack", .ddg.cur.cmd.stack[1:(stack.length - 2)])
    }
}

# .ddg.break.statement creates a procedure node for a break statement in a for,
# repeat, or while statement. It also adds a finish node for the if statement (if
# any) where the break occurs, adds a finish node for the for, repeat, or while
# loop where the break occurs, and adds a finish node for the for, repeat, or
# while statement.

.ddg.break.statement <- function() {
    # Create procedure node for break statement.
    .proc.node("Operation", "break", "break")
    .ddg.proc2proc()
    # Get last command from stack.
    cmd <- .ddg.get.last.cmd()
    # Get loop type.
    loop.type <- as.character(cmd@parsed[[1]][[1]])
    # Create finish nodes if break occurs in if statement.
    if (loop.type == "if") {
        # Create finish node for if loop.
        ddg.finish("if")
        # Create finish node for if statement.
        .add.abstract.node("Finish", cmd, parent.frame())
        # Remove last command & start.created from stack.
        .ddg.remove.last.cmd.start.created()
        # Get last command from stack.
        cmd <- .ddg.get.last.cmd()
        # Get loop type.
        loop.type <- as.character(cmd@parsed[[1]][[1]])
    }
    # Create finish node for for, repeat, or while loop.
    loop.name <- paste(loop.type, "loop")
    ddg.finish(loop.name)
    # Create finish node for for, while, or repeat statement.
    .add.abstract.node("Finish", cmd, parent.frame())
    # Remove last command & start.created from stack.
    .ddg.remove.last.cmd.start.created()
}

# .ddg.next.statement creates a procedure node for a next statement in a for,
# repeat, or while statement. It also adds a finish node for the if statement (if
# any) where the next occurs and adds a finish node for the for, while, or repeat
# loop where the next occurs.

.ddg.next.statement <- function() {
    # Create procedure node for next statement.
    .proc.node("Operation", "next", "next")
    .ddg.proc2proc()
    # Get last command from stack.
    cmd <- .ddg.get.last.cmd()
    # Get loop type.
    loop.type <- as.character(cmd@parsed[[1]][[1]])
    # Create finish nodes if break occurs in if statement.
    if (loop.type == "if") {
        # Create finish node for if loop.
        ddg.finish("if")
        # Create finish node for if statement.
        .add.abstract.node("Finish", cmd, parent.frame())

        # Remove last command & start.created from stack.
        .ddg.remove.last.cmd.start.created()
        # Get last command from stack.
        cmd <- .ddg.get.last.cmd()
        # Get loop type.
        loop.type <- as.character(cmd@parsed[[1]][[1]])
    }
    # Create finish node for for, repeat, or while loop.
    loop.name <- paste(loop.type, "loop")
    ddg.finish(loop.name)
}

# .ddg.markdown takes a Rmd file and extracts the R code and text through the
# purl function in the knitr library. It then annotates the R script to insert
# start and finish nodes based on the chunks the user already created. If eval =
# false, then the chunk will not be added to the DDG. If the user has a name for
# the chunk, then that name will be used, else a chunk name 'ddg.chunk_1' and
# higher numbers will be generated.  Important: If in a code chunk, there is an
# empty line followed by '# ----' or '#'', then an extra finish node will be
# inserted, causing an error.  r.script.path is the path of the original Rmd file
# output.path is the path of the generated R script

.ddg.markdown <- function(r.script.path = NULL, output.path = NULL) {
    # generates R script file from markdown file
    knitr::purl(r.script.path, documentation = 2L, quiet = TRUE)
    # moves file to ddg directory
    file.rename(from = paste(getwd(), "/", basename(tools::file_path_sans_ext(r.script.path)),
        ".R", sep = ""), to = output.path)
    script <- readLines(output.path)
    skip <- FALSE
    name <- "ddg.chunk"
    annotated <- character(0)
    index <- 1
    # This for loop goes through the script line by line and searches for patterns to
    # insert the start and finish nodes
    for (i in 1:length(script)) {
        # eval = false means we skip this code chunk, therefore skip = TRUE
        if (regexpr("eval+(\\s*)+=+(\\s*)+FALSE", script[i]) != -1) {
            skip <- TRUE
            annotated <- append(annotated, script[i])
        } else if (regexpr("## ----", script[i]) != -1) {
            # if no options in the line, then generate default name.
            if (regexpr("## -----", script[i]) == -1) {
                if (regexpr("=", script[i]) == -1) {
                  end <- regexpr("-----", script[i])
                  name <- substring(script[i], 8, last = end - 1)
                } else if (regexpr(",", script[i]) != -1) {
                  comma <- regexpr(",", script[i])
                  name <- substring(script[i], 8, last = comma - 1)
                } else {
                  name <- paste("ddg.chunk_", index, sep = "")
                  index <- index + 1
                }
            } else {
                name <- paste("ddg.chunk_", index, sep = "")
                index <- index + 1
            }
            name <- stringr::str_trim(name, side = "both")
            annotated <- append(annotated, paste("ddg.start(\"", name, "\")", sep = ""))
        } else if (nchar(script[i]) == 0 && (regexpr("#'", script[i + 1]) != -1 ||
            i == length(script) || regexpr("## ----", script[i + 1]) != -1)) {
            if (skip) {
                annotated <- append(annotated, script[i])
                skip <- FALSE
            } else {
                annotated <- append(annotated, paste("ddg.finish(\"", name, "\")",
                  sep = ""))
            }
        } else {
            annotated <- append(annotated, script[i])
        }
    }
    writeLines(annotated, output.path)
    r.script.path
}

# .ddg.save.debug.files saves debug files to the debug directory.

.ddg.save.debug.files <- function() {
    # Save initial environment table to file.
    fileout <- paste(paste(.ddg.get("ddg.path"), "/debug", sep = ""), "/initial-environment.csv", sep = "")
    ddg.initial.env <- .ddg.get("ddg.initial.env")
    write.csv(ddg.initial.env, fileout, row.names = FALSE)
    # Save procedure nodes table to file.
    fileout <- paste(paste(.ddg.get("ddg.path"), "/debug", sep = ""), "/procedure-nodes.csv", sep = "")
    ddg.proc.nodes <- .ddg.get("ddg.proc.nodes")
    ddg.proc.nodes <- ddg.proc.nodes[ddg.proc.nodes$ddg.num > 0, ]
    write.csv(ddg.proc.nodes, fileout, row.names = FALSE)
    # Save data nodes table to file.
    fileout <- paste(paste(.ddg.get("ddg.path"), "/debug", sep = ""), "/data-nodes.csv", sep = "")
    ddg.data.nodes <- .ddg.get("ddg.data.nodes")
    ddg.data.nodes2 <- ddg.data.nodes[ddg.data.nodes$ddg.num > 0, ]
    write.csv(ddg.data.nodes2, fileout, row.names = FALSE)
    # Save edges table to file.
    fileout <- paste(paste(.ddg.get("ddg.path"), "/debug", sep = ""), "/edges.csv", sep = "")
    ddg.edges <- .ddg.get("ddg.edges")
    ddg.edges2 <- ddg.edges[ddg.edges$ddg.num > 0, ]
    write.csv(ddg.edges2, fileout, row.names = FALSE)
    # Save function return table to file.
    fileout <- paste(paste(.ddg.get("ddg.path"), "/debug", sep = ""), "/function-returns.csv", sep = "")
    ddg.returns <- .ddg.get(".ddg.ret.values")
    ddg.returns2 <- ddg.returns[ddg.returns$ret.node.id > 0, ]
    write.csv(ddg.returns2, fileout, row.names = FALSE)
    # Save if script is sourced.
    if (.ddg.get(".ddg.is.sourced")) {
        # Save sourced script table to file.
        fileout <- paste(paste(.ddg.get("ddg.path"), "/debug", sep = ""), "/sourced-scripts.csv", sep = "")
        ddg.sourced.scripts <- .ddg.get(".ddg.sourced.scripts")
        ddg.sourced.scripts2 <- ddg.sourced.scripts[ddg.sourced.scripts$snum >= 0,
            ]
        write.csv(ddg.sourced.scripts2, fileout, row.names = FALSE)
        # Save data object table to file.
        fileout <- paste(paste(.ddg.get("ddg.path"), "/debug", sep = ""), "/data-objects.csv", sep = "")
        ddg.data.objects <- .ddg.data.objects()
        write.csv(ddg.data.objects, fileout, row.names = FALSE)
    }
}

#--------------------USER FUNCTIONS-----------------------#
# .ddg.find.ddg.ret.value.caller.frame.number returns the frame number of the
# first caller to ddg.ret.value.  If ddg.ret.value is called recursively,
# this will give us the position of the earliest one called.

.ddg.find.ddg.ret.value.caller.frame.number <- function() {
    # Get the stack
    calls <- sys.calls()
    # Find the calls to ddg.ret.value
    ddg.funcs <- unlist(lapply(calls, function(call) return(grepl("^ddg|.ddg", deparse(call)[[1]]))))
    calls.to.ddg.ret.value <- unlist(lapply(calls, function(call) return(.ddg.is.call.to(call,
        as.name("ddg.ret.value")))))
    non.ddg.calls.to.ddg.ret.value <- !(ddg.funcs[1:length(ddg.funcs) - 1]) &
        calls.to.ddg.ret.value[2:length(calls.to.ddg.ret.value)]
    which.frame <- Position(function(call) return(call), non.ddg.calls.to.ddg.ret.value,
        right = TRUE)
    # Return the frame number of the caller to ddg.ret.value
    return(which.frame)
}

# Returns true if we should run the annotated version of a function and false if
# we should run the unannotated version.
ddg.should.run.annotated <- function(func.name) {
    # check if we are in a loop and loop annotations are off
    if (!.ddg.get("ddg.loop.annotate") && .ddg.get("ddg.inside.loop") > 0)
        return(FALSE)
    # Make sure this specific function has not been disabled
    if (!is.null(.ddg.get("ddg.annotate.off")) & func.name %in% .ddg.get("ddg.annotate.off"))
        return(FALSE)
    # Not annotating functions in general check if this specific function should be
    # annotated
    if (!is.null(.ddg.get("ddg.annotate.on")) & func.name %in% .ddg.get("ddg.annotate.on"))
        return(TRUE)
    # If we do not know anything specific about this function, follow the general
    # rule
    return(.ddg.get("ddg.annotate.inside"))
}

# .ddg.save inserts attribute information and the number of procedure steps at the
# top of the DDG. It writes the DDG and the procedure nodes, data nodes, and
# function return tables to the DDG directory.

# r.script.path (optional) - Path to the R script.  save.debug (optional) - If
# TRUE, save debug files to debug directory.  Used in console mode.  quit
# (optional) - If TRUE, remove all DDG files from memory.  Unlike ddg.run, this
# is set to false as default since it will generally be called internally and by
# tests, as opposed to by the user.

.ddg.save <- function(r.script.path = NULL, save.debug = FALSE, quit = FALSE) {
    if (!(.ddg.is.set(".ddg.initialized") && .ddg.get(".ddg.initialized")))
        return(invisible())
    if (interactive() && .ddg.get(".ddg.enable.console")) {
        # Get the final commands
        .ddg.console.node()
    }
    # If there is a display device open, grab what is on the display
    if (length(dev.list()) >= 1) {
        tryCatch(.ddg.capture.graphics(basename(.ddg.get("ddg.r.script.path")), called.from.save = TRUE),
            error = function(e) print(e))
    }
    # Save ddg.json to file.
    .ddg.json.write()
    if (interactive())
        print(paste("Saving ddg.json in ", .ddg.get("ddg.path"), sep = ""))
    # Save sourced scripts (if any). First row is main script.
    ddg.sourced.scripts <- .ddg.get(".ddg.sourced.scripts")
    if (!is.null(ddg.sourced.scripts)) {
        if (nrow(ddg.sourced.scripts) > 1) {
            for (i in 1:nrow(ddg.sourced.scripts)) {
                sname <- ddg.sourced.scripts[i, "sname"]
                if (.ddg.get("ddg.save.to.disk")) {
                  file.copy(sname, paste(paste(.ddg.get("ddg.path"), "/scripts", sep = ""), basename(sname), sep = "/"))
                }
            }
        }
    }
    # Save debug files to debug directory.
    if (save.debug | .ddg.get("ddg.save.debug")) {
        .ddg.save.debug.files()
    }
    # Clear DDGStatements from ddg environment.
    .ddg.set("ddg.statement.num", 0)
    .ddg.set("ddg.statements", list())
    # Clear loop information from ddg environment.
    .ddg.set("ddg.loop.num", 0)
    .ddg.set("ddg.loops", list())
    # By convention, this is the final call to ddg.save.
    if (quit) {
        # Restore history settings.
        if (.ddg.is.set("ddg.original.hist.size"))
            Sys.setenv(R_HISTSIZE = .ddg.get("ddg.original.hist.size"))
        # Delete temporary files.
        .ddg.delete.temp()
        # Capture current graphics device.
        .ddg.auto.graphic.node(dev.to.capture = dev.cur)
        # Shut down the DDG.
        .ddg.clear()
    }
    invisible()
}

# .ddg.console.off turns off the console mode of DDG construction.

.ddg.console.off <- function() {
    if (!(.ddg.is.set(".ddg.initialized") && .ddg.get(".ddg.initialized")))
        return(invisible())
    # Capture history if console was on up to this point.
    if (interactive() && .ddg.get(".ddg.enable.console")) {
        .ddg.console.node()
    }
    # Set the console to off.
    .ddg.set(".ddg.enable.console", FALSE)
}

# .ddg.console.on turns on the console mode of DDG construction.

.ddg.console.on <- function() {
    if (!(.ddg.is.set(".ddg.initialized") && .ddg.get(".ddg.initialized")))
        return(invisible())
    # Write a new timestamp if we're turning on the console so we only capture
    # history from this point forward.
    if (!.ddg.get(".ddg.enable.console"))
        .write.timestamp.to.history()
    .ddg.set(".ddg.enable.console", TRUE)
}

# .ddg.delete.save removes all files from the DDG directories unless the the DDG
# directory is the working directory. If no DDG directory is specified, the
# current DDG directory is assumed.

# ddg.path (optional) - path to DDG directory.

.ddg.delete.save <- function(ddg.path = NULL) {
    # Use current DDG directories if no directory is specified.
    if (is.null(ddg.path)) {
        ddg.path <- .ddg.get("ddg.path")
        ddg.path.data <- paste(.ddg.get("ddg.path"), "/data", sep = "")
        ddg.path.debug <- paste(.ddg.get("ddg.path"), "/debug", sep = "")
        ddg.path.scripts <- paste(.ddg.get("ddg.path"), "/scripts", sep = "")
    }
    # Remove files unless the DDG directory is the working directory.
    if (ddg.path != getwd()) {
        unlink(paste(ddg.path, "*.*", sep = "/"))
        unlink(paste(ddg.path.data, "*.*", sep = "/"))
        unlink(paste(ddg.path.data, ".ddghistory", sep = "/"))
        unlink(paste(ddg.path.debug, "*.*", sep = "/"))
        unlink(paste(ddg.path.scripts, "*.*", sep = "/"))
    }
    invisible()
}
ProvTools/provR documentation built on May 6, 2019, 3:27 p.m.