R/statement.r

Defines functions null.pos .construct.DDGStatement .abbrev.cmd .find.var.uses .find.simple.assign .is.assign .get.var .find.assign .is.functiondecl .get.statement.type .add.annotations .parse.contained .parse.contained.function .parse.contained.if .ensure.in.block .parse.contained.control .replace.source .add.function.annotations .create.function.block .add.conditional.statement .insert.ddg.function .wrap.ret.parameters .wrap.all.ret.parameters .find.last.statement .create.ddg.eval.call .create.ddg.ret.call .wrap.last.line .wrap.with.ddg.eval .create.block.ddg.eval.call .wrap.block.with.ddg.eval .add.block.start.finish .insert.ddg.forloop .insert.ddg.loop.annotate .annotate.if.statement .annotate.loop.statement .annotate.simple.block .ddg.is.call.to .has.call.to .is.call.to.ddg.function .reads.file .writes.file .creates.graphics .updates.graphics

# Copyright (C) 2017 Harvard University, Mount Holyoke College
#
# This file is part of ProvR.
#
# ProvR is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# ProvR is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ProvR; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#
# This package was forked from <https://github.com/End-to-end-provenance/RDataTracker>
#
# Contact: Matthew Lau <matthewklau@fas.harvard.edu>

# Needed to work with S4 classes.  Normally, this library is automatically
# loaded.  However, it is not loaded when running non-interactively, as in our
# test cases or if a user uses RScript to run R files.
library(methods)

# Information about where in the source code this statement appears.
setClass("DDGStatementPos", slots = list(startLine = "numeric", startCol = "numeric",
    endLine = "numeric", endCol = "numeric"))

# This is called automatically when there is a call to create a new
# DDGStatementPos object.
setMethod("initialize", "DDGStatementPos", function(.Object, parseData) {
    # If the parse data is missing, we set all the fields to -1
    if (length(parseData) == 1 && is.na(parseData)) {
        .Object@startLine <- -1
        .Object@startCol <- -1
        .Object@endLine <- -1
        .Object@endCol <- -1
    } else {
        # If we have parseData, we extract the information into oure object.
        .Object@startLine <- parseData$line1
        .Object@startCol <- parseData$col1
        .Object@endLine <- parseData$line2
        .Object@endCol <- parseData$col2
    }
    return(.Object)
})

# This class contains all the information that we need when building a ddg.  We
# create this when we parse the statement so that it is only done once and then
# look up the information we need when the statement executes.


## text = 'character', # The original text in the file parsed = 'expression', #
## The parse tree for the statement abbrev = 'character', # A shortened version of
## the text to use in node names annotated = 'expression', # An annotated version
## of the statement.  This is # what we actually execute.

## # Note that vars.used through has.dev.off do not apply to a situation where #
## the statement is a function declaration, since declaring the statement # does
## not read from files, etc.  That happens when the function is called, # at which
## point we will refer to the information in the contained statements.

## vars.used = 'character', # A list of the variables that are used in the
## statement vars.set = 'character', # If this is an assignment statement, this is
## the variable assigned vars.possibly.set = 'character', # If this contains any
## internal assignment statements, # like an if-statement might, for example,
## these are the # variables assigned within the statement.  isDdgFunc =
## 'logical', # True if this is a call to a ddg function readsFile = 'logical', #
## True if this statement contains a call to a function that # reads from a file
## writesFile = 'logical', # True if this statement contains a call to a function
## that # writes to a file createsGraphics = 'logical', # True if this is a
## function that creates a graphics # object, like a call to pdf, for example
## updatesGraphics = 'logical', # True if this is a function that updates a
## graphics # object, like a call to a function in the graphics package, for
## example has.dev.off = 'logical', # True if this statement contains a call to
## dev.off pos = 'DDGStatementPos', # The location of this statement in the source
## code.  # Has the value null.pos() if it is not available.  script.num =
## 'numeric', # The number for the script this statement comes from.  # Has the
## value -1 if it is not available

setClass("DDGStatement", slots = list(text = "character", parsed = "expression",
    abbrev = "character", annotated = "expression", vars.used = "character", vars.set = "character",
    vars.possibly.set = "character", isDdgFunc = "logical", readsFile = "logical",
    writesFile = "logical", createsGraphics = "logical", updatesGraphics = "logical",
    has.dev.off = "logical", pos = "DDGStatementPos", script.num = "numeric", contained = "list"))
## This is called when a new DDG Statement is created.  It initializes all of the
## slots.
setMethod("initialize", "DDGStatement", function(.Object, parsed, pos, script.name,
    script.num, parseData) {
    .Object@parsed <- parsed
    # deparse can return a vector of strings.  We convert that into one long string.
    .Object@text <- paste(deparse(.Object@parsed[[1]]), collapse = "")
    # If this is a call to ddg.eval, we only want the argument to ddg.eval (which is
    # a string) to appear in the node label
    .Object@abbrev <- if (grepl("^ddg.eval", .Object@text)) {
        .abbrev.cmd(.Object@parsed[[1]][[2]])
    } else {
        .abbrev.cmd(.Object@text)
    }
    vars.used <- .find.var.uses(.Object@parsed[[1]])
    # Remove index variable in for statement (handled separately in ddg.forloop).
    if (length(parsed) > 0 && !is.symbol(parsed[[1]]) && parsed[[1]][[1]] == "for") {
        index.var <- c(parsed[[1]][[2]])
        vars.used <- vars.used[!vars.used %in% index.var]
    }
    .Object@vars.used <- vars.used
    .Object@vars.set <- .find.simple.assign(.Object@parsed[[1]])
    .Object@vars.possibly.set <- .find.assign(.Object@parsed[[1]])
    # ddg.eval is treated differently than other calls to ddg functions since we will
    # execute the parameter as a command and want a node for it.
    .Object@isDdgFunc <- grepl("^ddg.", .Object@text) & !grepl("^ddg.eval", .Object@text)
    .Object@readsFile <- .reads.file(.Object@parsed[[1]])
    .Object@writesFile <- .writes.file(.Object@parsed[[1]])
    .Object@createsGraphics <- .creates.graphics(.Object@parsed[[1]])
    .Object@updatesGraphics <- .updates.graphics(.Object@parsed[[1]])
    .Object@has.dev.off <- .has.call.to(.Object@parsed[[1]], "dev.off")
    .Object@pos <- if (is.object(pos)) {
        pos
    } else {
        null.pos()
    }
    .Object@script.num <- if (is.na(script.num))
        -1 else script.num
    # The contained field is a list of DDGStatements for all statements inside the
    # function or control statement.  If we are collecting provenance inside
    # functions or control statements, we will execute annotated versions of these
    # statements.  If this is a call to ddg.eval, we only want to execute the
    # argument to ddg.eval
    .Object@contained <- .parse.contained(.Object, script.name, parseData)
    .Object@annotated <- if (grepl("^ddg.eval", .Object@text)) {
        parse(text = .Object@parsed[[1]][[2]])
    } else {
        .add.annotations(.Object)
    }
    return(.Object)
})

# A special null value for when source code position information is missing.
null.pos <- function() {
    return(new(Class = "DDGStatementPos", NA))
}

# Create a DDGStatement.  expr - the parsed expression pos - the DDGStatementPos
# object for this statement script.name - the name of the script the statement is
# from script.num - the script number used to find the script in the sourced
# the object created by the parser that gives us source position information
.construct.DDGStatement <- function(expr, pos, script.name, script.num, parseData) {
    # Surprisingly, if a statement is just a number, like 1 (which could be the last
    # statement in a function, for example), the parser returns a number, rather than
    # a parse tree!
    if (is.numeric(expr))
        expr <- parse(text = expr)
    return(new(Class = "DDGStatement", parsed = expr, pos, script.name, script.num, parseData))
}

# .abbrev.cmd abbreviates a command to the specified length.  Default is 60
# characters.
# cmd - command string.  len (optional) - number of characters.
.abbrev.cmd <- function(cmd, len = 60) {
    if (length(cmd) > 1) {
        cmd <- paste(cmd, collapse = " ")
    }
    if (file.exists(cmd))
        basename(cmd) else if (nchar(cmd) <= len)
        cmd else if (substr(cmd, len, len) != "\\")
        substr(cmd, 1, len) else if (substr(cmd, len - 1, len) == "\\\\")
        substr(cmd, 1, len) else substr(cmd, 1, len - 1)
}

# .find.var.uses returns a vector containing all the variables used in an
# expression.  Each value is unique in the returned vector, so that if a variable
# is used more than once, it only appears once.
# main.object - input expression.
.find.var.uses <- function(main.object) {
    # Recursive helper function.
    .find.var.uses.rec <- function(obj) {
        # Base cases.
        if (is.atomic(obj)) {
            return(character())  # A name is not atomic!
        }
        if (is.name(obj)) {
            if (nchar(obj) == 0)
                return(character())
            # Operators also pass the is.name test.  Make sure that if it is a single
            # character, then it is alpha-numeric.
            if (nchar(obj) == 1 && !grepl("[[:alpha:]]", obj))
                return(character())
            return(deparse(obj))
        }
        if (!is.recursive(obj))
            return(character())
        if (.is.functiondecl(obj))
            return(character())
        tryCatch({
            if (.is.assign(obj)) {
                # If assigning to a simple variable, recurse on the right hand side of the
                # assignment.
                # covers cases: '=', '<-', '<<-' for simple variable assignments e.g.  a <- 2
                if (is.symbol(obj[[2]])) {
                  unique(unlist(.find.var.uses.rec(obj[[3]])))
                } else if (is.call(obj[[2]])) {
                  # If assigning to an expression (like a[b]), recurse on the indexing part of the
                  # lvalue as well as on the expression.  covers cases: storage.mode(z) a[1] <- 2,
                  # a[b] <- 3
                  variables <- c(.find.var.uses.rec(obj[[2]][[2]]), unlist(.find.var.uses.rec(obj[[3]])))
                  # for array index cases like a[b] <- 3, where there could be a variable in the
                  # brackets
                  if (obj[[2]][[1]] == "[")
                    append(variables, .find.var.uses.rec(obj[[2]][[3]]))
                  unique(variables)
                } else if (is.character(obj[[2]])) {
                  # covers cases where there is a string literal.  for assign function
                  unique(c(unlist(.find.var.uses.rec(parse(text = obj[[2]])[[1]])),
                    unlist(.find.var.uses.rec(parse(text = obj[[3]])[[1]]))))
                } else {
                  # not entirely sure what this catches
                  unique(c(.find.var.uses.rec(obj[[2]]), unlist(.find.var.uses.rec(obj[[3]]))))
                }
            } else {
                # Not an assignment.  Recurse on all parts of the expression except the operator.
                unique(unlist(lapply(obj[1:length(obj)], .find.var.uses.rec)))
            }
        }, error = function(e) {
            print(paste(".find.var.uses.rec:  Error analyzing", deparse(obj)))
            character()
        })
    }

    return(.find.var.uses.rec(main.object))
}


# .find.simple.assign returns the name of the variable assigned to if the
# object passed in is an expression representing an assignment statement.
# Otherwise, it returns NULL.
# obj - input expression.
.find.simple.assign <- function(obj) {
    if (.is.assign(obj)) {
        .get.var(obj[[2]])
    } else {
        ""
    }
}


# .is.assign returns TRUE if the object passed is an expression object
# containing an assignment statement.
# expr - a parsed expression.  This also finds uses of ->.  This also finds uses
# of ->>.
.is.assign <- function(expr) {
    if (is.call(expr)) {
        if (identical(expr[[1]], as.name("<-")))
            return(TRUE) else if (identical(expr[[1]], as.name("<<-")))
            return(TRUE) else if (identical(expr[[1]], as.name("=")))
            return(TRUE) else if (identical(expr[[1]], as.name("assign")))
            return(TRUE)
    }
    return(FALSE)
}


# .get.var returns the variable being referenced in an expression. It should
# be passed an expression object that is either a variable, a vector access (like
# a[1]), a list member (like a[[i]]) or a data frame access (like a$foo[i]).  For
# all of these examples, it would return 'a'.
# lvalue - a parsed expression.
# for string literals e.g. when the assign function is used
.get.var <- function(lvalue) {
    if (is.symbol(lvalue))
        deparse(lvalue) else if (is.character(lvalue))
        .get.var(parse(text = lvalue)[[1]]) else .get.var(lvalue[[2]])
}


# .find.assign returns a vector containing the names of all the variables
# assigned in an expression.  The parameter should be an expression object. For
# example, if obj represents the expression 'a <- (b <- 2) * 3', the vector
# returned will contain both a and b.
# obj - a parsed expression.
# Assignment statement.  Add the variable being assigned to the vector and
# recurse on the expression being assigned.
# Don't look for assignments in the body of a function as those won't happen
# until the function is called.  Don't recurse on NULL.
# Not an assignment statement.  Recurse on the parts of the expression.  Base
# case.
.find.assign <- function(obj) {
    if (!is.recursive(obj))
        return(character())
    if (.is.assign(obj)) {
        var <- .get.var(obj[[2]])
        if (!(is.null(obj[[3]]))) {
            if (.is.functiondecl(obj[[3]]))
                var else c(var, unlist(lapply(obj[[3]], .find.assign)))
        } else var
    } else {
        unique(unlist(lapply(obj, .find.assign)))
    }
}

# ddg.is.functiondecl tests to see if an expression is a function declaration.
# expr - a parsed expression.
.is.functiondecl <- function(expr) {
    if (is.symbol(expr) || !is.language(expr))
        return(FALSE)
    if (is.null(expr[[1]]) || !is.language(expr[[1]]))
        return(FALSE)
    return(expr[[1]] == "function")
}

# .get.statement.type returns the control type (if applicable) of a parsed
# statement.
.get.statement.type <- function(parsed.command) {
    if (length(parsed.command) > 1)
        return(parsed.command[[1]])
    return(0)
}

# .add.annotations accepts a DDGStatement and returns an expression.  The
# returned expression is annotated as needed.  Return if statement is empty.
# Replace source with ddg.source.  Annotate user-defined functions.  Note that
# this will not annotate anonymous functions, like ones that might be passed to
# lapply, for example Is that what we want?
.add.annotations <- function(command) {
    parsed.command <- command@parsed[[1]]
    if (length(parsed.command) == 0)
        return(command@parsed)
    if (is.call(parsed.command) && parsed.command[[1]] == "source") {
        return(.replace.source(parsed.command))
    }
    if (.is.assign(parsed.command) && .is.functiondecl(parsed.command[[3]])) {
        return(.add.function.annotations(command))
    }
    statement.type <- as.character(.get.statement.type(parsed.command))
    loop.types <- list("for", "while", "repeat")
    if (length(statement.type > 0) && !is.null(statement.type)) {
        # Move into funcs below && ddg.max.loops() > 0) { Annotate if statement.
        if (statement.type == "if") {
            return(.annotate.if.statement(command))
        } else if (statement.type %in% loop.types) {
            # Annotate for, while, repeat statement.
            return(.annotate.loop.statement(command, statement.type))
        } else if (statement.type == "{") {
            # Annotate simple block.
            return(.annotate.simple.block(command))
        }
    }
    # Not a function or control construct.  No annotation required.
    return(command@parsed)
}

# .parse.contained creates the DDGStatement objects that correspond to
# statements inside a function or control block (or blocks).  cmd - the
# DDGStatement being considered script.name - the name of the script the
# statement is from parseData - the data returned by the parser that is used to
# extract source position information Returns a list of DDTStatements or an empty
# list if this is not a function declaration or a control construct.
.parse.contained <- function(cmd, script.name, parseData) {
    parsed.cmd <- cmd@parsed[[1]]
    # Function declaration
    if (.is.assign(parsed.cmd) && .is.functiondecl(parsed.cmd[[3]])) {
        # Create the DDGStatement objects for the statements in the function
        return(.parse.contained.function(cmd, script.name, parseData, parsed.cmd[[3]][[3]]))
    } else if (ddg.max.loops() == 0) {
        # Check if we want to go inside loop and if-statements
        return(list())
    }
    # Control statements.
    st.type <- as.character(.get.statement.type(parsed.cmd))
    # If statement.
    if (st.type == "if") {
        return(.parse.contained.if(cmd, script.name, parseData, parsed.cmd))
    } else {
        # Other control statements
        control.types <- list("for", "while", "repeat", "{")
        if (length(st.type) > 0 && !is.null(st.type) && (st.type %in% control.types)) {
            return(.parse.contained.control(cmd, script.name, parseData, parsed.cmd,
                st.type))
        }
    }
    # Not a function declaration or control construct.
    return(list())
}

.parse.contained.function <- function(cmd, script.name, parseData, func.body) {
    if (func.body[[1]] == "{") {
        # The function body is a block.  Extract the statements inside the block
        func.stmts <- func.body[2:length(func.body)]
    } else {
        # The function body is a single statement.
        func.stmts <- list(func.body)
    }
    # Create the DDGStatement objects for the statements in the function
    return(.ddg.create.DDGStatements(func.stmts, script.name, cmd@script.num, parseData,
        cmd@pos))
}

.parse.contained.if <- function(cmd, script.name, parseData, parent) {
    block.stmts <- list()
    # If and else if blocks.
    while (!is.symbol(parent) && parent[[1]] == "if") {
        # Get block
        block <- parent[[3]]
        block <- .ensure.in.block(block)
        # Get statements for this block.
        for (i in 2:(length(block))) {
            block.stmts <- c(block.stmts, block[[i]])
        }
        # Check for possible final else.
        if (length(parent) == 4) {
            final.else <- TRUE
        } else {
            final.else <- FALSE
        }
        # Get next parent
        parent <- parent[[(length(parent))]]
    }
    # Final else block (if any).
    if (final.else) {
        # Get block.
        block <- parent
        block <- .ensure.in.block(block)
        # Get statements for this block.
        for (i in 2:(length(block))) {
            block.stmts <- c(block.stmts, block[[i]])
        }
    }
    # Create the DDGStatement objects for statements in block
    return(.ddg.create.DDGStatements(block.stmts, script.name, cmd@script.num, parseData,
        cmd@pos))
}

# If there is a singleton statement inside a control construct nest it inside a
# block.  Return the block.
.ensure.in.block <- function(block) {
    if (is.symbol(block) || block[[1]] != "{")
        call("{", block) else block
}


.parse.contained.control <- function(cmd, script.name, parseData, parsed.cmd,
    st.type) {
    block.stmts <- list()
    if (st.type == "for")
        block <- parsed.cmd[[4]] else if (st.type == "while")
        block <- parsed.cmd[[3]] else if (st.type == "repeat")
        block <- parsed.cmd[[2]] else if (st.type == "{")
        block <- parsed.cmd

    block <- .ensure.in.block(block)
    for (i in 2:length(block)) {
        block.stmts <- c(block.stmts, block[[i]])
    }
    # Create the DDGStatement objects for statements in block
    return(.ddg.create.DDGStatements(block.stmts, script.name, cmd@script.num, parseData,
        cmd@pos))
}

# .replace.source replaces source with ddg.source.  parsed.command must be a
# parsed expression that is a call to the source function.

.replace.source <- function(parsed.command) {
    script.name <- deparse(parsed.command[[2]])
    parsed.command.txt <- paste("ddg.source(", script.name, ")", sep = "")
    return(parse(text = parsed.command.txt))
}

# .add.function.annotations is passed a command that corresponds to a
# function declaration.  It returns a parsed command corresponding to the same
# function declaration but with calls to ddg.function, ddg.eval and
# ddg.ret.value inserted if they are not already present.  The functions
# ddg.annotate.on and ddg.annotate.off may be used to provide a list of functions
# to annotate or not to annotate, respectively.  function.decl should be a
# command that contains an assignment statement where the value being bound is a
# function declaration

.add.function.annotations <- function(function.decl) {
    parsed.function.decl <- function.decl@parsed[[1]]
    # Get function name.
    func.name <- toString(parsed.function.decl[[2]])
    # Get function definition.
    func.definition <- parsed.function.decl[[3]]
    # Create function block if necessary.
    if (func.definition[[3]][[1]] != "{") {
        func.definition <- .create.function.block(func.definition)
    }
    # Create new function body with an if-then statement for annotations.
    func.definition <- .add.conditional.statement(func.definition, func.name)
    # Insert call to ddg.function if not already added.
    if (!.has.call.to(func.definition[[3]], "ddg.function")) {
        func.definition <- .insert.ddg.function(func.definition)
    }
    # Insert calls to ddg.ret.value if not already added.
    if (!.has.call.to(func.definition[[3]], "ddg.ret.value")) {
        func.definition <- .wrap.all.ret.parameters(func.definition, function.decl@contained)
    }
    # Wrap last statement with ddg.ret.value if not already added and if last
    # statement is not a simple return or a ddg function.
    last.statement <- .find.last.statement(func.definition)

    if (!.ddg.is.call.to(last.statement, "ddg.ret.value") & !.ddg.is.call.to(last.statement,
        "return") & !.is.call.to.ddg.function(last.statement)) {
        func.definition <- .wrap.last.line(func.definition, function.decl@contained)
    }
    # Wrap statements with ddg.eval if not already added and if statements are not
    # calls to a ddg function and do not contain ddg.ret.value.
    if (!.has.call.to(func.definition, "ddg.eval")) {
        func.definition <- .wrap.with.ddg.eval(func.definition, function.decl@contained)
    }
    # Reassemble parsed.command.
    return(as.expression(call("<-", as.name(func.name), func.definition)))
}

# .create.function.block creates a function block.  func.definition is a
# parsed expression for a function declaration (not the full assignment statement
# in which it is declared) Returns a parse tree for the same function declaration
# but with the function statements inside a block.

.create.function.block <- function(func.definition) {
    # Get the function parameters.
    func.params <- func.definition[[2]]
    # Get the body of the function.
    func.body <- func.definition[[3]]
    # Add block and reconstruct the call.
    new.func.body <- call("{", func.body)
    return(call("function", func.params, as.call(new.func.body)))
}

# .add.conditional.statement creates a new function definition containing an
# if-then statement used to control annotation.  func.definition - original
# function definition.

.add.conditional.statement <- function(func.definition, func.name) {
    # Get the function parameters.
    func.params <- func.definition[[2]]
    # Get the body of the function.
    func.body <- func.definition[[3]]
    pos <- length(func.body)
    # Create new function definition containing if-then statement.  This will prevent
    # us from collecting provenance inside functions that are inside control
    # structures when we are not collecting provenance in control structures.
    new.func.body.txt <- c(paste("if (ddg.should.run.annotated(\"", func.name, "\")) {",
        sep = ""), as.list(func.body[2:pos]), paste("} else {", sep = ""), as.list(func.body[2:pos]),
        paste("}", sep = ""))

    new.func.expr <- parse(text = new.func.body.txt)
    new.func.body <- new.func.expr[[1]]

    return(call("function", func.params, call("{", new.func.body)))
}

# .insert.ddg.functioninserts ddg.function before the first line in the
# annotated block of a function body.  func.definition is a parsed expression for
# a function declaration (not the full assignment statement in which it is
# declared) Returns a parse tree for the same function declaration but with a
# call to ddg.function() as the first statement.

.insert.ddg.function <- function(func.definition) {
    # Get the function parameters.
    func.params <- func.definition[[2]]
    # Get the body of the function.
    func.body <- func.definition[[3]]
    # Get annotated block.
    block <- func.body[[2]][[3]]
    pos <- length(block)
    # Insert ddg.function.
    inserted.statement <- call("ddg.function")
    new.statements.txt <- c(as.list("{"), inserted.statement, as.list(block[2:pos]),
        as.list("}"))
    block <- parse(text = new.statements.txt)[[1]]
    func.body[[2]][[3]] <- block

    return(call("function", func.params, as.call(func.body)))
}

# .wrap.ret.parameters wraps parameters of return functions with
# ddg.ret.value in the annotated block of a function body.  block is the parse
# tree corresponding to the statements within the annotated block of a function
# parsed.stmts is the list of DDGStatement objects contained in the function
# Returns a parse tree for the same function body but with a call to
# ddg.ret.value wrapped around all expressions that are returned.

.wrap.ret.parameters <- function(block, parsed.stmts) {
    # Check each statement in the annotated block to see if it contains a ret.
    pos <- length(block)
    for (i in 1:pos) {
        statement <- block[[i]]
        if (.has.call.to(statement, "return")) {
            # If statement is a return, wrap parameters with ddg.ret.value.
            if (.ddg.is.call.to(statement, "return")) {
                # Need to handle empty parameter separately.
                if (length(statement) == 1) {
                  ret.params <- ""
                } else {
                  ret.params <- statement[[2]]
                }
                if (is.list(parsed.stmts)) {
                  parsed.stmt <- parsed.stmts[[i - 2]]
                } else {
                  parsed.stmt <- parsed.stmts
                }
                # If parameters contain a return, recurse on parameters.
                if (.has.call.to(ret.params, "return")) {
                  ret.params <- .wrap.ret.parameters(ret.params, parsed.stmt)
                }
                new.ret.params <- .create.ddg.ret.call(ret.params, parsed.stmt)
                new.statement <- call("return", new.ret.params)
                block[[i]] <- new.statement
                # If statement contains a return, recurse on statement.
            } else {
                if (is.list(parsed.stmts)) {
                  parsed.stmt <- parsed.stmts[[i - 2]]
                } else {
                  parsed.stmt <- parsed.stmts
                }
                block[[i]] <- .wrap.ret.parameters(statement, parsed.stmt)
            }
        }
    }
    return(block)
}

# .wrap.all.ret.parameters wraps parameters of all return functions with
# ddg.ret.value in the annotated block of a function definition.
# func.definition is a parsed expression for a function declaration (not the full
# assignment statement in which it is declared) parsed.stmts is the list of
# DDGStatement objects contained in the function Returns a parse tree for the
# same function declaration but with a call to ddg.ret.value wrapped around
# all expressions that are returned.

.wrap.all.ret.parameters <- function(func.definition, parsed.stmts) {
    # Get function parameters.
    func.params <- func.definition[[2]]
    # Get the body of the function.
    func.body <- func.definition[[3]]
    # Get annotated block.
    block <- func.body[[2]][[3]]
    pos <- length(block)
    # Wrap individual return functions.
    block <- .wrap.ret.parameters(block, parsed.stmts)
    # Get new function body
    func.body[[2]][[3]] <- block
    # Reconstruct function.
    return(call("function", func.params, as.call(func.body)))
}

# .find.last.statement finds the last statement in the annotated block of a
# function.  func.definition is a parsed expression for a function declaration
# (not the full assignment statement in which it is declared) Returns the parse
# tree corresponding to the last statement in the function definition.

.find.last.statement <- function(func.definition) {
    # Get function body.
    func.body <- func.definition[[3]]
    # Get annotated block.
    block <- func.body[[2]][[3]]
    pos <- length(block)
    # Return final statement in block.
    return(block[[pos]])
}

# Creates a call to ddg.eval using a closure so that we will be able to refer to
# the correct DDGStatement object when the return call is executed.  statement is
# the parse tree for the expression being returned parsed.stmt is the
# DDGStatement object corresponding to the last statement Returns a parse tree
# with a call to ddg.eval.  The arguments to ddg.eval are the original statement
# and the DDGStatement object.

.create.ddg.eval.call <- function(statement, parsed.stmt) {
    # We need to force the evaluation of parsed.stmt for the closure to return the
    # value that parsed.stmt has at the time the ddg.eval call is created.
    force(parsed.stmt)
    return(call("ddg.eval", paste(deparse(statement), collapse = ""), function() parsed.stmt))
}

# Creates a call to ddg.ret.value using a closure so that we will be able to
# refer to the correct DDGStatement object when the return call is executed.
# last.statement is the parse tree for the expression being returned parsed.stmt
# is the DDGStatement object corresponding to the last statement Returns a parse
# tree with a call to ddg.ret.value.  The arguments to ddg.ret.value are
# the parsed statement and the DDGStatement object.

.create.ddg.ret.call <- function(last.statement, parsed.stmt) {
    # We need to force the evaluation of parsed.stmt for the closure to return the
    # value that parsed.stmt has at the time the ddg.eval call is created.
    force(parsed.stmt)
    if (.has.call.to(last.statement, "return")) {
        return(call("ddg.ret.value", last.statement, function() parsed.stmt))
    } else {
        # If there is no return call, we will use ddg.eval to execute the statement and
        # then ddg.ret.value to create the necessary return structure.  We cannot use
        # this technique if there is a return call because we if tried to eval a return
        # call, we would end up returning from some code inside RDT, instead of the
        # user's function.
        eval.cmd <- .construct.DDGStatement(parse(text = deparse(last.statement)),
            pos = NA, script.num = NA, parseData = NULL)
        new.statement <- .create.ddg.eval.call(last.statement, parsed.stmt)
        return(call("ddg.ret.value", new.statement, function() parsed.stmt))
    }
}

# .wrap.last.line wraps the last line of the annotated block of a function
# with ddg.ret.value.  func.definition is a parsed expression for a function
# declaration (not the full assignment statement in which it is declared)
# parsed.stmts is the list of DDGStatement objects contained in the function
# Returns a parse tree for the same function declaration but with a call to
# ddg.ret.value wrapped around the last line in the body.

.wrap.last.line <- function(func.definition, parsed.stmts) {
    # Get function parameters.
    func.params <- func.definition[[2]]
    # Get the body of the function.
    func.body <- func.definition[[3]]
    # Get annotated block.
    block <- func.body[[2]][[3]]
    pos <- length(block)

    last.statement <- block[[pos]]
    parsed.stmt <- parsed.stmts[[length(parsed.stmts)]]

    wrapped.statement <- .create.ddg.ret.call(last.statement, parsed.stmt)
    func.body[[2]][[3]][[pos]] <- wrapped.statement

    return(call("function", func.params, as.call(func.body)))
}

# .wrap.with.ddg.eval wraps each statement in the annotated block of a
# function body with ddg.eval if the statement is not a call to a ddg function
# and does not contain a call to ddg.ret.value. The statement is enclosed in
# quotation marks.  func.definition is a parsed expression for a function
# declaration (not the full assignment statement in which it is declared)
# parsed.stmts is the list of DDGStatement objects contained in the function
# Returns a parse tree for the same function declaration but with the calls to
# ddg.eval inserted.
.wrap.with.ddg.eval <- function(func.definition, parsed.stmts) {
    # Get the function parameters.
    func.params <- func.definition[[2]]
    # Get the body of the function.
    func.body <- func.definition[[3]]
    # Get annotated block.
    block <- func.body[[2]][[3]]
    pos <- length(block)
    # Process each statement in block.
    for (i in 2:pos) {
        # Wrap with ddg.eval if statement is not a call to a ddg function and does not
        # contain a call to ddg.ret.value. Enclose statement in quotation marks.
        statement <- block[[i]]
        if (!grepl("^ddg.", statement[1]) & !.has.call.to(statement, "ddg.ret.value")) {
            parsed.stmt <- parsed.stmts[[i - 2]]
            new.statement <- .create.ddg.eval.call(statement, parsed.stmt)
            func.body[[2]][[3]][[i]] <- new.statement
        }
    }
    return(call("function", func.params, as.call(func.body)))
}

# Creates a call to ddg.eval using the number of the DDGStatement stored in the
# list ddg.statements in the ddg environment.  statement is the parse tree for
# the expression being returned and parsed.stmt is the corresponding DDGStatement
# object.  Returns a parse tree with a call to ddg.eval.  The arguments to
# ddg.eval are the original statement and the number of the DDGStatement object.

.create.block.ddg.eval.call <- function(statement, parsed.stmt) {
    # Get the next DDGStatement number and store parsed.stmt at this location.
    .ddg.inc("ddg.statement.num")
    num <- .ddg.get("ddg.statement.num")
    ddg.statements <- c(.ddg.get("ddg.statements"), parsed.stmt)
    .ddg.set("ddg.statements", ddg.statements)
    return(call("ddg.eval", paste(deparse(statement), collapse = ""), num))
}

# .wrap.block.with.ddg.eval wraps each statement in a block with ddg.eval
# unless the statement is a ddg function or contains a call to ddg.ret.value.

.wrap.block.with.ddg.eval <- function(block, parsed.stmts) {
    # Ignore initial brace.
    for (i in 2:length(block)) {
        # Enclose statement in quotation marks and wrap with ddg.eval.
        statement <- block[[i]]
        if (!grepl("^ddg.", statement) && !.has.call.to(statement, "ddg.ret.value")) {
            parsed.stmt <- parsed.stmts[[i - 1]]
            # print(statement) print(parsed.stmt@text)

            new.statement <- .create.block.ddg.eval.call(statement, parsed.stmt)
            block[[i]] <- new.statement
        }
    }
    return(block)
}

# .add.block.start.finish adds start and finish nodes to blocks in control
# statements.

.add.block.start.finish <- function(block, pname) {
    # Create ddg.start & ddg.finish statements.
    start.statement <- deparse(call("ddg.start", pname))
    finish.statement <- deparse(call("ddg.finish", pname))
    # Get internal statements.
    pos <- length(block)
    statements <- deparse(block[[2]])
    if (pos > 2) {
        for (i in 3:pos) {
            statements <- append(statements, deparse(block[[i]]))
        }
    }
    # Create new block.
    block.txt <- paste(c("{", start.statement, statements, finish.statement, "}"),
        collapse = "\n")
    block.parsed <- parse(text = block.txt)
    return(block.parsed[[1]])
}

# .insert.ddg.forloop inserts a ddg.forloop statement at the top of a block.

.insert.ddg.forloop <- function(block, index.var) {
    pos <- length(block)
    inserted.statement <- call("ddg.forloop", index.var)
    # Block with single statement.
    if (pos == 2) {
        new.statements <- c(as.list(block[[1]]), inserted.statement, as.list(block[2]))
        return(as.call(new.statements))
    } else {
        # Block with multiple statements.
        new.statements <- c(as.list(block[[1]]), inserted.statement, as.list(block[2:pos]))
        return(as.call(new.statements))
    }
}

# .insert.ddg.loop.annotate inserts a ddg.loop.annotate.on or
# ddg.loop.annotate.off statement at the beginning of a block.

.insert.ddg.loop.annotate <- function(block, var) {
    pos <- length(block)
    if (var == "on")
        inserted.statement <- call("ddg.loop.annotate.on") else if (var == "off")
        inserted.statement <- call("ddg.loop.annotate.off")
    # Block with single statement.
    if (pos == 2) {
        new.statements <- c(as.list(block[[1]]), inserted.statement, as.list(block[2]))
        return(as.call(new.statements))
    } else {
        # Block with multiple statements.
        new.statements <- c(as.list(block[[1]]), inserted.statement, as.list(block[2:pos]))
        return(as.call(new.statements))
    }
}

# .annotate.if.statement adds annotations to if statements.

.annotate.if.statement <- function(command) {
    if (ddg.max.loops() == 0) {
        parsed.command.txt <- deparse(command@parsed[[1]])
    } else {
        # Get parsed command & contained statements
        parsed.command <- command@parsed[[1]]
        parsed.stmts <- command@contained
        # Set initial values.
        bnum <- 1
        ptr <- 0
        parent <- parsed.command
        parsed.command.txt <- vector()
        # If & else if blocks.
        while (!is.symbol(parent) && parent[[1]] == "if") {
            # Get block
            block <- parent[[3]]
            block <- .ensure.in.block(block)
            # Get statements for this block.
            block.stmts <- list()
            for (i in 1:(length(block) - 1)) {
                block.stmts <- c(block.stmts, parsed.stmts[[i + ptr]])
            }
            # Advance pointer for next block.
            ptr <- ptr + length(block) - 1
            # Wrap each statement with ddg.eval.
            block <- .wrap.block.with.ddg.eval(block, block.stmts)
            # Add start and finish nodes.
            block <- .add.block.start.finish(block, "if")
            # Reconstruct original statement.
            cond <- paste(deparse(parent[[2]]), collapse = "")
            if (bnum == 1) {
                statement.txt <- paste(c(paste("if (", cond, ")", sep = ""), deparse(block),
                  collapse = "\n"))
            } else {
                statement.txt <- paste(c(paste("} else if (", cond, ")", sep = ""),
                  deparse(block), collapse = "\n"))
            }
            # Remove final brace & new line.
            if (bnum > 1) {
                last <- length(parsed.command.txt) - 2
                parsed.command.txt <- parsed.command.txt[c(1:last)]
            }
            parsed.command.txt <- append(parsed.command.txt, statement.txt)
            # Check for possible final else.
            if (length(parent) == 4) {
                final.else <- TRUE
            } else {
                final.else <- FALSE
            }
            # Get next parent
            bnum <- bnum + 1
            parent <- parent[[(length(parent))]]
        }
        # Final else block (if any).
        if (final.else) {
            # Get block.
            block <- parent
            block <- .ensure.in.block(block)
            # Get statements for this block
            block.stmts <- list()
            for (i in 1:(length(block) - 1)) {
                block.stmts <- c(block.stmts, parsed.stmts[[i + ptr]])
            }
            # Wrap each statement with ddg.eval.
            block <- .wrap.block.with.ddg.eval(block, block.stmts)
            # Add start and finish nodes.
            block <- .add.block.start.finish(block, "if")
            # Reconstruct original statement
            statement.txt <- paste(c(paste("} else", sep = ""), deparse(block), collapse = ""))
            # Remove final brace.
            last <- length(parsed.command.txt) - 2
            parsed.command.txt <- parsed.command.txt[c(1:last)]
            parsed.command.txt <- append(parsed.command.txt, statement.txt)
        }
    }
    parsed.command.txt <- append(parsed.command.txt, "ddg.set.inside.loop()", after = 0)
    parsed.command.txt <- append(parsed.command.txt, "ddg.not.inside.loop()")
    return(parse(text = parsed.command.txt))
}

# .annotate.loop.statement adds annotations to for, while and repeat
# statements. Provenance is collected for the number of iterations specified in
# the parameter max.loops, beginning with the iteration specified in the
# parameter first.loop. A Details Omitted node may be added before and after the
# annotated section, as needed.

.annotate.loop.statement <- function(command, loop.type) {
    if (ddg.max.loops() == 0) {
        # Note that I can't just use command@text because it does not separate statements
        # with newlines
        parsed.command.txt <- deparse(command@parsed[[1]])
    } else {
        # Get parsed command
        parsed.command <- command@parsed[[1]]
        # Add new loop & get loop number.
        ddg.loops <- c(.ddg.get("ddg.loops"), 0)
        .ddg.set("ddg.loops", ddg.loops)
        .ddg.inc("ddg.loop.num")
        ddg.loop.num <- .ddg.get("ddg.loop.num")
        # Get statements in block.
        if (loop.type == "for") {
            block <- parsed.command[[4]]
        } else if (loop.type == "while") {
            block <- parsed.command[[3]]
        } else {
            # repeat
            block <- parsed.command[[2]]
        }
        # Add braces if necessary.
        block <- .ensure.in.block(block)
        # Wrap each statement with ddg.eval.
        annotated.block <- .wrap.block.with.ddg.eval(block, command@contained)
        # Insert ddg.forloop statement.
        if (loop.type == "for") {
            index.var <- parsed.command[[2]]
            annotated.block <- .insert.ddg.forloop(annotated.block, index.var)
        }
        # Add start and finish nodes.
        annotated.block <- .add.block.start.finish(annotated.block, paste(loop.type,
            "loop"))
        # Insert ddg.loop.annotate statements.
        block <- .insert.ddg.loop.annotate(block, "off")
        # Reconstruct for statement.
        block.txt <- deparse(block)
        annotated.block.txt <- deparse(annotated.block)
        # Calculate the control line of the annotated code
        if (loop.type == "for") {
            firstLine <- paste("for (", deparse(parsed.command[[2]]), " in ", deparse(parsed.command[[3]]),
                ") {", sep = "")
        } else if (loop.type == "while") {
            firstLine <- paste("while (", deparse(parsed.command[[2]]), ") {", sep = "")
        } else {
            # repeat
            firstLine <- paste("repeat {", sep = "")
        }
        # Turn loop annotations back on in case we reached the max.
        parsed.command.txt <- paste(c(firstLine, paste("if (ddg.loop.count.inc(",
            ddg.loop.num, ") >= ddg.first.loop() && ddg.loop.count(", ddg.loop.num,
            ") <= ddg.first.loop() + ddg.max.loops() - 1)", sep = ""), annotated.block.txt,
            paste("else", sep = ""), block.txt, paste("}", sep = ""), paste("if (ddg.loop.count(",
                ddg.loop.num, ") > ddg.first.loop() + ddg.max.loops() - 1) ddg.details.omitted()",
                sep = ""), paste("ddg.reset.loop.count(", ddg.loop.num, ")", sep = ""),
            paste("if (ddg.max.loops() != 0) ddg.loop.annotate.on()"), collapse = "\n"))
    }
    parsed.command.txt <- append(parsed.command.txt, "ddg.set.inside.loop()", after = 0)
    parsed.command.txt <- append(parsed.command.txt, "ddg.not.inside.loop()")
    return(parse(text = parsed.command.txt))
}

# .annotate.simple.block adds annotations to simple blocks.

.annotate.simple.block <- function(command) {
    # Get parsed command
    parsed.command <- command@parsed[[1]]
    # Get statements in block.
    block <- parsed.command
    # Wrap each statement with ddg.eval.
    block <- .wrap.block.with.ddg.eval(block, command@contained)
    # Add start and finish nodes.
    block <- .add.block.start.finish(block, "block")
    # Reconstruct block.
    block.txt <- deparse(block)
    return(parse(text = block.txt))
}

# .ddg.is.call.to returns TRUE if the parsed expression passed in is a call to
# the specified function.  parsed.expr - a parse tree func.name - the name of a
# function

.ddg.is.call.to <- function(parsed.expr, func.name) {
    # Check if a function call.
    if (is.call(parsed.expr)) {
        # Check if the function called is the specified function.
        if (parsed.expr[[1]] == func.name) {
            return(TRUE)
        }
    }
    return(FALSE)
}

# .has.call.to returns TRUE if the parsed expression passed in contains a
# call to the specified function.  parsed.expr - a parse tree func.name - the
# name of a function

.has.call.to <- function(parsed.expr, func.name) {
    # Base case.
    if (!is.recursive(parsed.expr))
        return(FALSE)
    # If this is a function declaration, skip it
    if (.is.functiondecl(parsed.expr))
        return(FALSE)
    # A call to the specified function.
    if (.ddg.is.call.to(parsed.expr, func.name)) {
        return(TRUE)
    } else {
        # Not a call to the specified function.  Recurse on the parts of the expression.
        return(any(sapply(parsed.expr, function(parsed.expr) {
            return(.has.call.to(parsed.expr, func.name))
        })))
    }
}

# .is.call.to.ddg.function returns TRUE if the parsed expression passed in is
# a call to a ddg function.  parsed.expr - a parse tree

.is.call.to.ddg.function <- function(parsed.expr) {
    # Check if a function call.
    if (is.call(parsed.expr)) {
        # Check if the function called is a ddg function.
        if (grepl("^ddg.", parsed.expr[1])) {
            return(TRUE)
        }
    }
    return(FALSE)
}

# Returns true if the statement contains a call to a function that read from a
# file parsed.statement - a parse tree
.reads.file <- function(parsed.statement) {
    .ddg.file.read.functions.df <- .ddg.get(".ddg.file.read.functions.df")
    reading.functions <- .ddg.file.read.functions.df$function.names
    return(TRUE %in% (lapply(reading.functions, function(fun.name) {
        return(.has.call.to(parsed.statement, fun.name))
    })))
}

# Returns true if the statement contains a call to a function that writes to a
# file parsed.statement - a parse tree
.writes.file <- function(parsed.statement) {
    .ddg.file.write.functions.df <- .ddg.get(".ddg.file.write.functions.df")
    writing.functions <- .ddg.file.write.functions.df$function.names
    return(TRUE %in% (lapply(writing.functions, function(fun.name) {
        return(.has.call.to(parsed.statement, fun.name))
    })))
}

# Returns true if the statement contains a call to a function that creates a
# graphics object parsed.statement - a parse tree
.creates.graphics <- function(parsed.statement) {
    .ddg.graphics.functions.df <- .ddg.get(".ddg.graphics.functions.df")
    graphics.functions <- .ddg.graphics.functions.df$function.names
    if (TRUE %in% (lapply(graphics.functions, function(fun.name) {
        return(.has.call.to(parsed.statement, fun.name))
    }))) {
        return(TRUE)
    }
    return(FALSE)
}

# Returns true if the statement contains a call to a function that updates a
# graphics object parsed.statement - a parse tree
.updates.graphics <- function(parsed.statement) {
    graphics.update.functions <- .ddg.get(".ddg.graphics.update.functions")
    if (TRUE %in% (lapply(graphics.update.functions, function(fun.name) {
        return(.has.call.to(parsed.statement, fun.name))
    }))) {
        return(TRUE)
    }
    return(FALSE)
}
ProvTools/provR documentation built on May 6, 2019, 3:27 p.m.