Nothing
# Copyright (C) President and Fellows of Harvard College and
# Trustees of Mount Holyoke College, 2020, 2021.
# This program is free software: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this program. If not, see
# <http://www.gnu.org/licenses/>.
###############################################################################
# The debug environment
.debug.env <- new.env(parent = emptyenv())
# parsed provenance and adjacency graph
.debug.env$prov <- NULL
.debug.env$graph <- NULL
.debug.env$has.graph <- FALSE
# procedure nodes, data nodes
.debug.env$proc.nodes <- NULL
.debug.env$data.nodes <- NULL
# data-to-procedure edges, procedure-to-data edges
.debug.env$data.proc <- NULL
.debug.env$proc.data <- NULL
# environment for loaded variables
.debug.env$var.env <- NULL
# path to provenance directory
.debug.env$prov.dir <- NULL
# === INIT =================================================================== #
#' A Time-Travelling Debugger for R - Debugger Initialization
#'
#' prov.debug uses the provenance from the last execution of prov.run to
#' initialise the debugger.
#'
#' Provenance is a detailed record of the execution of a script which includes
#' information about the steps that were excecuted and the intermediate data values
#' that were used and/or created. After it is collected, it can be used in a
#' variety of ways to better understand the execution.
#'
#' This package, provDebugR, is one such application, using provenance post-execution
#' to help the user understand and debug their script by providing functions to
#' look at intermediate steps and data values, as well as their forwards or backwards
#' lineage. These functions may be used only after provDebugR has been initialised using
#' one of the initialisation functions above.
#'
#' The forwards lineage of a data object is the list of steps showing how the data object
#' was used. The backwards lineage of a data object is the list of steps showing how the
#' data object was produced.
#'
#' provDebugR uses provenance produced by rdtLite (a provenance collection package
#' available on CRAN), stored in PROV-JSON format.
#'
#' @return No return value.
#'
#' @seealso Other provDebugR Functions (non-initialisation):
#' @seealso \code{\link{debug.error}}: Returns the backwards lineage of the error, if any.
#' The error may be queried on StackOverflow.
#' @seealso \code{\link{debug.line}}: Returns all immediate inputs and outputs
#' for the line(s) queried.
#' @seealso \code{\link{debug.lineage}}: Returns the forwards or backwards lineage
#' of the data object(s) queried. The forwards lineage shows how the
#' data object was used, and the backwards lineage shows how it was produced.
#' @seealso \code{\link{debug.state}}: Returns the state at the line(s) queried,
#' after the line had been executed. The state is the list of all
#' variables and their values in the environment at the queried line.
#' @seealso \code{\link{debug.type.changes}}: Returns a data frame for each variable in
#' the execution containing the instances where the data type changed.
#' @seealso \code{\link{debug.variable}}: Returns a data frame showing all instances
#' of the variable(s) queried.
#' @seealso \code{\link{debug.view}}: Opens and displays the contents of each file or variable
#' or variable queried.
#' @seealso \code{\link{debug.warning}}: Returns the backwards lineage of the queried
#' warning(s), if any.
#'
#' @seealso Other tools that use provenance:
#' \url{https://github.com/End-to-end-provenance}
#'
#' @references rdtLite (Provenance Collection Tool):
#' \url{https://CRAN.R-project.org/package=rdtLite}
#' @references PROV-JSON output produced by rdtLite:
#' \url{https://github.com/End-to-end-provenance/ExtendedProvJson/blob/master/JSON-format.md}
#' @references PROV-JSON standard:
#' \url{https://www.w3.org/Submission/2013/SUBM-prov-json-20130424/}
#'
#' @examples
#' \dontrun{
#' rdtLite::prov.run("test.R")
#' prov.debug()}
#'
#' @export
#' @rdname prov.debug
prov.debug <- function()
{
# clear debug environment first!
.clear()
# determine which provenance collection tool to use
tool <- .get.tool()
if(tool == "rdtLite")
prov.json <- rdtLite::prov.json
else
prov.json <- rdt::prov.json
# pass json to helper function to initialise debugger
.debug.init(prov.json(), is.file = FALSE)
}
#' A Time-Travelling Debugger for R - Debugger Initialization
#'
#' prov.debug.file reads a PROV-JSON file to initialise the debugger.
#'
#' @param prov.file Path to a PROV-JSON file.
#'
#' @examples
#' \dontrun{
#' prov.debug.file("prov_test/prov.json")}
#'
#' @export
#' @rdname prov.debug
prov.debug.file <- function(prov.file)
{
# clear debug environment first!
.clear()
.debug.init(prov.file, is.file = TRUE)
}
#' A Time-Travelling Debugger for R - Debugger Initialization
#'
#' prov.debug.run executs a R or Rmd script, collects provenance, and
#' initialises the debugger using the collected provenance.
#'
#' @param script Path to an R script.
#' @param ... extra parameters are passed to the provenance collector. See rdt's prov.run function
#' or rdtLites's prov.run function for details.
#'
#' @examples
#' \dontrun{
#' prov.debug.run("test.R", snapshot.size = 100)}
#'
#' @export
#' @rdname prov.debug
prov.debug.run <- function(script, ...)
{
# clear debug environment first!
.clear()
# determine which provenance collection tool to use
tool <- .get.tool()
if(tool == "rdtLite") {
prov.run <- rdtLite::prov.run
prov.json <- rdtLite::prov.json
}
else {
prov.run <- rdt::prov.run
prov.json <- rdt::prov.json
}
# collect provenance
tryCatch({
prov.run(script, ...)
}, finally = {
cat(paste (tool, "is finished running.\n"))
# initialise debugger
.debug.init(prov.json(), is.file = FALSE)
})
}
# === HELPER FUNCTIONS ======================================================= #
#' Initialises the debugger.
#'
#' @param json Path to a PROV-JSON file, or a PROV-JSON string.
#' @param is.file If TRUE, this indicates that the value given in the parameter
#' 'json' is a file.
#'
#' @return N/A
#' @noRd
.debug.init <- function(json, is.file)
{
# get parsed provenance, adjacency graph
.debug.env$prov <- provParseR::prov.parse(json, isFile = is.file)
.debug.env$graph <- provGraphR::create.graph(json, isFile = is.file)
# procedure nodes, data nodes
.debug.env$proc.nodes <- provParseR::get.proc.nodes(.debug.env$prov)
.debug.env$data.nodes <- provParseR::get.data.nodes(.debug.env$prov)
# data-to-procedure edges, procedure-to-data edges
.debug.env$data.proc <- provParseR::get.data.proc(.debug.env$prov)
.debug.env$proc.data <- provParseR::get.proc.data(.debug.env$prov)
# var.env (for loading variables for viewing)
.debug.env$var.env <- new.env(parent = .debug.env)
# path to provenance directory
environment <- provParseR::get.environment(.debug.env$prov)
.debug.env$prov.dir <- environment$value[environment$label == "provDirectory"]
# empty case
if(is.null(.debug.env$graph)) {
.debug.env$has.graph <- FALSE
stop("There is no provenance.")
}
.debug.env$has.graph <- TRUE
# get full code for each procedure node
.debug.env$proc.nodes$name <- .get.full.code()
# get columns of script names for each procedure node,
# cbind to proc.nodes
scripts <- .get.scripts()
scriptName <- .get.script.names(.debug.env$proc.nodes$scriptNum, scripts)
.debug.env$proc.nodes <- cbind(.debug.env$proc.nodes, scriptName,
stringsAsFactors = FALSE)
.debug.env$proc.nodes <- .debug.env$proc.nodes[ , c("id", "name", "type",
"elapsedTime",
"scriptNum", "scriptName",
"startLine", "startCol",
"endLine", "endCol")]
}
#' .get.tool determines whether to use rdt or rdtLite to get the provenance.
#'
#' If rdtLite is loaded, "rdtLite" is returned. If rdtLite is not loaded, but rdt
#' is, "rdt" is returned. If neither is loaded, it then checks to see if either
#' is installed, favoring "rdtLite" over "rdt".
#'
#' Stops if neither rdt nor rdtLite is available.
#'
#' @return "rdtLite" or "rdt"
#' @noRd
.get.tool <- function()
{
# choose provenance collector from loaded libraries
libs <- loadedNamespaces()
if ("rdtLite" %in% libs)
return("rdtLite")
if ("rdt" %in% libs)
return("rdt")
# neither rdtLite nor rdt are loaded,
# choose provenance collector from installed libraries
if (requireNamespace ("rdtLite"))
return("rdtLite")
if (requireNamespace ("rdt"))
return("rdt")
# nether rdt nor rdt are available: stop
stop("One of rdtLite or rdt must be installed.")
}
#' Returns the full code for each Operation procedure node. A vector of strings.
#' Uses a helper function for testing purposes.
#'
#' @return The full code for each Operation procedure node. A vector of strings.
#' @noRd
.get.full.code <- function()
{
# get list of saved scripts
scripts <- provParseR::get.saved.scripts(.debug.env$prov)$script
# call helper function
return(.get.full.code.helper(.debug.env$proc.nodes, scripts))
}
#' Helper function for .get.full.code
#' Given a table of procedure nodes and saved scripts, returns the full code
#' for each Operation procedure node as a vector of strings.
#' Separated from .get.full.code for testing purposes.
#'
#' @param proc.nodes Table of procedure nodes.
#' @param scripts List of paths to saved scripts.
#'
#' @return The full code for each Operation procedure node. A vector of strings.
#' @noRd
.get.full.code.helper <- function(proc.nodes, scripts)
{
# vector to store script paths that could not be found
inaccessible <- c()
lines <- lapply(scripts, function(script)
{
# check if file exists
if(!file.exists(script)) {
inaccessible <<- append(inaccessible, script)
return(NA)
}
# read file
file <- file(script, "r", encoding = getOption("encoding"))
line.list <- readLines(file, warn = FALSE)
close(file)
return(line.list)
})
# case: throw warning if there are inaccessible files
if(length(inaccessible) > 0)
{
warn.msg <- paste(inaccessible, collapse="\n")
warn.msg <- paste("Unable to access the following files:",
warn.msg, sep="\n")
warning(warn.msg, call. = FALSE)
# case: if all files are inaccessible
# return the name column of proc nodes as is
if(length(inaccessible) == length(scripts)) {
return(proc.nodes$name)
}
}
# get full code for each proc node
codes <- sapply(1:nrow(proc.nodes), function(i)
{
node <- proc.nodes[i, ]
script.num <- node$scriptNum
# case: not an operation
# case: script file was not found
script.lines <- lines[[script.num]]
if(length(script.lines) == 1 && is.na(script.lines)) {
return(node$name)
}
if(node$type != "Operation") {
return(node$name)
}
# get full code
# if procedure has more than 1 line,
# collapse the lines into 1 before returning
if(node$endLine - node$startLine == 0) {
return(script.lines[node$startLine])
}
code <- script.lines[node$startLine:node$endLine]
return(paste(code, sep="", collapse = "\n"))
})
return(unname(codes))
}
#' Get the list of script names, returning only basenames.
#' If the script is "", return "console"
#'
#' @return A list of script basenames.
#' @noRd
.get.scripts <- function()
{
# get table of scripts (sourced & main script), save only basename
scripts <- provParseR::get.scripts(.debug.env$prov)$script
scripts <- unname(sapply(scripts, function(script)
{
if(script == "")
return("console")
return(basename(script))
}))
return(scripts)
}
#' For each of the given script numbers, return its corresponding script names.
#'
#' @param script.nums The list of script numbers
#' @param scripts The list of script names
#'
#' @return A list of script names corresponding to the given script numbers
#' @noRd
.get.script.names <- function(script.nums, scripts)
{
# for each script number, return appropriate script name
scriptName <- unname(sapply(script.nums, function(i)
{
if(is.na(script.nums[i]))
return(scripts[1])
return(scripts[i])
}))
return(scriptName)
}
# === FOR TESTING ONLY ======================================================= #
# This function returns provDebugR's debug environment to its initial state.
#
#' @noRd
.clear <- function()
{
# parsed provenance and adjacency graph
.debug.env$prov <- NULL
.debug.env$graph <- NULL
.debug.env$has.graph <- FALSE
# procedure nodes, data nodes
.debug.env$proc.nodes <- NULL
.debug.env$data.nodes <- NULL
# data-to-procedure edges, procedure-to-data edges
.debug.env$data.proc <- NULL
.debug.env$proc.data <- NULL
# environment for loaded variables
.debug.env$var.env <- NULL
# path to provenance directory
.debug.env$prov.dir <- NULL
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.