Nothing
# Copyright (C) President and Fellows of Harvard College and
# Trustees of Mount Holyoke College, 2020, 2021.
# This program is free software: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this program. If not, see
# <http://www.gnu.org/licenses/>.
###############################################################################
# === STATE ================================================================== #
#' debug.state
#'
#' debug.state displays the values of all variables in the global environment
#' after execution of a particular line.
#'
#' For each queried line, debug.state returns a data frame showing the state
#' at that line, after it has been executed.
#' Each data frame contains the following columns:
#' \itemize{
#' \item name: The names of variables in the state.
#' \item value: The value of each variable.
#' \item container: The type of the container of each variable.
#' \item dimension: The size of the container.
#' \item type: The data type(s) contained within the container.
#' \item scriptNum: The script number associated with each variable.
#' \item scriptName: The name of the script the variable is associated with.
#' \item startLine: The line number associated with each variable.
#' }
#' If no paramters are given, debug.state will return the state at the end of
#' execution.
#'
#' @param ... The line numbers to be queried.
#' @param script.num The script number of the queried line numbers. This is ignored
#' if no line numbers are given.
#' If script.num == "all", all possible script numbers will be queried.
#' Defaults to script number 1 (main script).
#' @param showType If TRUE, variable container, dimension, and type are displayed.
#'
#' @return debug.state returns a list of data frames of states for each queried line number, or the state
#' at the end of execution if no parameters are given to the function.
#'
#' @examples
#' \dontrun{
#' prov.debug.run("test.R")
#' debug.state()
#' debug.state(5)
#' debug.state(10, 20, script.num = 2)
#' debug.state(5, script.num = "all")
#' }
#'
#' @export
#' @rdname debug.line
debug.state <- function(..., script.num = 1, showType = FALSE)
{
# CASE: no provenance
if(!.debug.env$has.graph)
stop("There is no provenance.")
# STEP: get all possible options
# columns: p.id, startLine, scriptNum, scriptName, code
pos.nodes <- .get.pos.line(.debug.env$proc.nodes)
if(is.null(pos.nodes)) {
cat("There are no lines.\n")
return(invisible(NULL))
}
# STEP: get user's query
# columns: startLine, scriptNum
query <- .get.query.line(..., script.num = script.num, all = FALSE)
# STEP: get valid queries
# columns: p.id, startLine, scriptNum, scriptName, code
valid.queries <- .get.valid.query.state(pos.nodes, query)
# If valid.queries is null, that means there are no valid queries or the
# query itself is empty. In this case, we want to show the state at the
# end of execution. We want to keep a variable to track that the end of
# execution is automatically shown.
end.of.execution <- FALSE
if(is.null(valid.queries)) {
end.of.execution <- TRUE
#cat("No valid queries.\nState at the end of execution:\n")
valid.queries <- pos.nodes[nrow(pos.nodes), c("startLine","scriptNum")]
}
# STEP: Get state for each query
# Keep a vector to keep track of indicies where there are queries with no state
# There should be much fewer cases of no state than those with state.
no.state <- c()
states <- lapply(c(1:nrow(valid.queries)), function(i)
{
# Get the closest procedure node with line number <= queried line number
# this could be the procedure from a previous script,
# or 'p0' indicating the beginning of the execution.
query.line <- .to.int(valid.queries$startLine[i])
query.script <- .to.int(valid.queries$scriptNum[i])
p.id <- .get.closest.proc(pos.nodes, query.line, query.script)
# loop up proc until get one with output node that is a variable
d.id <- .get.last.var(pos.nodes, p.id)
# get state
d.list <- .get.state(d.id)
# case: no state
if(is.null(d.list)) {
no.state <<- append(no.state, i)
cat("There is no state for line ", query.line, " in script ",
query.script, ".\n", sep='')
return(invisible(NULL))
}
# get output for all variables in the state
return(.get.output.state(pos.nodes, d.list))
})
# Remove, if any, elements with no state.
if(length(no.state) > 0) {
valid.queries <- valid.queries[-no.state, ]
states <- states[-no.state]
}
# CASE: no state to display at all
if(length(states) == 0)
return(invisible(NULL))
# re-number rows of the table of queries
row.names(valid.queries) <- c(1:nrow(valid.queries))
names(states) <- row.names(valid.queries)
# If not directly showing the end of execution, print table of queries with state.
if(!end.of.execution) {
.print.state(states, valid.queries, showType)
}
else {
.print.state(states, showType = showType)
}
# Label output with indices of queries, return.
return(invisible(states))
}
#' Returns a table of valid queries.
#' Unlike .get.valid.query.line, all integer line queries are considered valid.
#' columns: startLine, scriptNum
#'
#' @param pos.nodes The table of possible nodes
#' @param query The table of the user's queries.
#'
#' @return The table of valid queries
#' columns: startLine, scriptNum
#'
#' @noRd
.get.valid.query.state <- function(pos.nodes, query)
{
# Case: no queries
if(is.null(query))
return(NULL)
# for each query, return TRUE if valid, FALSE otherwise
valid.indices <- sapply(c(1:nrow(query)), function(i)
{
# Step: check that both start line and script number are integers
query.line <- .to.int(query$startLine[i])
query.script <- .to.int(query$scriptNum[i])
# Case: line number is NA or not an int, or script number is not an int
if(is.null(query.line) || is.na(query.line))
return(FALSE)
if(is.null(query.script))
return(FALSE)
# Step: check if there are proc nodes in that script
script.nodes <- pos.nodes[pos.nodes$scriptNum == query.script, ]
script.nodes <- .remove.na.rows(script.nodes)
# Case: invalid script number
if(nrow(script.nodes) == 0)
return(FALSE)
# script number is valid, line number is an int
return(TRUE)
})
# extract valid queries, ensure unique entries
valid.queries <- .remove.na.rows(unique(query[valid.indices, ]))
if(nrow(valid.queries) == 0)
return(NULL)
# rename columns, return
rownames(valid.queries) <- c(1:nrow(valid.queries))
return(valid.queries)
}
#' Get closest procedure node id with line number <= queried line number
#' Returns NULL if a procedure node is not found.
#'
#' @param pos.nodes The table of all possible nodes.
#' @param line The queried line number.
#' @param script.num The queried script number. Guarenteed to have associated procedure nodes.
#'
#' @return The procedure node id of the closest procedure node with line number less
#' than or equal to the given line number, or NULL if none are found.
#'
#' @noRd
.get.closest.proc <- function(pos.nodes, line, script.num)
{
# Get list of all possible lines for the specified script
script.proc <- pos.nodes[pos.nodes$scriptNum == script.num, ]
script.proc <- .remove.na.rows(script.proc)
if(nrow(script.proc) == 0)
return(NULL)
# Find the index of where the queried line falls right after in the list of
# line numbers of the script, or 0 if the queried line falls before them.
script.index <- .find.num.loc(script.proc$startLine, line)
# Case: 0 (line falls before the list of numbers in the script)
# Returns the p.id of the node before the first node of the script,
# or NULL if the top of the procedure nodes table has been reached.
if(script.index == 0)
{
proc.index <- c(1:nrow(pos.nodes))[pos.nodes$p.id == script.proc$p.id[1]]
proc.index <- proc.index - 1
if(proc.index == 0)
return(NULL)
return(pos.nodes$p.id[proc.index])
}
# Case: line falls somewhere in the list or after it.
return(script.proc$p.id[script.index])
}
#' From the given procedure node id, loop up the table of procedure nodes until
#' an output variable is found. Returns the data node id of the last variable,
#' or NULL if none are found. Also returns NULL if the procedure node id given is NULL.
#'
#' @param pos.nodes The table of all possible nodes.
#' @param The procedure node id where the search for an variable begins.
#'
#' @return The data node id of the variable found, or NULL if none are found.
#' @noRd
.get.last.var <- function(pos.nodes, p.id)
{
# Case: p.is is NULL
if(is.null(p.id))
return(NULL)
# Starting from the given procedure node, loop up the table of
# operations until an output edge to a variable is found.
start.index <- c(1:nrow(pos.nodes))[pos.nodes$p.id == p.id]
for(i in c(start.index:1))
{
# check if there are output data nodes.
p.id <- pos.nodes$p.id[i]
d.nodes <- .debug.env$proc.data$entity[.debug.env$proc.data$activity == p.id]
# case: no output edges found. check next procedure node
if(length(d.nodes) == 0)
next
# for each data node found, check if it is a variable.
# extract the variables
vars <- sapply(d.nodes, function(d.id)
{
node <- .debug.env$data.nodes[.debug.env$data.nodes$id == d.id, ]
return(node$type == "Data" || node$type == "Snapshot")
})
vars <- d.nodes[vars]
# if there are variables, return the id of the last one found
# (there could be multiple)
if(length(vars) > 0)
return(vars[length(vars)])
}
# reaching here means that no output variables could be found. return NULL
return(NULL)
}
#' Returns the state for a given data node as a vector of data node id.
#' This is done by obtaining all data nodes up to the specified data node id
#' and obtaining the last occurance of each unique variable name.
#'
#' @param d.id The data node id for the data node where the state should be
#' obtained for.
#'
#' @return The state. A vector of data node id.
#' @noRd
.get.state <- function(d.id)
{
# Extract variables from data nodes table.
data.nodes <- .debug.env$data.nodes
data.nodes <- data.nodes[(data.nodes$type == "Data" | data.nodes$type == "Snapshot"), ]
data.nodes <- .remove.na.rows(data.nodes)
if(nrow(data.nodes) == 0)
return(NULL)
# Get the id and name of data nodes where fromEnv is TRUE, if any.
# These are guarenteed to be variables.
vars <- data.nodes[data.nodes$fromEnv, c("id", "name")]
# Then, get the variables up until the specified data node id.
# This is appended to the table of fromEnv nodes before
# obtaining the last occurnce of each unique variable.
# fromEnv nodes will always have unique variable names.
# If d.id is NULL, just the fromEnv variables are returned.
if(!is.null(d.id))
{
# Get all data nodes up to specified d.id
max.index <- c(1:nrow(data.nodes))[data.nodes$id == d.id]
vars <- rbind(vars, data.nodes[c(1:max.index), c("id", "name")], stringsAsFactors = FALSE)
# For each unique variable name, get the last data node id,
# if there are multiple occurrences of it.
unique.names <- unique(vars$name)
if(length(unique.names) < nrow(vars))
{
state <- sapply(unique.names, function(var.name)
{
# get all data nodes for the specified variable name
id.list <- vars$id[vars$name == var.name]
# sort by increasing data node number
id.nums <- as.integer(sub('d', '', id.list))
id.list <- id.list[order(id.nums)]
# return the last node
return(id.list[length(id.list)])
})
return(state)
}
}
# If there are no variables, there is no state. Return NULL.
if(nrow(vars) == 0)
return(NULL)
# Reaching here means all unique variable names have 1 data node associated
# Sort table by increasing d.id, return.
id.nums <- as.integer(sub('d', '', vars$id))
return(vars$id[order(id.nums)])
}
#' From the given list of data node id, form user output.
#' columns: name, value, container, dimension, type, scriptNum, startLine
#'
#' @param pos.proc The table of all possible procedure nodes.
#' @param id.list A vector of data node id which forms the state.
#'
#' @return The state. A data frame.
#' columns: name, value, container, dimension, type,
#' scriptNum, scriptName, startLine
#'
#' @noRd
.get.output.state <- function(pos.proc, id.list)
{
# For each variable in the state, obtain fields:
# var name, value, valType, scriptNum, startLine
rows <- lapply(id.list, function(d.id)
{
d.node <- .debug.env$data.nodes[.debug.env$data.nodes$id == d.id, ]
# Get fields from data node table
# columns: name, value
d.fields <- d.node[ , c("name", "value")]
# Get valType (remove id column)
val.type <- provParseR::get.val.type(.debug.env$prov, d.id)
val.type <- val.type[ , c("container", "dimension", "type")]
# Get start line and script number
# For fromEnv variables, these are NA.
if(d.node$fromEnv)
{
p.fields <- data.frame(scriptNum = NA,
scriptName = NA,
startLine = NA,
stringsAsFactors = FALSE)
}
else
{
# For non-fromEnv variables, find the procedure node associated with it.
# As the variables in the state are either fromEnv nodes or were produced
# by an operation within the script, non-fromEnv variables will always
# have 1 procedure-to-data edge linking it to an Operation procedure node.
p.id <- .get.p.id(d.id)
# Get fields from procedure node (scriptNum, startLine)
p.fields <- pos.proc[pos.proc == p.id, ]
p.fields <- p.fields[ , c("scriptNum", "scriptName", "startLine")]
p.fields <- .remove.na.rows(p.fields)
}
# Combine fields into a row
return(cbind(d.fields, val.type, p.fields, stringsAsFactors = FALSE))
})
# Combine rows into a data frame.
return(.form.df(rows))
}
#' Prints the state of all variables at a particular line or lines.
#'
#' @param states the list of variable states for all lines queried.
#' @param valid.queries the list of all valid lines from the user's query.
#' @param showType if TRUE, then the container, dimension, and type of variables
#' are also printed.
#'
#' @noRd
.print.state <- function(states, valid.queries = NA, showType) {
# print script numbers, if multiple scripts
num.scripts <- .print.script.nums()
# check if this is the state at the end of execution
if(length(valid.queries) == 1 && is.na(valid.queries)) {
cat("No valid queries.\nState at the end of execution:\n")
# print state
lapply(c(1:nrow(states[[1]])), function(j) {
# if only one script, print just line number
if (num.scripts == 1) {
cat(paste("\t", states[[1]]$startLine[j], ": ", sep=""))
}
else {
cat(paste("\t", states[[1]]$scriptNum[j], ", ",
states[[1]]$startLine[j], ": ", sep=""))
}
# print variable name and value
if (nchar(states[[1]]$value[j]) > 50)
cat(paste("\t", states[[1]]$name[j], "\t",
substring(states[[1]]$value[j], 1, 47), " ...\n", sep = ""))
else
cat(paste("\t", states[[1]]$name[j], "\t", states[[1]]$value[j], "\n",
sep = ""))
# print valType info, if desired
if (showType == TRUE) {
print(states[[1]][j, c(3:5)], right = FALSE)
}
})
}
else {
# loop through and print all valid queries before any details
cat('Results for line(s): ')
lapply(c(1:nrow(valid.queries)), function(i) {
if (i == nrow(valid.queries)) {
# on last loop, no comma
cat(paste(valid.queries$startLine[i], "\n\n"))
}
else {
cat(paste(valid.queries$startLine[i], ", ", sep=""))
}
})
# print variable details
lapply(c(1:nrow(valid.queries)), function(i) {
# line information
# if only one script, print just line number
if (num.scripts == 1) {
cat(paste("Line", valid.queries$startLine[i], "\n"))
}
else {
cat(paste("Script ", valid.queries$scriptNum[i], ", line ",
valid.queries$startLine[i], "\n", sep=""))
}
# print state for each valid line
lapply(c(1:nrow(states[[i]])), function(j) {
# if only one script, print just line number
if (num.scripts == 1) {
cat(paste("\t", states[[i]]$startLine[j], ": ", sep=""))
}
else {
cat(paste("\t", states[[i]]$scriptNum[j], ", ",
states[[i]]$startLine[j], ": ", sep=""))
}
# print variable name and value
if (nchar(states[[i]]$value[j]) > 50)
cat(paste("\t",states[[i]]$name[j], "\t",
substring(states[[i]]$value[j], 1, 47), " ...\n", sep = ""))
else
cat(paste("\t", states[[i]]$name[j], "\t", states[[i]]$value[j], "\n",
sep = ""))
# print valType info, if desired
if (showType == TRUE) {
print(states[[i]][j, c(3:5)], right = FALSE)
}
})
cat("\n") # add an extra space between iterations
})
}
if (showType == FALSE)
cat("\nRerun with showType = TRUE to see more detailed variable information.
\n")
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.