# Copyright (C) President and Fellows of Harvard College and
# Trustees of Mount Holyoke College, 2014, 2015, 2016, 2017, 2018.
# This program is free software: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this program. If not, see
# <http://www.gnu.org/licenses/>.
########################## DataNodes.R ###########################
#
# This file contains functions involved in managing data nodes.
# Data nodes are stored in a data frame, with one row for each node.
# The columns of the data frame are:
#
# type - the type of node, one of Data, File, Exception, URL, Snapshot
# num - a unique number
# name - the label for the node
# value - often this is the same as the name
# val.type - a description of the type of the data value
# scope - the scope that the data value is defined in
# from.env - if the value was set globally before the script ran
# time - a timestamp
# hash - hash value for files
# rw - either "read" or "write", only used for files
# loc - absolute path to the original file
#
# ddg.pnum is the number associated with the last procedure node created
#
# This file also contains functions that create data nodes of various types.
#' .ddg.init.data.nodes initializes the information needed to manage data nodes.
#' @return nothing
.ddg.init.data.nodes <- function () {
.ddg.set("ddg.data.nodes", .ddg.create.data.node.rows())
.ddg.set("ddg.dnum", 0)
# Name of the last variable that was a plot created by ggplot
.ddg.set (".ddg.last.ggplot", "")
# Set max.snapshot.size. Make sure it is not already set, as
# someone may have called prov.set.detail.
if (!.ddg.is.set("ddg.max.snapshot.size")) {
.ddg.set("ddg.max.snapshot.size", 0)
}
}
#' .ddg.max.snapshot.size returns the current maximum size for snapshots
#' in kilobytes.
#' @return maximum snapshot size in KB
.ddg.max.snapshot.size <- function() {
return(.ddg.get("ddg.max.snapshot.size"))
}
#' .ddg.is.data.type returns TRUE for any type of data node.
#' This is used for type-checking.
#' @param type data node type.
#' @return true for any type of data node
.ddg.is.data.type <- function(type) {
return(type %in% c("Data", "Snapshot", "File", "URL", "Exception"))
}
#' .ddg.dnum returns the counter used to assign data node ids
#' @return the data node id of the last data node created
.ddg.dnum <- function() {
return (.ddg.get("ddg.dnum"))
}
#' .ddg.create.data.node.rows create a data frame of empty rows to put in
#' the data node table.It is faster to add a bunch of empty rows and update them
#' than to add one row at a time
#' @param size the number of rows to add
#' @return a data frame with size rows, with all columns being empty vectors
.ddg.create.data.node.rows <- function (size=100) {
return (
data.frame(
ddg.type = character(size),
ddg.num = numeric(size),
ddg.name = character(size),
ddg.value = character(size),
ddg.val.type = character(size),
ddg.scope = character(size),
ddg.from.env = logical(size),
ddg.time = character(size),
ddg.hash = character(size),
ddg.rw = character(size),
ddg.loc = character(size),
stringsAsFactors=FALSE))
}
#' .ddg.data.node.table returns the data node table
#' @return the data node table
.ddg.data.node.table <- function() {
return (.ddg.get("ddg.data.nodes"))
}
#' .ddg.data.nodes returns the filled rows of the data node table
#' @return the filled rows of the data node table
.ddg.data.nodes <- function() {
ddg.data.nodes <- .ddg.get("ddg.data.nodes")
return (ddg.data.nodes [ddg.data.nodes$ddg.num > 0, ])
}
#' .ddg.set.hash sets the hash and rw fields for a data node
#' @param dnum the id of the node to set
#' @param hash the hash value to use
#' @param rw the rw value to use
#' @return nothing
.ddg.set.hash <- function (dnum, hash, rw) {
ddg.data.nodes <- .ddg.data.node.table()
ddg.data.nodes$ddg.hash[dnum] <- hash
ddg.data.nodes$ddg.rw[dnum] <- rw
.ddg.set("ddg.data.nodes", ddg.data.nodes)
}
#' .ddg.data.node.exists searches the data node table for a matching
#' data node and returns TRUE if a match is found. If no match is found, it
#' checks if a variable with that name exists in the global environment.
#' If a match is found, it creates a data node for the global and returns TRUE.
#' If no node exists and it is not a global, it returns FALSE.
#' @param dname data node name
#' @param dscope data node scope. If NULL, uses the closest scope in which
#' dname is defined
#' @return true if a node with the given name exists
.ddg.data.node.exists <- function(dname, dscope=NULL) {
if (is.null(dscope)) dscope <- .ddg.get.scope(dname)
# Search data nodes table.
#print (paste (".ddg.data.node.exists: Looking for", dname, "in scope", dscope))
ddg.data.nodes <- .ddg.data.node.table()
matching <- ddg.data.nodes [ddg.data.nodes$ddg.name == dname &
(ddg.data.nodes$ddg.scope == "ddg.library" |
ddg.data.nodes$ddg.scope == dscope), ]
if (nrow (matching) > 0) {
return(TRUE)
}
# Search initial environment table.
if (dscope == "R_GlobalEnv") {
#print("Searching global environment")
if (exists(dname, globalenv())) {
dvalue <- get(dname, envir = globalenv())
if (!is.function(dvalue)) {
.ddg.save.data(dname, dvalue, scope=dscope, from.env=TRUE)
return (TRUE)
}
}
}
#print(".ddg.data.node.exists NOT found")
return(FALSE)
}
#' .ddg.data.number retrieves the number of the nearest preceding
#' current matching data node. It returns zero if no match is found.
#' @param dname data node name.
#' @param dscope (optional) data node scope. If not provided, it uses
#' the closest scope in which dname is found
#' @return the id of the matching data node, or 0 if none was found
.ddg.data.number <- function(dname, dscope=NULL) {
if (is.null(dscope)) dscope <- .ddg.get.scope(dname)
ddg.data.nodes <- .ddg.data.node.table()
matching <- ddg.data.nodes [ddg.data.nodes$ddg.name == dname &
(ddg.data.nodes$ddg.scope == "ddg.library" |
ddg.data.nodes$ddg.scope == dscope), ]
if (nrow (matching) > 0) {
return (matching$ddg.num[nrow(matching)])
}
# Error message if no match found.
error.msg <- paste("No data node found for", dname)
.ddg.insert.error.message(error.msg)
return(0)
}
#' .ddg.save.debug.data.nodes writes the data nodes to a csv table. Useful for debugging.
#' The file will be in the debug directory in a file called data-nodes.csv
#' @return nothing
.ddg.save.debug.data.nodes <- function () {
# Save data nodes table to file.
fileout <- paste(.ddg.path.debug(), "/data-nodes.csv", sep="")
ddg.data.nodes <- .ddg.data.nodes()
utils::write.csv(ddg.data.nodes, fileout, row.names=FALSE)
}
#' .ddg.record.data records a data node in the data node table.
#' @param dtype data node type.
#' @param dname data node name.
#' @param dvalue data node value.
#' @param value the value of the data. For some node types, this will be the same as dvalue;
#' sometimes it is different. For example, for snapshot nodes, dvalue is the location
#' of the snapshot file, while value is the data itself.
#' @param dscope data node scope.
#' @param from.env (optional) if object is from initial environment. Default is FALSE.
#' @param dtime (optional) timestamp of original file.
#' @param dloc (optional) path and name of original file.
#' @return nothing
.ddg.record.data <- function(dtype, dname, dvalue, value, dscope, from.env=FALSE,
dtime="", dloc="") {
#print("In .ddg.record.data")
#print(paste("dvalue =", utils::head(dvalue)))
#print(paste("value =", utils::head(value)))
#print (sys.calls())
if (!.ddg.is.data.type (dtype)) {
print (paste (".ddg.record.data: bad value for dtype - ", dtype))
}
# Increment data node counter.
ddg.dnum <- .ddg.inc("ddg.dnum")
# If the table is full, make it bigger.
ddg.data.nodes <- .ddg.data.node.table()
if (nrow(ddg.data.nodes) < ddg.dnum) {
ddg.data.nodes <- .ddg.add.rows("ddg.data.nodes", .ddg.create.data.node.rows ())
}
dvalue2 <-
if (length(dvalue) > 1 || !is.atomic(dvalue)) "complex"
else if (!is.null(dvalue)) dvalue
else ""
# get value type
val.type <- .ddg.get.val.type.string(value)
#print(".ddg.record.data: adding info")
ddg.data.nodes$ddg.type[ddg.dnum] <- dtype
ddg.data.nodes$ddg.num[ddg.dnum] <- ddg.dnum
ddg.data.nodes$ddg.name[ddg.dnum] <- dname
ddg.data.nodes$ddg.value[ddg.dnum] <- dvalue2
ddg.data.nodes$ddg.val.type[ddg.dnum] <- val.type
ddg.data.nodes$ddg.scope[ddg.dnum] <- dscope
ddg.data.nodes$ddg.from.env[ddg.dnum] <- from.env
ddg.data.nodes$ddg.hash[ddg.dnum] <- ""
ddg.data.nodes$ddg.rw[ddg.dnum] <- ""
ddg.data.nodes$ddg.time[ddg.dnum] <- dtime
ddg.data.nodes$ddg.loc[ddg.dnum] <- dloc
.ddg.set("ddg.data.nodes", ddg.data.nodes)
# Output data node.
#print(".ddg.record.data outputting data node")
if (dtype == "File") {
dhash <- .ddg.calculate.hash(dname)
drw <- .ddg.calculate.rw(dname)
.ddg.set.hash (ddg.dnum, dhash, drw)
# .ddg.add.to.hashtable(dname = dname, ddg.dnum = ddg.dnum, dloc = dloc,
# dvalue = dvalue, dtime = dtime)
}
if (.ddg.debug.lib()) {
if (dtype != "File") {
print(paste("Adding data node", ddg.dnum, "named", dname,
"with scope", dscope,
" and value ", ddg.data.nodes$ddg.value[ddg.dnum]))
} else {
# Get the table again because .ddg.add.to.hashtable changed it.
ddg.data.nodes <- .ddg.data.node.table()
print(paste("Adding data node", ddg.dnum, "named", dname,
"with scope", dscope,
" and value ", ddg.data.nodes$ddg.value[ddg.dnum],
" that hashes to ", ddg.data.nodes$ddg.hash[ddg.dnum],
" and performs a file ", ddg.data.nodes$ddg.rw[ddg.dnum]))
}
}
}
#' .ddg.get.val.type.string returns the type information for a given value as a string.
#' "null" for null values. For values of length 1, it is the type of the value.
#' For longer values, the description includes the container (like vector, matrix, ...),
#' the dimensions, and the type of the members of the data structure
#' @param value the value
#' @return the type information as a string
.ddg.get.val.type.string <- function(value)
{
val.type <- .ddg.get.val.type(value)
if( is.null(val.type) )
return( "null" )
# list, object, environment, function, language
if( length(val.type) == 1 )
return( paste('"', val.type, '"', sep="") )
# vector, matrix, array, data frame
# type information recorded in a list of 3 vectors (container,dimension,type)
container <- val.type[[1]]
dimension <- val.type[[2]]
type <- val.type[[3]]
# matrix: a 2-dimensional array (uniform typing)
# array: n-dimensional (uniform typing)
# data frame: list of vectors
if( !identical(container, "vector"))
{
# Record size of each dimension
dimension <- paste( dimension, collapse = "," )
# data frame. Record type of each column
if (identical(container, "data_frame")) {
type <- paste( type, collapse = '","' )
}
}
return( paste('{"container":"', container,
'", "dimension":[', dimension,
'], "type":["', type, '"]}', sep = "") )
}
#' .ddg.get.val.type returns the type information for a given value
#' There are several return types possible:
#' For lists, objects, environments, functions and language values,
#' the return type is string. For vectors, matrices, arrays
#' and data frames, a list is returned. The list contains 3 parts: a
#' string representation of the container, dimension information,
#' and the types of values in the structure. If anything else
#' is found, it returns NULL.
#' @param value the value
#' @return the type information
.ddg.get.val.type <- function(value)
{
# vector: a 1-dimensional array (uniform typing)
if(is.vector(value))
return( list("vector", length(value), .ddg.get.lowest.class(value)) )
# matrix: a 2-dimensional array (uniform typing)
if(is.matrix(value))
return( list("matrix", dim(value), .ddg.get.lowest.class(value[1])) )
# array: n-dimensional (uniform typing)
if(is.array(value))
return( list("array", dim(value), .ddg.get.lowest.class(value[1])) )
# data frame: is a type of list
if(is.data.frame(value))
{
types <- unname(sapply(value, .ddg.get.lowest.class))
return( unname(list("data_frame", dim(value), types)) )
}
# a list
if(is.list(value))
return("list")
# an object
if(is.object(value))
return("object")
# envrionment, function, language
if(is.environment(value))
return("environment")
if(is.function(value))
return("function")
if(is.language(value))
return("language")
# none of the above - null is a character, not NULL or NA
return(NULL)
}
#' .ddg.get.lowest.class returns the first element that the function class returns.
#' When inheritance is used, this is the lowest type.
#' @param obj object
#' @return first element returned by class
.ddg.get.lowest.class <- function( obj )
{
return( class(obj)[1] )
}
#' .ddg.data.node creates a data node of type Data. Data nodes are
#' used for single data values. The value (dvalue) is stored in the
#' DDG.
#' @param dtype type of data node.
#' @param dname name of data node.
#' @param dvalue value of data node.
#' @param dscope scope of data node.
#' @param from.env if object is from initial environment
#' @return nothing
.ddg.data.node <- function(dtype, dname, dvalue, dscope, from.env=FALSE) {
#print ("In .ddg.data.node")
#print(paste(".ddg.data.node: dname =", dname))
#print(paste(".ddg.data.node: typeof(dvalue) =", typeof(dvalue)))
#print(paste(".ddg.data.node: dvalue =", dvalue))
#print(paste(".ddg.data.node: dscope =", dscope))
# If object or a long list, try to create snapshot node.
if (is.object(dvalue)) {
if (.ddg.is.connection(dvalue)) {
val <- showConnections(TRUE)[as.character(dvalue[1]), "description"]
# Record in data node table
.ddg.record.data(dtype, dname, val, val, dscope, from.env=from.env)
if (.ddg.debug.lib()) print(paste("data.node:", dtype, dname))
return()
}
else {
tryCatch(
{
.ddg.snapshot.node (dname, "txt", dvalue, dscope=dscope, from.env=from.env)
return()
},
error = function(e) {
error.msg <- paste("Unable to create snapshot node for", dname, "Details:", e)
.ddg.insert.error.message(error.msg)
.ddg.data.node (dtype, dname, "complex", dscope, from.env=from.env)
return ()
}
)
}
}
else if (is.matrix(dvalue) ||
(is.vector(dvalue) && !is.character(dvalue) && length(dvalue) > 20)) {
.ddg.snapshot.node (dname, "csv", dvalue, dscope=dscope, from.env=from.env)
return ()
}
#print("Converting value to a string")
# Convert value to a string.
val <-
if (is.list(dvalue)) {
tryCatch(
{
dvalue <- .ddg.convert.list.to.string(dvalue)
},
error = function(e) {
error.msg <- paste("Unable to convert value of", dname, "to a string.")
.ddg.insert.error.message(error.msg)
"complex"
}
)
}
else if (typeof(dvalue) == "closure") "#ddg.function"
else if (length(dvalue) > 1 || !is.atomic(dvalue)) {
tryCatch(paste(.ddg.remove.tab.and.eol.chars(dvalue), collapse=","),
error =
function(e) {
"complex"
}
)
}
else if (is.null(dvalue)) "NULL"
else if (length(dvalue) == 0) "Empty"
else if (is.na(dvalue)) "NA"
else if (dvalue == "complex" || dvalue == "#ddg.function") dvalue
else if (is.character(dvalue) && dvalue == "") "NotRecorded"
else {
.ddg.remove.tab.and.eol.chars(dvalue)
}
if (grepl("\n", val)) {
# Create snapshot node.
.ddg.snapshot.node (dname, "txt", val, from.env=from.env)
return ()
}
else {
# Get scope if necessary.
if (is.null(dscope)) dscope <- .ddg.get.scope(dname)
# Record in data node table
.ddg.record.data(dtype, dname, val, val, dscope, from.env=from.env)
if (.ddg.debug.lib()) print(paste("data.node:", dtype, dname))
}
invisible()
}
#' .ddg.snapshot.node creates a data node of type Snapshot. Snapshots
#' are used for complex data values not written to file by the main
#' script. The contents of data are written to the file dname.fext
#' in the DDG directory. Snapshots are also used to capture output
#' plots and other graphics generated by the R script.
#' The user can control the size of the snapshot files by setting the
#' max.snapshot.size parameter when calling prov.init or prov.run. If
#' the user passes in 0, no snapshots are saved.
#' Instead a data node will be created. If the user passes in -1,
#' there is no limit on the snapshot size. If the user passes a value > 0,
#' if the R value is larger than this size, only the head of the data will
#' be saved.
#' @param dname name of data node.
#' @param fext file extension.
#' @param data value of data node.
#' @param save.object (optional) if TRUE, also save as an R object. Default FALSE
#' @param dscope (optional) scope of data node. Default NULL
#' @param from.env (optional) true if a value set outside the script. Default FALSE
#' @return path and name of snapshot file
.ddg.snapshot.node <- function(dname, fext, data, save.object = FALSE,
dscope=NULL, from.env=FALSE) {
orig.data <- data
# Determine if we should save the entire data
max.snapshot.size <- .ddg.max.snapshot.size()
# Don't save the data
if (max.snapshot.size == 0) {
return(.ddg.data.node ("Data", dname, "", dscope, from.env=from.env))
}
# object.size returns bytes, but max.snapshot.size is in kilobytes
if (max.snapshot.size == -1 || utils::object.size(data) < max.snapshot.size * 1024) {
full.snapshot <- TRUE
}
else if (is.vector(data) || is.list(data) || is.data.frame(data) ||
is.matrix(data) || is.array(data)) {
# Decide how much data to save
element.size <- utils::object.size(utils::head(data, 1))
num.elements.to.save <- ceiling(max.snapshot.size * 1024 / element.size)
if (num.elements.to.save < length(data)) {
data <- utils::head(data, num.elements.to.save)
full.snapshot <- FALSE
}
else {
full.snapshot <- TRUE
}
}
else {
full.snapshot <- FALSE
}
snapname <-
if (full.snapshot) dname
else paste(dname, "-PARTIAL", sep="")
# Snapshot type
dtype <- "Snapshot"
# If the object is an environment, update the data to be the environment's
# name followed by a list of the variables bound in the environment.
if (is.environment (data)) {
envHeader <- paste0 ("<environemnt: ", environmentName (data), ">")
data <- c (envHeader, ls(data), recursive=TRUE)
}
else if ("XMLInternalDocument" %in% class(data)) {
fext <- "xml"
}
else if (is.vector(data) || is.data.frame(data) || is.matrix(data) ||
is.array(data) || is.list(data)) {
}
else if (!is.character(data)) {
tryCatch(data <- as.character(data),
error = function(e){
# Not sure if str or summary will give us the most useful
# information.
#data <- utils::capture.output(str(data));
data <- summary(data)
})
}
# Default file extensions.
dfile <-
if (fext == "" || is.null(fext)) paste(.ddg.dnum()+1, "-", snapname, sep="")
else paste(.ddg.dnum()+1, "-", snapname, ".", fext, sep="")
# Get path plus file name.
dpfile <- paste(.ddg.path.data(), "/", dfile, sep="")
if (.ddg.debug.lib()) print(paste("Saving snapshot in ", dpfile))
# Write to file .
if (fext == "csv") utils::write.csv(data, dpfile, row.names=FALSE)
else if (fext == "xml") XML::saveXML (data, dpfile)
# Capture graphic.
else if (.ddg.supported.graphic(fext)) .ddg.graphic.snapshot(fext, dpfile)
# Write out RData (this is old code, not sure if we need it).
else if (fext == "RData") {
file.rename(paste(.ddg.path.data(), "/", dname, sep=""), dpfile)
}
# Write out text file for txt or empty fext.
else if (fext == "txt" || fext == "") {
file.create(dpfile, showWarnings=FALSE)
if (is.list(data) && length(data) > 0) {
list.as.string <- .ddg.convert.list.to.string(data)
write(list.as.string, dpfile)
}
else {
tryCatch(write(as.character(data), dpfile),
error = function(e){
utils::capture.output(data, file=dpfile)
})
}
}
# Write out data node object if the file format is unsupported.
else {
error.msg <- paste("File extension", fext, "not recognized")
.ddg.insert.error.message(error.msg)
return(NULL)
}
# Check to see if we want to save the object.
if (save.object && full.snapshot) {
save(data, file = paste(.ddg.path.data(), "/", .ddg.dnum()+1, "-", snapname,
".RObject", sep=""), ascii = TRUE)
}
dtime <- .ddg.timestamp()
# Get scope if necessary.
if (is.null(dscope)) dscope <- .ddg.get.scope(dname)
# Record in data node table
.ddg.record.data(dtype, dname, paste(.ddg.data.dir(), dfile, sep="/"),
orig.data, dscope, from.env=from.env, dtime)
if (.ddg.debug.lib()) print(paste("snapshot.node: ", dname))
return(dpfile)
}
#' .ddg.supported.graphic - the sole purpose of this function is
#' to verify that the input file extension is a supported graphic
#' type. Currently supported graphics types inlude: jpg, jpeg,
#' bmp, png, tiff.
#' @param ext file extension.
#' @return TRUE if the extension passed in is a known graphics type
.ddg.supported.graphic <- function(ext){
return(ext %in% c("jpeg", "jpg", "tiff", "png", "bmp", "pdf"))
}
#' .ddg.file.copy creates a data node of type File. File nodes are
#' used for files written by the main script. A copy of the file is
#' written to the DDG directory.
#' @param fname path and name of original file.
#' @param dname name of data node.
#' @param dscope scope of data node.
#' @return nothing
.ddg.file.copy <- function(fname, dname=NULL, dscope=NULL) {
# Calculate location of original file.
file.loc <- normalizePath(fname, winslash="/", mustWork = FALSE)
# Copy file.
if (file.exists(file.loc)) {
# Create file node in DDG.
dpfile.out <- .ddg.file.node("File", fname, dname, dscope)
file.copy(file.loc, dpfile.out, overwrite=TRUE)
}
else {
# For zipfiles,
file.loc <- normalizePath(dname, winslash="/", mustWork = FALSE)
if (file.exists(file.loc)) {
# Create file node in DDG.
dpfile.out <- .ddg.file.node("File", fname, dname, dscope)
file.copy(file.loc, dpfile.out, overwrite=TRUE)
}
else {
error.msg <- paste("File to copy does not exist:", fname)
.ddg.insert.error.message(error.msg)
return()
}
}
if (.ddg.debug.lib()) print(paste("file.copy: FILE ", file.loc))
return ()
}
#' .ddg.file.node creates a node of type File. File nodes are used
#' for files written to the DDG directory by capturing output from
#' the script or by copying a file that is written by the script.
#' Returns the path where the file referenced by the node is stored.
#' @param dtype - type of data node.
#' @param fname - path and name of original file.
#' @param dname - name of data node.
#' @param dscope (optional) - scope of data node.
#' @return the full path to the saved file
.ddg.file.node <- function(dtype, fname, dname, dscope=NULL) {
# Get original file location.
file.name <- basename(fname)
file.loc <- normalizePath(fname, winslash="/", mustWork = FALSE)
# Add number to file name.
dfile <- paste(.ddg.dnum()+1, "-", file.name, sep="")
# Calculate the path to the file relative to the ddg directory.
# This is the value stored in the node.
dpfile <- paste(.ddg.data.dir(), dfile, sep="/")
# Set the node label.
if (is.null(dname)) dname <- file.name
# Get scope if necessary.
if (is.null(dscope)) dscope <- .ddg.get.scope(dname)
# Record in data node table
.ddg.record.data(dtype, dname, dpfile, dpfile, dscope, from.env=FALSE,
dtime=.ddg.timestamp(), file.loc)
# Get path plus file name to where the file will be copied
dpath <- paste(.ddg.path.data(), "/", dfile, sep="")
return(dpath)
}
#' .ddg.url.node creates a node of type URL. URL nodes are used
#' for URLs and also for server connections.
#' @param original the actual url or server connection description
#' @param saved the name of the file where a copy has been saved
#' @return nothing
.ddg.url.node <- function(original, saved) {
# Record in data node table
.ddg.record.data("URL", original, saved, saved,
dscope=environmentName(.GlobalEnv), from.env=FALSE, dtime=.ddg.timestamp())
}
#' .ddg.is.graphic tries to decipher if the value snapshot should be
#' written to file directly from the data or if it is a graphic which
#' can be captured from the image device. This function, as written,
#' is basically a hack. There must be a better way to implement it.
#' @param value value to test
#' @return TRUE if the value is in the gg or ggplot class
.ddg.is.graphic <- function(value){
# Matching any of these classes automatically classifies the
# object as a graphic.
graph.classes <- list("gg", "ggplot")
return(is.object(value) && any(class(value) %in% graph.classes))
}
#' .ddg.is.simple returns TRUE if the value passed in is a simple
#' data value which should be saved locally as opposed to stored
#' in a separate file.
#' @param value value to test
#' @return TRUE for NULL and for vectors of length 1
.ddg.is.simple <- function(value) {
# Note that is.vector returns TRUE for lists, so we need to check
# lists separately. Since every value in a list can have a
# different type, if it is a list, we will assume the value is
# complex. We consider NULL values to be simple.
return((!.ddg.is.graphic(value) &&
!is.list(value) &&
is.vector(value) &&
length(value) == 1) ||
is.null(value))
}
#' .ddg.is.csv returns TRUE if the value passed in should be saved
#' as a csv file
#' @param value value to test
#' @return TRUE for vectors longer than 1, and for all matrices and data frames
.ddg.is.csv <- function(value) {
return(!.ddg.is.simple(value) &&
((is.vector(value) && !is.list(value)) || is.matrix(value) ||
is.data.frame(value)))
}
#' .ddg.is.object returns TRUE if the value is determined to be an
#' object by our standards.
#' @param value value to test
#' @return TRUE for objects and environments
.ddg.is.object <- function(value){
return(is.object(value) || is.environment(value))
}
#' .ddg.save.data takes as input the name and value of a data node
#' that needs to be created. It determines how the data should be
#' output (or saved) and saves it in that format.
#' @param name name of created node.
#' @param value value of created node.
#' @param graphic.fext (optional) file extension for graphic file. Default is jpeg
#' @param error (optional) if TRUE, raise an R error rather than a
#' DDG error.
#' @param scope default is NULL
#' @param from.env if node is from initial environment
#' @param stack (optional) stack to use in determing scope.
#' @param env (optional) default is NULL
#' @return nothing
.ddg.save.data <- function(name, value, graphic.fext="jpeg", error=FALSE,
scope=NULL, from.env=FALSE, stack=NULL, env=NULL){
if (is.null(scope)) {
scope <- .ddg.get.scope(name, calls=stack, env=env)
}
# Determine type for value, and save accordingly.
if (.ddg.is.graphic(value)) {
.ddg.write.graphic(name, value, graphic.fext, scope=scope, from.env=from.env)
}
else if (.ddg.is.simple(value)) {
.ddg.save.simple(name, value, scope=scope, from.env=from.env)
}
else if (.ddg.is.csv(value)) .ddg.write.csv(name, value, scope=scope, from.env=from.env)
else if (is.list(value) || is.array(value)) {
.ddg.snapshot.node(name, "txt", value, save.object=TRUE, dscope=scope,
from.env=from.env)
}
else if (.ddg.is.connection(value)) {
.ddg.save.simple(name, value, scope=scope, from.env=from.env)
}
else if (.ddg.is.object(value)) {
.ddg.snapshot.node(name, "txt", value, dscope=scope, from.env=from.env)
}
else if (is.function(value)) {
.ddg.save.simple(name, "#ddg.function", scope=scope, from.env=from.env)
}
else if (error) stop("Unable to create data (snapshot) node.")
else {
error.msg <- paste("Unable to create data (snapshot) node.")
.ddg.insert.error.message(error.msg)
}
invisible()
}
#' .ddg.save.simple takes in a simple name-value pair and creates
#' a data node. Extra long strings are saved as snapshots.
#' @param name data node name.
#' @param value data node value.
#' @param scope data node scope.
#' @return nothing
.ddg.save.simple <- function(name, value, scope=NULL, from.env=FALSE) {
# Save extra long strings as snapshot.
if (is.character(value) && nchar(value) > 200) {
.ddg.snapshot.node(name, "txt", value, dscope=scope, from.env=from.env)
} else {
# Save the true value.
.ddg.data.node("Data", name, value, scope, from.env=from.env)
}
}
#' .ddg.write.graphic takes as input the name of a variable as well
#' as its value and attempts to write it out as a graphics file. If
#' all else fails, it writes out the information as a text file and
#' also writes out an RData Object which can later be read back into
#' the system.
#' @param name data node name.
#' @param value data node value.
#' @param fext file extension.
#' @param scope data node scope.
#' @param from.env If TRUE, means the value was assigned outside the script
#' @return nothing
.ddg.write.graphic <- function(name, value=NULL, fext="jpeg", scope=NULL, from.env=FALSE){
# Remember the name of the variable so that we can link to it if ggsave is
# called later without a plot parameter.
.ddg.set (".ddg.last.ggplot", name)
# Try to output graphic value.
.ddg.snapshot.node(name, "txt", value, save.object = TRUE, dscope=scope,
from.env=from.env)
}
#' .ddg.graphic.snapshot copies a graphics value into a snapshot file
#' @param fext file extension.
#' @param dpfile path and name of file to copy
#' @return nothing
.ddg.graphic.snapshot <-function(fext, dpfile) {
# pdfs require a separate procedure.
if (fext == "pdf") grDevices::dev.copy2pdf(file=dpfile)
# At the moment, all other graphic types can be done by
# constructing a similar function.
else {
# If jpg, we need to change it to jpeg for the function call.
fext <- ifelse(fext == "jpg", "jpeg", fext)
# First, we create a string, then convert it to an actual R
# expression and use that as the function.
strFun <- paste(fext, "(filename=dpfile, width=800, height=500)", sep="")
parseFun <-
function(){
eval(parse(text=strFun))
}
grDevices::dev.copy(parseFun)
# Turn it off (this switches back to prev device).
if (grDevices::dev.cur() != 1) {
grDevices::dev.off()
}
}
}
#' .ddg.write.csv takes as input a name-value pair for a
#' variable and attempts to save the data as a csv file. It does
#' not create any edges but does add the node to the DDG. Edge
#' creation should occur from wherever this function is called.
#' @param name data node name.
#' @param value data node value.
#' @param scope data node scope.
#' @param from.env TRUE if defined outside the script
#' @return nothing
.ddg.write.csv <- function(name, value, scope=NULL, from.env=FALSE) {
tryCatch(
{
.ddg.snapshot.node(name, "csv", value, dscope=scope, from.env=from.env)
}
, error = function(e) {
# warning(paste("Attempted to write", name,
# "as .csv snapshot but failed. Out as RDataObject.", e))
.ddg.snapshot.node(name, "txt", value, save.object = TRUE,
dscope=scope, from.env=from.env)
}
)
}
#' .ddg.convert.list.to.string converts a list of values to a string
#' by calling as.character on each element in the list.
#' @param dvalue a list of values.
#' @return a string showing the position and value of each list member
.ddg.convert.list.to.string <- function (dvalue) {
values <- .ddg.remove.tab.and.eol.chars(lapply(dvalue, .ddg.as.character))
positions <- 1:length(values)
return (paste("[[", positions, "]]", values, collapse="\n"))
}
#' .ddg.as.character wraps an exception handler around as.character
#' The exception handler captures the print output for the value and
#' returns that instead.
#' @param value a value to convert to a string
#' @return the value represented as a string
.ddg.as.character <- function (value) {
tryCatch (as.character(value),
error=function(e) {
utils::capture.output(print(value))
}
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.