R/snapshot.r

Defines functions .as.character.no.exception .convert.list.to.string .graphic.snapshot .snapshot.node

# Copyright (C) 2017 Harvard University, Mount Holyoke College
#
# This file is part of ProvR.
#
# ProvR is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# ProvR is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ProvR; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#
# This package was forked from <https://github.com/End-to-end-provenance/RDataTracker>
#
# Contact: Matthew Lau <matthewklau@fas.harvard.edu>

# .as.character.no.exception wraps an exception handler around as.character The exception
# handler captures the print output for the value and returns that instead.
.as.character.no.exception <- function(value) {
    tryCatch(as.character(value), error = function(e) {
        capture.output(print(value))
    })
}

# .convert.list.to.string converts a list of values to a string by calling
# as.character on each element in the list.
# dvalue - a list of values.
.convert.list.to.string <- function(dvalue) {
    values <- .ddg.replace.quotes(lapply(dvalue, .as.character.no.exception))
    positions <- 1:length(values)
    paste("[[", positions, "]]", values, collapse = "\n")
}

# .graphic.snapshot provides factoring of snapshot code.
# fext - file extension.  dpfile - path and name of file.
.graphic.snapshot <- function(fext, dpfile) {
    # pdfs require a separate procedure.
    if (fext == "pdf")
        dev.copy2pdf(file = dpfile) else {
        # At the moment, all other graphic types can be done by constructing a similar
        # function.
        # If jpg, we need to change it to jpeg for the function call.
        fext = ifelse(fext == "jpg", "jpeg", fext)
        # First, we create a string, then convert it to an actual R expression and use
        # that as the function.
        strFun <- paste(fext, "(filename=dpfile, width=800, height=500)", sep = "")
        parseFun <- function() {
            eval(parse(text = strFun))
        }
        dev.copy(parseFun)
        # Turn it off (this switches back to prev device).
        dev.off()
    }
}

# .snapshot.node creates a data node of type Snapshot. Snapshots are used for
# complex data values not written to file by the main script. The contents of
# data are written to the file dname.fext in the DDG directory. Snapshots are
# also used to capture output plots and other graphics generated by the R script.
# The user can control the size of the snapshot files by setting the
# max.snapshot.size parameter when calling ddg.init or ddg.run.  If the user
# passes in 0, no snapshots are saved.  Instead a data node will be created.  If
# the user passes in -1, there is no limit on the snapshot size.  If the user
# passes a value > 0, if the R value is larger than this size, only the head of
# the data will be saved.
# dname - name of data node.  fext - file extension.  data - value of data node.
# save.object (optional) - if TRUE, also save as an R object.  dscope (optional)
# - scope of data node.
.snapshot.node <- function(dname, fext, data, save.object = FALSE, dscope = NULL,
    from.env = FALSE) {
    orig.data <- data
    # Determine if we should save the entire data
    max.snapshot.size <- .ddg.get("ddg.max.snapshot.size")
    if (max.snapshot.size == 0) {
        return(.ddg.data.node("Data", dname, "", dscope, from.env = from.env))
    }
    # Snapshot name
    snapname <- dname
    # object.size returns bytes, but max.snapshot.size is in kilobytes
    if (max.snapshot.size == -1 || object.size(data) < max.snapshot.size * 1024) {
        full.snapshot <- TRUE

    } else if (is.vector(data) || is.list(data) || is.data.frame(data) || is.matrix(data) ||
        is.array(data)) {
        # Decide how much data to save
        element.size <- object.size(head(data, 1))
        num.elements.to.save <- ceiling(max.snapshot.size * 1024/element.size)
        if (num.elements.to.save < length(data)) {
            data <- head(data, num.elements.to.save)
            snapname <- paste(dname, "-PARTIAL", sep = "")
            full.snapshot <- FALSE
        } else {
            full.snapshot <- TRUE
        }
    } else {
        full.snapshot <- FALSE
        snapname <- paste(dname, "-PARTIAL", sep = "")
    }
    # Snapshot type
    dtype <- "Snapshot"
    # If the object is an environment, update the data to be the environment's name
    # followed by a list of the variables bound in the environment.
    if (is.environment(data)) {
        envHeader <- paste0("<environemnt: ", environmentName(data), ">")
        data <- c(envHeader, ls(data), recursive = TRUE)
    } else if ("XMLInternalDocument" %in% class(data)) {
        fext <- "xml"
    } else if (is.vector(data) || is.data.frame(data) || is.matrix(data) || is.array(data) ||
        is.list(data)) {
    } else if (!is.character(data)) {
        tryCatch(data <- as.character(data), error = function(e) {
            # Not sure if str or summary will give us the most useful information.
            data <- summary(data)
        })
    }
    # Default file extensions.
    dfile <- if (fext == "" || is.null(fext))
        paste(.ddg.get("ddg.dnum") + 1, "-", snapname, sep = "") else paste(.ddg.get("ddg.dnum") + 1, "-", snapname, ".", fext, sep = "")
    # Get path plus file name.
    dpfile <- paste(paste(.ddg.get("ddg.path"), "/data", sep = ""), "/", dfile, sep = "")
    if (.ddg.get("ddg.debug.lib"))
        print(paste("Saving snapshot in ", dpfile))

    if(.ddg.get("ddg.save.to.disk")) {
      # Write to file .
      if (fext == "csv") {
        write.csv(data, dpfile, row.names = FALSE)
      } else if (fext == "xml") {
          saveXML(data, dpfile)
      } else if (.ddg.supported.graphic(fext)) {
          # Capture graphic.  Write out RData (this is old code, not sure if we need it).
          .graphic.snapshot(fext, dpfile)
      } else if (fext == "RData") {
          file.rename(paste(paste(.ddg.get("ddg.path"), "/data", sep = ""), "/", dname, sep = ""), dpfile)
      } else if (fext == "txt" || fext == "") {
          # Write out text file for txt or empty fext.
          file.create(dpfile, showWarnings = FALSE)
          if (is.list(data) && length(data) > 0) {
              list.as.string <- .convert.list.to.string(data)
              write(list.as.string, dpfile)
          } else {
              tryCatch(write(as.character(data), dpfile), error = function(e) {
                  capture.output(data, file = dpfile)
              })
          }
      } else {
          # Write out data node object if the file format is unsupported.
          error.msg <- paste("File extension", fext, "not recognized")
          .ddg.insert.error.message(error.msg)
          return(NULL)
      }
    }
    # check to see if we want to save the object.
    if (save.object && full.snapshot && .ddg.get("ddg.save.to.disk"))
        save(data, file = paste(paste(.ddg.get("ddg.path"), "/data", sep = ""), "/", .ddg.get("ddg.dnum") + 1, "-", snapname,
            ".RObject", sep = ""), ascii = TRUE)
    dtime <- .format.time(Sys.time())
    # Get scope if necessary.
    if (is.null(dscope))
        dscope <- .ddg.get.scope(dname)
    # Record in data node table
    .ddg.record.data(dtype, dname, paste("data", dfile, sep = "/"), orig.data,
        dscope, from.env = from.env, dtime)
    if (.ddg.get("ddg.debug.lib"))
        print(paste("snapshot.node: ", dname))
    return(dpfile)
}
ProvTools/provR documentation built on May 6, 2019, 3:27 p.m.