# This file contains the public functions of the package (i.e. those that can be
# called from a script using this package)
# other files contain private functions that should not be called from outside the
# package and should only be called by these public functions or by each other
# Most of these functions should call LoadConfig at the start or they won't work
#' Read a Dataobject of a Given Name
#'
#' Given the name and optionally the parameters and dependencies, will read the dataobject previously saved in a datatrack project.
#'
#' @param name String; Optional. The dataobject to read, eg "clusters", "features" etc. If ommited, will prompt the user with a choice of available names
#' @param purpose string; Displayed to the user when prompting
#' @param include.meta bool; if true, will wrap the data to return in a list that also contains the metadata
#' @param params list; Optional. If supplied, will only consider returning a dataobject with the matching params
#' @param dependencies list; optional. If supplied, will only consider returning a dataobject with the matching
#' @param false.if.missing bool; if true, will return false if the file is missing
#' @param optional boolean; if true, an option is added to select none and return false
#' @param use.last.accessed boolean; if true (default) will look for the version that was last written or read in this session
#' @param version int; If the required version is known, then it can be supplied. This version will be used if it exists
#'
#' @return if include.meta, will return a list that has a 'data' key, that contains the data read in if include.meta is false, will return the data read in, eg data frame if it's a csv
#'
#' @details
#' 1) if name is not supplied, will ask user,
#' 2) then if version is supplied, find the that version of the dataobject name. If it doesn't exist, will return false or stop (depending on false.if.missing)
#' 3) looks for the last accessed version if it matches the version OR params, dependencies
#' 4) if no matching last accessed verion is found, it will ask the user to select a version
#' This means that for a particular set of params and dependencies, only one version of a particular dataobject name can be used within the same run
#' 5) if the file is found, then it will set the 'last accessed' flag
#' on the chosen dataobject and it dependencies
#'
#' @export
ReadDataobject <- function (name = NULL,
purpose = NA,
include.meta = TRUE,
params = NULL,
dependencies = NULL,
false.if.missing = FALSE,
optional = FALSE,
use.last.accessed = TRUE,
version = NULL) {
.LoadConfig()
if (is.null(name)) {
# if name is ommited from function call, get user input
# this should only happen when calling directly from the commandline
choices = .GetDataobjectNames()
choice = userinput::GetUserChoice(choices, choosing.what = "choose a type of dataobject")
name = choices[choice]
}
if (!is.na(purpose)) {
.Report('Reading dataobject for:', purpose)
}
if (use.last.accessed) {
meta.row <- .GetLastAccessed(name, params, dependencies, version)
} else {
meta.row <- FALSE
}
if (!is.data.frame(meta.row) || optional) {
meta.row <- .ChooseDataobjectVersion(name, params = params, dependencies = dependencies, false.if.missing = false.if.missing, optional = optional, version = version)
if (!is.data.frame(meta.row)) {
return(FALSE)
}
}
.SetLastAccessed.recursive(meta.row$name, meta.row$version)
val <- .ReadDataobjectFile(meta.row)
if (include.meta) {
meta <- .ExpandMeta(meta.row)
meta$indirect.dependencies <- .GetIndirectDependenciesStack(meta.row$name, meta.row$version)
meta$data <- val
return(meta)
} else {
return(val)
}
}
#' Write a Dataobject to the Datatrack Project
#'
#' Writes a dataobject where the meta values for the dataobject are included in the list
#' rather than passed as separate parameters as they would be for WriteDataobject
#'
#' @param x mixed the data to write
#' @param check.before.overwrite boolean whether to prompt for user confirmation before overwriting an existing version
#' @details
#' Simply takes the parameters from the list and uses them for WriteDataobject
#' TODO: before writing, check meta for invalid dependencies (i.e. directly or indirectly depends on different versions of the same name)
#' TODO: check that all dependencies actually exist. Currently, datatrack just works fine and then the viewer will crash.
#' @export
WriteStructuredDataobject <- function (x, check.before.overwrite = TRUE) {
.LoadConfig()
required.params <- c('data', 'name', 'params', 'dependencies')
missing.items <- required.params[!required.params %in% names(x)]
if (length(missing.items) > 0) {
stop(paste('supplied dataobject list is missing some items:', paste(missing.items, sep = ", ")))
}
v.num = WriteDataobject(x = x$data, name = x$name, params = x$params, dependencies = x$dependencies, check.before.overwrite = check.before.overwrite)
return(v.num)
}
#' Write a Dataobject to the Datatrack Project
#'
#' Given any R object, along with parameters and dependencies used when creating the value, will
#' save the object to disk and record the metadata in the datatrack project
#'
#' @param x data.frame or Object
#' @param name string what to call the dataobject. This is used in: the filename and the meta file to keep track of versions and dependencies
#' @param params list The parameters used when generating this dataobject. Saved in the meta file.
#' @param dependencies list The dataobjects created from previous steps that were used as input data for the process that created this dataobject List of name/version pairs
#' @param check.before.overwrite boolean if false, overwrite an existing dataobject if the given parameters and dependencies are the same as the existing one
#' If the dependency version is given as 0, then datatrack will attempt to look up the last accessed version of that name to use. In other words, use 0 to
#' specify that datatrack should use the last accessed version as the dependency
#'
#' @return the version that it gets saved as
#'
#' @details
#' dataobject will be saved with the filename like: name.version.csv The version is detected automatically.
#' If dataobject for this name with the same parameters and dependencies is already saved, then it will be overwritten (after user confirmation).
#' If params or dependencies are different, then a new version is saved. The version number is created automatically.
#'
#' The version for the dependency can be ommited (by putting a value < 1). If this happens, then it will be assumed to be the last accessed version
#' of the dependency dataobject. So, the process which generated the dataobject will have accesed some other dataobject it depends on. Then, when it saves its dataobject
#' it need only pass the name of the input dataobject without the version, and the function will know which version it was.
#' @export
WriteDataobject <- function (x, name, params = list(), dependencies = list(), check.before.overwrite = TRUE, annotations = list()) {
.LoadConfig()
# read the meta for all dataobjects
meta <- ReadMeta()
params <- rjson::toJSON(params)
# if a dependency version is not supplied, then it tries
# to use the version that was last accessed
dependency.names <- names(dependencies)
missing <- c()
for (d.name in dependency.names) {
if (!dependencies[[d.name]] > 0) {
last.accessed <- .GetLastAccessed(d.name)
if (class(last.accessed) == 'data.frame') {
dependencies[[d.name]] <- last.accessed$version
}
}
}
if (any(dependencies < 1)) {
# if any dependency versions are less than 1 (i.e. 0), stop and give the error message
# This will happen if the dependency verion was not supplied AND the last accesed verion is not stored
stop(paste('dependency versions must be supplied for ', paste(names(dependencies[dependencies < 1]), collapse = ",")))
}
dependencies <- rjson::toJSON(dependencies)
matching.name <- meta$name == name
matching.p.and.d<- matching.name & meta$params == params & meta$dependencies == dependencies
# search for a previous version with the same params and same dependencies
# if found, confirm overwrite and update meta with new date
# if not found, create a new meta row
callstack <- as.character(sys.calls())
# remove this function from call stack
length(callstack) <- length(callstack) - 1
if (is.data.frame(x)) {
col.names = colnames(x)
} else {
col.names = NULL
}
# initialise version to zero for now. It will be adjusted once we know whether
# we are overwriting an existing version or adding a new one
new.meta.row <- .MakeMetaRow(name,
v.num = 0,
params = params,
dependencies = dependencies,
col.names = col.names,
callstack = callstack,
csv = .UseCsv(x),
annotations = annotations)
if (any(matching.p.and.d)) {
# todo: check if this file is the dependency of other files. If so, maybe not safe to overwrite?
if (check.before.overwrite) {
msg <- paste0("Overwrite dataobject for version ", meta$version[matching.p.and.d], " of ", name," (", meta$date[matching.p.and.d], ") ?")
overwrite <- userinput::Confirm(msg)
} else {
overwrite <- TRUE
}
if (!overwrite) {
return(FALSE)
} else {
#replace the existing meta row. Will have same name, dependencies, params and version as the old one
# but the callstack, annotations and system info might be different
new.meta.row$version <- new.v.num <- meta$version[matching.p.and.d]
meta[matching.p.and.d, ] <- new.meta.row
}
} else {
if (any(matching.name)) {
new.v.num <- max(meta$version[matching.name]) + 1
} else {
new.v.num <- 1
}
new.meta.row$version <- new.v.num
meta <- rbind(meta, new.meta.row)
}
.SetLastAccessed(name, new.meta.row)
.WriteMeta(meta)
.WriteDataobjectFile(x, name, new.v.num)
return(new.v.num)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.