#' Waypoint Construction
#'
#' These functions are responsible for constructing waypoints, the building
#' blocks for every query.
#'
#' In the GrokIt system, a waypoint is the most basic level of data processing,
#' each of which is a coupling of an operator, inputs, and outputs. Essentially,
#' each waypoint consists of a task to perform, what to perform this task on,
#' and what to produce.
Aggregate <- function(data, gla, inputs = character(), outputs = character(),
states = NULL) {
schema <- setNames(convert.outputs(outputs), outputs)
gla <- convert.args(gla, schema)
check.inputs(data, inputs)
alias <- create.alias("gla")
if (is.data(states))
states <- list(states)
aggregate <- list(data = data, alias = alias, gla = gla, inputs = inputs,
schema = schema, states = states)
class(aggregate) <- c("GLA", "data")
aggregate
}
Cache <- function(data) {
alias <- create.alias(paste0("cache", "_", base.name(data$alias)))
cache <- list(alias = alias, data = data, schema = data$schema)
class(cache) <- c("Cache", class(data))
cache
}
Compact <- function(data) {
alias <- create.alias(paste0("compact", "_", base.name(data$alias)))
waypoint <- list(alias = alias, data = data, schema = data$schema)
class(waypoint) <- c("Compact", class(data))
waypoint
}
Transition <- function(gist, outputs, states) {
alias <- create.alias("gist")
schema <- setNames(convert.outputs(outputs), outputs)
if (is.data(states))
states <- list(states)
transition <- list(alias = alias, gist = gist, schema = schema, states = states)
class(transition) <- c("GIST", "data")
transition
}
Transform <- function(data, gt, inputs, outputs, states = NULL, overwrite = TRUE) {
check.inputs(data, inputs)
alias <- create.alias("gt")
if (any(bad <- outputs %in% names(data$schema)) && !overwrite)
stop("cannot perform transform due to the following name clashes:\n",
paste0(outputs[bad], collapse = ", "))
outputs <- setNames(convert.outputs(outputs), outputs)
schema <- data$schema
schema[names(outputs)] <- outputs
gt <- convert.args(gt, schema)
if (is.data(states))
states <- list(states)
transform <- list(data = data, alias = alias, gt = gt, inputs = inputs,
schema = schema, states = states, outputs = outputs)
class(transform) <- c("GT", "data")
transform
}
Generate <- function(data, ..., .overwrite = FALSE) {
args <- as.list(substitute(list(...)))[-1]
atts <- names(args)
if (is.null(atts) || any(atts == ""))
stop("There are missing names for the generated attributes.")
if (any(bad <- atts %in% names(data$schema)) && !.overwrite)
stop("cannot perform generation due to the following name clashes:\n",
paste0("\t", atts[bad], collapse = "\n"))
exprs <- unlist(lapply(args, convert.exprs, data))
check.inputs(data, exprs)
generated <- convert.outputs(exprs)
schema <- data$schema
schema[atts] <- generated
alias <- create.alias("generate")
generator <- list(data = data, alias = alias, schema = schema, generated = generated)
class(generator) <- c("Generated", "data")
generator
}
## schema should either be a character naming a relation or a named list of type objects
Input <- function(files, gi, outputs, chunk = NULL) {
assert(isTRUE(is.relation(outputs)) || is.list(outputs),
"illegal outputs argument")
if (isTRUE(is.relation(outputs))) {
schema <- get.attributes(outputs)
schema <- setNames(paste0(outputs, ".", schema), schema)
} else {
assert(!is.null(names(outputs)) && all(names(outputs) != ""),
"outputs has missing names.")
schema <- names(outputs)
schema <- setNames(convert.outputs(schema), schema)
}
assert(all(good <- file_test("-f", files)),
"missing files: ", paste(files[!good], collapse = ", "))
files <- normalizePath(files)
gi <- convert.args(gi, schema)
alias <- create.alias("gi")
if (!(is.null(chunk) || (is.numeric(chunk) && length(chunk) == 1 && chunk > 0)))
stop("chunk size should a single positive number.")
chunk <- as.integer(chunk)
structure(list(files = files, alias = alias, gi = gi,
schema = schema, outputs = outputs, chunk = chunk),
class = c("GI", "data"))
}
Filter <- function(data, gf, inputs = character(), states = NULL) {
check.inputs(data, inputs)
schema <- data$schema
if (is.data(states))
states <- list(states)
alias <- create.alias("gf")
filter <- list(data = data, alias = alias, gf = gf, schema = schema,
inputs = inputs, states = states)
class(filter) <- c("GF", "data")
filter
}
#' Load a relation.
#'
#' \code{Load} loads a relation from the disc.
#'
#' An error is thrown if \code{relation} does not specify a relation that exists
#' and can be read by the user.
#'
#' \code{Read} is simply an alias for \code{Load} that exists for compatibility.
#'
#' @param relation Usually, a name or character string specifying the relation
#' to load. A character string (enclosed in explicit single or double quotes)
#' is always taken as the relation name.
#'
#' If the value of \code{relation} is a length-one character vector the name
#' of the relation is taken to be the value of the only element. Otherwise,
#' \code{relation} must be a name or character string.
#' @return A \code{waypoint} object whose schema is determined by the
#' relation being loaded.
Load <- function(relation) {
ischar <- tryCatch(is.character(relation) && length(relation) == 1,
error = identity)
if (inherits(ischar, "error"))
ischar <- FALSE
if (!ischar)
relation <- as.character(substitute(relation))
assert(is.character(relation) && length(relation) == 1,
"'relation' should be a name or a length-one character vector")
catalog <- get.catalog(relation)
alias <- create.alias(relation)
schema <- unlist(lapply(catalog$attributes, `[[`, "name"))
schema <- setNames(paste0(alias, ".", schema), schema)
if (!is.null(catalog$cluster)) {
cluster <- paste0(alias, ".", catalog$cluster)
index <- which(schema == cluster)
type <- catalog$attributes[[index]]$type$node_data
if (!is.character(type)) ## dealing with templated type.
type <- type$name
grokit$cluster[[cluster]] <- list(lower = -Inf, upper = Inf,
type = convert.typename(type))
} else {
cluster <- NULL
}
data <- list(relation = relation, alias = alias, schema = schema, cluster = cluster)
class(data) <- c("Load", "data")
data
}
#' @rdname Load
#' @usage Read(relation)
Read <- Load
#' Basic Filtering of Waypoints
#'
#' Filter a waypoint based on a boolean expression.
#'
#' \code{condition} is evaluated for each tuple in \code{data} independently.
#' Only those tuples for which \code{condition} evaluates to TRUE are passed
#' through the filter.
#'
#' @param data A \code{\link{waypoint}}.
#' @param condition A boolean valued \code{\link{expression}}.
#' @return A \code{\link{waypoint}} with a subset of the tuples in \code{data}.
#' @author Jon Claus, <jonterainsights@@gmail.com>, Tera Insights, LLC.
`[.data` <- function(data, condition) {
condition <- substitute(condition)
if (condition[[1]] == "c")
stop("Condition is not allowed to be a listt of expressions.")
check.exprs(condition)
condition <- convert.exprs(condition, data)
check.inputs(data, condition)
alias <- create.alias("filter")
schema <- data$schema
filter <- list(data = data, alias = alias, schema = schema, condition = condition)
class(filter) <- c("Filter", "data")
filter
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.