Nothing
formulaToCubeQuery <- function(formula, data) {
query <- formulaToQuery(formula, data)
## The formulaToQuery part is shared with newMultitable.
## What follows is needed to prepare for a cube query
if (length(query$dimensions) > 0) { # leave empty dimensions present in query
query$dimensions <- unlist(query$dimensions, recursive = FALSE)
names(query$dimensions) <- NULL
}
return(query)
}
formulaToQuery <- function(formula, data) {
formula <- try(as.formula(formula), silent = TRUE)
if (is.error(formula)) {
halt(dQuote("formula"), " is not a valid formula")
}
parsed_terms <- parseTerms(formula, data)
vars <- parsed_terms$RHS
measures <- parsed_terms$LHS
## Construct the "measures", either from the formula or default "count"
if (!length(measures)) {
# if measures is an empty list, there are none.
measures <- list(zfunc("cube_count"))
} else {
## Look for multiple measures, as passed by `list(f(x), g(x) ~ a + b)`
if (startsWith(as.character(formula)[2], "list(")) {
measures <- unlist(measures, recursive = FALSE)
}
measures <- lapply(measures, zcl)
}
badmeasures <- vapply(measures, Negate(isCubeAggregation), logical(1))
if (any(badmeasures)) {
halt("Left side of formula must be a valid aggregation")
}
## Name measures based on function (and append to end if already named)
if (is.null(names(measures))) {
names(measures) <- getCubeMeasureNames(measures)
} else {
names(measures) <- paste0(names(measures), "__", getCubeMeasureNames(measures))
}
## Special case for scorecard queries
if (any(is_scorecard(vars))) {
return(scorecardQuery(vars, measures))
}
## Make "dimensions".
dimensions <- lapply(vars, varToDim)
baddimensions <- vapply(
unlist(dimensions, recursive = FALSE),
isCubeAggregation, logical(1)
)
if (any(baddimensions)) {
halt("Right side of formula cannot contain aggregation functions")
}
return(list(dimensions = dimensions, measures = measures))
}
parseTerms <- function(formula, data) {
terms <- terms(formula, allowDotAsName = TRUE)
f.vars <- attr(terms, "variables")
all.f.vars <- all.vars(f.vars)
## More input validation
if ("." %in% all.f.vars) {
halt("Crunch formulae do not support ", dQuote("."), " in formula")
}
if (!length(all.f.vars)) {
halt("Must supply one or more variables")
}
## Find variables either in 'data' or in the calling environment
## Evaluate the formula's terms in order to catch derived expressions
v.call <- do.call(
substitute,
list(expr = f.vars, env = registerCubeFunctions(all.f.vars))
)
if (missing(data)) {
vars <- eval(v.call, NULL, environment(formula))
} else {
vars <- eval(v.call, as.environment(data), environment(formula))
}
## Validate that vars are non-null
nullvars <- vapply(vars, is.null, logical(1))
if (any(nullvars)) {
## Get the NULL expressions.
## Note the off-by-one problem:
## If f.vars == language list(CA$mr_1, CA$NOTAVAR),
## as.character(f.vars) == [1] "list" "CA$mr_1" "CA$NOTAVAR"
varexprs <- as.character(f.vars)[-1]
halt(
"Invalid cube dimension", ifelse(sum(nullvars) > 1, "s: ", ": "),
serialPaste(varexprs[nullvars]), " cannot be NULL"
)
}
resp <- attr(terms, "response")
if (resp > 0) {
# remove response vars only if there are any
RHS <- vars[-resp]
} else {
RHS <- vars
}
LHS <- vars[resp]
return(list(LHS = LHS, RHS = RHS))
}
registerCubeFunctions <- function(varnames = c()) {
## Return a list of "cube functions" to substitute()
## in. A better approach, which would avoid potential name collisions, would
## probably be to have vars be an environment inside of another environment
## that has the cube functions. This version just checks for name collisions
## and errors if there is one.
numfunc <- function(func, ...) {
force(func)
moreArgs <- list(...)
return(function(x, ...) {
if (is.Categorical(x)) {
## "Cast" it on the fly
x <- list(zfunc("cast", x, "numeric"))
}
dots <- list(...)
if ("na.rm" %in% names(dots)) {
# --- Translate R's `na.rm` to zz9's `ignore_missing`
dots[["ignore_missing"]] <- list(value = dots[["na.rm"]])
dots[["na.rm"]] <- NULL
}
do.call("zfunc", c(func, x, moreArgs, dots))
# zfunc(func, x)
})
}
funcs <- list(
mean = numfunc("cube_mean"),
min = numfunc("cube_min"),
max = numfunc("cube_max"),
sd = numfunc("cube_stddev"),
sum = numfunc("cube_sum"),
median = numfunc("cube_quantile", list(value = I(.5))),
as_array = function(x) {
## Kinda hacky way to do a query of an MR as CA
if (!(is.MR(x) | is.CrunchExpr(x))) {
halt(
"Cannot analyze a variable of type ", dQuote(type(x)),
" 'as_array'"
)
}
zfunc("as_array", x)
},
# Preserved for backwards compatibility here, but we don't allow changing
# the default behavior of MRs anymore so this special case isn't needed
# except for the error message
as_selected = function(x) {
if (!(is.MR(x) | is.CrunchExpr(x))) {
halt(
"Cannot analyze a variable of type ", dQuote(type(x)),
" 'as_selected'"
)
}
x
},
subvariables = function(x) {
if (!(is.Array(x) | is.CrunchExpr(x))) {
halt(
"Cannot analyze a variable of type ", dQuote(type(x)),
" using 'subvariables'"
)
}
zfunc("dimension", x, "subvariables")
},
categories = function(x) {
if (!(is.Array(x) | is.CrunchExpr(x))) {
halt(
"Cannot analyze a variable of type ", dQuote(type(x)),
" using 'categories'"
)
}
zcl(x)
},
n = function(...) zfunc("cube_count"),
scorecard = function(..., vars = NULL) {
vars <- c(list(...), vars)
if (!all(vapply(vars, function(x) is.MR(x) | is.CrunchExpr(x), logical(1)))) {
halt("Expected Multiple Response variables or expressions in a scorecard")
}
vars <- lapply(vars, function(x) {
if (is.MR(x)) zfunc("as_selected", x) else zcl(x)
})
do.call(zfunc, c(list(func = "scorecard"), vars))
}
)
overlap <- intersect(varnames, names(funcs))
if (length(overlap)) {
halt(
"Cannot evaluate a cube with reserved ",
pluralize("name", length(overlap)), ": ",
serialPaste(dQuote(overlap))
)
}
return(funcs)
}
getCubeMeasureNames <- function(measures) {
vapply(measures, function(m) {
sub("^cube_", "", m[["function"]])
}, character(1))
}
isCubeAggregation <- function(x) {
# --- Must have function and args and possibly kwargs, but nothing else
allowed <- c("function", "args", "kwargs")
length(setdiff(names(x), allowed)) == 0 && grepl("^cube_", x[["function"]])
}
is.zfunc <- function(x, func) {
out <- is.list(x) && "function" %in% names(x)
if (out && !missing(func)) {
out <- x[["function"]] == func
}
return(out)
}
varToDim <- function(x) {
## Given variable, construct the appropriate ZCL to get a cube with them
## as dimensions
v <- zcl(x)
if (is.MR(x)) {
## Multiple response gets "as_selected" by default and subvars dimension
return(list(
zfunc("dimension", zfunc("as_selected", v), list(value = "subvariables")),
zfunc("as_selected", v)
))
} else if ("function" %in% names(v) && identical(v[["function"]], "as_selected")) {
# If user created an as_selected (eg with `asSelected` or `selectCategories()`) expand
# out the dimensions for them like it's a MR
return(list(
zfunc("dimension", v, list(value = "subvariables")),
v
))
} else if (is.CA(x) | is.NumericArray(x)) {
## Categorical array gets the subvariables dimension first
## and then itself so that the rows, not columns, are subvars
## We treat numeric arrays as categoricals when used bare
## in formulas like this too so that eg `table(ds$numarray)`
## matches expectations.
return(list(
zfunc("dimension", x, list(value = "subvariables")),
v
))
} else if (is.zfunc(x, "as_array")) {
## Pseudo-ZCL from registerCubeFunctions, used to treat an MR like a CA
## x is thus list(`function`="as_array", args=list(list(variable=self)))
## Return instead subvar dimension + variable
return(list(
zfunc("dimension", x$args[[1]]["variable"], list(value = "subvariables")),
x$args[[1]]["variable"]
))
} else if (is.CrunchExpr(x)) {
## Give a name and alias "references"
ref <- formatExpression(x)
v$references <- list(name = ref, alias = ref)
return(list(v))
} else {
## Just the var ref, but nest in a list so we can unlist to flatten
return(list(v))
}
}
evalLHS <- function(formula, data) {
evalSide(formula[[2]], data, environment(formula))
}
evalRHS <- function(formula, data) {
evalSide(formula[[3]], data, environment(formula))
}
evalSide <- function(formula_part, data, eval_env) {
if (missing(data) || is.null(data)) {
return(eval(formula_part, NULL, eval_env))
} else {
return(eval(formula_part, as.environment(data), eval_env))
}
}
RHS_string <- function(f) {
## Return a string representation of the right-hand side of a formula
## (the stuff to the right of the ~)
sub("^ +", "", tail(splitFormula(f), 1))
}
LHS_string <- function(f) {
## Return a string representation of the left-hand side of a formula
## (the stuff to the left of the ~)
sub(" +$", "", head(splitFormula(f), 1))
}
splitFormula <- function(f) {
if (!is.character(f)) f <- paste(deparse(f), collapse = " ")
unlist(strsplit(f, "~"))
}
is_scorecard <- function(vars) {
vapply(
vars,
function(x) is.list(x) && identical(x[["function"]], "scorecard"), logical(1)
)
}
scorecardQuery <- function(vars, measures) {
if (length(vars) > 1) halt("scorecard queries cannot be combined with other dimensions")
if (length(measures) > 1 || !identical(measures[[1]][["function"]], "cube_count")) {
halt("scorecard queries can only have count as the aggregation")
}
return(list(
measures = measures,
with = list(v1 = zfunc("fuse", vars[[1]]$args)),
dimensions = list(list(
list(local = "v1.subvariables"),
list(local = "v1.categories"),
list(local = "v1.variables")
))
))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.