########################
# AFL operator helpers #
########################
#' Update available AFL operators
#'
#' @param .db an \code{afl} object (a SciDB database connection returned from \code{\link{scidbconnect}})
#' @param .new a character vector of operator names
#' @param .ops an optional three-variable data frame with variables name, signature, help, corresponding
#' to the operator names, signatures, and help files (from SciDB Doxygen documentation)
#' @return an updated database \code{afl} object
#' @note Every operator gets a shallow copy of the db argument; that is, \code{attributes(db[i])$conn} should be the same for every operator index i.
#' @keywords internal
#' @importFrom stats runif
#' @importFrom utils head
update.afl = function(.db, .new, .ops)
{
if (missing(.ops))
{
e = new.env()
data("operators", package="scidb", envir=e)
.ops = e$operators
}
afl = function(.db, .name, .help, .signature)
{
.name = paste(.name)
.help = paste(.help)
.signature = paste(.signature)
function(...)
{
.pf = parent.frame()
.depend = new.env()
.args = paste(
lapply(as.list(match.call())[-1],
function(.x) tryCatch({
ex = eval(.x, envir=.pf)
if (class(ex)[1] %in% "scidb")
{
assign(tail(make.names(c(ls(.depend), paste("V", runif(1), sep="")), unique=TRUE), 1), ex, envir=.depend)
ex@name
}
else .x
}, error=function(e) .x)),
collapse=",")
expr = sprintf("%s(%s)", .name, .args)
# handle aliasing
expr = gsub("%as%", " as ", expr)
# handle R scalar variable substitutions
expr = rsub(expr, parent.frame())
is_ddl = any(grepl(.name, getOption("scidb.ddl"), ignore.case=TRUE))
trace <- .TraceEnter(paste0("afl!", .name),
args=args,
.depend=.depend,
expr=expr,
is_ddl=is_ddl)
on.exit(.TraceExit(trace, returnValue()), add=TRUE)
if (getOption("scidb.debug", FALSE)) message("AFL EXPRESSION: ", expr)
# Some special AFL non-operator expressions don't return arrays
if (is_ddl)
{
return(iquery(.db, expr))
}
ans = scidb(.db, expr)
ans@meta$depend = as.list(.depend)
ans
}
}
for (.x in .new)
{
.formals = NULL
.signature = ""
.help = ""
.i = .ops[, 1] == .x
# update formal function arguments for nice tab completion help
if (any(.i))
{
.def = head(.ops[.i, ], 1)
# XXX very ugly...
.formals = strsplit(gsub("[=:].*", "", gsub("\\|.*", "", gsub(" *", "", gsub("\\]", "", gsub("\\[", "", gsub("\\[.*\\|.*\\]", "", gsub("[+*{})]", "", gsub(".*\\(", "", .def[2])))))))), ",")[[1]]
.help = .def[3]
.signature = .def[2]
}
.db[[.x]] = afl(.db, .x, .help, .signature)
if(!is.null(.formals))
formals(.db[[.x]]) = eval(parse(text=sprintf("alist(%s, ...=)", paste(paste(.formals, "="), collapse=", "))))
class(.db[[.x]]) = "operator"
}
.db
}
#' Substitute scalar-valued R expressions into an AFL expression
#' R expressions are marked with R(expression)
#' @param x character valued AFL expression
#' @param env environment in which to evaluate R expressions
#' @return character valued AFL expression
#' @keywords internal function
rsub = function(x, env)
{
if (! grepl("[^[:alnum:]_]R\\(", x)) return(x)
imbalance_paren = function(x) # efficiently find the first imbalanced ")" character position
{
which (cumsum( (as.numeric(charToRaw(x) == charToRaw("("))) - (as.numeric(charToRaw(x) == charToRaw(")"))) ) < 0)[1]
}
y = gsub("([^[:alnum:]_])R\\(", "\\1@R(", x)
y = strsplit(y, "@R\\(")[[1]]
expr = Map(function(x)
{
i = imbalance_paren(x)
rexp = eval(parse(text=substring(x, 1, i - 1)), envir=env)
rmdr = substring(x, i + 1)
paste(rexp, rmdr, sep="")
}, y[-1])
sprintf("%s%s", y[1], paste(expr, collapse=""))
}
#' Display SciDB AFL operator documentation
#' @param topic an afl object from a SciDB database connection, or optionally a character string name
#' @param db optional database connection from \code{\link{scidbconnect}} (only needed when \code{topic} is a character string)
#' @return displays help
#' @examples
#' \dontrun{
#' d <- scidbconnect()
#' aflhelp("list", d) # explicitly look up a character string
#' help(d$list) # same thing via R's \code{help} function
#' }
#' @importFrom utils data
#' @export
aflhelp = function(topic, db)
{
if (is.character(topic))
{
if (missing(db)) stop("character topics require a database connection argument")
topic = db[[topic]]
}
h = sprintf("%s\n\n%s", environment(topic)$.signature, gsub("\\n{2,}", "\n", environment(topic)$.help))
message(h)
}
#########################
# Base expression class #
#########################
#' Virtual base expression class
#' @description
#' Class representing a N-ary predicate, bound with an ordered vector of
#' names for the N (possibly non-distinct) variables
#' @export
scidb.expression <- R6::R6Class(
"scidb.expression",
portable = F, cloneable = T, lock_objects = F, lock_class = F,
public = list(
#' @description Returns the type of the expression as a string
#' @return type: "SYMBOL", "VALUE", "UNARY", "BINARY", or "TERNARY" (unimplemented)
type = function() {
stop("ERROR: base virtual method should be unreachable")
},
#' @description Returns the distinct symbols used in the expression
#' @return vector of symbol names
symbols = function() {
stop("ERROR: base virtual method should be unreachable")
},
#' @description Whether or not the expression is a function of (at most) a single symbol
#' @return a boolean value indicating unary or non-unary
unary = function() {
stop("ERROR: base virtual method should be unreachable")
},
#' @description Whether or not the expression is unary or a boolean AND or OR of
#' two other in-turn unary composite expressions
#' @return a boolean value indicating unary-composite or not
unary_composite = function() {
stop("ERROR: base virtual method should be unreachable")
},
#' @description Renames the symbols in the expression according to replacements in the arguments
#' @param ... optional list of string replacements for each symbol in the expression
substitute = function(...) {
stop("ERROR: base virtual method should be unreachable")
},
#' @description Convert the expression to an R string, with optional symbol substitutions
#' @param ... optional list of string replacements for each symbol in the expression
#' @return evaluation of this predicate as a single R string
to_r = function(...) {
stop("ERROR: base virtual method should be unreachable")
},
#' @description Convert the expression to an AFL string, with optional symbol substitutions
#' @param ... optional list of string replacements for each symbol in the expression
#' @return evaluation of this predicate as a single AFL string
to_afl = function(...) {
stop("ERROR: base virtual method should be unreachable")
}
),
private = list(
)
)
#####################
# Leaf symbol class #
#####################
#' Leaf symbol expression class
#' @description
#' Class representing a leaf symbol in an expression
#' @export
scidb.expression.symbol <- R6::R6Class(
"scidb.expression.symbol",
inherit = scidb.expression,
portable = F, cloneable = T, lock_objects = F, lock_class = F,
public = list(
#' @param input string name of symbol
initialize = function(input) {
if (rlang::is_symbol(input)) {
private$symbol_ <- rlang::as_string(input)
} else if (is.character(input) && length(input) == 1) {
private$symbol_ <- input
} else {
stop("ERROR: argument 'input' must be either a single string or a rlang symbol object")
}
},
#' @description Returns the type of the expression as a string
#' @return "SYMBOL"
type = function() {
"SYMBOL"
},
#' @description Returns the distinct symbols used in the expression
#' @return vector of symbol names
symbols = function() {
private$symbol_
},
#' @description Whether or not the expression is a function of (at most) a single symbol
#' @return a boolean value indicating unary or non-unary
unary = function() {
TRUE
},
#' @description Whether or not the expression is unary or a boolean AND or OR of
#' two other in-turn unary composite expressions
#' @return a boolean value indicating unary-composite or not
unary_composite = function() {
TRUE
},
#' @description Renames the symbols in the expression according to replacements in the arguments
#' @param ... optional list of string replacements for each symbol in the expression
substitute = function(...) {
symbol_map <- list(...)
private$symbol_ <- ifelse (symbol_ %in% names(symbol_map), symbol_map[[symbol_]], symbol_)
},
#' @description Convert the expression to an R string, with optional symbol substitutions
#' @param ... optional list of string replacements for each symbol in the expression
#' @return evaluation of this predicate as a single R string
to_r = function(...) {
symbol_map <- list(...)
variable <- ifelse (symbol_ %in% names(symbol_map), symbol_map[[symbol_]], symbol_)
paste0("`", variable, "`")
},
#' @description Convert the expression to an AFL string, with optional symbol substitutions
#' @param ... optional list of string replacements for each symbol in the expression
#' @return evaluation of this predicate as a single AFL string
to_afl = function(...) {
symbol_map <- list(...)
ifelse (symbol_ %in% names(symbol_map), symbol_map[[symbol_]], symbol_)
}
),
private = list(
symbol_ = NULL
)
)
#####################
# Leaf value class #
#####################
#' Leaf value expression class
#' @description
#' Class representing a leaf value in an expression
#' @export
scidb.expression.value <- R6::R6Class(
"scidb.expression.value",
inherit = scidb.expression,
portable = F, cloneable = T, lock_objects = F, lock_class = F,
public = list(
#' @description Wrap a raw R value in a scidb.expression object
#' @param input raw R value
initialize = function(input) {
if (!(rlang::is_syntactic_literal(input)
|| rlang::is_vector(input)
|| "scidb.predicate" %in% class(input)) ) {
stop("ERROR: argument 'input' must be either a scalar or vector basic type, or scidb.predicate object")
}
private$value_ <- input
},
#' @description Returns the type of the expression as a string
#' @return "VALUE"
type = function() {
"VALUE"
},
#' @description Returns the R value bound by this scidb.expression
#' @return raw R value
raw = function() {
private$value_
},
#' @description Returns the distinct symbols used in the expression
#' @return vector of symbol names
symbols = function() {
character()
},
#' @description Whether or not the expression is a function of (at most) a single symbol
#' @return a boolean value indicating unary or non-unary
unary = function() {
TRUE # ok, so technically no-ary, but this really means "at most one free symbol"
},
#' @description Whether or not the expression is unary or a boolean AND or OR of
#' two other in-turn unary composite expressions
#' @return a boolean value indicating unary-composite or not
unary_composite = function() {
TRUE
},
#' @description Renames the symbols in the expression according to replacements in the arguments
#' @param ... optional list of string replacements for each symbol in the expression
substitute = function(...) {
},
#' @description Convert the expression to an R string, with optional symbol substitutions
#' @param ... optional list of string replacements for each symbol in the expression
#' @return evaluation of this predicate as a single R string
to_r = function(...) {
# note: special casing may be necessary for floating point numbers
if (is.character(private$value_)) {
escaped <- private$value_
escaped <- gsub('"','\\\\"', escaped) # single escape double quote
# no need to escape single quote
paste0('"',escaped,'"')
} else {
as.character(private$value_)
}
},
#' @description Convert the expression to an AFL string, with optional symbol substitutions
#' @param ... optional list of string replacements for each symbol in the expression
#' @return evaluation of this predicate as a single AFL string
to_afl = function(...) {
# note: special casing may be necessary for floating point numbers
if (is.character(private$value_)) {
escaped <- private$value_
# no need to escape double quote
escaped <- gsub("\'","\\\\'", escaped) # double escape single quote
paste0("'",escaped,"'")
} else if (is.logical(private$value_)) {
ifelse(private$value_,"true","false")
} else {
as.character(private$value_)
}
}
),
private = list(
value_ = NULL
)
)
###############
# Unary class #
###############
#' Unary operation expression class
#' @description
#' Class representing a unary operation in an expression
#' @export
scidb.expression.unary <- R6::R6Class(
"scidb.expression.unary",
inherit = scidb.expression,
portable = F, cloneable = T, lock_objects = F, lock_class = F,
public = list(
#' @param op operation string
#' @param input input scidb.epxression object
initialize = function(op, input) {
private$op_ <- op
private$child_ <- NULL
if (!is.null(input)) {
if ('scidb.expression' %in% class(input)) {
private$child_ <- input
} else {
private$child_ <- scidb.expression.symbol$new(input)
}
}
},
#' @description Returns the type of the expression as a string
#' @return "UNARY"
type = function() {
"UNARY"
},
#' @description Returns the unary operation name
#' @return unary op name (length-1 character)
operation = function() {
private$op_
},
#' @description Returns the distinct symbols used in the expression
#' @return vector of symbol names
symbols = function() {
private$child_$symbols()
},
#' @description Whether or not the expression is a function of (at most) a single symbol
#' @return a boolean value indicating unary or non-unary
unary = function() {
private$child_$unary()
},
#' @description Whether or not the expression is unary or a boolean AND or OR of
#' two other in-turn unary composite expressions
#' @return a boolean value indicating unary-composite or not
unary_composite = function() {
# operation on a unary composite is not unary;
# the special case of the unary operator ! can
# be handled by using De Morgan laws to push
# down the negation during expression construction
private$child_$unary()
},
#' @description Renames the symbols in the expression according to replacements in the arguments
#' @param ... optional list of string replacements for each symbol in the expression
substitute = function(...) {
private$child_$substitute(...)
},
#' @description Convert the expression to an R string, with optional symbol substitutions
#' @param ... optional list of string replacements for each symbol in the expression
#' @return evaluation of this predicate as a single R string
to_r = function(...) {
child_r <- ifelse(is.null(private$child_), "", private$child_$to_r(...))
# note: may need to handle special case unary operators
# that are not called in the functional form `op`(...)
# `-` should not need special casing, as -(x) <=> -x
paste0(private$op_,"(",child_r,")")
},
#' @description Convert the expression to an AFL string, with optional symbol substitutions
#' @param ... optional list of string replacements for each symbol in the expression
#' @return evaluation of this predicate as a single AFL string
to_afl = function(...) {
child_afl <- ifelse(is.null(private$child_), "", private$child_$to_afl(...))
remapped_op <- switch(private$op_,
"nchar" = "strlen",
"is.na" = "is_null",
"is.null" = "is_null",
"is.nan" = "is_nan",
"!" = "not",
private$op_)
# note: may need to handle special case unary operators
# that are not called in the functional form `op`(...)
# `-` should not need special casing, as -(x) <=> -x
paste0(remapped_op,"(",child_afl,")") # otherwise assume the function name is the same
}
),
private = list(
op_ = NULL,
child_ = NULL
)
)
################
# Binary class #
################
#' Binary expression class
#' @description
#' Class representing a binary operation in an expression
#' @export
scidb.expression.binary <- R6::R6Class(
"scidb.expression.binary",
inherit = scidb.expression,
portable = F, cloneable = T, lock_objects = F, lock_class = F,
public = list(
#' @param op compound type ("&&", "||", or "!")
#' @param left left predicate (for all op cases)
#' @param right right predicate (for "&&" or "||")
initialize = function(op, left, right) {
private$op_ <- switch(op,
"&" = "&&", # special case synonym
"|" = "||", # special case synonym
op)
private$left_ <- left
private$right_ <- right
# Basic type checking
if (op %in% c(">=",">","<=","<")) {
if ("scidb.expression.value" %in% class(private$left_)
&& (!is.numeric(private$left_$raw()) || length(private$left_$raw()) > 1 ) ) {
stop(paste0("ERROR: cannot apply numeric operation '", op, "'; LHS is not a scalar numeric"))
}
if ("scidb.expression.value" %in% class(private$right_)
&& (!is.numeric(private$right_$raw()) || length(private$right_$raw()) > 1 ) ) {
stop(paste0("ERROR: cannot apply numeric operation '", op, "'; RHS is not a scalar numeric"))
}
} else if (op %in% c("stringr::str_detect","paste0")) {
if ("scidb.expression.value" %in% class(private$left_)
&& (!is.character(private$left_$raw()) || length(private$left_$raw()) > 1 ) ) {
stop(paste0("ERROR: cannot apply character operation '", op, "'; LHS is not a scalar character"))
}
if ("scidb.expression.value" %in% class(private$right_)
&& (!is.character(private$right_$raw()) || length(private$right_$raw()) > 1 ) ) {
stop(paste0("ERROR: cannot apply character operation '", op, "'; RHS is not a scalar character"))
}
}
},
#' @description Returns the type of the expression as a string
#' @return "BINARY"
type = function() {
"BINARY"
},
#' @description Returns the binary operation name
#' @return binary op name (length-1 character)
operation = function() {
private$op_
},
#' @return if type "&&", "||", or "!", left predicate; else NULL
left = function() {
private$left_
},
#' @return if type "&&" or "||", right predicate; else NULL
right = function() {
private$right_
},
#' @description Returns the distinct symbols used in the expression
#' @return vector of symbol names
symbols = function() {
sort(unique(union(private$left_$symbols(), private$right_$symbols())))
},
#' @description Whether or not the expression is a function of (at most) a single symbol
#' @return a boolean value indicating unary or non-unary
unary = function() {
if (private$left_$type() == "VALUE") {
private$right_$unary()
} else if (private$right_$type() == "VALUE") {
private$left_$unary()
} else {
# neither are values
private$left_$unary() && private$right_$unary() && private$left_$symbols() == private$right_$symbols()
}
},
#' @description Whether or not the expression is unary or a boolean AND or OR of
#' two other in-turn unary composite expressions
#' @return a boolean value indicating unary-composite or not
unary_composite = function() {
if (private$left_$type() == "VALUE") {
private$right_$unary_composite()
} else if (private$right_$type() == "VALUE") {
private$left_$unary_composite()
} else {
# neither are values
private$left_$unary_composite() && private$right_$unary_composite() && private$op_ %in% c("&&","||")
}
},
#' @description Renames the symbols in the expression according to replacements in the arguments
#' @param ... optional list of string replacements for each symbol in the expression
substitute = function(...) {
private$left_$substitute(...)
private$right_$substitute(...)
},
#' @description Convert the expression to an R string, with optional symbol substitutions
#' @param ... optional list of string replacements for each symbol in the expression
#' @return evaluation of this predicate as a single R string
to_r = function(...) {
if (private$infix(private$op_)) {
# if left|right is binary, wrap it in parenthesis to be safe
leftR <- ifelse(private$left_$type() != "BINARY",
private$left_$to_r(...),
paste0("(", private$left_$to_r(...), ")")
)
rightR <- ifelse(private$right_$type() != "BINARY",
private$right_$to_r(...),
paste0("(", private$right_$to_r(...), ")")
)
paste0(leftR, " ", private$op_, " ", rightR)
} else {
paste0(private$op_, "(", private$left_$to_r(...), ", ", private$right_$to_r(...), ")")
}
},
#' @description Convert the expression to an AFL string, with optional symbol substitutions
#' @param ... optional list of string replacements for each symbol in the expression
#' @return evaluation of this predicate as a single AFL string
to_afl = function(...) {
remapped_op <- switch(private$op_,
"==" = "=",
"&&" = "and",
"||" = "or",
"stringr::str_detect" = "regex",
"paste0" = "+",
private$op_)
if (private$infix(remapped_op)) {
leftR <- ifelse(private$left_$type() != "BINARY",
private$left_$to_afl(...),
paste0("(", private$left_$to_afl(...), ")")
)
rightR <- ifelse(private$right_$type() != "BINARY",
private$right_$to_afl(...),
paste0("(", private$right_$to_afl(...), ")")
)
paste0(leftR, " ", remapped_op, " ", rightR)
} else {
paste0(remapped_op, "(", private$left_$to_afl(...), ", ", private$right_$to_afl(...), ")")
}
}
),
private = list(
op_ = NULL,
left_ = NULL,
right_ = NULL,
infix = function(op) {
op %in% c("&&","and","||","or","=","==","!=",">=","<=",">","<","=","+","-","*","/")
}
)
)
######################
# Boolean operations #
######################
#' binary conjunction operator for predicates
#' @name and.scidb.expression
#' @param P1 first predicate object for the conjunction
#' @param P2 second predicate object for the conjunction
#' @export
'&.scidb.expression' <- function(P1, P2) {
scidb.expression.binary$new(op = "&&", left = P1, right = P2)
}
#' binary disjunction operator for predicates
#' @name or.scidb.expression
#' @param P1 first predicate object for the disjunction
#' @param P2 second predicate object for the disjunction
#' @export
'|.scidb.expression' <- function(P1, P2) {
scidb.expression.binary$new(op = "||", left = P1, right = P2)
}
#' unary negation operator for a predicate
#' @name not.scidb.expression
#' @param P predicate object to negate
#' @export
'!.scidb.expression' <- function(P) {
# Push down simplifications where possible
if ('scidb.expression.unary' %in% class(P)) {
switch(P$operation(),
"!" = P$child(), # !(!P) <=> P
scidb.expression.unary$new(op = "!", input = P)
)
} else if ('scidb.expression.binary' %in% class(P)) {
switch(P$operation(),
"&&" = !P$left() | !P$right(), # !(P && Q) <=> !P || !Q
"||" = !P$left() & !P$right(), # !(P || Q) <=> !P && !Q
"==" = scidb.expression.binary$new("!=", P$left(), P$right()),
"!=" = scidb.expression.binary$new("==", P$left(), P$right()),
">=" = scidb.expression.binary$new("<", P$left(), P$right()),
">" = scidb.expression.binary$new("<=", P$left(), P$right()),
"<=" = scidb.expression.binary$new(">", P$left(), P$right()),
"<" = scidb.expression.binary$new(">=", P$left(), P$right()),
scidb.expression.unary$new(op = "!", input = P)
)
} else {
scidb.expression.unary$new(op = "!", input = P)
}
}
################################################################
# Walking the AST to parse R expressions into bound predicates #
################################################################
expr_type <- function(x) {
if (rlang::is_vector(x)) {
"constant"
} else if (rlang::is_symbol(x)) {
"symbol"
} else if (rlang::is_call(x)) {
"call"
} else if (rlang::is_pairlist(x)) {
"pairlist"
} else {
typeof(x)
}
}
switch_expr <- function(x, ...) {
switch(expr_type(x),
...,
stop("Don't know how to handle type ", typeof(x), call. = FALSE)
)
}
# Recurse AST to turn expressions into these assignments
expression_parser_helper <- function(e) {
switch_expr(e,
symbol = scidb.expression.symbol$new(e),
constant = scidb.expression.value$new(e),
call = {
L <- length(e)
op <- rlang::as_string(e[[1]])
# Call funcitons taking no arguments
if (L < 2) {
return( scidb.expression.value$new(do.call(op, list())))
}
# Recursively parse contents
values <- lapply(2:L,function(i) { expression_parser_helper(e[[i]]) })
# If all the sub-elements are constants, just evaluate R call
if (all(unlist(sapply(values, function(v) { v$type() == "VALUE" })))) {
raw_values <- lapply(values, function(v) { v$raw() })
return( scidb.expression.value$new(do.call(op, raw_values)))
}
# Otherwise build a compound expression
if (L == 2) {
value <- values[[1]]
# If op is (, just forward contents
if (op == '(') {
return(value)
}
# If op is !, use the !.scidb.expression operator for its pushdown logic
if (op == '!') {
return(!value)
}
# Otherwise, return a unary transformation
return(scidb.expression.unary$new(op, value))
} else if (L == 3) {
lhs <- values[[1]]
rhs <- values[[2]]
# Special operations
# In some of these operations, the RHS is required to be a literal value,
# and in all cases it will be modified as necessary
if (op == "%in%") {
if (rhs$type() != "VALUE") {
stop("ERROR: cannot use special operator %in% with a symbolic RHS")
}
reduced <- Reduce('|.scidb.expression', lapply(rhs$raw(),
function(x) {
scidb.expression.binary$new('==',lhs,scidb.expression.value$new(x))
}))
return(reduced)
} else if (op == "%not_in%") {
if (rhs$type() != "VALUE") {
stop("ERROR: cannot use special operator %not_in% with a symbolic RHS")
}
reduced <- Reduce('&.scidb.expression', lapply(rhs$raw(),
function(x) {
scidb.expression.binary$new('!=',lhs,scidb.expression.value$new(x))
}))
return(reduced)
} else if (op == "str_detect") {
op <- "stringr::str_detect"
} else if (op == "%like%") {
op <- "stringr::str_detect"
rhs <- expression_parser_helper(rlang::expr(paste0("(?i)",!!e[[3]])))
} else if (op == "%contains%") {
op <- "stringr::str_detect"
rhs <- expression_parser_helper(rlang::expr(paste0("(?i).*",paste0(!!e[[3]],".*"))))
} else if (op == "%starts_with%") {
op <- "stringr::str_detect"
rhs <- expression_parser_helper(rlang::expr(paste0("(?i)",paste0(!!e[[3]],".*"))))
} else if (op == "%ends_with%") {
op <- "stringr::str_detect"
rhs <- expression_parser_helper(rlang::expr(paste0("(?i).*",!!e[[3]])))
}
# Return a binary operator
scidb.expression.binary$new(op, lhs, rhs)
}
}
)
}
#' Create a scidb.expression by parsing an R expression
#' @description
#' This recursive methods creates a tree of scidb.expression objects
#' corresponding to the R expression supplied as a parameter.
#'
#' R variables can be interpolated into the expression by prepending them
#' with the double-exclamation !! operator.
#'
#' In addition to base R unary and binary operands, this method
#' treats several operations as special cases, and also defines
#' several custom operations of its own that can be used:
#' * `%in%`: this vector membership operator, e.g. `x %in% c(1,2,...)`, can
#' only be used when the RHS value is a non-symbolic scalar or vector value
#' * `%not_in%`: this custom operand is the negation of `%in%`
#' * `%like%`, `%contains%`, `%starts_with%`, and `%ends_with%`: these custom
#' operands are treated as special cases of the regex function
#' `stringr::str_detect`, and can be applied to symbolic and non-symbolic arguments
#' @param ... R expression to parse
#' @return scidb.expression object
#' @export
expression_parser <- function(...) {
expression_parser_helper(rlang::expr(...))
}
###########################
# Unbound Predicate class #
###########################
#' Anonymous predicate class
#' @description
#' Class representing an expression with a free symbol,
#' similar in intent to an anonymous function
#' @export
scidb.predicate <- R6::R6Class(
"scidb.predicate",
portable = F, cloneable = T, lock_objects = F, lock_class = F,
public = list(
#' @description Create a predicate from a free variable placeholder name and expression
#' @param variable free variable symbol
#' @param expression expression as a function of the free variable
initialize = function(variable, expression) {
v <- rlang::expr(!!variable)
if (!((rlang::is_string(v) && length(v) == 1) || rlang::is_symbol(v))) {
stop("ERROR: input 'variable' must be a symbol or symbol name character")
}
if (!("scidb.expression" %in% class(expression))) {
stop("ERROR: input 'expression' must be a scidb.expression object")
}
private$variable_ <- rlang::as_string(v)
private$expression_ <- expression
},
#' @description Rename the free symbol in the internal expression
#' @param new_variable new string to represent unbound variable
resymbol = function(new_variable) {
v <- rlang::expr(!!new_variable)
if (!((rlang::is_string(v) && length(v) == 1) || rlang::is_symbol(v))) {
stop("ERROR: input 'variable' must be a symbol or symbol name character")
}
symbol_map <- list()
symbol_map[[private$variable_]] <- rlang::as_string(v)
do.call(private$expression_$substitute,symbol_map)
},
#' @description Evaluate predicate with symbol substituted for the free variable, returning a scidb.expression
#' @param symbol symbol name to take place of free variable
evaluate = function(symbol) {
R <- self$clone(deep=TRUE)
R$resymbol(symbol)
R$private$expression_
}
),
private = list(
variable_ = NULL,
expression_ = NULL
)
)
# Boolean operations
#' binary conjunction operator for predicates
#' @name and.scidb.predicate
#' @param P1 first predicate object for the conjunction
#' @param P2 second predicate object for the conjunction
#' @export
'&.scidb.predicate' <- function(P1, P2) {
P1$resymbol("x")
P2$resymbol("x")
scidb.predicate$new("x", P1$private$expression_ & P2$private$expression_)
}
#' binary disjunction operator for predicates
#' @name or.scidb.predicate
#' @param P1 first predicate object for the disjunction
#' @param P2 second predicate object for the disjunction
#' @export
'|.scidb.predicate' <- function(P1, P2) {
P1$resymbol("x")
P2$resymbol("x")
scidb.predicate$new("x", P1$private$expression_ | P2$private$expression_)
}
#' unary negation operator for a predicate
#' @name not.scidb.predicate
#' @param P predicate object to negate
#' @export
'!.scidb.predicate' <- function(P) {
P$resymbol("x")
scidb.predicate$new("x", !P$private$expression_)
}
#' @title scidb predicate expressions
#' @name scidb_predicate_expressions
#' @description
#' These functions create anonymous scidb.predicate expressions objects,
#' which can be chained with boolean operators ([&][&.scidb.predicate],
#' [|][|.scidb.predicate], and [!][!.scidb.predicate]) and supplied as
#' values in arguments to [filter].
#' * [EQUALS]
#' * [NEQ]
#' * [IN]
#' * [NOT_IN]
#' * [IS_NULL]
#' * [NOT_NULL]
#' * [GEQ]
#' * [GT]
#' * [LEQ]
#' * [LT]
#' * [IN_RANGE]
#' * [IN_RANGE_INCL]
#' * [IN_RANGE_EXCL]
#' * [LIKE]
#' * [CONTAINS]
#' * [STARTS_WITH]
#' * [ENDS_WITH]
#' @md
NULL
#> NULL
# Basic equality comparisons
#' equality binary predicate
#' @param value numeric or character value for equality comparison
EQUALS <- function(value) {
scidb.predicate$new("x", expression_parser(x == !!value))
}
#' inequality binary predicate
#' @param value numeric or character value for inequality comparison
NEQ <- function(value) {
scidb.predicate$new("x", expression_parser(x != !!value))
}
# Compound equality comparisons: set membership or absence
#' set membership binary predicate
#' @param values vector of numeric or character values for membership test
IN <- function(values) {
Reduce('|.scidb.predicate', lapply(values, EQUALS))
}
#' set anti-membership binary predicate
#' @param values vector of numeric or character values for membership test
NOT_IN <- function(values) {
Reduce('&.scidb.predicate', lapply(values, NEQ))
}
# Special value checks
#' null test unary predicate
IS_NULL <- function() {
scidb.predicate$new("x", expression_parser(is.null(x)))
}
#' negation of IS_NULL
NOT_NULL <- function() {
!IS_NULL()
}
# Basic inequality comparisons
#' greater-than-or-equal-to binary predicate
#' @param value numeric value for inequality comparison
GEQ <- function(value) {
scidb.predicate$new("x", expression_parser(x >= !!value))
}
#' greater-than binary predicate
#' @param value numeric value for inequality comparison
GT <- function(value) {
scidb.predicate$new("x", expression_parser(x > !!value))
}
#' less-than-or-equal-to binary predicate
#' @param value numeric value for inequality comparison
LEQ <- function(value) {
scidb.predicate$new("x", expression_parser(x <= !!value))
}
#' less-than binary predicate
#' @param value numeric value for inequality comparison
LT <- function(value) {
scidb.predicate$new("x", expression_parser(x < !!value))
}
# Compound arithmetic inequality comparisons
#' inclusive range test
#' @param lower lower bound
#' @param upper upper bound
IN_RANGE <- function(lower,upper) {
GEQ(lower) & LEQ(upper)
}
#' inclusive range test
#' @param lower lower bound
#' @param upper upper bound
IN_RANGE_INCL <- function(lower,upper) {
GEQ(lower) & LEQ(upper)
}
#' exclusive range test
#' @param lower lower bound
#' @param upper upper bound
IN_RANGE_EXCL <- function(lower,upper) {
GT(lower) & LT(upper)
}
# Regular expression comparisons
#' test if this pattern matches the entire string
#' @param pattern regular expression pattern
LIKE <- function(pattern) {
scidb.predicate$new("x", expression_parser(x %like% !!pattern))
}
#' test if this pattern matches a substring
#' @param pattern regular expression pattern
CONTAINS <- function(pattern) {
scidb.predicate$new("x", expression_parser(x %contains% !!pattern))
}
#' test if this pattern matches the start of the string
#' @param pattern regular expression pattern
STARTS_WITH <- function(pattern) {
scidb.predicate$new("x", expression_parser(x %starts_with% !!pattern))
}
#' test if this pattern matches the end of the string
#' @param pattern regular expression pattern
ENDS_WITH <- function(pattern) {
scidb.predicate$new("x", expression_parser(x %ends_with% !!pattern))
}
####################
# Helper functions #
####################
#' Parse multiple R expressions into a list of boolean expressions
#' @description
#' This method takes a variable number of R expressions as arguments, returning a list
#' of scidb.expression objects. There are three formats for constructing a boolean expression
#' * Named value: an expression like `x = 1` is evaluated as the expression x == 1, while
#' one like `x %in% c(1,2,3)` is evaluated as `x %in% c(1,2,3)`
#' * Named scidb predicate expression: an expression like `x = IN_RANGE(4,10)` is evaluated as
#' `x >= 4 && x <= 10`, making use of the anonymous predicate method `scidb::IN_RANGE`.
#' The page [scidb predicate expressions][scidb_predicate_expressions] lists the available expressions
#' * Arbitrary expression evaluating to a boolean; this is parsed with \link{expression_parser}
#' and cannot take the form of a named argument `x = ...`.
#' @examples
#' \dontrun{
#' scidb::filter(x = 1)
#' scidb::filter(x %in% c(1,2,3))
#' scidb::filter(x = IN_RANGE(5,10) | IN_RANGE(15,20))
#' scidb::filter(foo %like% "hello", bar %like% "world")
#' scidb::filter(foo %like% "hello" || bar %like% "world")
#' scidb::filter(nchar(foo) > 5)
#' }
#' @param ... R expressions representing compound predicates to be applied jointly
#' to possibly-multiple bound variables
#' @return a list of scidb.expression objects corresponding to each input expression
#' @export
#' @md
filter <- function(...) {
fs <- lapply(rlang::exprs(...), expression_parser_helper)
# Fix up fs to all be scidb.expression objects
fixup <- function(key, value) {
if ('scidb.expression.value' %in% class(value)) {
value <- value$raw()
if ('scidb.predicate' %in% class(value)) {
# lambda function style; bind with key to make an expression
value$evaluate(key)
} else if (class(value) %in% c("logical","integer","numeric","integer64","character")) {
# biobank style; turn into a set membership expression
IN(value)$evaluate(key)
} else {
# unsupported value type
stop(paste("ERROR: the list of filters includes an item of class", class(raw_value)[1], "which cannot be parsed"))
}
} else if ('scidb.expression' %in% class(value)) {
value
} else {
# unsupported filter class
stop(paste("ERROR: the list of filters includes an item of class", class(value)[1], "which cannot be parsed"))
}
}
lapply(seq_along(fs), function(k, v, i) {
fixup(k[[i]], v[[i]])
},
k = names(fs),
v = fs
)
}
#' Merge list of scidb.expression objects in conjunction
#' @param filter_list a list of scidb.expression objects, such as one created by scidb::filter
#' @return a single scidb.expression object equivalent to the conjunctions of all elements of filter_list
#' @export
merge_filters <- function(filter_list) {
Reduce(function(l,r) l & r, filter_list)
}
#' Parse multiple R expressions into a single AFL string
#' @param ... R expressions representing compound predicates to be applied jointly
#' to possibly-multiple bound variables
#' @return a character vector of the AFL strings corresponding to the expressions
#' @export
filter_to_afl <- function(...) {
merge_filters(filter(...))$to_afl()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.