R/afl.R

Defines functions filter_to_afl merge_filters filter ENDS_WITH STARTS_WITH CONTAINS LIKE IN_RANGE_EXCL IN_RANGE_INCL IN_RANGE LT LEQ GT GEQ NOT_NULL IS_NULL NOT_IN IN NEQ EQUALS expression_parser expression_parser_helper switch_expr expr_type aflhelp rsub update.afl

Documented in aflhelp CONTAINS ENDS_WITH EQUALS expression_parser filter filter_to_afl GEQ GT IN IN_RANGE IN_RANGE_EXCL IN_RANGE_INCL IS_NULL LEQ LIKE LT merge_filters NEQ NOT_IN NOT_NULL rsub STARTS_WITH update.afl

########################
# AFL operator helpers #
########################

#' Update available AFL operators
#'
#' @param .db an \code{afl} object (a SciDB database connection returned from \code{\link{scidbconnect}})
#' @param .new a character vector of operator names
#' @param .ops an optional three-variable data frame with variables name, signature, help, corresponding
#' to the operator names, signatures, and help files (from SciDB Doxygen documentation)
#' @return an updated database \code{afl} object
#' @note Every operator gets a shallow copy of the db argument; that is, \code{attributes(db[i])$conn} should be the same for every operator index i.
#' @keywords internal
#' @importFrom stats runif
#' @importFrom utils head
update.afl = function(.db, .new, .ops)
{
  if (missing(.ops))
  {
    e = new.env()
    data("operators", package="scidb", envir=e)
    .ops = e$operators
  }

  afl = function(.db, .name, .help, .signature)
  {
    .name = paste(.name)
    .help = paste(.help)
    .signature = paste(.signature)
    function(...)
    {
      .pf = parent.frame()
      .depend = new.env()
      .args = paste(
               lapply(as.list(match.call())[-1],
                 function(.x) tryCatch({
                   ex = eval(.x, envir=.pf)
                   if (class(ex)[1] %in% "scidb")
                   {
                     assign(tail(make.names(c(ls(.depend), paste("V", runif(1), sep="")), unique=TRUE), 1), ex, envir=.depend)
                     ex@name
                   }
                   else .x
               }, error=function(e) .x)),
         collapse=",")

      expr = sprintf("%s(%s)", .name, .args)
      # handle aliasing
      expr = gsub("%as%", " as ", expr)
      # handle R scalar variable substitutions
      expr = rsub(expr, parent.frame())

      is_ddl = any(grepl(.name, getOption("scidb.ddl"), ignore.case=TRUE))
      
      trace <- .TraceEnter(paste0("afl!", .name),
                           args=args,
                           .depend=.depend,
                           expr=expr,
                           is_ddl=is_ddl)
      on.exit(.TraceExit(trace, returnValue()), add=TRUE)

      if (getOption("scidb.debug", FALSE)) message("AFL EXPRESSION: ", expr)
      # Some special AFL non-operator expressions don't return arrays
      if (is_ddl)
      {
        return(iquery(.db, expr))
      }
      ans = scidb(.db, expr)
      ans@meta$depend = as.list(.depend)
      ans
    }
  }

  for (.x in .new)
  {
    .formals = NULL
    .signature = ""
    .help = ""
    .i = .ops[, 1] == .x
    # update formal function arguments for nice tab completion help
    if (any(.i))
    {
      .def = head(.ops[.i, ], 1)
      # XXX very ugly...
      .formals = strsplit(gsub("[=:].*", "", gsub("\\|.*", "", gsub(" *", "", gsub("\\]", "", gsub("\\[", "", gsub("\\[.*\\|.*\\]", "", gsub("[+*{})]", "", gsub(".*\\(", "", .def[2])))))))), ",")[[1]]
      .help = .def[3]
      .signature = .def[2]
    }
    .db[[.x]] = afl(.db, .x, .help, .signature)
    if(!is.null(.formals))
      formals(.db[[.x]]) = eval(parse(text=sprintf("alist(%s, ...=)", paste(paste(.formals, "="), collapse=", "))))
    class(.db[[.x]]) = "operator"
  }
  .db
}

#' Substitute scalar-valued R expressions into an AFL expression
#' R expressions are marked with R(expression)
#' @param x character valued AFL expression
#' @param env environment in which to evaluate R expressions
#' @return character valued AFL expression
#' @keywords internal function
rsub = function(x, env)
{
  if (! grepl("[^[:alnum:]_]R\\(", x)) return(x)
  imbalance_paren = function(x) # efficiently find the first imbalanced ")" character position
  {
    which (cumsum( (as.numeric(charToRaw(x) == charToRaw("("))) - (as.numeric(charToRaw(x) == charToRaw(")"))) ) < 0)[1]
  }
  y = gsub("([^[:alnum:]_])R\\(", "\\1@R(", x)
  y = strsplit(y, "@R\\(")[[1]]
  expr = Map(function(x)
  {
    i = imbalance_paren(x)
    rexp = eval(parse(text=substring(x, 1, i - 1)), envir=env)
    rmdr = substring(x, i + 1)
    paste(rexp, rmdr, sep="")
  }, y[-1])
  sprintf("%s%s", y[1], paste(expr, collapse=""))
}

#' Display SciDB AFL operator documentation
#' @param topic an afl object from a SciDB database connection, or optionally a character string name
#' @param db optional database connection from \code{\link{scidbconnect}} (only needed when \code{topic} is a character string)
#' @return displays help
#' @examples
#' \dontrun{
#' d <- scidbconnect()
#' aflhelp("list", d)     # explicitly look up a character string
#' help(d$list)        # same thing via R's \code{help} function
#' }
#' @importFrom  utils data
#' @export
aflhelp = function(topic, db)
{
  if (is.character(topic))
  {
    if (missing(db)) stop("character topics require a database connection argument")
    topic = db[[topic]]
  }
  h = sprintf("%s\n\n%s", environment(topic)$.signature, gsub("\\n{2,}", "\n", environment(topic)$.help))
  message(h)
}


#########################
# Base expression class #
#########################

#' Virtual base expression class
#' @description
#' Class representing a N-ary predicate, bound with an ordered vector of
#' names for the N (possibly non-distinct) variables
#' @export
scidb.expression <- R6::R6Class(
  "scidb.expression",
  portable = F, cloneable = T, lock_objects = F, lock_class = F,
  public = list(
    #' @description Returns the type of the expression as a string
    #' @return type: "SYMBOL", "VALUE", "UNARY", "BINARY", or "TERNARY" (unimplemented)
    type = function() {
      stop("ERROR: base virtual method should be unreachable")
    },
    #' @description Returns the distinct symbols used in the expression
    #' @return vector of symbol names
    symbols = function() {
      stop("ERROR: base virtual method should be unreachable")
    },
    #' @description Whether or not the expression is a function of (at most) a single symbol
    #' @return a boolean value indicating unary or non-unary
    unary = function() {
      stop("ERROR: base virtual method should be unreachable")
    },
    #' @description Whether or not the expression is unary or a boolean AND or OR of
    #' two other in-turn unary composite expressions
    #' @return a boolean value indicating unary-composite or not
    unary_composite = function() {
      stop("ERROR: base virtual method should be unreachable")
    },
    #' @description Renames the symbols in the expression according to replacements in the arguments
    #' @param ... optional list of string replacements for each symbol in the expression
    substitute = function(...) {
      stop("ERROR: base virtual method should be unreachable")
    },
    #' @description Convert the expression to an R string, with optional symbol substitutions
    #' @param ... optional list of string replacements for each symbol in the expression
    #' @return evaluation of this predicate as a single R string
    to_r = function(...) {
      stop("ERROR: base virtual method should be unreachable")
    },
    #' @description Convert the expression to an AFL string, with optional symbol substitutions
    #' @param ... optional list of string replacements for each symbol in the expression
    #' @return evaluation of this predicate as a single AFL string
    to_afl = function(...) {
      stop("ERROR: base virtual method should be unreachable")
    }
  ),
  private = list(
  )
)

#####################
# Leaf symbol class #
#####################

#' Leaf symbol expression class
#' @description
#' Class representing a leaf symbol in an expression
#' @export
scidb.expression.symbol <- R6::R6Class(
  "scidb.expression.symbol",
  inherit = scidb.expression,
  portable = F, cloneable = T, lock_objects = F, lock_class = F,
  public = list(
    #' @param input string name of symbol
    initialize = function(input) {
      if (rlang::is_symbol(input)) {
        private$symbol_ <- rlang::as_string(input)
      } else if (is.character(input) && length(input) == 1) {
        private$symbol_ <- input
      } else {
        stop("ERROR: argument 'input' must be either a single string or a rlang symbol object")
      }
    },
    #' @description Returns the type of the expression as a string
    #' @return "SYMBOL"
    type = function() {
      "SYMBOL"
    },
    #' @description Returns the distinct symbols used in the expression
    #' @return vector of symbol names
    symbols = function() {
      private$symbol_
    },
    #' @description Whether or not the expression is a function of (at most) a single symbol
    #' @return a boolean value indicating unary or non-unary
    unary = function() {
      TRUE
    },
    #' @description Whether or not the expression is unary or a boolean AND or OR of
    #' two other in-turn unary composite expressions
    #' @return a boolean value indicating unary-composite or not
    unary_composite = function() {
      TRUE
    },
    #' @description Renames the symbols in the expression according to replacements in the arguments
    #' @param ... optional list of string replacements for each symbol in the expression
    substitute = function(...) {
      symbol_map <- list(...)
      private$symbol_ <- ifelse (symbol_ %in% names(symbol_map), symbol_map[[symbol_]], symbol_)
    },
    #' @description Convert the expression to an R string, with optional symbol substitutions
    #' @param ... optional list of string replacements for each symbol in the expression
    #' @return evaluation of this predicate as a single R string
    to_r = function(...) {
      symbol_map <- list(...)
      variable <- ifelse (symbol_ %in% names(symbol_map), symbol_map[[symbol_]], symbol_)
      paste0("`",  variable, "`")
    },
    #' @description Convert the expression to an AFL string, with optional symbol substitutions
    #' @param ... optional list of string replacements for each symbol in the expression
    #' @return evaluation of this predicate as a single AFL string
    to_afl = function(...) {
      symbol_map <- list(...)
      ifelse (symbol_ %in% names(symbol_map), symbol_map[[symbol_]], symbol_)
    }
  ),
  private = list(
    symbol_ = NULL
  )
)

#####################
# Leaf value class #
#####################

#' Leaf value expression class
#' @description
#' Class representing a leaf value in an expression
#' @export
scidb.expression.value <- R6::R6Class(
  "scidb.expression.value",
  inherit = scidb.expression,
  portable = F, cloneable = T, lock_objects = F, lock_class = F,
  public = list(
    #' @description Wrap a raw R value in a scidb.expression object
    #' @param input raw R value
    initialize = function(input) {
      if (!(rlang::is_syntactic_literal(input)
            || rlang::is_vector(input)
            || "scidb.predicate" %in% class(input)) ) {
        stop("ERROR: argument 'input' must be either a scalar or vector basic type, or scidb.predicate object")
      }
      private$value_ <- input
    },
    #' @description Returns the type of the expression as a string
    #' @return "VALUE"
    type = function() {
      "VALUE"
    },
    #' @description Returns the R value bound by this scidb.expression
    #' @return raw R value
    raw = function() {
      private$value_
    },
    #' @description Returns the distinct symbols used in the expression
    #' @return vector of symbol names
    symbols = function() {
      character()
    },
    #' @description Whether or not the expression is a function of (at most) a single symbol
    #' @return a boolean value indicating unary or non-unary
    unary = function() {
      TRUE # ok, so technically no-ary, but this really means "at most one free symbol"
    },
    #' @description Whether or not the expression is unary or a boolean AND or OR of
    #' two other in-turn unary composite expressions
    #' @return a boolean value indicating unary-composite or not
    unary_composite = function() {
      TRUE
    },
    #' @description Renames the symbols in the expression according to replacements in the arguments
    #' @param ... optional list of string replacements for each symbol in the expression
    substitute = function(...) {
    },
    #' @description Convert the expression to an R string, with optional symbol substitutions
    #' @param ... optional list of string replacements for each symbol in the expression
    #' @return evaluation of this predicate as a single R string
    to_r = function(...) {
      # note: special casing may be necessary for floating point numbers
      if (is.character(private$value_)) {
        escaped <- private$value_
        escaped <- gsub('"','\\\\"', escaped) # single escape double quote 
        # no need to escape single quote
        paste0('"',escaped,'"')
      } else {
        as.character(private$value_)
      }
    },
    #' @description Convert the expression to an AFL string, with optional symbol substitutions
    #' @param ... optional list of string replacements for each symbol in the expression
    #' @return evaluation of this predicate as a single AFL string
    to_afl = function(...) {
      # note: special casing may be necessary for floating point numbers
      if (is.character(private$value_)) {
        escaped <- private$value_
        # no need to escape double quote
        escaped <- gsub("\'","\\\\'", escaped) # double escape single quote
        paste0("'",escaped,"'")
      } else if (is.logical(private$value_)) {
        ifelse(private$value_,"true","false")
      } else {
        as.character(private$value_)
      }
    }
  ),
  private = list(
    value_ = NULL
  )
)

###############
# Unary class #
###############

#' Unary operation expression class
#' @description
#' Class representing a unary operation in an expression
#' @export
scidb.expression.unary <- R6::R6Class(
  "scidb.expression.unary",
  inherit = scidb.expression,
  portable = F, cloneable = T, lock_objects = F, lock_class = F,
  public = list(
    #' @param op operation string
    #' @param input input scidb.epxression object
    initialize = function(op, input) {
      private$op_ <- op
      private$child_ <- NULL
      if (!is.null(input)) {
        if ('scidb.expression' %in% class(input)) {
          private$child_ <- input
        } else {
          private$child_ <- scidb.expression.symbol$new(input)
        }
      }
    },
    #' @description Returns the type of the expression as a string
    #' @return "UNARY"
    type = function() {
      "UNARY"
    },
    #' @description Returns the unary operation name
    #' @return unary op name (length-1 character)
    operation = function() {
      private$op_
    },
    #' @description Returns the distinct symbols used in the expression
    #' @return vector of symbol names
    symbols = function() {
      private$child_$symbols()
    },
    #' @description Whether or not the expression is a function of (at most) a single symbol
    #' @return a boolean value indicating unary or non-unary
    unary = function() {
      private$child_$unary()
    },
    #' @description Whether or not the expression is unary or a boolean AND or OR of
    #' two other in-turn unary composite expressions
    #' @return a boolean value indicating unary-composite or not
    unary_composite = function() {
      # operation on a unary composite is not unary;
      # the special case of the unary operator ! can
      # be handled by using De Morgan laws to push
      # down the negation during expression construction
      private$child_$unary()
    },
    #' @description Renames the symbols in the expression according to replacements in the arguments
    #' @param ... optional list of string replacements for each symbol in the expression
    substitute = function(...) {
      private$child_$substitute(...)
    },
    #' @description Convert the expression to an R string, with optional symbol substitutions
    #' @param ... optional list of string replacements for each symbol in the expression
    #' @return evaluation of this predicate as a single R string
    to_r = function(...) {
      child_r <- ifelse(is.null(private$child_), "", private$child_$to_r(...))
      # note: may need to handle special case unary operators
      # that are not called in the functional form `op`(...)
      # `-` should not need special casing, as -(x) <=> -x
      paste0(private$op_,"(",child_r,")")
    },
    #' @description Convert the expression to an AFL string, with optional symbol substitutions
    #' @param ... optional list of string replacements for each symbol in the expression
    #' @return evaluation of this predicate as a single AFL string
    to_afl = function(...) {
      child_afl <- ifelse(is.null(private$child_), "", private$child_$to_afl(...))
      remapped_op <- switch(private$op_,
                            "nchar" = "strlen",
                            "is.na" = "is_null",
                            "is.null" = "is_null",
                            "is.nan" = "is_nan",
                            "!" = "not",
                            private$op_)
      # note: may need to handle special case unary operators
      # that are not called in the functional form `op`(...)
      # `-` should not need special casing, as -(x) <=> -x
      paste0(remapped_op,"(",child_afl,")")      # otherwise assume the function name is the same
    }
  ),
  private = list(
    op_ = NULL,
    child_ = NULL
  )
)

################
# Binary class #
################

#' Binary expression class
#' @description
#' Class representing a binary operation in an expression
#' @export
scidb.expression.binary <- R6::R6Class(
  "scidb.expression.binary",
  inherit = scidb.expression,
  portable = F, cloneable = T, lock_objects = F, lock_class = F,
  public = list(
    #' @param op compound type ("&&", "||", or "!")
    #' @param left left predicate (for all op cases)
    #' @param right right predicate (for "&&" or "||")
    initialize = function(op, left, right) {
      private$op_ <- switch(op,
                            "&" = "&&", # special case synonym
                            "|" = "||", # special case synonym
                            op)
      private$left_ <- left
      private$right_ <- right
      # Basic type checking
      if (op %in% c(">=",">","<=","<")) {
        if ("scidb.expression.value" %in% class(private$left_)
            && (!is.numeric(private$left_$raw()) || length(private$left_$raw()) > 1 ) ) {
          stop(paste0("ERROR: cannot apply numeric operation '", op, "'; LHS is not a scalar numeric"))
        }
        if ("scidb.expression.value" %in% class(private$right_)
            && (!is.numeric(private$right_$raw()) || length(private$right_$raw()) > 1 ) ) {
          stop(paste0("ERROR: cannot apply numeric operation '", op, "'; RHS is not a scalar numeric"))
        }
      } else if (op %in% c("stringr::str_detect","paste0")) {
        if ("scidb.expression.value" %in% class(private$left_)
            && (!is.character(private$left_$raw()) || length(private$left_$raw()) > 1 ) ) {
          stop(paste0("ERROR: cannot apply character operation '", op, "'; LHS is not a scalar character"))
        }
        if ("scidb.expression.value" %in% class(private$right_)
            && (!is.character(private$right_$raw()) || length(private$right_$raw()) > 1 ) ) {
          stop(paste0("ERROR: cannot apply character operation '", op, "'; RHS is not a scalar character"))
        }
      }
    },
    #' @description Returns the type of the expression as a string
    #' @return "BINARY"
    type = function() {
      "BINARY"
    },
    #' @description Returns the binary operation name
    #' @return binary op name (length-1 character)
    operation = function() {
      private$op_
    },
    #' @return if type "&&", "||", or "!", left predicate; else NULL
    left = function() {
      private$left_
    },
    #' @return if type "&&" or "||", right predicate; else NULL
    right = function() {
      private$right_
    },
    #' @description Returns the distinct symbols used in the expression
    #' @return vector of symbol names
    symbols = function() {
      sort(unique(union(private$left_$symbols(), private$right_$symbols())))
    },
    #' @description Whether or not the expression is a function of (at most) a single symbol
    #' @return a boolean value indicating unary or non-unary
    unary = function() {
      if (private$left_$type() == "VALUE") {
        private$right_$unary()
      } else if (private$right_$type() == "VALUE") {
        private$left_$unary()
      } else {
        # neither are values
        private$left_$unary() && private$right_$unary() && private$left_$symbols() == private$right_$symbols()
      }
    },
    #' @description Whether or not the expression is unary or a boolean AND or OR of
    #' two other in-turn unary composite expressions
    #' @return a boolean value indicating unary-composite or not
    unary_composite = function() {
      if (private$left_$type() == "VALUE") {
        private$right_$unary_composite()
      } else if (private$right_$type() == "VALUE") {
        private$left_$unary_composite()
      } else {
        # neither are values
        private$left_$unary_composite() && private$right_$unary_composite() && private$op_ %in% c("&&","||")
      }
    },
    #' @description Renames the symbols in the expression according to replacements in the arguments
    #' @param ... optional list of string replacements for each symbol in the expression
    substitute = function(...) {
      private$left_$substitute(...)
      private$right_$substitute(...)
    },
    #' @description Convert the expression to an R string, with optional symbol substitutions
    #' @param ... optional list of string replacements for each symbol in the expression
    #' @return evaluation of this predicate as a single R string
    to_r = function(...) {
      if (private$infix(private$op_)) {
        # if left|right is binary, wrap it in parenthesis to be safe
        leftR <- ifelse(private$left_$type() != "BINARY",
                        private$left_$to_r(...),
                        paste0("(", private$left_$to_r(...), ")")
        )
        rightR <- ifelse(private$right_$type() != "BINARY",
                         private$right_$to_r(...),
                         paste0("(", private$right_$to_r(...), ")")
        )
        paste0(leftR, " ", private$op_, " ", rightR)
      } else {
        paste0(private$op_, "(", private$left_$to_r(...), ", ", private$right_$to_r(...), ")")
      }
    },
    #' @description Convert the expression to an AFL string, with optional symbol substitutions
    #' @param ... optional list of string replacements for each symbol in the expression
    #' @return evaluation of this predicate as a single AFL string
    to_afl = function(...) {
      remapped_op <- switch(private$op_,
                            "==" = "=",
                            "&&" = "and",
                            "||" = "or",
                            "stringr::str_detect" = "regex",
                            "paste0" = "+",
                            private$op_)
      if (private$infix(remapped_op)) {
        leftR <- ifelse(private$left_$type() != "BINARY",
                        private$left_$to_afl(...),
                        paste0("(", private$left_$to_afl(...), ")")
        )
        rightR <- ifelse(private$right_$type() != "BINARY",
                         private$right_$to_afl(...),
                         paste0("(", private$right_$to_afl(...), ")")
        )
        paste0(leftR, " ", remapped_op, " ", rightR)
      } else {
        paste0(remapped_op, "(", private$left_$to_afl(...), ", ", private$right_$to_afl(...), ")")
      }
    }
  ),
  private = list(
    op_ = NULL,
    left_ = NULL,
    right_ = NULL,
    infix = function(op) {
      op %in% c("&&","and","||","or","=","==","!=",">=","<=",">","<","=","+","-","*","/")
    }
  )
)

######################
# Boolean operations #
######################

#' binary conjunction operator for predicates
#' @name and.scidb.expression
#' @param P1 first predicate object for the conjunction
#' @param P2 second predicate object for the conjunction
#' @export
'&.scidb.expression' <- function(P1, P2) {
  scidb.expression.binary$new(op = "&&", left = P1, right = P2)
}

#' binary disjunction operator for predicates
#' @name or.scidb.expression
#' @param P1 first predicate object for the disjunction
#' @param P2 second predicate object for the disjunction
#' @export
'|.scidb.expression' <- function(P1, P2) {
  scidb.expression.binary$new(op = "||", left = P1, right = P2)
}

#' unary negation operator for a predicate
#' @name not.scidb.expression
#' @param P predicate object to negate
#' @export
'!.scidb.expression' <- function(P) {
  # Push down simplifications where possible
  if ('scidb.expression.unary' %in% class(P)) {
    switch(P$operation(),
      "!" = P$child(), # !(!P) <=> P
      scidb.expression.unary$new(op = "!", input = P)
    )
  } else if ('scidb.expression.binary' %in% class(P)) {
    switch(P$operation(),
      "&&" = !P$left() | !P$right(), # !(P && Q) <=> !P || !Q
      "||" = !P$left() & !P$right(), # !(P || Q) <=> !P && !Q
      "==" = scidb.expression.binary$new("!=", P$left(), P$right()),
      "!=" = scidb.expression.binary$new("==", P$left(), P$right()),
      ">=" = scidb.expression.binary$new("<", P$left(), P$right()),
      ">" = scidb.expression.binary$new("<=", P$left(), P$right()),
      "<=" = scidb.expression.binary$new(">", P$left(), P$right()),
      "<" = scidb.expression.binary$new(">=", P$left(), P$right()),
      scidb.expression.unary$new(op = "!", input = P)
    )
  } else {
    scidb.expression.unary$new(op = "!", input = P)
  }
}

################################################################
# Walking the AST to parse R expressions into bound predicates #
################################################################

expr_type <- function(x) {
  if (rlang::is_vector(x)) {
    "constant"
  } else if (rlang::is_symbol(x)) {
    "symbol"
  } else if (rlang::is_call(x)) {
    "call"
  } else if (rlang::is_pairlist(x)) {
    "pairlist"
  } else {
    typeof(x)
  }
}

switch_expr <- function(x, ...) {
  switch(expr_type(x),
         ...,
         stop("Don't know how to handle type ", typeof(x), call. = FALSE)
  )
}

# Recurse AST to turn expressions into these assignments
expression_parser_helper <- function(e) {
  switch_expr(e,
              symbol = scidb.expression.symbol$new(e),
              constant = scidb.expression.value$new(e),
              call = {
                L <- length(e)
                op <- rlang::as_string(e[[1]])
                # Call funcitons taking no arguments
                if (L < 2) {
                  return( scidb.expression.value$new(do.call(op, list())))
                }
                # Recursively parse contents
                values <- lapply(2:L,function(i) { expression_parser_helper(e[[i]]) })
                # If all the sub-elements are constants, just evaluate R call
                if (all(unlist(sapply(values, function(v) { v$type() == "VALUE" })))) {
                  raw_values <- lapply(values, function(v) { v$raw() })
                  return( scidb.expression.value$new(do.call(op, raw_values)))
                }
                # Otherwise build a compound expression
                if (L == 2) {
                  value <- values[[1]]
                  # If op is (, just forward contents
                  if (op == '(') {
                    return(value)
                  }
                  # If op is !, use the !.scidb.expression operator for its pushdown logic
                  if (op == '!') {
                    return(!value)
                  }
                  # Otherwise, return a unary transformation
                  return(scidb.expression.unary$new(op, value))
                } else if (L == 3) {
                  lhs <- values[[1]]
                  rhs <- values[[2]]
                  # Special operations
                  # In some of these operations, the RHS is required to be a literal value,
                  # and in all cases it will be modified as necessary
                  if (op == "%in%") {
                    if (rhs$type() != "VALUE") {
                      stop("ERROR: cannot use special operator %in% with a symbolic RHS")
                    }
                    reduced <- Reduce('|.scidb.expression', lapply(rhs$raw(),
                      function(x) {
                        scidb.expression.binary$new('==',lhs,scidb.expression.value$new(x))
                      }))
                    return(reduced)
                  } else if (op == "%not_in%") {
                    if (rhs$type() != "VALUE") {
                      stop("ERROR: cannot use special operator %not_in% with a symbolic RHS")
                    }
                    reduced <- Reduce('&.scidb.expression', lapply(rhs$raw(),
                      function(x) {
                        scidb.expression.binary$new('!=',lhs,scidb.expression.value$new(x))
                      }))
                    return(reduced)
                  } else if (op == "str_detect") {
                    op <- "stringr::str_detect"
                  } else if (op == "%like%") {
                    op <- "stringr::str_detect"
                    rhs <- expression_parser_helper(rlang::expr(paste0("(?i)",!!e[[3]])))
                  } else if (op == "%contains%") {
                    op <- "stringr::str_detect"
                    rhs <- expression_parser_helper(rlang::expr(paste0("(?i).*",paste0(!!e[[3]],".*"))))
                  } else if (op == "%starts_with%") {
                    op <- "stringr::str_detect"
                    rhs <- expression_parser_helper(rlang::expr(paste0("(?i)",paste0(!!e[[3]],".*"))))
                  } else if (op == "%ends_with%") {
                    op <- "stringr::str_detect"
                    rhs <- expression_parser_helper(rlang::expr(paste0("(?i).*",!!e[[3]])))
                  }
                  # Return a binary operator
                  scidb.expression.binary$new(op, lhs, rhs)
                }
              }
  )
}

#' Create a scidb.expression by parsing an R expression
#' @description 
#' This recursive methods creates a tree of scidb.expression objects
#' corresponding to the R expression supplied as a parameter.
#' 
#' R variables can be interpolated into the expression by prepending them
#' with the double-exclamation !! operator.
#'
#' In addition to base R unary and binary operands, this method
#' treats several operations as special cases, and also defines
#' several custom operations of its own that can be used:
#'  * `%in%`: this vector membership operator, e.g. `x %in% c(1,2,...)`, can
#'    only be used when the RHS value is a non-symbolic scalar or vector value
#'  * `%not_in%`: this custom operand is the negation of `%in%`
#'  * `%like%`, `%contains%`, `%starts_with%`, and `%ends_with%`: these custom
#'    operands are treated as special cases of the regex function
#'    `stringr::str_detect`, and can be applied to symbolic and non-symbolic arguments
#' @param ... R expression to parse
#' @return scidb.expression object
#' @export
expression_parser <- function(...) {
  expression_parser_helper(rlang::expr(...))
}

###########################
# Unbound Predicate class #
###########################

#' Anonymous predicate class
#' @description
#' Class representing an expression with a free symbol,
#' similar in intent to an anonymous function
#' @export
scidb.predicate <- R6::R6Class(
    "scidb.predicate",
    portable = F, cloneable = T, lock_objects = F, lock_class = F,
    public = list(
        #' @description Create a predicate from a free variable placeholder name and expression
        #' @param variable free variable symbol
        #' @param expression expression as a function of the free variable
        initialize = function(variable, expression) {
          v <- rlang::expr(!!variable)
          if (!((rlang::is_string(v) && length(v) == 1) || rlang::is_symbol(v))) {
            stop("ERROR: input 'variable' must be a symbol or symbol name character")
          }
          if (!("scidb.expression" %in% class(expression))) {
            stop("ERROR: input 'expression' must be a scidb.expression object")
          }
          private$variable_ <- rlang::as_string(v)
          private$expression_ <- expression
        },
        #' @description Rename the free symbol in the internal expression
        #' @param new_variable new string to represent unbound variable
        resymbol = function(new_variable) {
            v <- rlang::expr(!!new_variable)
            if (!((rlang::is_string(v) && length(v) == 1) || rlang::is_symbol(v))) {
              stop("ERROR: input 'variable' must be a symbol or symbol name character")
            }
            symbol_map <- list()
            symbol_map[[private$variable_]] <- rlang::as_string(v)
            do.call(private$expression_$substitute,symbol_map)
        },
        #' @description Evaluate predicate with symbol substituted for the free variable, returning a scidb.expression
        #' @param symbol symbol name to take place of free variable
        evaluate = function(symbol) {
            R <- self$clone(deep=TRUE)
            R$resymbol(symbol)
            R$private$expression_
        }
    ),
    private = list(
        variable_ = NULL,
        expression_ = NULL
    )
)

# Boolean operations

#' binary conjunction operator for predicates
#' @name and.scidb.predicate
#' @param P1 first predicate object for the conjunction
#' @param P2 second predicate object for the conjunction
#' @export
'&.scidb.predicate' <- function(P1, P2) {
    P1$resymbol("x")
    P2$resymbol("x")
    scidb.predicate$new("x", P1$private$expression_ & P2$private$expression_)
}

#' binary disjunction operator for predicates
#' @name or.scidb.predicate
#' @param P1 first predicate object for the disjunction
#' @param P2 second predicate object for the disjunction
#' @export
'|.scidb.predicate' <- function(P1, P2) {
    P1$resymbol("x")
    P2$resymbol("x")
    scidb.predicate$new("x", P1$private$expression_ | P2$private$expression_)
}

#' unary negation operator for a predicate
#' @name not.scidb.predicate
#' @param P predicate object to negate
#' @export
'!.scidb.predicate' <- function(P) {
    P$resymbol("x")
    scidb.predicate$new("x", !P$private$expression_)
}

#' @title scidb predicate expressions
#' @name scidb_predicate_expressions
#' @description 
#' These functions create anonymous scidb.predicate expressions objects,
#' which can be chained with boolean operators ([&][&.scidb.predicate],
#' [|][|.scidb.predicate], and [!][!.scidb.predicate]) and supplied as 
#' values in arguments to [filter].
#'  * [EQUALS]
#'  * [NEQ]
#'  * [IN]
#'  * [NOT_IN]
#'  * [IS_NULL]
#'  * [NOT_NULL]
#'  * [GEQ]
#'  * [GT]
#'  * [LEQ]
#'  * [LT]
#'  * [IN_RANGE]
#'  * [IN_RANGE_INCL]
#'  * [IN_RANGE_EXCL]
#'  * [LIKE]
#'  * [CONTAINS]
#'  * [STARTS_WITH]
#'  * [ENDS_WITH]
#' @md
NULL
#> NULL

# Basic equality comparisons

#' equality binary predicate
#' @param value numeric or character value for equality comparison
EQUALS <- function(value) {
    scidb.predicate$new("x", expression_parser(x == !!value))
}

#' inequality binary predicate
#' @param value numeric or character value for inequality comparison
NEQ <- function(value) {
    scidb.predicate$new("x", expression_parser(x != !!value))
}

# Compound equality comparisons: set membership or absence

#' set membership binary predicate
#' @param values vector of numeric or character values for membership test
IN <- function(values) {
    Reduce('|.scidb.predicate', lapply(values, EQUALS))
}

#' set anti-membership binary predicate
#' @param values vector of numeric or character values for membership test
NOT_IN <- function(values) {
    Reduce('&.scidb.predicate', lapply(values, NEQ))
}

# Special value checks

#' null test unary predicate
IS_NULL <- function() {
    scidb.predicate$new("x", expression_parser(is.null(x)))
}

#' negation of IS_NULL
NOT_NULL <- function() {
    !IS_NULL()
}

# Basic inequality comparisons

#' greater-than-or-equal-to binary predicate
#' @param value numeric value for inequality comparison
GEQ <- function(value) {
    scidb.predicate$new("x", expression_parser(x >= !!value))
}

#' greater-than binary predicate
#' @param value numeric value for inequality comparison
GT <- function(value) {
    scidb.predicate$new("x", expression_parser(x > !!value))
}

#' less-than-or-equal-to binary predicate
#' @param value numeric value for inequality comparison
LEQ <- function(value) {
    scidb.predicate$new("x", expression_parser(x <= !!value))
}

#' less-than binary predicate
#' @param value numeric value for inequality comparison
LT <- function(value) {
    scidb.predicate$new("x", expression_parser(x < !!value))
}

# Compound arithmetic inequality comparisons

#' inclusive range test
#' @param lower lower bound
#' @param upper upper bound
IN_RANGE <- function(lower,upper) {
    GEQ(lower) & LEQ(upper)
}

#' inclusive range test
#' @param lower lower bound
#' @param upper upper bound
IN_RANGE_INCL <- function(lower,upper) {
    GEQ(lower) & LEQ(upper)
}

#' exclusive range test
#' @param lower lower bound
#' @param upper upper bound
IN_RANGE_EXCL <- function(lower,upper) {
    GT(lower) & LT(upper)
}

# Regular expression comparisons

#' test if this pattern matches the entire string
#' @param pattern regular expression pattern
LIKE <- function(pattern) {
    scidb.predicate$new("x", expression_parser(x %like% !!pattern))
}

#' test if this pattern matches a substring
#' @param pattern regular expression pattern
CONTAINS <- function(pattern) {
    scidb.predicate$new("x", expression_parser(x %contains% !!pattern))
}

#' test if this pattern matches the start of the string
#' @param pattern regular expression pattern
STARTS_WITH <- function(pattern) {
    scidb.predicate$new("x", expression_parser(x %starts_with% !!pattern))
}

#' test if this pattern matches the end of the string
#' @param pattern regular expression pattern
ENDS_WITH <- function(pattern) {
    scidb.predicate$new("x", expression_parser(x %ends_with% !!pattern))
}

####################
# Helper functions #
####################

#' Parse multiple R expressions into a list of boolean expressions
#' @description
#' This method takes a variable number of R expressions as arguments, returning a list
#' of scidb.expression objects. There are three formats for constructing a boolean expression
#'  * Named value: an expression like `x = 1` is evaluated as the expression x == 1, while
#'    one like `x %in% c(1,2,3)` is evaluated as `x %in% c(1,2,3)`
#'  * Named scidb predicate expression: an expression like `x = IN_RANGE(4,10)` is evaluated as 
#'    `x >= 4 && x <= 10`, making use of the anonymous predicate method `scidb::IN_RANGE`.
#'    The page [scidb predicate expressions][scidb_predicate_expressions] lists the available expressions
#'  * Arbitrary expression evaluating to a boolean; this is parsed with \link{expression_parser}
#'    and cannot take the form of a named argument `x = ...`.
#' @examples
#' \dontrun{
#'  scidb::filter(x = 1)
#'  scidb::filter(x %in% c(1,2,3))
#'  scidb::filter(x = IN_RANGE(5,10) | IN_RANGE(15,20))
#'  scidb::filter(foo %like% "hello", bar %like% "world")
#'  scidb::filter(foo %like% "hello" || bar %like% "world")
#'  scidb::filter(nchar(foo) > 5)
#' }
#' @param ... R expressions representing compound predicates to be applied jointly
#' to possibly-multiple bound variables
#' @return a list of scidb.expression objects corresponding to each input expression
#' @export
#' @md
filter <- function(...) {
    fs <- lapply(rlang::exprs(...), expression_parser_helper)
    # Fix up fs to all be scidb.expression objects
    fixup <- function(key, value) {
        if ('scidb.expression.value' %in% class(value)) {
            value <- value$raw()
            if ('scidb.predicate' %in% class(value)) {
                # lambda function style; bind with key to make an expression
                value$evaluate(key)
            } else if (class(value) %in% c("logical","integer","numeric","integer64","character")) {
                # biobank style; turn into a set membership expression
                IN(value)$evaluate(key)
            } else {
              # unsupported value type
              stop(paste("ERROR: the list of filters includes an item of class", class(raw_value)[1], "which cannot be parsed"))
            }
        } else if ('scidb.expression' %in% class(value)) {
            value
        } else {
            # unsupported filter class
            stop(paste("ERROR: the list of filters includes an item of class", class(value)[1], "which cannot be parsed"))
        }
    }
    lapply(seq_along(fs), function(k, v, i) {
            fixup(k[[i]], v[[i]])
        },
        k = names(fs),
        v = fs
    )
}

#' Merge list of scidb.expression objects in conjunction
#' @param filter_list a list of scidb.expression objects, such as one created by scidb::filter
#' @return a single scidb.expression object equivalent to the conjunctions of all elements of filter_list
#' @export
merge_filters <- function(filter_list) {
  Reduce(function(l,r) l & r, filter_list)
}

#' Parse multiple R expressions into a single AFL string
#' @param ... R expressions representing compound predicates to be applied jointly
#' to possibly-multiple bound variables
#' @return a character vector of the AFL strings corresponding to the expressions
#' @export
filter_to_afl <- function(...) {
  merge_filters(filter(...))$to_afl()
}
Paradigm4/SciDBR documentation built on Nov. 9, 2023, 4:58 a.m.