R/exa.createScript.R

Defines functions exa.createScript

Documented in exa.createScript

#' Deploys an R function as an UDF in the EXASolution database.
#'
#' This function takes an R function and creates a R UDF script on the
#' EXASolution database. A \code{CREATE SCRIPT} call will be used behind the
#' scenes. The return value is a function that, when executed, will execute the
#' script on the database server.
#'
#' We recommend to read the EXASolution manual about UDF scripts for a better
#' understanding.
#'
#' @param channel The RODBC connection channel, typically created via
#'   odbcConnect.
#' @param name The script will be created in the database with this name.
#' @param func The R function to be created as a UDF R script in the database.
#' @param env A list of values which will be available in the UDF function under
#'   the same name.
#'
#'   For example if you pass \code{list(a=2)} for this argument, you can access
#'   the value a in the function via \code{env$a}.
#'
#' @param initCode This code block will be executed once on each parallel
#'   instance of the database running the script, before the first call of the
#'   function.
#'
#' @param cleanCode This code block will be executed once on each parallel
#'   instance of the database running the script, after the function was called
#'   the last time.
#'
#' @param inType The input type of the UDF script, either \code{SET} or
#'   \code{SCALAR}.
#'
#' \code{SET} will call the function once for each group,
#' \code{SCALAR} will call the function once for each record.
#' Please read the EXASolution manual about UDF scripts for details.
#'
#' @param outType The output type of the UDF script, either \code{EMITS} or
#'   \code{RETURNS}. For \code{EMITS}, the function emits any number of values.
#'   For \code{RETURNS}, the function emits just a single value.
#'
#' @param inArgs Vector of strings specifying the names and types of the input
#'   arguments for the UDF script.
#'   Example: \code{inArgs = c("k INT", "v VARCHAR(10)")}
#'
#' @param outArgs Vector of strings specifying the names and types of the output
#'   arguments of the UDF script.
#'   Example: \code{inArgs = c("outputstring VARCHAR(10)")}
#'
#' @param outputAddress This parameters specifies the address and port of the
#'   optional python output service is listening on. For example:
#'   \code{c("192.168.1.10", 3000)}.Please read the README.txt of this R package
#'   for details.
#'
#' @param replaceIfExists Boolean whether the script shall be replaced if it
#'   already exists. Either \code{TRUE} or \code{FALSE}.
#'
#' @param mockOnly Boolean, default FALSE. This parameter is useful for
#'   unit-testing if the ODBC connection is not available. Setting mockOnly=TRUE
#'   will not install the UDF function to the EXASOL database.
#'
#' @return This function returns a function that, when called, will execute the
#'   script on the server. With the call you have to specify to which data it
#'   shall be applied. The returned function generates and executes a
#'   \code{SELECT SQL} statement behind the scenes. It has the following
#'   signature:
#'
#'   \code{function(..., table = NA, where = NA, groupBy = NA, restQuery = "",
#'   returnSQL = FALSE, reader = NA, server = NA)}
#'
#'   \item{...}{The first string parameters define the SQL expressions that will
#'   be used as the input for the UDF script. Typically this is one or more
#'   column names as you see in the example below.}
#'
#'   \item{table}{A string with the table name to which the function shall be
#'   applied to. You can specify quoted names the following:
#'   table='myschema."MyQuotedTable"'}
#'
#'   \item{where}{A string with the where clause (SQL) to filter the records.}
#'
#'   \item{groupBy}{A string with the group-by clause (SQL) that will be used to
#'   group the. This is especially important for SET UDF scripts that will be
#'   called once for each group.}
#'
#'   \item{returnSQL}{Boolean value. For TRUE, the autogenerated SQL statement
#'   will be returned, but NOT executed.}
#'
#'   \item{restQuery}{A string with additional SQL code that will be appended at
#'   the end of the autogenerated query, e.g. ORDER BY or HAVING.}
#'
#'   \item{reader}{For internal usage only.}
#'
#'   \item{server}{For internal usage only.}
#'
#'   \item{RETURN VALUE}{The return value of the function is the result of the
#'   SELECT query. The query will be executed internally with the exa.readData
#'   function.}
#'
#' @author EXASOL AG <opensource@exasol.com>
#' @family None-DBI-API
#' @example examples/createScript.R
#' @export
#' @import assertthat
exa.createScript <- function(channel, name, func = NA,
                             env = list(),
                             initCode = NA,
                             cleanCode = NA,
                             inType = SET,
                             inArgs = list(),
                             outType = EMITS,
                             outArgs = list(),
                             outputAddress = NA,
                             replaceIfExists = TRUE,
                             mockOnly = FALSE
                             ) {
  m <- match.call()
  code <- func
  initCode <- m$initCode
  cleanCode <- m$cleanCode

  inType <- match.arg(inType, ALLOWED_UDF_IN_TYPES)
  outType <- match.arg(outType, ALLOWED_UDF_OUT_TYPES)

  inArgs <- do.call(paste, c(as.list(inArgs), sep = ", "))

  if (outType == EMITS) {
    if (is.null(m$outArgs)) {
      stop("No output arguments given")
    }
    outArgs <- paste("(",
                     do.call(paste, c(as.list(outArgs), sep = ", ")),
                     ")", sep = "")
  } else {
    outType <- RETURNS
    outArgs <- as.character(outArgs)
  }

  sql <- paste("CREATE", if (replaceIfExists) "OR REPLACE" else "", "R",
               inType, "SCRIPT", name,
               "(", inArgs, ")",
               outType, outArgs, "AS")

  if (!is.null(m$outputAddress))
    sql <- paste(sql,
                 "# activate output to external server",
                 paste("output_connection__ <- socketConnection('",
                       outputAddress[[1]], "', ", outputAddress[[2]], ")",
                       sep = ""),
                 "sink(output_connection__)",
                 "sink(output_connection__, type = \"message\")",
                 "# ----------------------------------",
                 sep = "\n")

  if (!is.null(m$env)) {
    sql <- paste(sql, "\nenv <- ",
                 do.call(paste, c(as.list(deparse(env)), sep = "\n")),
                 "\n", sep = "")
  }

  if (!is.null(initCode)) {
    sql <- paste(sql, "\n# code from the init function")
    for (codeLine in deparse(initCode)) {
      sql <- paste(sql, codeLine, sep = "\n")
    }
    sql <- paste(sql, "\n# ---------------------------")
  }

  if (!is.null(cleanCode)) {
    sql <- paste(sql, "cleanup <- function()", sep = "\n")
    for (codeLine in deparse(cleanCode)) {
      sql <- paste(sql, codeLine, sep = "\n")
    }
  }

  sql <- paste(sql, "run <-", sep = "\n")
  for (codeLine in deparse(code)) {
    sql <- paste(sql, codeLine, sep = "\n")
  }
  sql <- paste(sql, "", sep = "\n")

  if (!mockOnly) {
    if (odbcQuery(channel, sql) == -1) {
      stop(odbcGetErrMsg(channel)[[1]])
    }
  }

  # This function will be returned as a proxy to the R script
  function(..., table, where = "", groupBy = "", restQuery = "",
           returnSQL = FALSE, reader = NA, server = NA) {

    # preconditions
    assert_that(is.character(table))
    assert_that(length(where) == 1)

    assert_that(is.character(where))
    assert_that(length(where) == 1)

    assert_that(is.character(groupBy))
    assert_that(length(groupBy) == 1)

    assert_that(is.character(restQuery))
    assert_that(length(restQuery) == 1)
    # end of preconditions

    m <- match.call(expand.dots = FALSE)

    # UDF parameters
    args <- paste(c(...), collapse = ", ")

    # optional WHERE clause
    if (where != "") {
      where <- paste(" WHERE", where, sep = " ")
    }

    # optional GROUP BY clause
    if (groupBy != "") {
      groupBy <- paste(" GROUP BY", groupBy, sep = " ")
    }

    sql <- paste("SELECT ", name, "(", args, ") FROM ",
                 table, where, groupBy, sep = "")
    sql <- paste("SELECT * FROM (", sql, ")", restQuery, sep = "")

    if (returnSQL) {
      paste("(", sql, ")", sep = "")
    } else {
      execArgs <- list(channel, sql)
      if (!is.null(m$reader)) {
        execArgs <- c(execArgs, reader = reader)
      }
      if (!is.null(m$server)) {
        execArgs <- c(execArgs, server = server)
      }
      if (!mockOnly) {
        do.call(exa.readData, execArgs)
      }
    }
  }
}
EXASOL/r-exasol documentation built on Aug. 28, 2023, 2:32 a.m.