R/sql-build.R

Defines functions sql_build sql_build.tbl_sql sql_build.tbl_lazy sql_build.op_base_remote sql_build.op_base_local sql_build.op_select sql_build.op_rename sql_build.op_arrange sql_build.op_summarise sql_build.op_mutate sql_build.op_head sql_build.op_group_by sql_build.op_ungroup sql_build.op_filter sql_build.op_distinct sql_build.op_join sql_build.op_semi_join sql_build.op_set_op

Documented in sql_build

#' Build and render SQL from a sequence of lazy operations
#'
#' \code{sql_build} creates a \code{select_query} S3 object, that is rendered
#' to a SQL string by \code{sql_render}. The output from \code{sql_build} is
#' designed to be easy to test, as it's database diagnostic, and has
#' a hierarchical structure.
#'
#' \code{sql_build} is generic over the lazy operations, \link{lazy_ops},
#' and generates an S3 object that represents the query. \code{sql_render}
#' takes a query object and then calls a function that is generic
#' over the database. For example, \code{sql_build.op_mutate} generates
#' a \code{select_query}, and \code{sql_render.select_query} calls
#' \code{sql_select}, which has different methods for different databases.
#' The default methods should generate ANSI 92 SQL where possible, so you
#' backends only need to override the methods if the backend is not ANSI
#' compliant.
#'
#' @export
#' @keywords internal
#' @param op A sequence of lazy operations
#' @param con A database connection. The default \code{NULL} uses a set of
#'   rules that should be very similar to ANSI 92, and allows for testing
#'   without an active database connection.
#' @param ... Other arguments passed on to the methods. Not currently used.
sql_build <- function(op, con, ...) {
  UseMethod("sql_build")
}

#' @export
sql_build.tbl_sql <- function(op, con, ...) {
  sql_build(op$ops, op$con, ...)
}

#' @export
sql_build.tbl_lazy <- function(op, con = NULL, ...) {
  sql_build(op$ops, con, ...)
}

# Base ops --------------------------------------------------------

#' @export
sql_build.op_base_remote <- function(op, con, ...) {
  op$x
}

#' @export
sql_build.op_base_local <- function(op, con, ...) {
  ident("df")
}

# Single table ops --------------------------------------------------------

#' @export
sql_build.op_select <- function(op, con, ...) {
  vars <- select_vars_(op_vars(op$x), op$dots, include = op_grps(op$x))
  select_query(sql_build(op$x, con), ident(vars))
}

#' @export
sql_build.op_rename <- function(op, con, ...) {
  vars <- rename_vars_(op_vars(op$x), op$dots)
  select_query(sql_build(op$x, con), ident(vars))
}

#' @export
sql_build.op_arrange <- function(op, con, ...) {
  order_vars <- translate_sql_(op$dots, con, op_vars(op$x))
  group_vars <- c.sql(ident(op_grps(op$x)), con = con)

  select_query(sql_build(op$x, con), order_by = order_vars)
}

#' @export
sql_build.op_summarise <- function(op, con, ...) {
  select_vars <- translate_sql_(op$dots, con, op_vars(op$x), window = FALSE)
  group_vars <- c.sql(ident(op_grps(op$x)), con = con)

  select_query(
    sql_build(op$x, con),
    select = c.sql(group_vars, select_vars, con = con),
    group_by = group_vars
  )
}

#' @export
sql_build.op_mutate <- function(op, con, ...) {
  vars <- op_vars(op$x)

  new_vars <- translate_sql_(op$dots, con, vars,
    vars_group = op_grps(op),
    vars_order = op_sort(op)
  )
  old_vars <- ident(setdiff(vars, names(new_vars)))

  select_query(
    sql_build(op$x, con),
    select = c.sql(old_vars, new_vars, con = con)
  )
}

#' @export
sql_build.op_head <- function(op, con, ...) {
  select_query(sql_build(op$x, con), limit = op$args$n)
}

#' @export
sql_build.op_group_by <- function(op, con, ...) {
  sql_build(op$x, con, ...)
}

#' @export
sql_build.op_ungroup <- function(op, con, ...) {
  sql_build(op$x, con, ...)
}

#' @export
sql_build.op_filter <- function(op, con, ...) {
  vars <- op_vars(op$x)

  if (!uses_window_fun(op$dots, con)) {
    where_sql <- translate_sql_(op$dots, con, vars = vars)

    select_query(
      sql_build(op$x, con),
      where = where_sql
    )
  } else {
    # Do partial evaluation, then extract out window functions
    expr <- partial_eval2(op$dots, vars)
    where <- translate_window_where_all(expr, ls(sql_translate_env(con)$window))

    # Convert where$expr back to a lazy dots object, and then
    # create mutate operation
    mutate_dots <- lapply(where$comp, lazyeval::as.lazy)
    mutated <- sql_build(op_single("mutate", op$x, dots = mutate_dots), con)
    where_sql <- translate_sql_(where$expr, con = con, vars = vars)

    select_query(mutated, select = ident(vars), where = where_sql)
  }

}

#' @export
sql_build.op_distinct <- function(op, con, ...) {
  if (length(op$dots) == 0) {
    select_query(
      sql_build(op$x, con),
      distinct = TRUE
    )
  } else {
    if (op$args$.keep_all) {
      stop("Can't calculate distinct only on specified columns with SQL unless .keep_all is FALSE",
           call. = FALSE)
    }

    group_vars <- c.sql(ident(names(op$dots)), con = con)
    select_query(
      sql_build(op$x, con),
      select = group_vars,
      group_by = group_vars
    )
  }
}

# Dual table ops --------------------------------------------------------

#' @export
sql_build.op_join <- function(op, con, ...) {
  # Ensure tables have unique names
  x_names <- op_vars(op$x)
  y_names <- op_vars(op$y)
  by <- op$args$by

  uniques <- unique_names(x_names, y_names, by = by, suffix = op$args$suffix)

  if (is.null(uniques)) {
    x <- op$x
    y <- op$y
  } else {
    # TODO: it would be better to construct an explicit FROM statement
    # that used the table names to disambiguate the fields names: this
    # would remove a layer of subqueries and would make sql_join more
    # flexible.
    x <- select_(op$x, .dots = setNames(x_names, uniques$x))
    y <- select_(op$y, .dots = setNames(y_names, uniques$y))

    by$x <- unname(uniques$x[by$x])
    by$y <- unname(uniques$y[by$y])
  }

  join_query(x, y,
    type = op$args$type,
    by = by
  )
}

#' @export
sql_build.op_semi_join <- function(op, con, ...) {
  semi_join_query(op$x, op$y, anti = op$args$anti, by = op$args$by)
}

#' @export
sql_build.op_set_op <- function(op, con, ...) {
  set_op_query(op$x, op$y, type = op$args$type)
}
sctyner/dplyr050 documentation built on May 17, 2019, 2:22 p.m.