R/sql-build.R

Defines functions sql_optimise flatten_query_2_tables querylist_reuse_query flatten_query get_subquery_name cte_render sql_render.lazy_query sql_render.tbl_lazy sql_render sql_build.tbl_lazy sql_build

Documented in sql_build sql_optimise sql_render

#' Build and render SQL from a sequence of lazy operations
#'
#' `sql_build()` creates a `select_query` S3 object, that is rendered
#' to a SQL string by `sql_render()`. The output from `sql_build()` is
#' designed to be easy to test, as it's database agnostic, and has
#' a hierarchical structure. Outside of testing, however, you should
#' always call `sql_render()`.
#'
#' `sql_build()` is generic over the lazy operations, \link{lazy_ops},
#' and generates an S3 object that represents the query. `sql_render()`
#' takes a query object and then calls a function that is generic
#' over the database. For example, `sql_build.op_mutate()` generates
#' a `select_query`, and `sql_render.select_query()` calls
#' `sql_select()`, which has different methods for different databases.
#' The default methods should generate ANSI 92 SQL where possible, so you
#' backends only need to override the methods if the backend is not ANSI
#' compliant.
#'
#' @export
#' @keywords internal
#' @param op A sequence of lazy operations
#' @param con A database connection. The default `NULL` uses a set of
#'   rules that should be very similar to ANSI 92, and allows for testing
#'   without an active database connection.
#' @param ... Other arguments passed on to the methods. Not currently used.
#' @param sql_options SQL rendering options generated by `sql_options()`.
sql_build <- function(op, con = NULL, ..., sql_options = NULL) {
  if (is.null(sql_options)) {
    sql_options <- as_sql_options(sql_options)
    out <- sql_build(
      op = op,
      con = con,
      ...,
      sql_options = sql_options
    )

    return(out)
  }

  unique_subquery_name_reset()
  unique_column_name_reset()
  check_dots_used()
  UseMethod("sql_build")
}

#' @export
sql_build.tbl_lazy <- function(op, con = op$src$con, ..., sql_options = NULL) {
  con <- con %||% op$src$con
  sql_options <- sql_options %||% sql_options()

  # only used for testing
  qry <- sql_build(
    op$lazy_query,
    con = con,
    ...,
    sql_options = sql_options
  )
  sql_optimise(qry, con = con, ...)
}


# Render ------------------------------------------------------------------

#' @export
#' @rdname sql_build
#' @param subquery Is this SQL going to be used in a subquery?
#'   This is important because you can place a bare table name in a subquery
#'   and  ORDER BY does not work in subqueries.
sql_render <- function(query,
                       con = NULL,
                       ...,
                       sql_options = NULL,
                       subquery = FALSE,
                       lvl = 0) {
  if (is.null(sql_options)) {
    sql_options <- as_sql_options(sql_options)
    out <- sql_render(
      query = query,
      con = con,
      ...,
      sql_options = sql_options,
      subquery = subquery,
      lvl = lvl
    )

    return(out)
  }

  check_dots_used()
  UseMethod("sql_render")
}

#' @export
sql_render.tbl_lazy <- function(query,
                                con = query$src$con,
                                ...,
                                sql_options = NULL,
                                subquery = FALSE,
                                lvl = 0) {
  con <- con %||% query$src$con
  sql_render(
    query$lazy_query,
    con = con,
    ...,
    sql_options = sql_options,
    subquery = subquery,
    lvl = lvl
  )
}

#' @export
sql_render.lazy_query <- function(query,
                                  con = NULL,
                                  ...,
                                  sql_options = NULL,
                                  subquery = FALSE,
                                  lvl = 0) {
  qry <- sql_build(query, con = con, sql_options = sql_options)
  qry <- sql_optimise(qry, con = con, subquery = subquery)

  if (sql_options$cte) {
    query_list <- flatten_query(qry, list(queries = list(), name = NULL))
    queries <- query_list$queries

    rendered_queries <- purrr::map2(
      queries, seq_along(queries) != length(queries),
      function(query, indent) {
        sql_render(query, con = con, ..., subquery = subquery, lvl = as.integer(indent))
      }
    )

    cte_render(rendered_queries, con)
  } else {
    sql_render(qry, con = con, ..., subquery = subquery, lvl = lvl)
  }
}

cte_render <- function(query_list, con) {
  n <- length(query_list)
  if (n == 1) {
    return(query_list[[1]])
  }

  ctes <- purrr::imap(
    query_list[-n],
    function(query, name) {
      glue_sql2(con, "{.name name} {.kw 'AS'} (\n{query}\n)")
    }
  )
  cte_query <- sql_vector(unname(ctes), parens = FALSE, collapse = ",\n", con = con)

  glue_sql2(con, "{.kw 'WITH'} ", cte_query, "\n", query_list[[n]])
}

get_subquery_name <- function(x, query_list) {
  if (inherits(x, "base_query")) return(x)

  base_query(query_list$name)
}

flatten_query <- function(qry, query_list) {
  UseMethod("flatten_query")
}

querylist_reuse_query <- function(qry, query_list) {
  id <- vctrs::vec_match(list(unclass(qry)), purrr::map(query_list$queries, unclass))

  if (!is.na(id)) {
    query_list$name <- names(query_list$queries)[[id]]
  } else {
    name <- unique_subquery_name()
    wrapped_query <- set_names(list(qry), name)
    query_list$queries <- c(query_list$queries, wrapped_query)
    query_list$name <- name
  }

  query_list
}

flatten_query_2_tables <- function(qry, query_list) {
  x <- qry$x
  query_list_x <- flatten_query(x, query_list)
  qry$x <- get_subquery_name(x, query_list_x)

  y <- qry$y
  query_list_y <- flatten_query(y, query_list_x)
  qry$y <- get_subquery_name(y, query_list_y)

  querylist_reuse_query(qry, query_list_y)
}


# Optimise ----------------------------------------------------------------

#' @export
#' @rdname sql_build
sql_optimise <- function(x, con = NULL, ..., subquery = FALSE) {
  UseMethod("sql_optimise")
}

Try the dbplyr package in your browser

Any scripts or data that you put into this service are public.

dbplyr documentation built on Oct. 26, 2023, 9:06 a.m.