R/src_dbi.R

Defines functions db_disconnector src_dbi tbl_src_dbi tbl.src_dbi

Documented in src_dbi tbl.src_dbi

#' Use dplyr verbs with a remote database table
#'
#' All data manipulation on SQL tbls are lazy: they will not actually
#' run the query or retrieve the data unless you ask for it: they all return
#' a new `tbl_dbi` object. Use [compute()] to run the query and save the
#' results in a temporary in the database, or use [collect()] to retrieve the
#' results to R. You can see the query with [show_query()].
#'
#' @details
#' For best performance, the database should have an index on the variables
#' that you are grouping by. Use [explain()] to check that the database is using
#' the indexes that you expect.
#'
#' There is one verb that is not lazy: [do()] is eager because it must pull
#' the data into R.
#'
#' @param src A `DBIConnection` object produced by `DBI::dbConnect()`.
#' @param from Either a string (giving a table name),
#'   a fully qualified table name created by [in_schema()]
#'   or a literal [sql()] string.
#' @param ... Passed on to [tbl_sql()]
#' @export
#' @examples
#' library(dplyr)
#'
#' # Connect to a temporary in-memory SQLite database
#' con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
#'
#' # Add some data
#' copy_to(con, mtcars)
#' DBI::dbListTables(con)
#'
#' # To retrieve a single table from a source, use `tbl()`
#' con %>% tbl("mtcars")
#'
#' # Use `in_schema()` for fully qualified table names
#' con %>% tbl(in_schema("temp", "mtcars")) %>% head(1)
#'
#' # You can also use pass raw SQL if you want a more sophisticated query
#' con %>% tbl(sql("SELECT * FROM mtcars WHERE cyl = 8"))
#'
#' # If you just want a temporary in-memory database, use src_memdb()
#' src2 <- src_memdb()
#'
#' # To show off the full features of dplyr's database integration,
#' # we'll use the Lahman database. lahman_sqlite() takes care of
#' # creating the database.
#'
#' if (requireNamespace("Lahman", quietly = TRUE)) {
#' batting <- copy_to(con, Lahman::Batting)
#' batting
#'
#' # Basic data manipulation verbs work in the same way as with a tibble
#' batting %>% filter(yearID > 2005, G > 130)
#' batting %>% select(playerID:lgID)
#' batting %>% arrange(playerID, desc(yearID))
#' batting %>% summarise(G = mean(G), n = n())
#'
#' # There are a few exceptions. For example, databases give integer results
#' # when dividing one integer by another. Multiply by 1 to fix the problem
#' batting %>%
#'   select(playerID:lgID, AB, R, G) %>%
#'   mutate(
#'    R_per_game1 = R / G,
#'    R_per_game2 = R * 1.0 / G
#'  )
#'
#' # All operations are lazy: they don't do anything until you request the
#' # data, either by `print()`ing it (which shows the first ten rows),
#' # or by `collect()`ing the results locally.
#' system.time(recent <- filter(batting, yearID > 2010))
#' system.time(collect(recent))
#'
#' # You can see the query that dplyr creates with show_query()
#' batting %>%
#'   filter(G > 0) %>%
#'   group_by(playerID) %>%
#'   summarise(n = n()) %>%
#'   show_query()
#' }
#' @importFrom dplyr tbl
#' @aliases tbl_dbi
tbl.src_dbi <- function(src, from, ...) {
  subclass <- class(src$con)[[1]] # prefix added by dplyr::make_tbl
  tbl_sql(c(subclass, "dbi"), src = src, from = from, ...)
}

# Internal calls to `tbl()` should be avoided in favor of tbl_src_dbi().
# The former may query the database for column names if `vars` is omitted,
# the latter always requires `vars`.
tbl_src_dbi <- function(src, from, vars) {
  force(vars)
  tbl(src, from, vars = vars)
}



#' Database src
#'
#' @description
#' `r lifecycle::badge("superseded")`
#'
#' Since can generate a `tbl()` directly from a DBI connection we no longer
#' recommend using `src_dbi()`.
#'
#' @param con An object that inherits from [DBI::DBIConnection-class],
#'   typically generated by [DBI::dbConnect]
#' @param auto_disconnect Should the connection be automatically closed when
#'   the src is deleted? Set to `TRUE` if you initialize the connection
#'   the call to `src_dbi()`. Pass `NA` to auto-disconnect but print a message
#'   when this happens.
#' @return An S3 object with class `src_dbi`, `src_sql`, `src`.
#' @keywords internal
#' @export
src_dbi <- function(con, auto_disconnect = FALSE) {
  # Avoid registering disconnector if con can't be evaluated
  force(con)

  # stopifnot(is(con, "DBIConnection"))
  if (is_false(auto_disconnect)) {
    disco <- NULL
  } else {
    disco <- db_disconnector(con, quiet = is_true(auto_disconnect)) # nocov
  }

  subclass <- paste0("src_", class(con)[[1]])

  structure(
    list(
      con = con,
      disco = disco
    ),
    class = c(subclass, "src_dbi", "src_sql", "src")
  )
}

methods::setOldClass(c("src_dbi", "src_sql", "src"))

# nocov start
# Creates an environment that disconnects the database when it's GC'd
db_disconnector <- function(con, quiet = FALSE) {
  reg.finalizer(environment(), function(...) {
    if (!quiet) {
      message("Auto-disconnecting ", class(con)[[1]])
    }
    dbDisconnect(con)
  })
  environment()
}
# nocov end

Try the dbplyr package in your browser

Any scripts or data that you put into this service are public.

dbplyr documentation built on Oct. 26, 2023, 9:06 a.m.