Nothing
# #' Create an SQL tbl (abstract)
# #'
# #' This method shouldn't be called by users - it should only be used by
# #' backend implementors who are creating backends that extend the basic
# #' sql behaviour.
# #'
# #' @keywords internal
# #' @export
# #' @param subclass name of subclass
# #' @param ... needed for agreement with generic. Not otherwise used.
# #' @param vars If known, the names of the variables in the tbl. This is
# #' relatively expensive to determine automatically, so is cached throughout
# #' dplyr. However, you should usually be able to leave this blank and it
# #' will be determined from the context.
# tbl_sql <- function(subclass, src, from, ..., vars = attr(from, "vars")) {
# if (!is.sql(from)) { # Must be a character string
# assert_that(length(from) == 1)
# if (isFALSE(db_has_table(src$con, from))) {
# stop("Table ", from, " not found in database ", src$path, call. = FALSE)
# }
# from <- ident(from)
# } else if (!is.join(from)) { # Must be arbitrary sql
# # Abitrary sql needs to be wrapped into a named subquery
# from <- sql_subquery(src$con, from, unique_name())
# }
# tbl <- make_tbl(c(subclass, "sql"),
# src = src, # src object
# from = from, # table, join, or raw sql
# select = vars, # SELECT: list of symbols
# summarise = FALSE, # interpret select as aggreagte functions?
# mutate = FALSE, # do select vars include new variables?
# where = NULL, # WHERE: list of calls
# group_by = NULL, # GROUP_BY: list of names
# order_by = NULL # ORDER_BY: list of calls
# )
# update(tbl)
# }
# #' @export
# update.tbl_sql <- function(object, ...) {
# args <- list(...)
# assert_that(only_has_names(args,
# c("select", "where", "group_by", "order_by", "summarise")))
# for (nm in names(args)) {
# object[[nm]] <- args[[nm]]
# }
# # Figure out variables
# if (is.null(object$select)) {
# var_names <- db_query_fields(object$src$con, object$from)
# vars <- lapply(var_names, as.name)
# object$select <- vars
# }
# object$query <- build_query(object)
# object
# }
# #' @export
# same_src.tbl_sql <- function(x, y) {
# if (!inherits(y, "tbl_sql")) return(FALSE)
# same_src(x$src, y$src)
# }
# #' @export
# tbl_vars.tbl_sql <- function(x) {
# x$query$vars()
# }
# #' @export
# groups.tbl_sql <- function(x) {
# x$group_by
# }
# # Grouping methods -------------------------------------------------------------
# #' @export
# ungroup.tbl_sql <- function(x, ...) {
# update(x, group_by = NULL)
# }
# #' @export
# group_size.tbl_sql <- function(x) {
# df <- collect(summarise(x, n = n()))
# df$n
# }
# #' @export
# n_groups.tbl_sql <- function(x) {
# if (is.null(groups(x))) return(1L)
# x <- update(x, select = groups(x))
# nrow(compute(distinct(x)))
# }
# # Standard data frame methods --------------------------------------------------
# #' @export
# as.data.frame.tbl_sql <- function(x, row.names = NULL, optional = NULL,
# ..., n = 1e5L) {
# x$query$fetch(n)
# }
# #' @export
# #' @rdname dplyr-formatting
# print.tbl_sql <- function(x, ..., n = NULL, width = NULL) {
# cat("Source: ", src_desc(x$src), "\n", sep = "")
# if (inherits(x$from, "ident")) {
# cat(wrap("From: ", x$from, " ", dim_desc(x)))
# } else {
# cat(wrap("From: <derived table> ", dim_desc(x)))
# }
# cat("\n")
# if (!is.null(x$where)) {
# cat(wrap("Filter: ", commas(x$where)), "\n")
# }
# if (!is.null(x$order_by)) {
# cat(wrap("Arrange: ", commas(x$order_by)), "\n")
# }
# if (!is.null(x$group_by)) {
# cat(wrap("Grouped by: ", commas(x$group_by)), "\n")
# }
# cat("\n")
# print(trunc_mat(x, n = n, width = width))
# invisible(x)
# }
# #' @export
# dimnames.tbl_sql <- function(x) {
# list(NULL, tbl_vars.tbl_sql(x))
# }
# #' @export
# dim.tbl_sql <- function(x) {
# if (!inherits(x$from, "ident")) {
# n <- NA
# } else {
# n <- x$query$nrow()
# }
# p <- x$query$ncol()
# c(n, p)
# }
# #' @export
# head.tbl_sql <- function(x, n = 6L, ...) {
# assert_that(length(n) == 1, n > 0L)
# build_query(x, limit = as.integer(n))$fetch()
# }
# #' @export
# tail.tbl_sql <- function(x, n = 6L, ...) {
# stop("tail is not supported by sql sources", call. = FALSE)
# }
# # Set operations ---------------------------------------------------------------
# #' @export
# intersect.tbl_sql <- function(x, y, copy = FALSE, ...) {
# y <- auto_copy(x, y, copy)
# sql <- sql_set_op(x$src$con, x, y, "INTERSECT")
# update(tbl(x$src, sql), group_by = groups(x))
# }
# #' @export
# union.tbl_sql <- function(x, y, copy = FALSE, ...) {
# y <- auto_copy(x, y, copy)
# sql <- sql_set_op(x$src$con, x, y, "UNION")
# update(tbl(x$src, sql), group_by = groups(x))
# }
# #' @export
# setdiff.tbl_sql <- function(x, y, copy = FALSE, ...) {
# y <- auto_copy(x, y, copy)
# sql <- sql_set_op(x$src$con, x, y, "EXCEPT")
# update(tbl(x$src, sql), group_by = groups(x))
# }
# # SQL select generation --------------------------------------------------------
# build_query <- function(x, limit = NULL) {
# assert_that(is.null(limit) || (is.numeric(limit) && length(limit) == 1))
# translate <- function(expr, ...) {
# translate_sql_q(expr, tbl = x, env = NULL, ...)
# }
# if (x$summarise) {
# # Summarising, so SELECT needs to contain grouping variables
# select <- c(x$group_by, x$select)
# select <- select[!duplicated(select)]
# select_sql <- translate(select)
# vars <- auto_names(select)
# group_by_sql <- translate(x$group_by)
# order_by_sql <- translate(x$order_by)
# } else {
# # Not in summarise, so assume functions are window functions
# select_sql <- translate(x$select, window = uses_window_fun(x$select, x))
# vars <- auto_names(x$select)
# # Don't use group_by - grouping affects window functions only
# group_by_sql <- NULL
# # If the user requested ordering, ensuring group_by is included
# # Otherwise don't, because that may make queries substantially slower
# if (!is.null(x$order_by) && !is.null(x$group_by)) {
# order_by_sql <- translate(c(x$group_by, x$order_by))
# } else {
# order_by_sql <- translate(x$order_by)
# }
# }
# if (!uses_window_fun(x$where, x)) {
# from_sql <- x$from
# where_sql <- translate(x$where)
# } else {
# # window functions in WHERE need to be performed in subquery
# where <- translate_window_where(x$where, x, con = x$src$con)
# base_query <- update(x,
# group_by = NULL,
# where = NULL,
# select = c(x$select, where$comp))$query
# from_sql <- build_sql("(", base_query$sql, ") AS ", ident(unique_name()),
# con = x$src$con)
# where_sql <- translate(where$expr)
# }
# sql <- sql_select(x$src$con, from = from_sql, select = select_sql,
# where = where_sql, order_by = order_by_sql, group_by = group_by_sql,
# limit = limit)
# query(x$src$con, sql, vars)
# }
# uses_window_fun <- function(x, tbl) {
# if (is.null(x)) return(FALSE)
# if (is.list(x)) {
# calls <- unlist(lapply(x, all_calls))
# } else {
# calls <- all_calls(x)
# }
# win_f <- ls(envir = src_translate_env(tbl)$window)
# any(calls %in% win_f)
# }
# # Verbs ------------------------------------------------------------------------
# #' @export
# filter_.tbl_sql <- function(.data, ..., .dots) {
# dots <- lazyeval::all_dots(.dots, ...)
# input <- partial_eval(dots, .data)
# update(.data, where = c(.data$where, input))
# }
# #' @export
# arrange_.tbl_sql <- function(.data, ..., .dots) {
# dots <- lazyeval::all_dots(.dots, ...)
# input <- partial_eval(dots, .data)
# update(.data, order_by = c(input, .data$order_by))
# }
# #' @export
# select_.tbl_sql <- function(.data, ..., .dots) {
# dots <- lazyeval::all_dots(.dots, ...)
# vars <- select_vars_(tbl_vars(.data), dots,
# include = as.character(groups(.data)))
# # Index into variables so that select can be applied multiple times
# # and after a mutate.
# idx <- match(vars, tbl_vars(.data))
# new_select <- .data$select[idx]
# names(new_select) <- names(vars)
# update(.data, select = new_select)
# }
# #' @export
# rename_.tbl_sql <- function(.data, ..., .dots) {
# dots <- lazyeval::all_dots(.dots, ...)
# vars <- rename_vars_(tbl_vars(.data), dots)
# # Index into variables so that select can be applied multiple times
# # and after a mutate.
# idx <- match(vars, tbl_vars(.data))
# new_select <- .data$select[idx]
# names(new_select) <- names(vars)
# update(.data, select = new_select)
# }
# #' @export
# summarise_.tbl_sql <- function(.data, ..., .dots) {
# dots <- lazyeval::all_dots(.dots, ..., all_named = TRUE)
# input <- partial_eval(dots, .data)
# # Effect of previous operations on summarise:
# # * select: none
# # * filter: none, just modifies WHERE (which is applied before)
# # * mutate: need to be precomputed so new select can use
# # * arrange: intersection with new variables preserved
# if (.data$mutate) {
# .data <- collapse(.data)
# }
# .data$summarise <- TRUE
# .data <- update(.data, select = c(.data$group_by, input))
# # Technically, don't always need to collapse result because summarise + filter
# # could be expressed in SQL using HAVING, but that's the only dplyr operation
# # that can be, so would be a lot of extra work for minimal gain
# update(
# collapse(.data),
# group_by = drop_last(.data$group_by)
# )
# }
# #' @export
# mutate_.tbl_sql <- function(.data, ..., .dots) {
# dots <- lazyeval::all_dots(.dots, ..., all_named = TRUE)
# input <- partial_eval(dots, .data)
# .data$mutate <- TRUE
# new <- update(.data, select = c(.data$select, input))
# # If we're creating a variable that uses a window function, it's
# # safest to turn that into a subquery so that filter etc can use
# # the new variable name
# if (uses_window_fun(input, .data)) {
# collapse(new)
# } else {
# new
# }
# }
# #' @export
# group_by_.tbl_sql <- function(.data, ..., .dots, add = FALSE) {
# groups <- group_by_prepare(.data, ..., .dots = .dots, add = add)
# x <- groups$data
# # Effect of group_by on previous operations:
# # * select: none
# # * filter: changes frame of window functions
# # * mutate: changes frame of window functions
# # * arrange: if present, groups inserted as first ordering
# needed <- (x$mutate && uses_window_fun(x$select, x)) ||
# uses_window_fun(x$filter, x)
# if (!is.null(x$order_by)) {
# arrange <- c(x$group_by, x$order_by)
# } else {
# arrange <- NULL
# }
# if (needed) {
# x <- collapse(update(x, order_by = NULL))
# }
# update(x, group_by = groups$groups, order_by = arrange)
# }
# # Copying ----------------------------------------------------------------------
# #' @export
# auto_copy.tbl_sql <- function(x, y, copy = FALSE, ...) {
# copy_to(x$src, as.data.frame(y), random_table_name(), ...)
# }
# #' Copy a local data frame to a sqlite src.
# #'
# #' This standard method works for all sql sources.
# #'
# #' @export
# #' @param types a character vector giving variable types to use for the columns.
# #' See \url{http://www.sqlite.org/datatype3.html} for available types.
# #' @param temporary if \code{TRUE}, will create a temporary table that is
# #' local to this connection and will be automatically deleted when the
# #' connection expires
# #' @param indexes a list of character vectors. Each element of the list
# #' will create a new index.
# #' @param analyze if \code{TRUE} (the default), will automatically ANALYZE the
# #' new table so that the query optimiser has useful information.
# #' @inheritParams copy_to
# #' @return a sqlite \code{\link{tbl}} object
# #' @examples
# #' if (requireNamespace("RSQLite")) {
# #' db <- src_sqlite(tempfile(), create = TRUE)
# #'
# #' iris2 <- copy_to(db, iris)
# #' mtcars$model <- rownames(mtcars)
# #' mtcars2 <- copy_to(db, mtcars, indexes = list("model"))
# #'
# #' explain(filter(mtcars2, model == "Hornet 4 Drive"))
# #'
# #' # Note that tables are temporary by default, so they're not
# #' # visible from other connections to the same database.
# #' src_tbls(db)
# #' db2 <- src_sqlite(db$path)
# #' src_tbls(db2)
# #' }
# copy_to.src_sql <- function(dest, df, name = deparse(substitute(df)),
# types = NULL, temporary = TRUE, indexes = NULL,
# analyze = TRUE, ...) {
# assert_that(is.data.frame(df), is.string(name), is.flag(temporary))
# class(df) <- "data.frame" # avoid S4 dispatch problem in dbSendPreparedQuery
# if (isTRUE(db_has_table(dest$con, name))) {
# stop("Table ", name, " already exists.", call. = FALSE)
# }
# types <- types %||% db_data_type(dest$con, df)
# names(types) <- names(df)
# con <- dest$con
# db_begin(con)
# on.exit(db_rollback(con))
# db_create_table(con, name, types, temporary = temporary)
# db_insert_into(con, name, df)
# db_create_indexes(con, name, indexes)
# if (analyze) db_analyze(con, name)
# db_commit(con)
# on.exit(NULL)
# tbl(dest, name)
# }
# #' @export
# collapse.tbl_sql <- function(x, vars = NULL, ...) {
# # If you collapse a query, the names of the fields will be the output names
# # of the previous query.
# if (is.null(vars)) {
# nms <- auto_names(x$select)
# vars <- lapply(nms, as.name)
# }
# update(tbl(x$src, x$query$sql, vars = vars, ...), group_by = groups(x))
# }
# #' @export
# #' @rdname compute
# compute.tbl_sql <- function(x, name = random_table_name(), temporary = TRUE, ...) {
# db_save_query(x$src$con, x$query$sql, name = name, temporary = temporary)
# update(tbl(x$src, name), group_by = groups(x))
# }
# #' @export
# collect.tbl_sql <- function(x, ...) {
# grouped_df(x$query$fetch(), groups(x))
# }
# # Do ---------------------------------------------------------------------------
# #' @export
# #' @rdname do
# #' @param .chunk_size The size of each chunk to pull into R. If this number is
# #' too big, the process will be slow because R has to allocate and free a lot
# #' of memory. If it's too small, it will be slow, because of the overhead of
# #' talking to the database.
# do_.tbl_sql <- function(.data, ..., .dots, .chunk_size = 1e4L) {
# group_by <- .data$group_by
# if (is.null(group_by)) stop("No grouping", call. = FALSE)
# args <- lazyeval::all_dots(.dots, ...)
# named <- named_args(args)
# gvars <- seq_along(group_by)
# # Create data frame of labels
# labels_tbl <- update(.data,
# select = group_by,
# order_by = NULL,
# summarise = TRUE)
# labels <- as.data.frame(labels_tbl)
# n <- nrow(labels)
# m <- length(args)
# out <- replicate(m, vector("list", n), simplify = FALSE)
# names(out) <- names(args)
# p <- progress_estimated(n * m, min_time = 2)
# env <- new.env(parent = lazyeval::common_env(args))
# # Create ungrouped data frame suitable for chunked retrieval
# chunky <- update(.data,
# select = unique(c(group_by, .data$select)),
# order_by = c(unname(group_by), .data$order_by),
# group_by = NULL
# )
# # When retrieving in pages, there's no guarantee we'll get a complete group.
# # So we always assume the last group in the chunk is incomplete, and leave
# # it for the next. If the group size is large than chunk size, it may
# # take a couple of iterations to get the entire group, but that should
# # be an unusual situation.
# last_group <- NULL
# i <- 0
# chunky$query$fetch_paged(.chunk_size, function(chunk) {
# if (!is.null(last_group)) chunk <- rbind(last_group, chunk)
# # Create an id for each group
# group_id <- id(chunk[gvars], drop = TRUE)
# n <- attr(group_id, "n")
# index <- split_indices(group_id, n)
# last_group <<- chunk[index[[length(index)]], , drop = FALSE]
# for (j in seq_len(n - 1)) {
# env$. <- chunk[index[[j]], , drop = FALSE]
# for (k in seq_len(m)) {
# out[[k]][i + j] <<- list(eval(args[[k]]$expr, envir = env))
# p$tick()$print()
# }
# }
# i <<- i + (n - 1)
# })
# # Process last group
# if (!is.null(last_group)) {
# env$. <- last_group
# for (k in seq_len(m)) {
# out[[k]][i + 1] <- list(eval(args[[k]]$expr, envir = env))
# p$tick()$print()
# }
# }
# if (!named) {
# label_output_dataframe(labels, out, groups(.data))
# } else {
# label_output_list(labels, out, groups(.data))
# }
# }
# # Joins ------------------------------------------------------------------------
# #' Join sql tbls.
# #'
# #' See \code{\link{join}} for a description of the general purpose of the
# #' functions.
# #'
# #' @section Implementation notes:
# #'
# #' Semi-joins are implemented using \code{WHERE EXISTS}, and anti-joins with
# #' \code{WHERE NOT EXISTS}. Support for semi-joins is somewhat partial: you
# #' can only create semi joins where the \code{x} and \code{y} columns are
# #' compared with \code{=} not with more general operators.
# #'
# #' @inheritParams join
# #' @param x,y tbls to join
# #' @param copy If \code{x} and \code{y} are not from the same data source,
# #' and \code{copy} is \code{TRUE}, then \code{y} will be copied into a
# #' temporary table in same database as \code{x}. \code{join} will automatically
# #' run \code{ANALYZE} on the created table in the hope that this will make
# #' you queries as efficient as possible by giving more data to the query
# #' planner.
# #'
# #' This allows you to join tables across srcs, but it's potentially expensive
# #' operation so you must opt into it.
# #' @param auto_index if \code{copy} is \code{TRUE}, automatically create
# #' indices for the variables in \code{by}. This may speed up the join if
# #' there are matching indexes in \code{x}.
# #' @examples
# #' \dontrun{
# #' if (require("RSQLite") && has_lahman("sqlite")) {
# #'
# #' # Left joins ----------------------------------------------------------------
# #' lahman_s <- lahman_sqlite()
# #' batting <- tbl(lahman_s, "Batting")
# #' team_info <- select(tbl(lahman_s, "Teams"), yearID, lgID, teamID, G, R:H)
# #'
# #' # Combine player and whole team statistics
# #' first_stint <- select(filter(batting, stint == 1), playerID:H)
# #' both <- left_join(first_stint, team_info, type = "inner", by = c("yearID", "teamID", "lgID"))
# #' head(both)
# #' explain(both)
# #'
# #' # Join with a local data frame
# #' grid <- expand.grid(
# #' teamID = c("WAS", "ATL", "PHI", "NYA"),
# #' yearID = 2010:2012)
# #' top4a <- left_join(batting, grid, copy = TRUE)
# #' explain(top4a)
# #'
# #' # Indices don't really help here because there's no matching index on
# #' # batting
# #' top4b <- left_join(batting, grid, copy = TRUE, auto_index = TRUE)
# #' explain(top4b)
# #'
# #' # Semi-joins ----------------------------------------------------------------
# #'
# #' people <- tbl(lahman_s, "Master")
# #'
# #' # All people in half of fame
# #' hof <- tbl(lahman_s, "HallOfFame")
# #' semi_join(people, hof)
# #'
# #' # All people not in the hall of fame
# #' anti_join(people, hof)
# #'
# #' # Find all managers
# #' manager <- tbl(lahman_s, "Managers")
# #' semi_join(people, manager)
# #'
# #' # Find all managers in hall of fame
# #' famous_manager <- semi_join(semi_join(people, manager), hof)
# #' famous_manager
# #' explain(famous_manager)
# #'
# #' # Anti-joins ----------------------------------------------------------------
# #'
# #' # batters without person covariates
# #' anti_join(batting, people)
# #' }
# #' }
# #' @name join.tbl_sql
# NULL
# #' @rdname join.tbl_sql
# #' @export
# inner_join.tbl_sql <- function(x, y, by = NULL, copy = FALSE,
# auto_index = FALSE, ...) {
# by <- common_by(by, x, y)
# y <- auto_copy(x, y, copy, indexes = if (auto_index) list(by$y))
# sql <- sql_join(x$src$con, x, y, type = "inner", by = by)
# update(tbl(x$src, sql), group_by = groups(x))
# }
# #' @rdname join.tbl_sql
# #' @export
# left_join.tbl_sql <- function(x, y, by = NULL, copy = FALSE,
# auto_index = FALSE, ...) {
# by <- common_by(by, x, y)
# y <- auto_copy(x, y, copy, indexes = if (auto_index) list(by$y))
# sql <- sql_join(x$src$con, x, y, type = "left", by = by)
# update(tbl(x$src, sql), group_by = groups(x))
# }
# #' @rdname join.tbl_sql
# #' @export
# semi_join.tbl_sql <- function(x, y, by = NULL, copy = FALSE,
# auto_index = FALSE, ...) {
# by <- common_by(by, x, y)
# y <- auto_copy(x, y, copy, indexes = if (auto_index) list(by$y))
# sql <- sql_semi_join(x$src$con, x, y, anti = FALSE, by = by)
# update(tbl(x$src, sql), group_by = groups(x))
# }
# #' @rdname join.tbl_sql
# #' @export
# anti_join.tbl_sql <- function(x, y, by = NULL, copy = FALSE,
# auto_index = FALSE, ...) {
# by <- common_by(by, x, y)
# y <- auto_copy(x, y, copy, indexes = if (auto_index) list(by$y))
# sql <- sql_semi_join(x$src$con, x, y, anti = TRUE, by = by)
# update(tbl(x$src, sql), group_by = groups(x))
# }
# is.join <- function(x) {
# inherits(x, "join")
# }
# # Set operations ---------------------------------------------------------------
# #' @export
# distinct_.tbl_sql <- function(.data, ..., .dots) {
# dist <- distinct_vars(.data, ..., .dots = .dots)
# if (length(dist$vars) > 0) {
# stop("Can't calculate distinct only on specified columns with SQL",
# call. = FALSE)
# }
# from <- sql_subquery(dist$data$src$con, dist$data$query$sql)
# sql <- build_sql("SELECT DISTINCT * FROM ", from, con = dist$data$src$con)
# update(tbl(dist$data$src, sql, vars = dist$data$select), group_by = groups(.data))
# }
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.