Nothing
# Copyright 2023 Cloudera Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#' @include translations.R process_translations.R
NULL
#' Parse a SQL query
#'
#' @description Parses a SQL \code{SELECT} statement into a list with R
#' expressions
#'
#' @param query a character string containing a SQL \code{SELECT} statement
#' @param tidyverse set to \code{TRUE} to use functions from \pkg{tidyverse}
#' packages including \pkg{dplyr}, \pkg{stringr}, and \pkg{lubridate} in the R
#' expressions
#' @param secure set to \code{FALSE} to allow potentially dangerous functions in
#' the returned R expressions
#' @return A list object with named elements representing the clauses of the
#' query, containing sublists of unevaluated R expressions translated from the
#' SQL expressions in the query.
#'
#' Depending on the arguments, the returned list and its sublists will have
#' attributes named \code{distinct} and \code{aggregate} with logical values
#' that can aid in the evaluation of the R expressions. If \code{query}
#' contains one or more joins, then the sublist named \code{from} will have
#' attributes named \code{join_types} and \code{join_conditions} specifying
#' the types of join and the join conditions.
#' @details See the
#' \href{https://cran.r-project.org/package=queryparser/readme/README.html#current-limitations}{current
#' limitations} section of the \code{README} for information about what types
#' of queries are supported.
#' @examples
#' my_query <- "SELECT origin, dest,
#' COUNT(flight) AS num_flts,
#' round(AVG(distance)) AS dist,
#' round(AVG(arr_delay)) AS avg_delay
#' FROM flights
#' WHERE distance BETWEEN 200 AND 300
#' AND air_time IS NOT NULL
#' GROUP BY origin, dest
#' HAVING num_flts > 3000
#' ORDER BY num_flts DESC, avg_delay DESC
#' LIMIT 100;"
#'
#' parse_query(my_query)
#'
#' parse_query(my_query, tidyverse = TRUE)
#' @seealso \code{\link{parse_expression}}
#' @export
parse_query <- function(query, tidyverse = FALSE, secure = TRUE) {
if (!identical(typeof(query), "character") ||
!identical(length(query), 1L) ||
!identical(typeof(tidyverse), "logical") ||
!identical(length(tidyverse), 1L) ||
!identical(typeof(secure), "logical") ||
!identical(length(secure), 1L)) {
stop("Unexpected input to parse_query()", call. = FALSE)
}
assert_tidyquery_version()
tree <- split_query(query, tidyverse)
is_select_distinct <- isTRUE(attr(tree$select, "distinct"))
is_select_star <- any(tree$select == "*")
has_from <- !is.null(tree$from)
has_where <- !is.null(tree$where)
has_group_by <- !is.null(tree$group_by)
has_having <- !is.null(tree$having)
has_order_by <- !is.null(tree$order_by)
has_limit <- !is.null(tree$limit)
tree$select <- parse_select(tree$select, tidyverse, secure)
tree$from <- parse_from(tree$from, tidyverse, secure)
tree$where <- parse_where(tree$where, tidyverse, secure)
tree$group_by <- parse_group_by(tree$group_by, tidyverse, secure)
tree$having <- parse_having(tree$having, tidyverse, secure)
tree$order_by <- parse_order_by(tree$order_by, tidyverse, secure)
tree$limit <- parse_limit(tree$limit)
bad_aliases <- intersect(c(names(tree$select), names(tree$from)), disallowed_aliases)
if (length(bad_aliases) > 0) {
stop("Query contains disallowed aliases: ", paste(bad_aliases, collapse = ", "), call. = FALSE)
}
is_aggregate_expression_in_select_list <- are_aggregate_expressions(tree$select)
is_aggregate_expression_in_order_by_list <- are_aggregate_expressions(tree$order_by)
has_aggregates_in_select <- any(is_aggregate_expression_in_select_list)
has_aggregates_in_order_by <- any(is_aggregate_expression_in_order_by_list)
is_aggregating_query <- has_group_by || has_having || has_aggregates_in_select || has_aggregates_in_order_by
if (is_aggregating_query) {
if (is_select_distinct) {
stop("SELECT DISTINCT cannot be used together with ",
"aggregate expressions or a GROUP BY clause", call. = FALSE)
}
if (is_select_star) {
stop("SELECT * cannot be used together with ",
"aggregate expressions or a GROUP BY clause", call. = FALSE)
}
group_by_cols <- vapply(tree$group_by, deparse, "")
agg_aliases <- names(tree$select)
if (is.null(agg_aliases)) {
agg_aliases <- rep("", length(tree$select))
}
group_by_refs <- as.character(tree$select[agg_aliases %in% as.character(tree$group_by)])
valid_agg_cols <- setdiff(c(group_by_cols, agg_aliases, group_by_refs), "")
if (has_having && !is_valid_expression_in_aggregation(tree$having[[1]], valid_agg_cols)) {
stop("The expression in the HAVING clause is invalid in an aggregation context ",
"or incompatible with the GROUP BY clause", call. = FALSE)
}
select_cols_to_check <- tree$select[!agg_aliases %in% group_by_cols & !tree$select %in% group_by_cols]
group_by_col_alias_refs <- vapply(tree$select[agg_aliases %in% group_by_cols], deparse, "")
group_by_cols_and_refs <- c(group_by_cols, group_by_col_alias_refs)
if (!all(are_valid_expressions_in_aggregation(select_cols_to_check, group_by_cols_and_refs))) {
stop("The SELECT list includes expressions that are invalid in an aggregation context ",
"or incompatible with the GROUP BY clause", call. = FALSE)
}
agg_expr_aliases <- agg_aliases[is_aggregate_expression_in_select_list]
if (any(agg_expr_aliases %in% group_by_cols)) {
stop("Aliases of aggregate expressions are not allowed in the GROUP BY clause", call. = FALSE)
}
if (tidyverse && length(valid_agg_cols) > 0) {
valid_agg_cols_for_order_by <- c(valid_agg_cols, paste0("dplyr::desc(",valid_agg_cols,")"))
} else {
valid_agg_cols_for_order_by <- valid_agg_cols
}
if (has_order_by && !all(are_valid_expressions_in_aggregation(tree$order_by, valid_agg_cols_for_order_by))) {
stop("The ORDER BY list includes expressions that are invalid in an aggregation context ",
"or incompatible with the GROUP BY clause", call. = FALSE)
}
attr(tree$select, "aggregate") <- is_aggregate_expression_in_select_list
if (has_order_by) {
attr(tree$order_by, "aggregate") <- is_aggregate_expression_in_order_by_list
}
attr(tree, "aggregate") <- TRUE
} else if (is_select_distinct) {
distinct_cols <- vapply(tree$select, deparse, "")
distinct_aliases <- names(tree$select)
if (is.null(distinct_aliases)) {
distinct_aliases <- rep("", length(tree$select))
}
valid_distinct_cols <- setdiff(c(distinct_cols, distinct_aliases), "")
if (tidyverse && length(valid_distinct_cols) > 0) {
valid_distinct_cols_for_order_by <- c(valid_distinct_cols, paste0("dplyr::desc(",valid_distinct_cols,")"))
} else {
valid_distinct_cols_for_order_by <- valid_distinct_cols
}
if (!all(are_valid_expressions_in_distinct(tree$order_by, valid_distinct_cols_for_order_by))) {
stop("The ORDER BY list includes expressions that are invalid ",
"in a SELECT DISTINCT query", call. = FALSE)
}
attr(tree$select, "distinct") <- TRUE
}
if (length(tree$from) == 1) {
table_name <- as.character(tree$from[[1]])
if (!is.null(names(tree$from)) && names(tree$from) != "") {
table_alias <- names(tree$from)
} else {
table_alias <- character(0)
}
table_prefixes <- c(table_name, table_alias)
tree <- unqualify_query(tree, table_prefixes)
}
tree
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.