library(knitr)
library(sqrrl)
opts_chunk$set(echo = TRUE, comment = '', prompt = TRUE)
normal_output <- knit_hooks$get("output")
knit_hooks$set(output = function(x, options) {
  as_sql <- !is.null(options$output.sql) && options$output.sql
  if (as_sql) {
    x <- gsub('^\\[[0-9]+\\] ', '', x)
    x <- gsub('"(.+)"', '\\1', x)
    x <- gsub('\\\\', '', x)
    x <- sqlformat(x, md = FALSE, sqlformat_options = '-k upper')
  }
  normal_output(x, options)
})

🐿 sqrrl

sqrrl is a small collection of utility functions that help build text-based SQL queries in an R-style native-feeling and functional manner.

Unlike other packages that build SQL queries using an object-oriented style, sqrrl provides small functions that produce SQL snippets and can be linked together to compose SQL queries. The result is that the code to produce the SQL statement reads much like the SQL statement iteself. On the other hand, sqrrl doesn't know anything about your database and can't help you out with completions, etc.

Quick Overview

SELECT

SELECT()
SELECT('col1', 'col2', 'col3')
SELECT(newcol = 'col1', avg_col1 = 'mean(col1)')
SELECT(letters[1:3], 't2' = letters[4:6])
SELECT(a = 'apple', b = 'banana', c = 'cherry')
SELECT('t1' = c(a = 'apple', b = 'banana'), c = 'cherry')
SELECT('t1' = c(a = 'apple', b = 'banana'), c = 'cherry', 't2' = c(d = 'dragon_fruit'))

FROM

FROM('table1')
FROM('table1', 'table2')
FROM(t1 = 'table1', t2 = 'table2', 'table3')

WHERE

WHERE('col1 IS NOT NULL')
WHERE(cond = TRUE, 'col1 = 2', 'col2 >= 10')
WHERE(cond = FALSE, 'col1 = 2', 'col2 >= 10')

IN

'id' %IN% 1:4
'id' %IN% letters[1:3]
# Note: if left-hand-side length == 1, then LHS is unquoted (for subqueries)
'id' %IN% paste(SELECT('id'), FROM('other_table'))
'in' %IN% quotes(letters[1])

LIKE

'text_col' %LIKE% 'Prefix%'
'text_col' %LIKE% c('Prefix 1%', 'Prefix 2%')

Comparison Operators

eq(id = 4)
neq(id = 4)
lt(id = 4)
leq(id = 4)
gt(id = 4)
geq(id = 4)

Concatenation Operators

AND(eq(id = 3, class = 'text_value'), geq(date = '2017-06-14'))
OR(eq(id = 9, id = 12), leq(id = 5))
# Concatenate snippets with the %+% infix operator
SELECT() %+% FROM('table') %+% WHERE(eq(id = 10))

Complete Example

(example_query <- 
  SELECT('mpg', weight = 'wt', cylinders = 'cyl') %+%
  FROM('mtcars') %+%
  WHERE(
    lt(gear = 4),
    geq(mpg = 15),
    'cyl' %IN% c(4,6)
  ))

Formatting SQL Queries

sqrrl also provides a simple wrapper for https://github.com/andialbrecht/sqlparse, a Python package for formatting SQL queries. sqlparse can be installed via pip install --upgrade sqlparse, thereafter making available the system command sqlformat.

sqrrl::sqlformat() pretty-prints SQL queries, such as the one above.

example <- sqlformat(example_query, header = 'A Beautifully Formatted Example Query')
cat(example)

More Operators and Examples

Joins

# Using one ID
JOIN(left_ref = 'left_tbl', right_tbls = 'right_tbl', on = 'id')
LEFT_JOIN('l', c('r' = 'right_tbl'), 'id')
RIGHT_JOIN('l', c('r' = 'right_tbl'), 'id', prefer_using = FALSE)
# Join on multiple columns, with different names on left and right
JOIN(type = 'natural right', 'left_tbl', 'right_tbl', c('left.col1' = 'right.col1', 'id2'))
# Join multiple tables on same column
INNER_JOIN('left_tbl', c('right_1', 'right_2'), 'id_col')
# Join multiple tables on different columns
OUTER_JOIN('l', c(r1 = 'right_1', r2 = 'right_2'), list('col1', 'col2'))
# Join multiple tables on different coluns with different column names
JOIN('l', c(r1 = 'right_1', r2 = 'right_2'), list(c(right_1_id = 'id', c(right_2_id = 'id'))))

INSERT INTO VALUES

iris_example <- iris[c(1, 51, 101), ]
# Insert all rows & columns from a data.frame
INSERT_INTO_VALUES('iris', iris_example)
# Insert select columns from a data.frame
INSERT_INTO_VALUES('iris', iris_example, c('Petal.Length', 'Petal.Width', 'Species'))
# Insert named vector
INSERT_INTO_VALUES('iris', c('Petal.Length' = 1.9, 'Petal.Width' = 0.2, 'Species' = 'setosa'))
# Insert subset of named vector
INSERT_INTO_VALUES('iris', c('Petal.Length' = 1.9, 'Petal.Width' = 0.2, 'Species' = 'setosa'),
                   cols = c('Petal.Width', 'Species'))
# Insert just vector of mixed type without column names
INSERT_INTO_VALUES('iris', c(6.5, 3.2, 5.1, 2, 'virginica'))

UPDATE

UPDATE('t1', col1 = 'a')
UPDATE('t1', col1 = ~col2 * 1.25)
UPDATE('t1', col1 = 'a', col2 = 42, .where = 'id' %IN% 1:5)

# Update provides .where argument, but it's easier to add this outside UPDATE()
UPDATE('t1', col1 = 'a', col2 = 42) %+% WHERE('id' %IN% 1:5)
UPDATE('iris', some_column = 1, some_other_col = "high") %+% WHERE(eq(another_col = 2))
UPDATE('t', id = ~id + 1) %+% ORDER_BY(DESC('id'))

# Multiple tables
UPDATE(c('items', 'month'), items.price = ~month.price, .where = eq(items.id = ~month.id))
UPDATE(c('items', 'month'), items.price = ~month.price) %+% WHERE(eq(items.id = ~month.id))


gadenbuie/sqrrrl documentation built on May 24, 2019, 4:04 a.m.