inst/doc/translation.R

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup, message = FALSE---------------------------------------------------
library(dtplyr)
library(data.table)
library(dplyr)

## -----------------------------------------------------------------------------
df <- data.frame(a = 1:5, b = 1:5, c = 1:5, d = 1:5)
dt <- lazy_dt(df)

## -----------------------------------------------------------------------------
dt

## -----------------------------------------------------------------------------
dt %>% show_query()

## -----------------------------------------------------------------------------
dt %>% arrange(a, b, c) %>% show_query()

dt %>% filter(b == c) %>% show_query()
dt %>% filter(b == c, c == d) %>% show_query()

## -----------------------------------------------------------------------------
dt %>% select(a:b) %>% show_query()
dt %>% summarise(a = mean(a)) %>% show_query()
dt %>% transmute(a2 = a * 2) %>% show_query()

## -----------------------------------------------------------------------------
dt %>% mutate(a2 = a * 2, b2 = b * 2) %>% show_query()

## -----------------------------------------------------------------------------
dt %>% mutate(a2 = a * 2, b2 = b * 2, a4 = a2 * 2) %>% show_query()

## -----------------------------------------------------------------------------
dt %>% transmute(a2 = a * 2, b2 = b * 2, a4 = a2 * 2) %>% show_query()

## -----------------------------------------------------------------------------
dt %>% rename(x = a, y = b) %>% show_query()

## -----------------------------------------------------------------------------
dt %>% distinct() %>% show_query()
dt %>% distinct(a, b) %>% show_query()
dt %>% distinct(a, b, .keep_all = TRUE) %>% show_query()

## -----------------------------------------------------------------------------
dt %>% distinct(c = a + b) %>% show_query()
dt %>% distinct(c = a + b, .keep_all = TRUE) %>% show_query()

## -----------------------------------------------------------------------------
dt2 <- lazy_dt(data.frame(a = 1))

dt %>% inner_join(dt2, by = "a") %>% show_query()
dt %>% right_join(dt2, by = "a") %>% show_query()
dt %>% left_join(dt2, by = "a") %>% show_query()
dt %>% anti_join(dt2, by = "a") %>% show_query()

## -----------------------------------------------------------------------------
dt %>% full_join(dt2, by = "a") %>% show_query()

## -----------------------------------------------------------------------------
dt3 <- lazy_dt(data.frame(b = 1, a = 1))

dt %>% left_join(dt3, by = "a") %>% show_query()
dt %>% full_join(dt3, by = "b") %>% show_query()

## -----------------------------------------------------------------------------
dt %>% semi_join(dt2, by = "a") %>% show_query()

## -----------------------------------------------------------------------------
dt %>% intersect(dt2) %>% show_query()
dt %>% setdiff(dt2) %>% show_query()
dt %>% union(dt2) %>% show_query()

## -----------------------------------------------------------------------------
dt %>% group_by(a) %>% summarise(b = mean(b)) %>% show_query()

## -----------------------------------------------------------------------------
dt %>% group_by(a, arrange = FALSE) %>% summarise(b = mean(b)) %>% show_query()

## -----------------------------------------------------------------------------
dt %>% group_by(a) %>% filter(b < mean(b)) %>% show_query()

## -----------------------------------------------------------------------------
dt %>% 
  filter(a == 1) %>% 
  select(-a) %>% 
  show_query()

## -----------------------------------------------------------------------------
dt %>% 
  group_by(a) %>% 
  filter(b < mean(b)) %>% 
  summarise(c = max(c)) %>% 
  show_query()

## -----------------------------------------------------------------------------
dt3 <- lazy_dt(data.frame(x = 1, y = 2))
dt4 <- lazy_dt(data.frame(x = 1, a = 2, b = 3, c = 4, d = 5, e = 7))

dt3 %>% 
  left_join(dt4) %>% 
  select(x, a:c) %>% 
  show_query()

## -----------------------------------------------------------------------------
dt %>% 
  select(X = a, Y = b) %>% 
  filter(X == 1) %>% 
  show_query()

## -----------------------------------------------------------------------------
dt %>% 
  filter(a == 1) %>% 
  mutate(b2 = b * 2) %>% 
  show_query()

## -----------------------------------------------------------------------------
dt %>% mutate(a2 = a * 2, b2 = b * 2) %>% show_query()

## -----------------------------------------------------------------------------
dt %>% 
  filter(x == 1) %>% 
  mutate(a2 = a * 2, b2 = b * 2) %>% 
  show_query()

## -----------------------------------------------------------------------------
dt2 <- data.table(a = 1:10)

dt_inplace <- lazy_dt(dt2, immutable = FALSE)
dt_inplace %>% mutate(a2 = a * 2, b2 = b * 2) %>% show_query()

## -----------------------------------------------------------------------------
bench::mark(
  filter = dt %>% filter(a == b, c == d),
  mutate = dt %>% mutate(a = a * 2, a4 = a2 * 2, a8 = a4 * 2) %>% show_query(),
  summarise = dt %>% group_by(a) %>% summarise(b = mean(b)) %>% show_query(),
  check = FALSE
)[1:6]

Try the dtplyr package in your browser

Any scripts or data that you put into this service are public.

dtplyr documentation built on March 31, 2023, 9:13 p.m.