Nothing
test_that("mutate computed before summarise", {
mf <- memdb_frame(x = c(1, 2, 3), y = c(9, 8, 7))
out <- mutate(mf, z = x + y) %>%
summarise(sum_z = sum(z, na.rm = TRUE)) %>%
collect()
expect_equal(out$sum_z, 30)
})
test_that("two mutates equivalent to one", {
mf <- memdb_frame(x = c(1, 5, 9), y = c(3, 12, 11))
df1 <- mf %>% mutate(x2 = x * 2, y4 = y * 4) %>% collect()
df2 <- mf %>% collect() %>% mutate(x2 = x * 2, y4 = y * 4)
compare_tbl(df1, df2)
})
test_that("mutate() isn't inlined after distinct() #1119", {
mf <- memdb_frame(x = 1:2)
expect_equal(
mf %>%
distinct(x) %>%
mutate(x = 0) %>%
collect(),
tibble(x = c(0, 0))
)
lf <- lazy_frame(x = 1:2)
expect_snapshot(
lf %>%
distinct(x) %>%
mutate(x = 0)
)
})
test_that("can use window function after summarise and pure projection #1104", {
lf <- lazy_frame(g = 1, x = 1) %>%
group_by(g) %>%
summarise(x = 1) %>%
select(g)
expect_snapshot({
(expect_no_error(lf %>% mutate(r = row_number())))
})
})
test_that("can refer to fresly created values", {
out1 <- memdb_frame(x1 = 1) %>%
mutate(x2 = x1 + 1, x3 = x2 + 1, x4 = x3 + 1) %>%
collect()
expect_equal(out1, tibble(x1 = 1, x2 = 2, x3 = 3, x4 = 4))
out2 <- copy_to_test("sqlite", tibble(x = 1), name = "multi_mutate") %>%
mutate(x = x + 1, x = x + 2, x = x + 4)
expect_equal(collect(out2), tibble(x = 8))
expect_snapshot(show_query(out2))
})
test_that("transmute includes all needed variables", {
lf <- lazy_frame(x = 1, y = 2)
out <- transmute(lf, x = x / 2, x2 = x + y)
expect_equal(op_vars(out$lazy_query$x), c("x", "y"))
expect_snapshot(out)
})
test_that("queries are not nested unnecessarily", {
# Should only be one query deep
sql <- memdb_frame(x = 1) %>%
mutate(y = x + 1, a = y + 1, b = y + 1) %>%
sql_build()
expect_s3_class(sql$from, "select_query")
expect_s3_class(sql$from$from, "base_query")
})
test_that("maintains order of existing columns (#3216, #3223)", {
lazy <- lazy_frame(x = 1, y = 2) %>%
mutate(z = 3, y = 4, y = 5)
expect_equal(op_vars(lazy), c("x", "y", "z"))
})
test_that("supports overwriting variables (#3222)", {
df <- memdb_frame(x = 1, y = 2) %>%
mutate(y = 4, y = 5) %>%
collect()
expect_equal(df, tibble(x = 1, y = 5))
df <- memdb_frame(x = 1, y = 2) %>%
mutate(y = 4, y = y + 1) %>%
collect()
expect_equal(df, tibble(x = 1, y = 5))
df <- memdb_frame(x = 1, y = 2) %>%
mutate(y = 4, y = x + 4) %>%
collect()
expect_equal(df, tibble(x = 1, y = 5))
})
test_that("across() does not select grouping variables", {
df <- lazy_frame(g = 1, x = 1)
# SELECT `g`, 0.0 AS `x`
expect_snapshot(df %>% group_by(g) %>% mutate(across(.fns = ~ 0)))
expect_snapshot(df %>% group_by(g) %>% transmute(across(.fns = ~ 0)))
})
test_that("transmute() keeps grouping variables", {
lf <- lazy_frame(g = 1, x = 1) %>% group_by(g)
expect_equal(lf %>% transmute(x = 1) %>% op_vars(), c("g", "x"))
expect_equal(lf %>% transmute(x = 1, g) %>% op_vars(), c("x", "g"))
})
test_that("across() can access previously created variables", {
out <- memdb_frame(x = 1) %>% mutate(y = 2, across(y, sqrt))
lf <- lazy_frame(x = 1) %>% mutate(y = 2, across(y, sqrt))
expect_equal(
collect(out),
tibble(x = 1, y = sqrt(2))
)
expect_equal(
op_vars(out),
c("x", "y")
)
expect_snapshot(remote_query(lf))
})
test_that("across() uses original column rather than overridden one", {
db <- memdb_frame(x = 2, y = 4, z = 6)
expect_equal(
db %>% mutate(across(everything(), ~ .x / x)) %>% collect(),
tibble(x = 1, y = 2, z = 3)
)
expect_equal(
db %>%
mutate(
x = -x,
across(everything(), ~ .x / x),
y = y + x
) %>%
collect(),
tibble(x = 1, y = -1, z = -3)
)
lf <- lazy_frame(x = 2, y = 4, z = 6)
expect_equal(
lf %>%
mutate(across(everything(), ~ .x / x)) %>%
remote_query(),
sql("SELECT `x` / `x` AS `x`, `y` / `x` AS `y`, `z` / `x` AS `z`\nFROM `df`")
)
expect_snapshot(
lf %>%
mutate(
x = -x,
across(everything(), ~ .x / x),
y = y + x
)
)
})
test_that("new columns take precedence over global variables", {
y <- "global var"
db <- memdb_frame(data.frame(x = 1)) %>% mutate(y = 2, z = y + 1)
lf <- lazy_frame(data.frame(x = 1)) %>% mutate(y = 2, z = y + 1)
expect_equal(
collect(db),
tibble(x = 1, y = 2, z = 3)
)
expect_snapshot(remote_query(lf))
})
test_that("constants do not need a new query", {
expect_equal(
lazy_frame(x = 1, y = 2) %>% mutate(z = 2, z = 3) %>% remote_query(),
sql("SELECT `df`.*, 3.0 AS `z`\nFROM `df`")
)
})
test_that("mutate() produces nice error messages", {
options(lifecycle_verbosity = "quiet")
expect_snapshot(error = TRUE, {
lazy_frame(x = 1) %>% mutate(z = non_existent + 1)
# `...` cannot be evaluated
lazy_frame(x = 1) %>% mutate(across(x, mean, na.rm = z))
# `.fns` cannot be evaluated
lazy_frame(x = 1) %>% mutate(across(x, .fns = "a"))
})
})
test_that("empty mutate returns input", {
df <- lazy_frame(x = 1)
gf <- group_by(df, x)
expect_equal(mutate(df), df)
expect_equal(mutate(df, .by = x), df)
expect_equal(mutate(gf), gf)
expect_equal(mutate(df, !!!list()), df)
expect_equal(mutate(df, !!!list(), .by = x), df)
expect_equal(mutate(gf, !!!list()), gf)
})
# .by -------------------------------------------------------------------------
test_that("can group transiently using `.by`", {
suppressWarnings(check_na_rm(FALSE))
df <- memdb_frame(g = c(1, 1, 2, 1, 2), x = c(5, 2, 1, 2, 3))
out <- mutate(df, x = mean(x), .by = g) %>%
arrange(g) %>%
collect()
expect_identical(out$g, c(1, 1, 1, 2, 2))
expect_identical(out$x, c(3, 3, 3, 2, 2))
expect_equal(group_vars(out), character())
})
test_that("can `NULL` out the `.by` column", {
df <- lazy_frame(x = 1:3, y = 1:3)
out <- mutate(df, x = NULL, .by = x)
expect_identical(op_vars(out), "y")
expect_identical(remote_query(out), sql("SELECT `y`\nFROM `df`"))
})
# SQL generation -----------------------------------------------------------
test_that("mutate generates new variables and replaces existing", {
df1 <- memdb_frame(x = 1)
out <- df1 %>% mutate(x = 2, y = 3) %>% collect()
expect_equal(out, tibble(x = 2, y = 3))
})
test_that("transmute only returns new variables", {
df1 <- memdb_frame(x = 1)
out <- df1 %>% transmute(y = 3) %>% collect()
expect_equal(out, tibble(y = 3))
})
test_that("mutate calls windowed versions of sql functions", {
df1 <- memdb_frame(x = 1:4, g = rep(c(1, 2), each = 2))
out <- df1 %>% group_by(g) %>% mutate(r = row_number(x)) %>% collect()
expect_equal(out$r, c(1, 2, 1, 2))
})
test_that("recycled aggregates generate window function", {
df1 <- memdb_frame(x = as.numeric(1:4), g = rep(c(1, 2), each = 2))
out <- df1 %>%
group_by(g) %>%
mutate(r = x - mean(x, na.rm = TRUE)) %>%
collect()
expect_equal(out$r, c(-0.5, 0.5, -0.5, 0.5))
})
test_that("cumulative aggregates generate window function", {
df1 <- memdb_frame(x = 1:4, g = rep(c(1, 2), each = 2))
out <- df1 %>%
group_by(g) %>%
arrange(x) %>%
mutate(r = cumsum(x)) %>%
collect()
expect_equal(out$r, c(1, 3, 3, 7))
})
test_that("mutate overwrites previous variables", {
df <- memdb_frame(x = 1:5) %>%
mutate(x = x + 1) %>%
mutate(x = x + 1) %>%
collect()
expect_equal(names(df), "x")
expect_equal(df$x, 1:5 + 2)
})
test_that("sequence of operations work", {
out <- memdb_frame(x = c(1, 2, 3, 4)) %>%
select(y = x) %>%
mutate(z = 2 * y) %>%
filter(z == 2) %>%
collect()
expect_equal(out, tibble(y = 1, z = 2))
})
# .before, .after, .keep ------------------------------------------------------
test_that(".keep = 'unused' keeps variables explicitly mentioned", {
df <- lazy_frame(x = 1, y = 2)
out <- mutate(df, x1 = x + 1, y = y, .keep = "unused")
expect_equal(op_vars(out), c("y", "x1"))
})
test_that(".keep = 'used' not affected by across()", {
df <- lazy_frame(x = 1, y = 2, z = 3, a = "a", b = "b", c = "c")
# This must evaluate every column in order to figure out if should
# be included in the set or not, but that shouldn't be counted for
# the purposes of "used" variables
out <- mutate(df, across(c("x", "y", "z"), identity), .keep = "unused")
expect_equal(op_vars(out), op_vars(df))
})
test_that(".keep = 'used' keeps variables used in expressions", {
df <- lazy_frame(a = 1, b = 2, c = 3, x = 1, y = 2)
out <- mutate(df, xy = x + y, .keep = "used")
expect_equal(op_vars(out), c("x", "y", "xy"))
})
test_that(".keep = 'none' only keeps grouping variables", {
df <- lazy_frame(x = 1, y = 2)
gf <- group_by(df, x)
expect_equal(op_vars(mutate(df, z = 1, .keep = "none")), "z")
expect_equal(op_vars(mutate(gf, z = 1, .keep = "none")), c("x", "z"))
})
test_that(".keep = 'none' retains original ordering (#5967)", {
df <- lazy_frame(x = 1, y = 2)
expect_equal(
df %>% mutate(y = 1, x = 2, .keep = "none") %>% op_vars(),
c("x", "y")
)
# even when grouped
gf <- group_by(df, x)
expect_equal(
gf %>% mutate(y = 1, x = 2, .keep = "none") %>% op_vars(),
c("x", "y")
)
})
test_that("can use .before and .after to control column position", {
df <- lazy_frame(x = 1, y = 2)
expect_equal(mutate(df, z = 1) %>% op_vars(), c("x", "y", "z"))
expect_equal(mutate(df, z = 1, .before = 1) %>% op_vars(), c("z", "x", "y"))
expect_equal(mutate(df, z = 1, .after = 1) %>% op_vars(), c("x", "z", "y"))
# but doesn't affect order of existing columns
df <- lazy_frame(x = 1, y = 2)
expect_equal(mutate(df, x = 1, .after = y) %>% op_vars(), c("x", "y"))
})
test_that(".keep and .before/.after interact correctly", {
df <- lazy_frame(x = 1, y = 1, z = 1, a = 1, b = 2, c = 3) %>%
group_by(a, b)
expect_equal(mutate(df, d = 1, x = 2, .keep = "none") %>% op_vars(), c("x", "a", "b", "d"))
expect_equal(mutate(df, d = 1, x = 2, .keep = "none", .before = "a") %>% op_vars(), c("x", "d", "a", "b"))
expect_equal(mutate(df, d = 1, x = 2, .keep = "none", .after = "a") %>% op_vars(), c("x", "a", "d", "b"))
})
test_that("dropping column with `NULL` then readding it retains original location", {
df <- lazy_frame(x = 1, y = 2, z = 3, a = 4)
df <- group_by(df, z)
expect_equal(
mutate(df, y = NULL, y = 3, .keep = "all") %>% op_vars(),
c("x", "y", "z", "a")
)
expect_equal(
mutate(df, b = a, y = NULL, y = 3, .keep = "used") %>% op_vars(),
c("y", "z", "a", "b")
)
expect_equal(
mutate(df, b = a, y = NULL, y = 3, .keep = "unused") %>% op_vars(),
c("x", "y", "z", "b")
)
# It isn't treated as a "new" column
expect_equal(mutate(df, y = NULL, y = 3, .keep = "all", .before = x) %>% op_vars(), c("x", "y", "z", "a"))
})
test_that(".keep= always retains grouping variables (#5582)", {
df <- lazy_frame(x = 1, y = 2, z = 3) %>% group_by(z)
expect_equal(
df %>% mutate(a = x + 1, .keep = "none") %>% op_vars(),
c("z", "a")
)
expect_equal(
df %>% mutate(a = x + 1, .keep = "all") %>% op_vars(),
c("x", "y", "z", "a")
)
expect_equal(
df %>% mutate(a = x + 1, .keep = "used") %>% op_vars(),
c("x", "z", "a")
)
expect_equal(
df %>% mutate(a = x + 1, .keep = "unused") %>% op_vars(),
c("y", "z", "a")
)
})
# sql_render --------------------------------------------------------------
test_that("quoting for rendering mutated grouped table", {
out <- memdb_frame(x = 1, y = 2) %>% mutate(y = x)
expect_match(out %>% sql_render, "^SELECT `x`, `x` AS `y`\nFROM `[^`]*`$")
expect_equal(out %>% collect, tibble(x = 1, y = 1))
})
test_that("mutate generates subqueries as needed", {
lf <- lazy_frame(x = 1, con = simulate_sqlite())
expect_snapshot(lf %>% mutate(x = x + 1, x = x + 1))
expect_snapshot(lf %>% mutate(x1 = x + 1, x2 = x1 + 1))
})
test_that("mutate collapses over nested select", {
lf <- lazy_frame(g = 0, x = 1, y = 2)
expect_snapshot(lf %>% select(x:y) %>% mutate(x = x * 2, y = y * 2))
expect_snapshot(lf %>% select(y:x) %>% mutate(x = x * 2, y = y * 2))
})
test_that("mutate() uses star", {
lf <- lazy_frame(x = 1, y = 1)
expect_equal(
lf %>% mutate(z = 1L) %>% remote_query(),
sql("SELECT `df`.*, 1 AS `z`\nFROM `df`")
)
expect_equal(
lf %>% mutate(a = 1L, .before = 1) %>% remote_query(),
sql("SELECT 1 AS `a`, `df`.*\nFROM `df`")
)
expect_equal(
lf %>% transmute(a = 1L, x, y, z = 2L) %>% remote_query(),
sql("SELECT 1 AS `a`, `df`.*, 2 AS `z`\nFROM `df`")
)
# does not use * if `use_star = FALSE`
expect_equal(
lf %>% mutate(z = 1L) %>% remote_query(sql_options = sql_options(use_star = FALSE)),
sql("SELECT `x`, `y`, 1 AS `z`\nFROM `df`")
)
})
# sql_build ---------------------------------------------------------------
test_that("mutate generates simple expressions", {
out <- lazy_frame(x = 1) %>%
mutate(y = x + 1L) %>%
sql_build()
expect_equal(out$select, sql('`df`.*', y = '`x` + 1'))
})
test_that("mutate can drop variables with NULL", {
out <- lazy_frame(x = 1, y = 1) %>%
mutate(y = NULL)
expect_named(sql_build(out)$select, "x")
expect_equal(op_vars(out), "x")
})
test_that("var = NULL works when var is in original data", {
lf <- lazy_frame(x = 1) %>% mutate(x = 2, z = x*2, x = NULL)
expect_equal(sql_build(lf)$select, sql(z = "`x` * 2.0"))
expect_equal(op_vars(lf), "z")
expect_snapshot(remote_query(lf))
})
test_that("var = NULL when var is in final output", {
lf <- lazy_frame(x = 1) %>% mutate(y = NULL, y = 3)
expect_equal(sql_build(lf)$select, sql("`df`.*", y = "3.0"))
expect_equal(op_vars(lf), c("x", "y"))
expect_snapshot(remote_query(lf))
})
test_that("temp var with nested arguments", {
lf <- lazy_frame(x = 1) %>% mutate(y = 2, z = y*2, y = NULL)
expect_equal(sql_build(lf)$select, sql(x = "`x`", z = "`y` * 2.0"))
expect_equal(op_vars(lf), c("x", "z"))
expect_snapshot(remote_query(lf))
})
test_that("mutate_all generates correct sql", {
out <- lazy_frame(x = 1, y = 1) %>%
dplyr::mutate_all(~ . + 1L) %>%
sql_build()
expect_equal(out$select, sql(x = '`x` + 1', y = '`y` + 1'))
out <- lazy_frame(x = 1) %>%
dplyr::mutate_all(list(one = ~ . + 1L, two = ~ . + 2L)) %>%
sql_build()
expect_equal(out$select, sql('`df`.*', one = '`x` + 1', two = '`x` + 2'))
})
test_that("mutate_all scopes nested quosures correctly", {
num <- 10L
out <- lazy_frame(x = 1, y = 1) %>%
dplyr::mutate_all(~ . + num) %>%
sql_build()
expect_equal(out$select, sql(x = '`x` + 10', y = '`y` + 10'))
})
# ops ---------------------------------------------------------------------
test_that("mutate adds new", {
out <- tibble(x = 1) %>% tbl_lazy() %>% mutate(y = x + 1, z = y + 1)
expect_equal(op_vars(out), c("x", "y", "z"))
})
test_that("mutated vars are always named", {
mf <- dbplyr::memdb_frame(a = 1)
out2 <- mf %>% mutate(1) %>% op_vars()
expect_equal(out2, c("a", "1"))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.