test_that("constructor has sensible defaults", {
first <- step_first(data.table(x = 1), "DT")
step <- step_mutate(first)
expect_s3_class(step, "dtplyr_step_mutate")
expect_equal(step$parent, first)
expect_equal(step$vars, "x")
expect_equal(step$groups, character())
expect_equal(step$new_vars, list())
})
# copies ------------------------------------------------------------------
test_that("need to copy when there's a mutate", {
dt <- lazy_dt(data.table(x = 1))
expect_false(dt %>% .$needs_copy)
expect_false(dt %>% filter(x == 1) %>% .$needs_copy)
expect_false(dt %>% head() %>% .$needs_copy)
expect_true(dt %>% mutate(y = 1) %>% .$needs_copy)
expect_true(dt %>% mutate(y = 1) %>% filter(x == 1) %>% .$needs_copy)
expect_true(dt %>% mutate(y = 1) %>% head() %>% .$needs_copy)
})
test_that("unless there's already an implicit copy", {
dt <- lazy_dt(data.table(x = 1))
expect_true(dt %>% filter(x == 1) %>% .$implicit_copy)
expect_false(dt %>% filter(x == 1) %>% mutate(y = 1) %>% .$needs_copy)
expect_true(dt %>% head() %>% .$implicit_copy)
expect_false(dt %>% head() %>% mutate(y = 1) %>% .$needs_copy)
})
test_that("properly copies with chained operations, #210", {
dt <- lazy_dt(data.table(x = 1))
query <- dt %>%
mutate(z1 = 1) %>%
summarize(z2 = 2) %>%
mutate(z3 = 4)
expect_true(query$implicit_copy)
expect_true(query$needs_copy)
})
# dplyr verbs -------------------------------------------------------------
test_that("generates single calls as expect", {
dt <- lazy_dt(data.table(x = 1), "DT")
expect_equal(
dt %>% mutate(x2 = x * 2) %>% show_query(),
expr(copy(DT)[, `:=`(x2 = x * 2)])
)
expect_equal(
dt %>% group_by(x) %>% mutate(x2 = x * 2) %>% show_query(),
expr(copy(DT)[, `:=`(x2 = x * 2), by = .(x)])
)
})
test_that("mutate generates compound expression if needed", {
dt <- lazy_dt(data.table(x = 1, y = 2), "DT")
expect_equal(
dt %>% mutate(x2 = x * 2, x4 = x2 * 2) %>% show_query(),
expr(copy(DT)[, c("x2", "x4") := {
x2 <- x * 2
x4 <- x2 * 2
.(x2, x4)
}])
)
})
test_that("allows multiple assignment to the same variable", {
dt <- lazy_dt(data.table(x = 1, y = 2), "DT")
# when nested
expect_equal(
dt %>% mutate(x = x * 2, x = x * 2) %>% show_query(),
expr(copy(DT)[, c("x") := {
x <- x * 2
x <- x * 2
.(x)
}])
)
# when not nested
expect_equal(
dt %>% mutate(z = 2, z = 3) %>% show_query(),
expr(copy(DT)[, `:=`(c("z"), {
z <- 2
z <- 3
.(z)
})])
)
})
test_that("can use across", {
dt <- lazy_dt(data.table(x = 1, y = 2), "DT")
expect_equal(
dt %>% mutate(across(everything(), ~ . + 1)) %>% show_query(),
expr(copy(DT)[, `:=`(x = x + 1, y = y + 1)])
)
expect_equal(
dt %>% mutate(across(.fns = ~ . + 1)) %>% show_query(),
expr(copy(DT)[, `:=`(x = x + 1, y = y + 1)])
)
})
test_that("across() can access previously created variables", {
dt <- lazy_dt(data.frame(x = 1), "DT")
step <- mutate(dt, y = 2, across(y, sqrt))
expect_equal(
collect(step),
tibble(x = 1, y = sqrt(2))
)
expect_equal(
step$vars,
c("x", "y")
)
expect_equal(
show_query(step),
expr(copy(DT)[, `:=`(c("y"), {
y <- 2
y <- sqrt(y)
.(y)
})])
)
})
test_that("vars set correctly", {
dt <- lazy_dt(data.frame(x = 1:3, y = 1:3))
expect_equal(dt %>% mutate(z = 1) %>% .$vars, c("x", "y", "z"))
expect_equal(dt %>% mutate(x = NULL, z = 1) %>% .$vars, c("y", "z"))
})
test_that("empty mutate returns input", {
dt <- lazy_dt(data.frame(x = 1))
expect_equal(mutate(dt), dt)
expect_equal(mutate(dt, !!!list()), dt)
})
test_that("unnamed arguments matching column names are ignored", {
dt <- lazy_dt(data.frame(x = 1), "DT")
expect_identical(mutate(dt, x), dt)
expect_snapshot(mutate(dt, y), error = TRUE)
})
test_that("new columns take precedence over global variables", {
dt <- lazy_dt(data.frame(x = 1), "DT")
y <- 'global var'
step <- mutate(dt, y = 2, z = y + 1)
expect_equal(
collect(step),
tibble(x = 1, y = 2, z = 3)
)
expect_equal(
show_query(step),
expr(copy(DT)[, `:=`(c("y", "z"), {
y <- 2
z <- y + 1
.(y, z)
})])
)
})
test_that("works with `.by`", {
dt <- lazy_dt(data.table(x = 1:3, y = c("a", "a", "b")))
step <- dt %>%
mutate(row_num = row_number(), .by = y)
expect_equal(as_tibble(step), tibble(x = 1:3, y = c("a", "a", "b"), row_num = c(1, 2, 1)))
expect_true(length(step$groups) == 0)
})
test_that("Using `.by` doesn't group prior step, #439", {
dt <- lazy_dt(data.table(x = 1:3, y = c(1, 1, 2), z = 1), "DT")
res <- dt %>%
select(x, y) %>%
mutate(row_num = row_number(), .by = y) %>%
filter(row_num < 3, .by = y) %>%
as.data.frame()
# Note: Why this test catches the potential error...
# data.frames/data.tables allow duplicate column names.
# If using `.by` affected the `select` step, data.table would duplicate the "y" column.
# and there would therefore be two "y" columns in the result.
expect_equal(names(res), c("x", "y", "row_num"))
})
# var = NULL -------------------------------------------------------------
test_that("var = NULL works when var is in original data", {
dt <- lazy_dt(data.frame(x = 1), "DT")
step <- dt %>% mutate(x = 2, z = x*2, x = NULL)
expect_equal(
collect(step),
tibble(z = 4)
)
expect_equal(
step$vars,
"z"
)
expect_equal(
show_query(step),
expr(copy(DT)[, `:=`(c("x", "z"), {
x <- 2
z <- x * 2
.(x, z)
})][, `:=`("x", NULL)]
)
)
})
test_that("var = NULL when var is in final output", {
dt <- lazy_dt(data.frame(x = 1), "DT")
step <- mutate(dt, y = NULL, y = 3)
expect_equal(
collect(step),
tibble(x = 1, y = 3)
)
expect_equal(
step$vars,
c("x", "y")
)
expect_equal(
show_query(step),
expr(copy(DT)[, `:=`(c("y"), {
y <- NULL
y <- 3
.(y)
})])
)
})
test_that("temp var with nested arguments", {
dt <- lazy_dt(data.frame(x = 1), "DT")
step <- mutate(dt, y = 2, z = y*2, y = NULL)
expect_equal(
collect(step),
tibble(x = 1, z = 4)
)
expect_equal(
step$vars,
c("x", "z")
)
expect_equal(
show_query(step),
expr(copy(DT)[, `:=`(c("y", "z"), {
y <- 2
z <- y * 2
.(y, z)
})][, `:=`("y", NULL)])
)
})
test_that("temp var with no new vars added", {
dt <- lazy_dt(data.frame(x = 1), "DT")
# when no other vars are added
step <- mutate(dt, y = 2, y = NULL)
expect_equal(
collect(step),
tibble(x = 1)
)
expect_equal(
step$vars,
"x"
)
expect_equal(
show_query(step),
expr(copy(DT)[, `:=`(c("y"), {
y <- 2
.(y)
})][, `:=`("y", NULL)])
)
})
test_that("var = NULL works when data is grouped", {
dt <- lazy_dt(data.frame(x = 1, g = 1), "DT") %>% group_by(g)
# when var is not in original data
step <- mutate(dt, y = 2, z = y*2, y = NULL)
expect_equal(
collect(step),
tibble(x = 1, g = 1, z = 4) %>% group_by(g)
)
expect_equal(
step$vars,
c("x", "g", "z")
)
expect_equal(
show_query(step),
expr(copy(DT)[, `:=`(c("y", "z"), {
y <- 2
z <- y * 2
.(y, z)
}), by = .(g)][, `:=`("y", NULL)])
)
# when var is in original data
step <- dt %>% mutate(x = 2, z = x*2, x = NULL)
expect_equal(
collect(step),
tibble(g = 1, z = 4) %>% group_by(g)
)
expect_equal(
step$vars,
c("g", "z")
)
expect_equal(
show_query(step),
expr(copy(DT)[, `:=`(c("x", "z"), {
x <- 2
z <- x * 2
.(x, z)
}), by = .(g)][, `:=`("x", NULL)])
)
})
# .before and .after -----------------------------------------------------------
test_that("can use .before and .after to control column position", {
dt <- lazy_dt(data.frame(x = 1, y = 2))
expect_named(
mutate(dt, z = 1) %>% as_tibble(),
c("x", "y", "z")
)
expect_named(
mutate(dt, z = 1, .before = x) %>% as_tibble(),
c("z", "x", "y")
)
expect_named(
mutate(dt, z = 1, .after = x) %>% as_tibble(),
c("x", "z", "y")
)
# but doesn't affect order of existing columns
expect_named(
mutate(dt, x = 1, .after = y) %>% as_tibble(),
c("x", "y")
)
})
# .before and .after -----------------------------------------------------------
test_that(".keep = 'unused' keeps variables explicitly mentioned", {
df <- data.table(x = 1, y = 2)
out <- df %>%
mutate(x1 = x + 1, y = y, .keep = "unused") %>%
as.data.table()
expect_named(out, c("y", "x1"))
})
test_that(".keep = 'used' not affected by across()", {
df <- data.table(x = 1, y = 2, z = 3, a = "a", b = "b", c = "c")
# This must evaluate every column in order to figure out if should
# be included in the set or not, but that shouldn't be counted for
# the purposes of "used" variables
out <- df %>%
mutate(across(c(x, y, z), identity), .keep = "unused") %>%
as.data.table()
expect_named(out, names(df))
})
test_that(".keep = 'used' keeps variables used in expressions", {
df <- data.table(a = 1, b = 2, c = 3, x = 1, y = 2)
out <- df %>%
mutate(xy = x + y, .keep = "used") %>%
as.data.table()
expect_named(out, c("x", "y", "xy"))
})
test_that(".keep = 'none' only keeps grouping variables", {
df <- data.table(x = 1, y = 2)
gf <- df %>% group_by(x)
out1 <- df %>%
mutate(z = 1, .keep = "none") %>%
as.data.table()
expect_named(out1, "z")
out2 <- gf %>%
mutate(z = 1, .keep = "none") %>%
as.data.table()
expect_named(out2, c("x", "z"))
})
test_that(".keep = 'none' retains original ordering", {
df <- data.table(x = 1, y = 2)
out1 <- df %>%
mutate(y = 1, x = 2, .keep = "none") %>%
as.data.table()
expect_named(out1, c("x", "y"))
# even when grouped
out2 <- df %>%
group_by(x) %>%
mutate(y = 1, x = 2, .keep = "none") %>%
as.data.table()
expect_named(out2, c("x", "y"))
})
test_that("works with empty dots", {
df <- data.table(x = 1, y = 2)
out <- df %>%
mutate(.keep = "used") %>%
as.data.table()
expect_equal(ncol(out), 0)
})
test_that("works with trivial dots", {
out <- lazy_dt(mtcars) %>%
mutate(mpg, .keep = 'used') %>%
as.data.table()
expect_named(out, "mpg")
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.