Nothing
# Run this file with testthat::test_local(filter = "^relational$")
con <- dbConnect(duckdb())
on.exit(dbDisconnect(con, shutdown = TRUE))
test_that("we can create a relation from a df", {
rel <- rel_from_df(con, mtcars)
expect_type(rel, "externalptr")
expect_s3_class(rel, "duckdb_relation")
})
test_that("we don't add optional quotes to columns", {
df1 <- data.frame(a.b = 1)
rel3 <- rel_from_df(con, df1)
rel4 <- rel_project(rel3, list(expr_reference("a.b")))
rel5 <- rel_to_altrep(rel4)
expect_equal(df1, rel5)
})
test_that("we won't crash when creating a relation from odd things", {
# na seems to be fine, single col data frame with a single row with NA in it
rel_from_df(con, NA)
expect_error(rel_from_df(NULL, NULL))
expect_error(rel_from_df(con, NULL))
expect_true(TRUE)
})
test_that("we can round-trip a data frame", {
expect_equivalent(mtcars, as.data.frame.duckdb_relation(rel_from_df(con, mtcars)))
expect_equivalent(iris, as.data.frame.duckdb_relation(rel_from_df(con, iris)))
})
test_that("we can recognize if a df is materialized", {
expect_error(df_is_materialized(data.frame(a = "x")))
expect_error(df_is_materialized(data.frame(a = 1:2)))
rel <- rel_from_df(con, data.frame(a = 1:3))
df <- rel_to_altrep(rel)
expect_false(df_is_materialized(df))
# Side effect: materialization
expect_equal(nrow(df), 3)
expect_true(df_is_materialized(df))
})
test_that("we can create various expressions and don't crash", {
ref <- expr_reference("asdf")
print(ref)
expect_error(expr_reference(NA))
# expect_error(expr_reference(as.character(NA)))
expect_error(expr_reference(""))
expect_error(expr_reference(NULL))
expr_constant(TRUE)
expr_constant(FALSE)
expr_constant(NA)
expr_constant(42L)
expr_constant(42)
const <- expr_constant("asdf")
print(const)
expect_error(expr_constant(NULL))
expect_error(expr_constant())
expr_function("asdf", list())
expect_error(expr_function("", list()))
# expect_error(expr_function(as.character(NA), list()))
expect_error(expr_function(NULL, list()))
expect_error(expr_function("asdf"))
expect_true(TRUE)
})
# TODO should maybe be a different file, test_enum_strings.R
test_that("we can cast R strings to DuckDB strings", {
chars <- c(letters, LETTERS)
max_len <- 100
n <- 100000
# yay R one-liners
gen_rand_string <- function(x, max_len) paste0(chars[sample(1:length(chars), runif(1) * max_len, replace = TRUE)], collapse = "")
test_string_vec <- c(vapply(1:n, gen_rand_string, "character", max_len), NA, NA, NA, NA, NA, NA, NA, NA) # batman
df <- data.frame(s = test_string_vec, stringsAsFactors = FALSE)
expect_equivalent(df, as.data.frame.duckdb_relation(rel_from_df(con, df)))
res <- rel_sql(
rel_from_df(con, df),
"SELECT s::string FROM _"
)
expect_equivalent(df, res)
res <- rel_sql(
rel_from_df(con, df),
"SELECT COUNT(*) c FROM _"
)
expect_equal(nrow(df), res$c)
# many rounds yay
df2 <- df
for (i in 1:10) {
df2 <- as.data.frame.duckdb_relation(rel_from_df(con, df2))
expect_equivalent(df, df2)
}
df2 <- df
for (i in 1:10) {
df2 <- as.data.frame(
rel_sql(
rel_from_df(con, df2),
"SELECT s::string s FROM _"
)
)
expect_equivalent(df, df2)
}
})
test_that("the altrep-conversion for relations works", {
iris$Species <- as.character(iris$Species)
rel <- rel_from_df(con, iris)
df <- rel_to_altrep(rel)
expect_false(df_is_materialized(df))
inspect_output <- capture.output(.Internal(inspect(df)))
expect_true(any(grepl("DUCKDB_ALTREP_REL_VECTOR", inspect_output, fixed = TRUE)))
expect_true(any(grepl("DUCKDB_ALTREP_REL_ROWNAMES", inspect_output, fixed = TRUE)))
expect_false(df_is_materialized(df))
dim(df)
expect_true(df_is_materialized(df))
expect_equal(iris, df)
})
test_that("the altrep-conversion for relations work for weirdo types", {
test_df <- data.frame(col_date = as.Date("2019-11-26"), col_ts = as.POSIXct("2019-11-26 21:11Z", "UTC"), col_factor = factor(c("a")))
rel <- rel_from_df(con, test_df)
df <- rel_to_altrep(rel)
expect_false(df_is_materialized(df))
expect_equal(test_df, df)
})
test_that("we can get the relation object back from an altrep df", {
iris$Species <- as.character(iris$Species)
rel <- rel_from_df(con, iris)
df <- rel_to_altrep(rel)
rel2 <- rel_from_altrep_df(df)
expect_true(TRUE)
})
test_that("rel_order() sorts NAs last", {
test_df <- rel_from_df(con, data.frame(a = c(NA, 1:3)))
orders <- list(expr_reference("a"))
rel <- rel_order(test_df, orders)
rel_df <- rel_to_altrep(rel)
expect_false(df_is_materialized(rel_df))
expected_result <- data.frame(a = c(1:3, NA))
expect_equal(rel_df, expected_result)
})
test_that("Inner join returns all inner relations", {
dbExecute(con, "CREATE OR REPLACE MACRO eq(a, b) AS a = b")
left <- rel_from_df(con, data.frame(left_a = c(1, 2, 3), left_b = c(1, 1, 2)))
right <- rel_from_df(con, data.frame(right_b = c(1, 3), right_c = c(4, 5)))
cond <- list(expr_function("eq", list(expr_reference("left_b"), expr_reference("right_b"))))
rel2 <- rel_join(left, right, cond, "inner")
rel_df <- rel_to_altrep(rel2)
dim(rel_df)
expected_result <- data.frame(left_a = c(1, 2), left_b = c(1, 1), right_b = c(1, 1), right_c = c(4, 4))
expect_equal(rel_df, expected_result)
})
test_that("Left join returns all left relations", {
dbExecute(con, "CREATE OR REPLACE MACRO eq(a, b) AS a = b")
left <- rel_from_df(con, data.frame(left_a = c(1, 2, 3), left_b = c(1, 1, 2)))
right <- rel_from_df(con, data.frame(right_b = c(1)))
cond <- list(expr_function("eq", list(expr_reference("left_b"), expr_reference("right_b"))))
rel2 <- rel_join(left, right, cond, "left")
rel_df <- rel_to_altrep(rel2)
dim(rel_df)
expected_result <- data.frame(left_a = c(1, 2, 3), left_b = c(1, 1, 2), right_b = c(1, 1, NA))
expect_equal(rel_df, expected_result)
})
test_that("Right join returns all right relations", {
dbExecute(con, "CREATE OR REPLACE MACRO eq(a, b) AS a = b")
left <- rel_from_df(con, data.frame(left_b = c(1)))
right <- rel_from_df(con, data.frame(right_a = c(1, 2, 3), right_b = c(1, 1, 2)))
cond <- list(expr_function("eq", list(expr_reference("left_b"), expr_reference("right_b"))))
rel2 <- rel_join(left, right, cond, "right")
rel_df <- rel_to_altrep(rel2)
dim(rel_df)
expected_result <- data.frame(left_b = c(1, 1, NA), right_a = c(1, 2, 3), right_b = c(1, 1, 2))
expect_equal(rel_df, expected_result)
})
test_that("Full join returns all outer relations", {
dbExecute(con, "CREATE OR REPLACE MACRO eq(a, b) AS a = b")
left <- rel_from_df(con, data.frame(left_a = c(1, 2, 5), left_b = c(4, 5, 6)))
right <- rel_from_df(con, data.frame(right_a = c(1, 2, 3), right_b = c(1, 1, 2)))
cond <- list(expr_function("eq", list(expr_reference("left_a"), expr_reference("right_a"))))
rel2 <- rel_join(left, right, cond, "outer")
rel_df <- rel_to_altrep(rel2)
expected_result <- data.frame(
left_a = c(1, 2, 5, NA),
left_b = c(4, 5, 6, NA),
right_a = c(1, 2, NA, 3),
right_b = c(1, 1, NA, 2)
)
rel_df_sorted <- rel_df[order(rel_df$left_a), ]
rownames(rel_df_sorted) <- NULL
expect_equal(rel_df_sorted, expected_result)
})
test_that("cross join works", {
left <- rel_from_df(con, data.frame(left_a = c(1, 2, 3), left_b = c(1, 1, 2)))
right <- rel_from_df(con, data.frame(right_a = c(1, 4, 5), right_b = c(7, 8, 9)))
cross <- rel_join(left, right, list(), "cross")
order_by <- rel_order(cross, list(expr_reference("right_a"), expr_reference("right_a")))
rel_df <- rel_to_altrep(order_by)
dim(rel_df)
expected_result <- data.frame(
left_a = c(1, 2, 3, 1, 2, 3, 1, 2, 3),
left_b = c(1, 1, 2, 1, 1, 2, 1, 1, 2),
right_a = c(1, 1, 1, 4, 4, 4, 5, 5, 5),
right_b = c(7, 7, 7, 8, 8, 8, 9, 9, 9)
)
expect_equal(rel_df, expected_result)
})
test_that("semi join works", {
left <- rel_from_df(con, data.frame(left_b = c(1, 5, 6)))
right <- rel_from_df(con, data.frame(right_a = c(1, 2, 3), right_b = c(1, 1, 2)))
cond <- list(expr_function("eq", list(expr_reference("left_b"), expr_reference("right_a"))))
# select * from left semi join right on (left_b = right_a)
rel2 <- rel_join(left, right, cond, "semi")
rel_df <- rel_to_altrep(rel2)
dim(rel_df)
expected_result <- data.frame(left_b = c(1))
expect_equal(rel_df, expected_result)
})
test_that("anti join works", {
left <- rel_from_df(con, data.frame(left_b = c(1, 5, 6)))
right <- rel_from_df(con, data.frame(right_a = c(1, 2, 3), right_b = c(1, 1, 2)))
cond <- list(expr_function("eq", list(expr_reference("left_b"), expr_reference("right_a"))))
# select * from left anti join right on (left_b = right_a)
rel2 <- rel_join(left, right, cond, "anti")
rel_df <- rel_to_altrep(rel2)
dim(rel_df)
expected_result <- data.frame(left_b = c(5, 6))
expect_equal(rel_df, expected_result)
})
test_that("Union all does not immediately materialize", {
test_df_a <- rel_from_df(con, data.frame(a = c("1", "2"), b = c("3", "4")))
test_df_b <- rel_from_df(con, data.frame(a = c("5", "6"), b = c("7", "8")))
rel <- rel_union_all(test_df_a, test_df_b)
rel_df <- rel_to_altrep(rel)
expect_false(df_is_materialized(rel_df))
dim(rel_df)
expect_true(df_is_materialized(rel_df))
})
test_that("Union all has the correct values", {
test_df_a <- rel_from_df(con, data.frame(a = c("1", "2"), b = c("3", "4")))
test_df_b <- rel_from_df(con, data.frame(a = c("5", "6"), b = c("7", "8")))
rel <- rel_union_all(test_df_a, test_df_b)
order_by_rel <- rel_order(rel, list(expr_reference("a"), expr_reference("b")))
rel_df <- rel_to_altrep(order_by_rel)
expect_false(df_is_materialized(rel_df))
dim(rel_df)
expect_true(df_is_materialized(rel_df))
expected_result <- data.frame(a = c("1", "2", "5", "6"), b = c("3", "4", "7", "8"))
expect_equal(rel_df, expected_result)
})
test_that("Union all keeps duplicates", {
test_df_a2 <- rel_from_df(con, data.frame(a = c("1", "2"), b = c("3", "4")))
test_df_b2 <- rel_from_df(con, data.frame(a = c("1", "2"), b = c("3", "4")))
rel <- rel_union_all(test_df_a2, test_df_b2)
order_by_rel <- rel_order(rel, list(expr_reference("a"), expr_reference("b")))
rel_df <- rel_to_altrep(order_by_rel)
dim(rel_df)
expect_true(df_is_materialized(rel_df))
expected_result <- data.frame(a = c("1", "1", "2", "2"), b = c("3", "3", "4", "4"))
expect_equal(rel_df, expected_result)
})
test_that("Inner join returns all inner relations", {
dbExecute(con, "CREATE OR REPLACE MACRO eq(a, b) AS a = b")
left <- rel_from_df(con, data.frame(left_a = c(1, 2, 3), left_b = c(1, 1, 2)))
right <- rel_from_df(con, data.frame(right_b = c(1, 3), right_c = c(4, 5)))
cond <- list(expr_function("eq", list(expr_reference("left_b"), expr_reference("right_b"))))
rel2 <- rel_join(left, right, cond, "inner")
rel_df <- rel_to_altrep(rel2)
dim(rel_df)
expected_result <- data.frame(left_a = c(1, 2), left_b = c(1, 1), right_b = c(1, 1), right_c = c(4, 4))
expect_equal(rel_df, expected_result)
})
test_that("ASOF join works", {
dbExecute(con, "CREATE OR REPLACE MACRO gte(a, b) AS a >= b")
test_df1 <- rel_from_df(con, data.frame(ts = c(1, 2, 3, 4, 5, 6, 7, 8, 9)))
test_df2 <- rel_from_df(con, data.frame(event_ts = c(1, 3, 6, 8), event_id = c(0, 1, 2, 3)))
cond <- list(expr_function("gte", list(expr_reference("ts"), expr_reference("event_ts"))))
rel <- rel_join(test_df1, test_df2, cond, join_ref_type = "asof")
rel_proj <- rel_project(rel, list(expr_reference("ts"), expr_reference("event_id")))
rel_df <- rel_to_altrep(rel_proj)
expected_result <- data.frame(ts = c(1, 2, 3, 4, 5, 6, 7, 8, 9), event_id = c(0, 0, 1, 1, 1, 2, 2, 3, 3))
expect_equal(expected_result, rel_df)
})
test_that("LEFT ASOF join works", {
dbExecute(con, "CREATE OR REPLACE MACRO gte(a, b) AS a >= b")
test_df1 <- rel_from_df(con, data.frame(ts = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)))
test_df2 <- rel_from_df(con, data.frame(event_ts = c(2, 4, 6, 8), event_id = c(0, 1, 2, 3)))
cond <- list(expr_function("gte", list(expr_reference("ts"), expr_reference("event_ts"))))
rel <- rel_join(test_df1, test_df2, cond, join = "left", join_ref_type = "asof")
rel_proj <- rel_project(rel, list(expr_reference("ts"), expr_reference("event_ts"), expr_reference("event_id")))
order <- rel_order(rel_proj, list(expr_reference("ts")))
rel_df <- rel_to_altrep(order)
expected_result <- data.frame(ts = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9), event_ts = c(NA, NA, 2, 2, 4, 4, 6, 6, 8, 8), event_id = c(NA, NA, 0, 0, 1, 1, 2, 2, 3, 3))
expect_equal(expected_result, rel_df)
})
test_that("Positional cross join works", {
test_df1 <- rel_from_df(con, data.frame(a = c(11, 12, 13), b = c(1, 2, 3)))
test_df2 <- rel_from_df(con, data.frame(c = c(11, 12), d = c(1, 2)))
rel <- rel_join(test_df1, test_df2, list(), join = "cross", join_ref_type = "positional")
rel_df <- rel_to_altrep(rel)
expected_result <- data.frame(a = c(11, 12, 13), b = c(1, 2, 3), c = c(11, 12, NA), d = c(1, 2, NA))
expect_equal(expected_result, rel_df)
})
test_that("regular positional join works", {
dbExecute(con, "CREATE OR REPLACE MACRO eq(a, b) AS a = b")
test_df1 <- rel_from_df(con, data.frame(a = c(11, 12, 13), b = c(1, 2, 3)))
test_df2 <- rel_from_df(con, data.frame(c = c(11, 12, 14, 11), d = c(4, 5, 6, 8)))
cond <- expr_function("eq", list(expr_reference("a"), expr_reference("c")))
rel <- rel_join(test_df1, test_df2, list(cond), join_ref_type = "positional")
rel_df <- rel_to_altrep(rel)
expected_result <- data.frame(a = c(11, 12), b = c(1, 2), c = c(11, 12), d = c(4, 5))
expect_equal(expected_result, rel_df)
})
test_that("Invalid asof join condition throws error", {
dbExecute(con, "CREATE OR REPLACE MACRO neq(a, b) AS a <> b")
test_df1 <- rel_from_df(con, data.frame(ts = c(1, 2, 3, 4, 5, 6, 7, 8, 9)))
test_df2 <- rel_from_df(con, data.frame(begin = c(1, 3, 6, 8), value = c(0, 1, 2, 3)))
cond <- list(expr_function("neq", list(expr_reference("ts"), expr_reference("begin"))))
expect_error(rel_join(test_df1, test_df2, cond, join_ref_type = "asof"), "Binder")
})
test_that("multiple inequality conditions for asof join throws error", {
dbExecute(con, "CREATE OR REPLACE MACRO gte(a, b) AS a >= b")
test_df1 <- rel_from_df(con, data.frame(ts = c(1, 2, 3, 4, 5, 6, 7, 8, 9)))
test_df2 <- rel_from_df(con, data.frame(begin = c(1, 3, 6, 8), value = c(0, 1, 2, 3)))
cond1 <- expr_function("gte", list(expr_reference("ts"), expr_reference("begin")))
cond2 <- expr_function("gte", list(expr_reference("ts"), expr_reference("value")))
conds <- list(cond1, cond2)
expect_error(rel_join(test_df1, test_df2, conds, join_ref_type = "asof"), "Binder")
})
test_that("Inequality joins work", {
dbExecute(con, "CREATE OR REPLACE MACRO gte(a, b) AS a >= b")
timing_df <- rel_from_df(con, data.frame(ts = c(1, 2, 3, 4, 5, 6)))
events_df <- rel_from_df(con, data.frame(event_ts = c(1, 3, 6, 8), event_id = c(0, 1, 2, 3)))
cond <- list(expr_function("gte", list(expr_reference("ts"), expr_reference("event_ts"))))
rel <- rel_inner_join(timing_df, events_df, cond)
rel_proj <- rel_project(rel, list(expr_reference("ts"), expr_reference("event_ts")))
rel_order <- rel_order(rel_proj, list(expr_reference("ts"), expr_reference("event_ts")))
rel_df <- rel_to_altrep(rel_order)
expected_result <- data.frame(ts = c(1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 6), event_ts = c(1, 1, 1, 3, 1, 3, 1, 3, 1, 3, 6))
expect_equal(expected_result, rel_df)
})
test_that("Inequality join works to perform between operation", {
dbExecute(con, "CREATE OR REPLACE MACRO gt(a, b) AS a > b")
dbExecute(con, "CREATE OR REPLACE MACRO lt(a, b) AS a < b")
timing_df <- rel_from_df(con, data.frame(ts = c(1, 2, 3, 4, 5, 6, 7, 8, 9)))
events_df <- rel_from_df(con, data.frame(event_ts = c(1, 3, 6, 8), event_id = c(0, 1, 2, 3)))
lead <- expr_function("lead", list(expr_reference("event_ts")))
window_lead <- expr_window(lead, offset_expr = expr_constant(1))
expr_set_alias(window_lead, "lead")
proj_window <- rel_project(events_df, list(expr_reference("event_ts"), window_lead, expr_reference("event_id")))
cond1 <- expr_function("gt", list(expr_reference("ts"), expr_reference("event_ts")))
cond2 <- expr_function("lt", list(expr_reference("ts"), expr_reference("lead")))
conds <- list(cond1, cond2)
rel <- rel_inner_join(timing_df, proj_window, conds)
rel_proj <- rel_project(rel, list(expr_reference("ts")))
rel_order <- rel_order(rel_proj, list(expr_reference("ts")))
rel_df <- rel_to_altrep(rel_order)
expected_result <- data.frame(ts = c(2, 4, 5, 7))
expect_equal(expected_result, rel_df)
})
# nobody should do this in reality. It's a pretty dumb idea
test_that("we can union the same relation to itself", {
test_df_a2 <- rel_from_df(con, data.frame(a = c("1", "2"), b = c("3", "4")))
rel <- rel_union_all(test_df_a2, test_df_a2)
rel_df <- rel_to_altrep(rel)
expected_result <- data.frame(a = c("1", "2", "1", "2"), b = c("3", "4", "3", "4"))
expect_equal(rel_df, expected_result)
})
test_that("we throw an error when attempting to union all relations that are not compatible", {
test_df_a2 <- rel_from_df(con, data.frame(a = c("1", "2"), b = c("3", "4")))
test_df_b2 <- rel_from_df(con, data.frame(a = c("1", "2"), b = c("3", "4"), c = c("5", "6")))
# The two data frames have different dimensions, therefore you get a binding error.
expect_error(rel_union_all(test_df_a2, test_df_b2), "Binder")
})
test_that("A union with different column types casts to the richer type", {
test_df_a1 <- rel_from_df(con, data.frame(a = c(1)))
test_df_a2 <- rel_from_df(con, data.frame(a = c("1")))
rel <- rel_union_all(test_df_a1, test_df_a2)
res <- rapi_rel_to_df(rel)
expected <- data.frame(a = c("1.0", "1"))
expect_equal(class(res$a), class(expected$a))
expect_equal(res$a[1], expected$a[1])
expect_equal(res$a[2], expected$a[2])
})
test_that("Set Intersect returns set intersection", {
test_df_a <- rel_from_df(con, data.frame(a = c(1, 2), b = c(3, 4)))
test_df_b <- rel_from_df(con, data.frame(a = c(1, 6), b = c(3, 8)))
rel <- rel_set_intersect(test_df_a, test_df_b)
ordered_rel <- rel_order(rel, list(expr_reference("a")))
rel_df <- rel_to_altrep(rel)
expect_false(df_is_materialized(rel_df))
dim(rel_df)
expected_result <- data.frame(a = c(1), b = c(3))
expect_equal(rel_df, expected_result)
})
test_that("Set intersect casts columns to the richer type", {
# df1 is Integer
df1 <- data.frame(x = 1:4)
# df2 has Double
df2 <- data.frame(x = 4)
expect_equal(class(df1$x), "integer")
expect_equal(class(df2$x), "numeric")
out <- rel_set_intersect(
rel_from_df(con, df1),
rel_from_df(con, df2)
)
out_df <- rapi_rel_to_df(out)
expect_equal(class(out_df$x), "numeric")
})
test_that("Set Diff returns the set difference", {
test_df_a <- rel_from_df(con, data.frame(a = c(1, 2), b = c(3, 4)))
test_df_b <- rel_from_df(con, data.frame(a = c(1, 6), b = c(3, 8)))
rel <- rel_set_diff(test_df_a, test_df_b)
rel_df <- rel_to_altrep(rel)
expect_false(df_is_materialized(rel_df))
dim(rel_df)
expected_result <- data.frame(a = c(2), b = c(4))
expect_equal(rel_df, expected_result)
})
test_that("Symmetric difference returns the symmetric difference", {
test_df_a <- rel_from_df(con, data.frame(a = c(1, 2), b = c(3, 4)))
test_df_b <- rel_from_df(con, data.frame(a = c(1, 6), b = c(3, 8)))
rel <- rel_set_symdiff(test_df_a, test_df_b)
ordered_rel <- rel_order(rel, list(expr_reference("a"), expr_reference("b")))
rel_df <- rel_to_altrep(ordered_rel)
expect_false(df_is_materialized(rel_df))
dim(rel_df)
expected_result <- data.frame(a = c(2, 6), b = c(4, 8))
expect_equal(rel_df, expected_result)
})
test_that("rel aggregate with no groups but a sum over a column, sums the column", {
rel_a <- rel_from_df(con, data.frame(a = c(1, 2), b = c(3, 4)))
aggrs <- list(sum = expr_function("sum", list(expr_reference("a"))))
res <- rel_aggregate(rel_a, list(), aggrs)
rel_df <- rel_to_altrep(res)
expected_result <- data.frame(sum = c(3))
expect_equal(rel_df, expected_result)
})
test_that("rel aggregate with groups and aggregate function works", {
rel_a <- rel_from_df(con, data.frame(a = c(1, 2, 5, 5), b = c(3, 3, 4, 4)))
aggrs <- list(sum = expr_function("sum", list(expr_reference("a"))))
rel_b <- rel_aggregate(rel_a, list(expr_reference("b")), aggrs)
res <- rel_order(rel_b, list(expr_reference("b")))
rel_df <- rel_to_altrep(res)
expected_result <- data.frame(b = c(3, 4), sum = c(3, 10))
expect_equal(rel_df, expected_result)
})
test_that("Window sum expression function test works", {
# select j, i, sum(i) over (partition by j) from a order by 1,2
rel_a <- rel_from_df(con, data.frame(a = c(1:8), b = c(1, 1, 2, 2, 3, 3, 4, 4)))
sum_func <- expr_function("sum", list(expr_reference("a")))
aggrs <- expr_window(sum_func, partitions = list(expr_reference("b")))
expr_set_alias(aggrs, "window_result")
window_proj <- rel_project(rel_a, list(expr_reference("a"), aggrs))
order_over_window <- rel_order(window_proj, list(expr_reference("window_result")))
res <- rel_to_altrep(order_over_window)
expected_result <- data.frame(a = c(1:8), window_result = c(3, 3, 7, 7, 11, 11, 15, 15))
expect_equal(res, expected_result)
})
test_that("Window count function works", {
# select a, b, count(b) over (partition by a) from a order by a
rel_a <- rel_from_df(con, data.frame(a = c(1:8), b = c(1, 1, 2, 2, 3, 3, 4, 4)))
count_func <- expr_function("count", list(expr_reference("a")))
count <- expr_window(count_func, partitions = list(expr_reference("b")))
expr_set_alias(count, "window_result")
window_proj <- rel_project(rel_a, list(count))
res <- rel_to_altrep(window_proj)
expected_result <- data.frame(window_result = c(2, 2, 2, 2, 2, 2, 2, 2))
expect_equal(res, expected_result)
})
test_that("Window avg function works", {
# select a, b, avg(b) over (partition by a) from a order by a
rel_a <- rel_from_df(con, data.frame(a = c(1:8), b = c(1, 1, 2, 2, 3, 3, 4, 4)))
avg_func <- expr_function("avg", list(expr_reference("a")))
avg_window <- expr_window(avg_func, partitions = list(expr_reference("b")))
expr_set_alias(avg_window, "window_result")
window_proj <- rel_project(rel_a, list(avg_window))
ordered <- rel_order(window_proj, list(expr_reference("window_result")))
res <- rel_to_altrep(ordered)
expected_result <- data.frame(window_result = c(1.5, 1.5, 3.5, 3.5, 5.5, 5.5, 7.5, 7.5))
expect_equal(res, expected_result)
})
test_that("Window sum with Partition, order, and window boundaries works", {
# SUM(x) OVER (partition by b ROWS BETWEEN 3 PRECEDING AND CURRENT ROW)
rel_a <- rel_from_df(con, data.frame(a = c(1:8), b = c(1, 1, 1, 1, 2, 2, 2, 2)))
partitions <- list(expr_reference("b"))
order_by_a <- list(rel_order(rel_a, list(expr_reference("a"))))
sum_func <- expr_function("sum", list(expr_reference("a")))
sum_window <- expr_window(sum_func,
partitions = partitions,
order_bys = list(expr_reference("a")),
window_boundary_start = "expr_preceding_rows",
window_boundary_end = "current_row_rows",
start_expr = expr_constant(2)
)
expr_set_alias(sum_window, "window_result")
window_proj <- rel_project(rel_a, list(expr_reference("a"), sum_window))
proj_order <- rel_order(window_proj, list(expr_reference("a")))
res <- rel_to_altrep(proj_order)
expected_result <- data.frame(a = c(1:8), window_result = c(1, 3, 6, 9, 5, 11, 18, 21))
expect_equal(res, expected_result)
})
test_that("Window boundaries boundaries are CaSe INsenSItive", {
# SUM(x) OVER (partition by b ROWS BETWEEN 3 PRECEDING AND CURRENT ROW)
rel_a <- rel_from_df(con, data.frame(a = c(1:8), b = c(1, 1, 1, 1, 2, 2, 2, 2)))
partitions <- list(expr_reference("b"))
order_by_a <- list(rel_order(rel_a, list(expr_reference("a"))))
sum_func <- expr_function("sum", list(expr_reference("a")))
sum_window <- expr_window(sum_func,
partitions = partitions,
order_bys = list(expr_reference("a")),
window_boundary_start = "exPr_PREceding_rOWs",
window_boundary_end = "cURrEnt_rOw_RoWs",
start_expr = expr_constant(2)
)
expr_set_alias(sum_window, "window_result")
window_proj <- rel_project(rel_a, list(expr_reference("a"), sum_window))
proj_order <- rel_order(window_proj, list(expr_reference("a")))
res <- rel_to_altrep(proj_order)
expected_result <- data.frame(a = c(1:8), window_result = c(1, 3, 6, 9, 5, 11, 18, 21))
expect_equal(res, expected_result)
})
test_that("Window avg with a filter expression and partition works", {
# select a, b, avg(a) FILTER (WHERE x % 2 = 0) over (partition by b)
DBI::dbExecute(con, "CREATE OR REPLACE MACRO mod(a, b) as a % b")
DBI::dbExecute(con, "CREATE OR REPLACE MACRO eq(a, b) as a = b")
rel_a <- rel_from_df(con, data.frame(a = c(1:8), b = c(1, 1, 2, 2, 3, 3, 4, 4)))
partitions <- list(expr_reference("b"))
mod_function <- expr_function("mod", list(expr_reference("a"), expr_constant(2)))
zero <- expr_constant(0)
filters <- list(expr_function("eq", list(zero, mod_function)))
avg_func <- expr_function("avg", args = list(expr_reference("a")), filter_bys = filters)
avg_filter_window <- expr_window(avg_func, partitions = partitions)
expr_set_alias(avg_filter_window, "avg_filter")
window_proj <- rel_project(rel_a, list(avg_filter_window))
proj_order <- rel_order(window_proj, list(expr_reference("avg_filter")))
expected_result <- data.frame(avg_filter = c(2, 2, 4, 4, 6, 6, 8, 8))
res <- rel_to_altrep(proj_order)
expect_equal(res, expected_result)
})
test_that("Window lag function works as expected", {
# select a, b, lag(a, 1) OVER () order by a
rel_a <- rel_from_df(con, data.frame(a = c(1:8), b = c(1, 1, 2, 2, 3, 3, 4, 4)))
lag <- expr_function("lag", list(expr_reference("a")))
window_lag <- expr_window(lag, offset_expr = expr_constant(1))
expr_set_alias(window_lag, "lag")
proj_window <- rel_project(rel_a, list(expr_reference("a"), window_lag))
order_over_window <- rel_order(proj_window, list(expr_reference("a")))
expected_result <- data.frame(a = c(1:8), lag = c(NA, 1, 2, 3, 4, 5, 6, 7))
res <- rel_to_altrep(order_over_window)
expect_equal(res, expected_result)
})
test_that("function name for window is case insensitive", {
# select a, b, lag(a, 1) OVER () order by a
rel_a <- rel_from_df(con, data.frame(a = c(1:8), b = c(1, 1, 2, 2, 3, 3, 4, 4)))
lag <- expr_function("LAG", list(expr_reference("a")))
window_lag <- expr_window(lag, offset_expr = expr_constant(1))
expr_set_alias(window_lag, "lag")
proj_window <- rel_project(rel_a, list(expr_reference("a"), window_lag))
order_over_window <- rel_order(proj_window, list(expr_reference("a")))
expected_result <- data.frame(a = c(1:8), lag = c(NA, 1, 2, 3, 4, 5, 6, 7))
res <- rel_to_altrep(order_over_window)
expect_equal(res, expected_result)
})
test_that("Window lead function works as expected", {
# select a, b, lag(a, 1) OVER () order by a
rel_a <- rel_from_df(con, data.frame(a = c(1:8), b = c(1, 1, 2, 2, 3, 3, 4, 4)))
lead <- expr_function("lead", list(expr_reference("a")))
window_lead <- expr_window(lead, offset_expr = expr_constant(1))
expr_set_alias(window_lead, "lead")
proj_window <- rel_project(rel_a, list(expr_reference("a"), window_lead))
order_over_window <- rel_order(proj_window, list(expr_reference("a")))
expected_result <- data.frame(a = c(1:8), lead = c(2, 3, 4, 5, 6, 7, 8, NA))
res <- rel_to_altrep(order_over_window)
expect_equal(res, expected_result)
})
test_that("Window function with string aggregate works", {
# select j, s, string_agg(s, '|') over (partition by b) from a order by j, s;
rel_a <- rel_from_df(con, data.frame(r = c(1, 2, 3, 4), a = c("hello", "Big", "world", "42"), b = c(1, 1, 2, 2)))
str_agg <- expr_function("string_agg", list(expr_reference("a")))
partitions <- list(expr_reference("b"))
window_str_cat <- expr_window(str_agg, partitions = partitions)
expr_set_alias(window_str_cat, "str_agg_res")
proj_window <- rel_project(rel_a, list(expr_reference("r"), window_str_cat))
order_over_window <- rel_order(proj_window, list(expr_reference("r")))
expected_result <- data.frame(r = c(1:4), str_agg_res = c("hello,Big", "hello,Big", "world,42", "world,42"))
res <- rel_to_altrep(order_over_window)
expect_equal(res, expected_result)
})
test_that("You can perform window functions on row_number", {
# select a, b, row_number() OVER () from tmp order by a;
rel_a <- rel_from_df(con, data.frame(a = c(8:1), b = c(1, 1, 2, 2, 3, 3, 4, 4)))
row_number <- expr_function("row_number", list())
window_function <- expr_window(row_number)
expr_set_alias(window_function, "row_number")
proj <- rel_project(rel_a, list(expr_reference("a"), window_function))
order_by_a <- rel_order(proj, list(expr_reference("a")))
expected_result <- data.frame(a = c(1:8), row_number = (8:1))
res <- rel_to_altrep(order_by_a)
expect_equal(res, expected_result)
})
# also tests order by inside the rank function.
# these tests come from https://dplyr.tidyverse.org/articles/window-functions.html
# in dplyr min_rank = rank
test_that("You can perform the window function min_rank", {
# select rank() OVER (order by a) from t1
rel_a <- rel_from_df(con, data.frame(a = c(1, 1, 2, 2, 2)))
rank_func <- expr_function("rank", list())
min_rank_window <- expr_window(rank_func, order_bys = list(expr_reference("a")))
expr_set_alias(min_rank_window, "window_result")
window_proj <- rel_project(rel_a, list(expr_reference("a"), min_rank_window))
res <- rel_to_altrep(window_proj)
expected_result <- data.frame(a = c(1, 1, 2, 2, 2), window_result = c(1, 1, 3, 3, 3))
expect_equal(res, expected_result)
})
test_that("You can perform the window function dense_rank", {
# select dense_rank() OVER (order by a) from t1;
rel_a <- rel_from_df(con, data.frame(a = c(1, 1, 2, 2, 2)))
dense_rank_fun <- expr_function("dense_rank", list())
min_rank_window <- expr_window(dense_rank_fun, order_bys = list(expr_reference("a")))
expr_set_alias(min_rank_window, "window_result")
window_proj <- rel_project(rel_a, list(expr_reference("a"), min_rank_window))
res <- rel_to_altrep(window_proj)
expected_result <- data.frame(a = c(1, 1, 2, 2, 2), window_result = c(1, 1, 2, 2, 2))
expect_equal(res, expected_result)
})
test_that("You can perform the window function cume_dist", {
# select cume_dist() OVER (order by a) from t1;
rel_a <- rel_from_df(con, data.frame(a = c(1, 1, 2, 2, 2)))
cume_dist_func <- expr_function("cume_dist", list())
cume_dist_window <- expr_window(cume_dist_func, order_bys = list(expr_reference("a")))
expr_set_alias(cume_dist_window, "cume_dist")
window_proj <- rel_project(rel_a, list(expr_reference("a"), cume_dist_window))
order_proj <- rel_order(window_proj, list(expr_reference("a")))
res <- rel_to_altrep(order_proj)
expected_result <- data.frame(a = c(1, 1, 2, 2, 2), cume_dist = c(0.4, 0.4, 1.0, 1.0, 1.0))
expect_equal(res, expected_result)
})
test_that("You can perform the window function percent rank", {
# select percent_rank() OVER (order by a) from t1;
rel_a <- rel_from_df(con, data.frame(a = c(5, 1, 3, 2, 2)))
percent_rank_func <- expr_function("percent_rank", list())
percent_rank_wind <- expr_window(percent_rank_func, order_bys = list(expr_reference("a")))
expr_set_alias(percent_rank_wind, "percent_rank")
window_proj <- rel_project(rel_a, list(expr_reference("a"), percent_rank_wind))
order_proj <- rel_order(window_proj, list(expr_reference("a")))
res <- rel_to_altrep(order_proj)
expected_result <- data.frame(a = c(1, 2, 2, 3, 5), percent_rank = c(0.00, 0.25, 0.25, 0.75, 1.00))
expect_equal(res, expected_result)
})
# with and without offsets
test_that("R semantics for adding NaNs is respected", {
dbExecute(con, "CREATE OR REPLACE MACRO eq(a, b) AS a = b")
test_df_a <- rel_from_df(con, data.frame(a = c(1, 2), b = c(3, 4)))
test_df_b <- rel_from_df(con, data.frame(c = c(NaN, 6), d = c(3, 8)))
cond <- list(expr_function("eq", list(expr_reference("b"), expr_reference("d"))))
rel_join <- rel_join(test_df_a, test_df_b, cond, "inner")
addition_expression <- expr_function("+", list(expr_reference("a"), expr_reference("c")))
proj <- rel_project(rel_join, list(addition_expression))
res <- rapi_rel_to_df(proj)
expect_true(is.na(res[[1]]))
})
test_that("R semantics for arithmetics sum function are respected", {
test_df_a <- rel_from_df(con, data.frame(a = c(1:5, NA)))
sum_rel <- expr_function("sum", list(expr_reference("a")))
ans <- rel_aggregate(test_df_a, list(), list(sum_rel))
res <- rel_to_altrep(ans)
expect_equal(res[[1]], 15)
})
test_that("anti joins for eq_na_matches works", {
dbExecute(con, 'CREATE OR REPLACE MACRO "___eq_na_matches_na"(a, b) AS ((a IS NULL AND b IS NULL) OR (a = b))')
rel1 <- rel_from_df(con, data.frame(x = c(1, 1, 2, 3)))
rel2 <- rel_from_df(con, data.frame(y = c(2, 3, 3, 4)))
cond <- list(expr_function("___eq_na_matches_na", list(expr_reference("x"), expr_reference("y"))))
out <- rel_join(rel1, rel2, cond, "anti")
res <- rel_to_altrep(out)
expect_equal(res, data.frame(x = c(1, 1)))
})
test_that("semi joins for eq_na_matches works", {
dbExecute(con, 'CREATE OR REPLACE MACRO "___eq_na_matches_na"(a, b) AS ((a IS NULL AND b IS NULL) OR (a = b))')
rel1 <- rel_from_df(con, data.frame(x = c(1, 1, 2, 2)))
rel2 <- rel_from_df(con, data.frame(y = c(2, 2, 2, 2, 3, 3, 3)))
cond <- list(expr_function("___eq_na_matches_na", list(expr_reference("x"), expr_reference("y"))))
out <- rel_join(rel1, rel2, cond, "semi")
res <- rel_to_altrep(out)
expect_equal(res, data.frame(x = c(2, 2)))
})
test_that("rel_project does not automatically quote upper-case column names", {
df <- data.frame(B = 1)
rel <- rel_from_df(con, df)
ref <- expr_reference(names(df))
exprs <- list(ref)
proj <- rel_project(rel, exprs)
ans <- rapi_rel_to_altrep(proj)
expect_equal(df, ans)
})
test_that("rel_to_sql works for row_number", {
invisible(DBI::dbExecute(con, "CREATE MACRO \"==\"(a, b) AS a = b"))
df1 <- data.frame(a = 1)
rel1 <- rel_from_df(con, df1)
rel2 <- rel_project(
rel1,
list({
tmp_expr <- expr_window(expr_function("row_number", list()), list(), list(), offset_expr = NULL, default_expr = NULL)
expr_set_alias(tmp_expr, "___row_number")
tmp_expr
})
)
sql <- rel_to_sql(rel2)
sub_str_sql <- substr(sql, 0, 44)
expect_equal(sub_str_sql, "SELECT row_number() OVER () AS ___row_number")
})
test_that("rel_from_table_function works", {
rel <- rel_from_table_function(con, "generate_series", list(1L, 10L, 2L))
df <- as.data.frame(rel)
expect_equal(df$generate_series, c(1, 3, 5, 7, 9))
})
test_that("we don't crash with evaluation errors", {
invisible(DBI::dbExecute(con, "CREATE OR REPLACE MACRO \"==\"(x, y) AS x = y"))
df1 <- data.frame(a = "a")
rel1 <- rel_from_df(con, df1)
rel2 <- rel_project(
rel1,
list(
{
tmp_expr <- expr_reference("a")
expr_set_alias(tmp_expr, "a")
tmp_expr
},
{
tmp_expr <- expr_function(
"==",
list(expr_reference("a"), expr_constant(1))
)
expr_set_alias(tmp_expr, "b")
tmp_expr
}
)
)
ans <- rapi_rel_to_altrep(rel2)
# This query is supposed to throw a runtime error.
# If this succeeds, find a new query that throws a runtime error.
expect_error(nrow(ans), "Error evaluating duckdb query")
})
test_that("we don't crash with evaluation errors", {
invisible(DBI::dbExecute(con, "CREATE OR REPLACE MACRO \"==\"(x, y) AS x = y"))
df1 <- data.frame(a = "a")
rel1 <- rel_from_df(con, df1)
rel2 <- rel_project(
rel1,
list(
{
tmp_expr <- expr_reference("a")
expr_set_alias(tmp_expr, "a")
tmp_expr
},
{
tmp_expr <- expr_function(
"==",
list(expr_reference("a"), expr_constant(1))
)
expr_set_alias(tmp_expr, "b")
tmp_expr
}
)
)
ans <- rapi_rel_to_altrep(rel2)
# This query is supposed to throw a runtime error.
# If this succeeds, find a new query that throws a runtime error.
expect_error(nrow(ans), "Error evaluating duckdb query")
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.