tests/testthat/test-translate-sql-window.R

test_that("window functions without group have empty over", {
  local_con(simulate_dbi())
  expect_equal(test_translate_sql(n()), sql("COUNT(*) OVER ()"))
  expect_equal(test_translate_sql(sum(x, na.rm = TRUE)), sql("SUM(`x`) OVER ()"))
})

test_that("aggregating window functions ignore order_by", {
  local_con(simulate_dbi())
  expect_equal(
    test_translate_sql(n(), vars_order = "x"),
    sql("COUNT(*) OVER ()")
  )
  expect_equal(
    test_translate_sql(sum(x, na.rm = TRUE), vars_order = "x"),
    sql("SUM(`x`) OVER ()")
  )
})

test_that("count uses order_by if frame is used", {
  local_con(simulate_dbi())
  expect_equal(
    test_translate_sql(n(), vars_order = "x", vars_frame = c(-2, 1)),
    sql("COUNT(*) OVER (ORDER BY `x` ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING)")
  )
})

test_that("order_by overrides default ordering", {
  local_con(simulate_dbi())
  expect_equal(
    test_translate_sql(order_by(y, cumsum(x)), vars_order = "x"),
    sql("SUM(`x`) OVER (ORDER BY `y` ROWS UNBOUNDED PRECEDING)")
  )
  expect_equal(
    test_translate_sql(order_by(y, cummean(x)), vars_order = "x"),
    sql("AVG(`x`) OVER (ORDER BY `y` ROWS UNBOUNDED PRECEDING)")
  )
  expect_equal(
    test_translate_sql(order_by(y, cummin(x)), vars_order = "x"),
    sql("MIN(`x`) OVER (ORDER BY `y` ROWS UNBOUNDED PRECEDING)")
  )
  expect_equal(
    test_translate_sql(order_by(y, cummax(x)), vars_order = "x"),
    sql("MAX(`x`) OVER (ORDER BY `y` ROWS UNBOUNDED PRECEDING)")
  )
})

test_that("cumulative windows warn if no order", {
  local_con(simulate_dbi())
  expect_warning(test_translate_sql(cumsum(x)), "does not have explicit order")
  expect_warning(test_translate_sql(cumsum(x), vars_order = "x"), NA)
})

test_that("ntile always casts to integer", {
  local_con(simulate_dbi())
  expect_equal(
    test_translate_sql(ntile(x, 10.5)),
    sql("NTILE(10) OVER (ORDER BY `x`)")
  )
})

test_that("first, last, and nth translated to _value", {
  local_con(simulate_dbi())
  expect_equal(
    test_translate_sql(first(x)),
    sql("FIRST_VALUE(`x`) OVER ()")
  )
  expect_equal(
    test_translate_sql(first(x, na_rm = TRUE)),
    sql("FIRST_VALUE(`x` IGNORE NULLS) OVER ()")
  )
  # `last()` must default to unbounded preceding and following
  expect_equal(
    test_translate_sql(last(x), vars_order = "a"),
    sql("LAST_VALUE(`x`) OVER (ORDER BY `a` ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)")
  )
  expect_equal(
    test_translate_sql(last(x), vars_order = "a", vars_frame = c(0, Inf)),
    sql("LAST_VALUE(`x`) OVER (ORDER BY `a` ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)")
  )
  expect_equal(
    test_translate_sql(nth(x, 3), vars_order = "a", vars_frame = c(-Inf, 0)),
    sql("NTH_VALUE(`x`, 3) OVER (ORDER BY `a` ROWS UNBOUNDED PRECEDING)")
  )

  # can also use a column #1236
  expect_equal(
    test_translate_sql(nth(x, n), vars_order = "a", vars_frame = c(-Inf, 0)),
    sql("NTH_VALUE(`x`, `n`) OVER (ORDER BY `a` ROWS UNBOUNDED PRECEDING)")
  )
})

test_that("can override frame of recycled functions", {
  local_con(simulate_dbi())
  expect_equal(
    test_translate_sql(sum(x, na.rm = TRUE), vars_frame = c(-1, 0), vars_order = "y"),
    sql("SUM(`x`) OVER (ORDER BY `y` ROWS 1 PRECEDING)")
  )
})

test_that("frame is checked", {
  local_con(simulate_dbi())
  expect_snapshot(
    error = TRUE,
    test_translate_sql(sum(x, na.rm = TRUE), vars_frame = c(1, 0))
  )
})

test_that("win_rank works", {
  local_con(simulate_dbi())
  sql_row_number <- win_rank("ROW_NUMBER")
  expect_equal(
    test_translate_sql(row_number(x)),
    sql("CASE
WHEN (NOT((`x` IS NULL))) THEN ROW_NUMBER() OVER (PARTITION BY (CASE WHEN ((`x` IS NULL)) THEN 1 ELSE 0 END) ORDER BY `x`)
END")
  )
})

test_that("win_rank(desc(x)) works", {
  local_con(simulate_dbi())
  expect_equal(
    test_translate_sql(row_number(desc(x))),
    sql("CASE
WHEN (NOT((`x` IS NULL))) THEN ROW_NUMBER() OVER (PARTITION BY (CASE WHEN ((`x` IS NULL)) THEN 1 ELSE 0 END) ORDER BY `x` DESC)
END")
  )
})

test_that("win_rank(tibble()) works", {
  local_con(simulate_dbi())

  expect_equal(
    test_translate_sql(row_number(tibble(x))),
    test_translate_sql(row_number(x))
  )
  expect_equal(
    test_translate_sql(row_number(tibble(desc(x)))),
    test_translate_sql(row_number(desc(x)))
  )

  expect_equal(
    test_translate_sql(row_number(tibble(x, desc(y)))),
    sql("CASE
WHEN (NOT((`x` IS NULL)) AND NOT((`y` IS NULL))) THEN ROW_NUMBER() OVER (PARTITION BY (CASE WHEN ((`x` IS NULL) OR (`y` IS NULL)) THEN 1 ELSE 0 END) ORDER BY `x`, `y` DESC)
END")
  )
})

test_that("win_rank(c()) gives an informative error", {
  local_con(simulate_dbi())
  expect_snapshot(error = TRUE, {
    test_translate_sql(row_number(c(x)))
  })
})

test_that("row_number() with and without group_by() and arrange()", {
  mf <- lazy_frame(x = c(1:5), y = c(rep("A", 5)), con = simulate_dbi())
  expect_snapshot(mf %>% mutate(rown = row_number()))
  expect_snapshot(mf %>% group_by(y) %>% mutate(rown = row_number()))
  expect_snapshot(mf %>% group_by(y) %>% arrange(y) %>% mutate(rown = row_number()))
  expect_snapshot(mf %>% arrange(y) %>% mutate(rown = row_number()))
})

test_that("win_cumulative works", {
  local_con(simulate_dbi())
  sql_cumsum <- win_cumulative("SUM")

  expect_equal(
    sql_cumsum(ident("x"), "y"),
    sql("SUM(`x`) OVER (ORDER BY `y` ROWS UNBOUNDED PRECEDING)")
  )

  # NA values results in NA rank
  db <- memdb_frame(x = c(1, 2, NA, 3))
  expect_equal(
    db %>% mutate(rank = dense_rank(x)) %>% collect() %>% arrange(x),
    tibble(x = c(1:3, NA), rank = c(1:3, NA))
  )
})


# win_over ----------------------------------------------------------------

test_that("over() only requires first argument", {
  local_con(simulate_dbi())

  expect_equal(win_over("X"), sql("'X' OVER ()"))
})

test_that("multiple group by or order values don't have parens", {
  local_con(simulate_dbi())

  expect_equal(
    win_over(ident("x"), order = c("x", "y")),
    sql("`x` OVER (ORDER BY `x`, `y`)")
  )
  expect_equal(
    win_over(ident("x"), partition = c("x", "y")),
    sql("`x` OVER (PARTITION BY `x`, `y`)")
  )
})


# window_frame ------------------------------------------------------------

test_that("window_frame()", {
  lf <- lazy_frame(x = runif(10), y = 1:10)

  expect_snapshot(
    lf %>%
      window_frame(-3, 0) %>%
      window_order(x) %>%
      mutate(z = sum(y, na.rm = TRUE)) %>%
      show_query()
  )

  expect_snapshot(
    lf %>%
      window_frame(-3) %>%
      window_order(x) %>%
      mutate(z = sum(y, na.rm = TRUE)) %>%
      show_query()
  )
})

test_that("window_frame() checks arguments", {
  skip_if(getRversion() <= '3.5.0', "R too old")
  lf <- lazy_frame(x = runif(10), y = 1:10)

  expect_snapshot(error = TRUE, window_frame(lf, "a"))
  expect_snapshot(error = TRUE, window_frame(lf, 1:2))

  expect_snapshot(error = TRUE, window_frame(lf, 1, "a"))
  expect_snapshot(error = TRUE, window_frame(lf, 1, 1:2))
})


# named windows -----------------------------------------------------------

test_that("names windows automatically", {
  lf <- lazy_frame(
    col1 = runif(3),
    col2 = runif(3),
    col3 = runif(3),
    col4 = runif(3),
    part = c("a", "a", "b"),
    ord = 3:1,
    con = simulate_sqlite()
  ) %>%
    group_by(part) %>%
    window_order(ord)

  lf1 <- lf %>%
    transmute(
      across(c(col1, col2), ~ sum(.x, na.rm = TRUE)),
      across(c(col3, col4), ~ order_by(desc(ord), cumsum(.x)))
    )

  sql_list <- get_select_sql(
    select = lf1$lazy_query$select,
    select_operation = "mutate",
    in_vars = op_vars(lf),
    table_alias = "df",
    con = simulate_sqlite(),
    use_star = TRUE
  )
  expect_equal(
    sql_list$window_sql,
    sql(
      "`win1` AS (PARTITION BY `part`)",
      "`win2` AS (PARTITION BY `part` ORDER BY `ord` DESC ROWS UNBOUNDED PRECEDING)"
    )
  )
  expect_equal(
    sql_list$select_sql,
    sql(
      part = ident("part"),
      col1 = sql("SUM(`col1`) OVER `win1`"),
      col2 = sql("SUM(`col2`) OVER `win1`"),
      col3 = sql("SUM(`col3`) OVER `win2`"),
      col4 = sql("SUM(`col4`) OVER `win2`")
    )
  )

  # Different order does not confuse naming of windows
  lf2 <- lf %>%
    transmute(
      col1 = sum(col1, na.rm = TRUE),
      col3 = order_by(desc(ord), cumsum(col3)),
      col2 = sum(col2, na.rm = TRUE),
      col4 = order_by(desc(ord), cumsum(col4))
    )

  sql_list <- get_select_sql(
    select = lf2$lazy_query$select,
    select_operation = "mutate",
    in_vars = op_vars(lf),
    table_alias = "df",
    con = simulate_sqlite(),
    use_star = TRUE
  )
  expect_equal(
    sql_list$window_sql,
    sql(
      "`win1` AS (PARTITION BY `part`)",
      "`win2` AS (PARTITION BY `part` ORDER BY `ord` DESC ROWS UNBOUNDED PRECEDING)"
    )
  )
  expect_equal(
    sql_list$select_sql,
    sql(
      part = ident("part"),
      col1 = sql("SUM(`col1`) OVER `win1`"),
      col3 = sql("SUM(`col3`) OVER `win2`"),
      col2 = sql("SUM(`col2`) OVER `win1`"),
      col4 = sql("SUM(`col4`) OVER `win2`")
    )
  )
})

test_that("only name windows if they appear multiple times", {
  lf <- lazy_frame(
    col1 = runif(3),
    col2 = runif(3),
    col3 = runif(3),
    part = c("a", "a", "b"),
    ord = 3:1,
    con = simulate_sqlite()
  ) %>%
    group_by(part) %>%
    window_order(ord) %>%
    transmute(
      across(c(col1, col2), ~ sum(.x, na.rm = TRUE)),
      across(c(col3), ~ order_by(desc(ord), cumsum(.x)))
    )

  sql_list <- get_select_sql(
    select = lf$lazy_query$select,
    select_operation = "mutate",
    in_vars = op_vars(lf),
    table_alias = "df",
    con = simulate_sqlite(),
    use_star = TRUE
  )
  expect_equal(sql_list$window_sql, sql("`win1` AS (PARTITION BY `part`)"))
  expect_equal(
    sql_list$select_sql,
    sql(
      part = ident("part"),
      col1 = sql("SUM(`col1`) OVER `win1`"),
      col2 = sql("SUM(`col2`) OVER `win1`"),
      col3 = sql("SUM(`col3`) OVER (PARTITION BY `part` ORDER BY `ord` DESC ROWS UNBOUNDED PRECEDING)")
    )
  )
})

test_that("name windows only if supported", {
  lf <- lazy_frame(
    col1 = runif(3),
    col2 = runif(3),
    part = c("a", "a", "b"),
    con = simulate_hana()
  ) %>%
    group_by(part) %>%
    transmute(
      across(c(col1, col2), ~ sum(.x, na.rm = TRUE))
    )

  sql_list <- get_select_sql(
    select = lf$lazy_query$select,
    select_operation = "mutate",
    in_vars = op_vars(lf),
    table_alias = "df",
    con = simulate_hana(),
    use_star = TRUE
  )
  expect_equal(sql_list$window_sql, character())
  expect_equal(
    sql_list$select_sql,
    sql(
      part = ident("part"),
      col1 = sql("SUM(`col1`) OVER (PARTITION BY `part`)"),
      col2 = sql("SUM(`col2`) OVER (PARTITION BY `part`)")
    )
  )
})
tidyverse/dbplyr documentation built on April 7, 2024, 1:42 a.m.