tests/testthat/test-over.R

# over ------------------------------------------------------------------
library(dplyr)

# over examples of basic functionality from the example section
test_that("over() works on numeric vectors", {

  df0 <- tibble(x = 1:25)

  df_over <- df0 %>%
    mutate(over(c(1:3),
                ~ lag(x, .x)))

  df_expect <- df0 %>%
    mutate(`1` = lag(x, 1),
           `2` = lag(x, 2),
           `3` = lag(x, 3))

  expect_equal(df_over, df_expect)

})

test_that("over() works on character vectors", {

  df_over <- iris %>%
    mutate(over(unique(Species),
                ~ if_else(Species == .x, 1, 0)),
                .keep = "none")

  df_expect <- iris %>%
    mutate(setosa = if_else(Species == "setosa", 1, 0),
           versicolor = if_else(Species == "versicolor", 1, 0),
           virginica = if_else(Species == "virginica", 1, 0),
           .keep = "none")

  expect_equal(df_over, df_expect)

})

test_that("over() can control names", {

  df_over <- iris %>%
    mutate(over(seq(4, 7, by = 1),
                ~ if_else(Sepal.Length < .x, 1, 0),
               .names = "Sepal.Length_{x}"),
               .keep = "none")

  df_expect <- iris %>%
    mutate(Sepal.Length_4 = if_else(Sepal.Length < 4, 1, 0),
           Sepal.Length_5 = if_else(Sepal.Length < 5, 1, 0),
           Sepal.Length_6 = if_else(Sepal.Length < 6, 1, 0),
           Sepal.Length_7 = if_else(Sepal.Length < 7, 1, 0),
           .keep = "none")

  expect_equal(df_over, df_expect)

})

test_that("over() works with dates & can transform names ", {


  dat_tbl <- tibble::tibble(start = seq.Date(as.Date("2020-01-01"),
                                     as.Date("2020-01-15"),
                                     by = "days"),
                            end = start + 10)

  df_over <- dat_tbl %>%
    mutate(over(seq(as.Date("2020-01-01"),
                    as.Date("2020-01-21"),
                    by = "weeks"),
                ~ .x >= start & .x <= end,
                .names = "day_{x}",
                .names_fn = ~ gsub("-", "", .x)))

  df_expect <- dat_tbl %>%
    mutate(day_20200101 = "2020-01-01" >= start & "2020-01-01" <= end,
           day_20200108 = "2020-01-08" >= start & "2020-01-08" <= end,
           day_20200115 = "2020-01-15" >= start & "2020-01-15" <= end)

  expect_equal(df_over, df_expect)

})


test_that("over() works with summarise", {

df_over <- csatraw %>%
  group_by(type) %>%
  summarise(over(c(1:5),
                 ~ mean(item1 == .x)))

df_expect <- csatraw %>%
  group_by(type) %>%
  summarise(`1` = mean(item1 == 1),
            `2` = mean(item1 == 2),
            `3` = mean(item1 == 3),
            `4` = mean(item1 == 4),
            `5` = mean(item1 == 5))

expect_equal(df_over, df_expect)

})

test_that("over() works with named lists", {

  df_over <- csatraw %>%
    group_by(type) %>%
    summarise(over(list(bot2 = c(1:2),
                        mid  = 3,
                        top2 = c(4:5)),
                   ~ mean(item1 %in% .x)))

  df_expect <- csatraw %>%
    group_by(type) %>%
    summarise(`bot2` = mean(item1 %in% 1:2),
              `mid`  = mean(item1 %in% 3),
              `top2` = mean(item1 %in% 4:5))

  expect_equal(df_over, df_expect)

})

test_that("over() works with a data.frame", {

  recode_df <- data.frame(old  = c(1, 2, 3, 4, 5),
                          top1 = c(0, 0, 0, 0, 1),
                          top2 = c(0, 0, 0, 1, 1),
                          bot1 = c(1, 0, 0, 0, 0),
                          bot2 = c(1, 1, 0, 0, 0))


  df_over <- csatraw %>%
    transmute(over(recode_df[,-1],
                   ~ .x[match(item1, recode_df[, 1])],
                   .names = "item1_{x}"))

  df_expect <- csatraw %>%
    transmute(
      item1_top1 = recode(item1, `1` = 0, `2` = 0, `3` = 0, `4` = 0, `5` = 1),
      item1_top2 = recode(item1, `1` = 0, `2` = 0, `3` = 0, `4` = 1, `5` = 1),
      item1_bot1 = recode(item1, `1` = 1, `2` = 0, `3` = 0, `4` = 0, `5` = 0),
      item1_bot2 = recode(item1, `1` = 1, `2` = 1, `3` = 0, `4` = 0, `5` = 0)
      )

  expect_equal(df_over, df_expect)

})

test_that("over() works with list-columns", {

  df_over <- csat %>%
       mutate(over(dist_values(csat_open, ", "),
                   ~ as.integer(grepl(.x, csat_open)),
                   .names = "rsp_{x}",
                   .names_fn = ~ gsub("\\s", "_", .x)),
                   .keep = "none")


  df_expect <- csat %>%
    mutate(rsp_friendly_staff = as.integer(grepl("friendly staff", csat_open)),
           rsp_good_service = as.integer(grepl("good service", csat_open)),
           rsp_great_product = as.integer(grepl("great product", csat_open)),
           rsp_no_response = as.integer(grepl("no response", csat_open)),
           rsp_too_expensive = as.integer(grepl("too expensive", csat_open)),
           rsp_unfriendly = as.integer(grepl("unfriendly", csat_open)),
           .keep = "none")

  expect_equal(df_over, df_expect)

})


test_that("over() works string evaluation", {

  df_over <- iris %>%
    mutate(over(c("Sepal", "Petal"),
                ~ .("{.x}.Width") + .("{.x}.Length")
                ))

  df_expect <- iris %>%
    mutate(
      Sepal = Sepal.Width + Sepal.Length,
      Petal = Petal.Width + Petal.Length
    )

  expect_equal(df_over, df_expect)

  df_over2 <- iris %>%
    mutate(over(c("Sepal", "Petal"),
                ~ eval(sym(paste0(.x, ".Width"))) +
                  eval(sym(paste0(.x, ".Length")))
    ))

  expect_equal(df_over2, df_expect)

})

test_that("over() works with anonymous functions", {

  df_over <-  iris %>%
    summarise(over(c("Sepal", "Petal"),
                   function(x) mean(.("{x}.Width"))
                   ))

  df_expect <- iris %>%
    summarise(
      Sepal = mean(Sepal.Width),
      Petal = mean(Petal.Width)
    )

  expect_equal(df_over, df_expect)

})

test_that("over() works named lists", {

  df_over <-  iris %>%
    mutate(over(c("Sepal", "Petal"),
                list(product = ~ .("{.x}.Width") * .("{.x}.Length"),
                     sum = ~ .("{.x}.Width") + .("{.x}.Length"))),
           .keep = "none")

  df_expect <- iris %>%
    mutate(
      Sepal_product = Sepal.Width * Sepal.Length,
      Sepal_sum = Sepal.Width + Sepal.Length,
      Petal_product = Petal.Width * Petal.Length,
      Petal_sum = Petal.Width + Petal.Length,
      .keep = "none"
    )

  expect_equal(df_over, df_expect)

})

test_that("over() works with named lists", {

  df_over <-  iris %>%
    mutate(over(c("Sepal", "Petal"),
                list(product = ~ .("{.x}.Width") * .("{.x}.Length"),
                     sum = ~ .("{.x}.Width") + .("{.x}.Length"))),
           .keep = "none")

  df_expect <- iris %>%
    mutate(
      Sepal_product = Sepal.Width * Sepal.Length,
      Sepal_sum = Sepal.Width + Sepal.Length,
      Petal_product = Petal.Width * Petal.Length,
      Petal_sum = Petal.Width + Petal.Length,
      .keep = "none"
    )

  expect_equal(df_over, df_expect)

})

test_that("over() can control names", {

  df_over <-  iris %>%
    mutate(over(c("Sepal", "Petal"),
                list(product = ~ .("{.x}.Width") * .("{.x}.Length"),
                     sum = ~ .("{.x}.Width") + .("{.x}.Length")),
                .names = "{fn}_{x}"),
           .keep = "none")

  df_expect <- iris %>%
    mutate(
      product_Sepal = Sepal.Width * Sepal.Length,
      sum_Sepal = Sepal.Width + Sepal.Length,
      product_Petal = Petal.Width * Petal.Length,
      sum_Petal = Petal.Width + Petal.Length,
      .keep = "none"
    )

  expect_equal(df_over, df_expect)

})

# tests adopted from across
test_that("over() works on one column data.frame", {

  df0 <- data.frame(x = 1)

  df_over <- df0 %>%
    mutate(over(1, ~ x * .x))

  df_exepect <- df0 %>%
    mutate(`1` = x * 1)

  expect_equal(df_over, df_exepect)

})

test_that("over() does not select grouping variables", {

  df0 <- data.frame(g = 1, x = 1)

  df_over <- df0 %>%
    group_by(g) %>%
    summarise(x = over(1, ~ x * .x)) %>%
    pull()

  expect_equal(df_over, tibble(`1` = 1))

})

test_that("over() correctly names output columns", {
  gf <- tibble(x = 1, y = 2, z = 3, s = "") %>% group_by(x)

  expect_named(
    mutate(gf, over(1, ~ x * .x)),
    c("x", "y", "z", "s", "1")
  )
  expect_named(
    mutate(gf, over(1, ~ x * .x, .names = "id_{x}")),
    c("x", "y", "z", "s", "id_1")
  )
  expect_named(
    summarise(gf, over(1, ~ mean(x + .x), .names = "mean_{x}")),
    c("x", "mean_1")
  )
  expect_named(
    summarise(gf, over(1, list(mean = mean, sum = sum))),
    c("x", "1_mean", "1_sum")
  )
  expect_named(
    summarise(gf, over(1, list(mean = mean, sum))),
    c("x", "1_mean", "1_2")
  )
  expect_named(
    summarise(gf, over(1, list(mean, sum = sum))),
    c("x", "1_1", "1_sum")
  )
  expect_named(
    summarise(gf, over(1, list(mean, sum))),
    c("x", "1_1", "1_2")
  )
  expect_named(
    summarise(gf, over(1, list(mean = mean, sum = sum), .names = "{fn}_{x}")),
    c("x", "mean_1", "sum_1")
  )
  # further added over()'s x_val, x_idx, x_nm
  expect_named(
    summarise(gf, over(list(a = 5, b = 6, c = 7),
                       list(mean = mean, sum = sum),
                       .names = "{fn}_{x_val}")),
    c("x", "mean_5", "sum_5",  "mean_6", "sum_6", "mean_7", "sum_7")
  )
  expect_warning(
    summarise(gf, over(list(a = 5:6, b = 6, c = 7),
                       list(mean = mean, sum = sum),
                       .names = "{fn}_{x_val}"))
  )
  expect_warning(
    summarise(gf, over(data.frame(a = 5:6, b = 6:7, c = 7:8),
                       list(mean = mean, sum = sum),
                       .names = "{fn}_{x_val}"))
  )
  expect_named(
    summarise(gf, over(list(a = 5, b = 6, c = 7),
                       list(mean = mean, sum = sum),
                       .names = "{fn}_{x_nm}")),
    c("x", "mean_a", "sum_a",  "mean_b", "sum_b", "mean_c", "sum_c")
  )
  expect_named(
    summarise(gf, over(list(5, 6:8, 7),
                       list(sum = sum))),
    c("x", "1_sum", "2_sum", "3_sum")
  )
  expect_warning(
    summarise(gf, over(list(5, 6, 7),
                       list(mean = mean, sum = sum),
                       .names = "{fn}_{x_nm}"))
  )
  expect_named(
    summarise(gf, over(list(a = 5, b = 6, c = 7),
                       list(mean = mean, sum = sum),
                       .names = "{fn}_{x_idx}")),
    c("x", "mean_1", "sum_1",  "mean_2", "sum_2", "mean_3", "sum_3")
  )
  expect_error(
    summarise(gf, over(list(a = 5, b = 5, c = 7),
                       list(mean = mean, sum = sum),
                       .names = "{fn}_{x_val}"))
  )
  expect_error(
    summarise(gf, over(list(a = 5, a = 6, c = 7),
                       list(mean = mean, sum = sum),
                       .names = "{fn}_{x_nm}"))
  )
  expect_error(
    summarise(gf, over(list(a = 5, a = 6, c = 7),
                       list(mean = mean, sum = sum)))
  )
  # further added external vector
  col_nm_vec <- c("one", "two", "three", "four", "five", "six")
  expect_named(
    summarise(gf, over(list(a = 5, b = 6, c = 7),
                       list(mean = mean, sum = sum),
                       .names = col_nm_vec)),
    c("x", "one", "two", "three", "four", "five", "six")
  )
  # test that external vector throws error when too short
  col_nm_vec2 <- c("one", "two", "three")
  expect_error(
    summarise(gf, over(list(a = 5, b = 6, c = 7),
                       list(mean = mean, sum = sum),
                       .names = col_nm_vec2))
  )
  # test that external vector throws error when too long
  col_nm_vec3 <- c("one", "two", "three", "four", "five", "six", "seven")
  expect_error(
    summarise(gf, over(list(a = 5, b = 6, c = 7),
                       list(mean = mean, sum = sum),
                       .names = col_nm_vec3))
  )
  expect_error(
    summarise(gf, over(list(a = 5, b = 6),
                       list(mean = mean),
                       .names = "new"))
  )
  # test that external vectors throws error when it contains non-unique names
  col_nm_vec4 <- rep(c("one", "two", "three"), 2)
  expect_error(
    summarise(gf, over(list(a = 5, b = 6, c = 7),
                       list(mean = mean, sum = sum),
                       .names = col_nm_vec4))
  )

})

test_that("over() result locations are aligned with .fn list names", {

  df <- tibble(x = 1:2)

  expect <- tibble(`1_cls` = "integer", `1_type` = TRUE,
                   `2_cls` = "integer", `2_type` = TRUE)

  x <- summarise(df, over(1:2, list(cls = ~ class(x + .x),
                                    type = ~ is.numeric(x + .x))))
  expect_identical(x, expect)
})

test_that("over() passes ... to functions", {

  df <- tibble(x = 1)

  expect_equal(
    summarise(df, over(list(a = c(1,NA)), mean, na.rm = TRUE)),
    tibble(a = 1)
  )

  expect_equal(
    summarise(df, over(list(a = c(1,NA)), list(mean = mean, median = median), na.rm = TRUE)),
    tibble(a_mean = 1, a_median = 1)
  )
})

test_that("over() passes unnamed arguments following .fns as ...", {

  df <- tibble(x = 1)

  expect_equal(mutate(df, over(2, `+`, 1)),
               tibble(x = 1, `2` = 3))
})

test_that("over() avoids simple argument name collisions with ... ", {

  df <- tibble(x = c(1, 2))

  expect_equal(summarize(df, over(list(a = c(1:10)), tail, n = 1)),
               tibble(`a` = 10))
})

test_that("over() works sequentially", {

  df <- tibble(a = 1)

  expect_equal(
    mutate(df,
           x = ncol(over(1:3, sum)),
           y = ncol(over(c(.data$a, .data$x), sum))),
    tibble(a = 1, x = 3L, y = 2L)
  )

  expect_equal(
    mutate(df,
           a = 2,
           y = over(.data$a, mean)$`2`),
    tibble(a = 2, y = 2)
  )

  expect_equal(
    mutate(df,
           x = 2,
           y = ncol(over(c(.data$a, .data$x), mean))),
    tibble(a = 1, x = 2, y = 2L)
  )
})

test_that("over() retains original ordering", {
  df <- tibble(a = 1, b = 2)
  expect_named(mutate(df, a = 3, x = over(c(.data$a, .data$b), mean))$x,
               c("3", "2"))
})

test_that("over() gives meaningful messages", {

  # inside dplyr
  expect_snapshot_error(over())

  # .fns must be function
  expect_snapshot_error(
    summarise(tibble(x = 1), over(1, 42))
  )

  # vector to .names too short
  expect_snapshot_error({
    gf <- tibble(x = 1, y = 2, z = 3, s = "") %>% group_by(x)
    col_nm_vec2 <- c("one", "two", "three")
    summarise(gf, over(list(a = 5, b = 6, c = 7),
                       list(mean = mean, sum = sum),
                       .names = col_nm_vec2))
  })

  # vector to .names too long
  expect_snapshot_error({
    gf <- tibble(x = 1, y = 2, z = 3, s = "") %>% group_by(x)
    col_nm_vec3 <- c("one", "two", "three", "four", "five", "six", "seven")
    summarise(gf, over(list(a = 5, b = 6, c = 7),
                       list(mean = mean, sum = sum),
                       .names = col_nm_vec3))
  })

  # vector to .names duplicate names
  expect_snapshot_error({
    gf <- tibble(x = 1, y = 2, z = 3, s = "") %>% group_by(x)
    col_nm_vec4 <- rep(col_nm_vec2 <- c("one", "two", "three"), 2)
    summarise(gf, over(list(a = 5, b = 6, c = 7),
                       list(mean = mean, sum = sum),
                       .names = col_nm_vec4))
  })

})

test_that("monitoring cache - over() can be used twice in the same expression", {
  df <- tibble(a = 1, b = 2)
  expect_equal(
    mutate(df, x = ncol(over(c(1, 2), mean)) + ncol(over(3, mean))),
    tibble(a = 1, b = 2, x = 3)
  )
})

test_that("monitoring cache - over() can be used in separate expressions", {
  df <- tibble(a = 1, b = 2)
  expect_equal(
    mutate(df,
           x = ncol(over(c(1, 2), mean)),
           y = ncol(over(.data$x, mean))),
    tibble(a = 1, b = 2, x = 2, y = 1)
  )
})

test_that("over() rejects non vectors", {
  expect_error(
    data.frame(x = 1) %>% summarise(over(1, ~sym("foo")))
  )
})

test_that("over() uses tidy recycling rules", {
  expect_equal(
    tibble::tibble(x = 1, y = 2) %>% summarise(over(1:2, ~ rep(42, .))),
    tibble::tibble(`1` = rep(42, 2), `2` = rep(42, 2))
  )

  expect_error(
    data.frame(x = 2, y = 3) %>% summarise(over(1:3, ~rep(42, .)))
  )
})

test_that("over(<empty set>, foo) returns a data frame with 1 row", {
  df <- tibble(x = 1:42)
  expect_equal(
    mutate(df, over(c(), mean)),
    df
  )
  expect_equal(
    mutate(df, y = over(c(), mean))$y,
    tibble::new_tibble(list(), nrow = 42)
  )
  mutate(df, {
    res <- over(c(), mean)
    expect_equal(nrow(res), 1L)
    res
  })
})

test_that("monitoring cache - over() usage can depend on the group id", {

  df <- tibble(g = 1:2, a = 1:2, b = 3:4)
  df <- group_by(df, g)

  switcher <- function() {
    if_else(cur_group_id() == 1L,
            over(cur_data()$a, mean, .names = "c")$c,
            over(cur_data()$b, mean, .names = "c")$c)
  }

  expect <- df
  expect$x <- c(1L, 4L)

  expect_equal(
    mutate(df, x = switcher()),
    expect
    )
})


# issues not adapted in over code yet

#
# test_that("monitoring cache - across() internal cache key depends on all inputs", {
#   df <- tibble(g = rep(1:2, each = 2), a = 1:4)
#   df <- group_by(df, g)
#
#   expect_identical(
#     mutate(df, tibble(x = across(where(is.numeric), mean)$a, y = across(where(is.numeric), max)$a)),
#     mutate(df, x = mean(a), y = max(a))
#   )
# })
#
# test_that("over(.names=) can use local variables in addition to {col} and {fn}", {
#   res <- local({
#     prefix <- "MEAN"
#     data.frame(x = 42) %>%
#       summarise(over(0, ~ mean(x + .x), .names = "{prefix}_{x}"))
#   })
#   expect_identical(res, data.frame(MEAN_x = 42))
# })
#
# test_that("over() uses environment from the current quosure (#5460)", {
#   # If the data frame `y` is selected, causes a subscript conversion
#   # error since it is fractional
#   df <- data.frame(x = c(1:2), y = c(1.1, 2.4))
#   y <- "x"
#   expect_equal(df %>% summarise(over(.env$y, mean, .names = "x_idx")), data.frame(x = 1))
#   expect_equal(df %>% mutate(over(all_of(y), mean)), df)
#   expect_equal(df %>% filter(over(all_of(y), ~ .x < 2)), df)
#
#   # Recursive case fails because the `y` column has precedence (#5498)
#   expect_error(df %>% summarise(summarise(across(), across(all_of(y), mean))))
#
#   # Inherited case
#   out <- df %>% summarise(local(across(all_of(y), mean)))
#   expect_equal(out, data.frame(x = 1))
# })

# test_that("across() sees columns in the recursive case (#5498)", {
#   df <- tibble(
#     vars = list("foo"),
#     data = list(data.frame(foo = 1, bar = 2))
#   )
#
#   out <- df %>% mutate(data = purrr::map2(data, vars, ~ {
#     .x %>% mutate(across(all_of(.y), ~ NA))
#   }))
#   exp <- tibble(
#     vars = list("foo"),
#     data = list(data.frame(foo = NA, bar = 2))
#   )
#   expect_identical(out, exp)
#
#   out <- df %>% mutate(data = purrr::map2(data, vars, ~ {
#     local({
#       .y <- "bar"
#       .x %>% mutate(across(all_of(.y), ~ NA))
#     })
#   }))
#   exp <- tibble(
#     vars = list("foo"),
#     data = list(data.frame(foo = 1, bar = NA))
#   )
#   expect_identical(out, exp)
# })


# expected errors

test_that("over() custom errors and warnings", {

  # inside dplyr
  expect_error(over())

  # .fns must be function
    expect_error(
    summarise(tibble(x = 1), over(1, 42))
  )

})

# dplyr compability
test_that("over() can use cur_data()", {

  df_over <- iris %>%
    group_by(Species) %>%
    summarise(over(1,
                   ~ nrow(cur_data())))

  df_expect <-  iris %>%
    group_by(Species) %>%
    summarise(`1` = nrow(cur_data()))

  expect_equal(df_over, df_expect)

})

test_that("over() can be used with mutate's `.keep` argument", {

  df_none <- iris %>%
    mutate(over(1, ~ Sepal.Length + .x), .keep = "none")

  expect_named(df_none, "1")


  df_all <- iris %>%
    mutate(over(1, ~ Sepal.Length + .x), .keep = "all")

  expect_named(df_all, c(names(iris),"1"))


  df_used <- iris %>%
    mutate(over(1, ~ Sepal.Length + .x), .keep = "used")

  expect_named(df_used, c("Sepal.Length","1"))


  df_unused <- iris %>%
    mutate(over(1, ~ Sepal.Length + .x), .keep = "unused")

  expect_named(df_unused, c(names(iris)[-1],"1"))

})

# other edge cases
TimTeaFan/dplyover documentation built on Sept. 27, 2021, 3:14 p.m.