tests/testthat/test_duplicated.R

context("Working with Duplicates")

dups <- data.frame(
  x = rep(1:3, each = 4),
  y = rep(1:4, each = 3),
  z = rep(1:2, 6),
  a = rep(1:6, 2)
)
empty_df <- head(dups, 0)
dups_on_yz <- dplyr::bind_rows(list(
  data.frame(x = 1, y = 1, z = 1, a = 1),
  data.frame(x = 1, y = 1, z = 1, a = 3),
  data.frame(x = 1, y = 2, z = 2, a = 4),
  data.frame(x = 2, y = 2, z = 2, a = 6),
  data.frame(x = 2, y = 3, z = 1, a = 1),
  data.frame(x = 3, y = 3, z = 1, a = 3),
  data.frame(x = 3, y = 4, z = 2, a = 4),
  data.frame(x = 3, y = 4, z = 2, a = 6)
))
dups_on_xyz <- dplyr::bind_rows(list(
  data.frame(x = 1, y = 1, z = 1, a = 1),
  data.frame(x = 1, y = 1, z = 1, a = 3),
  data.frame(x = 3, y = 4, z = 2, a = 4),
  data.frame(x = 3, y = 4, z = 2, a = 6)
))

all_dups <- data.frame(
  x = c(1:10, 1:10),
  y = c(11:20, 11:20)
)


# testing view_duplicated
test_that("view_duplicated finds duplicates on specified columns", {
  expect_equal(view_duplicated(dups, y, z), dups_on_yz)
  expect_equal(view_duplicated(dups, x, y, z), dups_on_xyz)

  expect_equal(view_duplicated(dups, -x, -a), dups_on_yz)
  expect_equal(view_duplicated(dups, one_of(c("x", "y", "z"))), dups_on_xyz)
})

test_that("view_duplicated finds duplicates on all columns", {
  dups2 <- dups
  dups2$a <- NULL
  dups2_on_xyz <- dups_on_xyz
  dups2_on_xyz$a <- NULL
  expect_equal(view_duplicated(dups2), dups2_on_xyz)
})

test_that("view_duplicated when there are no duplicates", {
  expect_equal(view_duplicated(empty_df), empty_df)
  expect_equal(view_duplicated(dups), head(dups, 0))
})

test_that("view_duplicated when there are all duplicates", {
  expect_equal(view_duplicated(all_dups), all_dups)
})

test_that("view_duplicated handles missing and invalid arguments", {
  expect_error(view_duplicated())
  expect_error(view_duplicated(1:10))
  expect_error(view_duplicated(list()))
})

test_that("view_duplicated only accepts columns within data frame", {
  expect_error(view_duplicated(dups, q, v))
  expect_error(view_duplicated(dups, list()))
  expect_error(view_duplicated(dups, ends_with("\\dx")))
})


# testing remove_duplicated
test_that("remove_duplicated removes duplicates", {
  rm_first_on_yz <- dplyr::anti_join(dups, dplyr::slice(dups_on_yz, c(2, 4, 6, 8)))
  rm_last_on_yz <- dplyr::anti_join(dups, dplyr::slice(dups_on_yz, c(1, 3, 5, 7)))
  rm_all_on_yz <- dplyr::anti_join(dups, dups_on_yz)

  expect_equal(remove_duplicated(dups, y, z, opt_keep = "first"), rm_first_on_yz)
  expect_equal(remove_duplicated(dups, y, z, opt_keep = "last"), rm_last_on_yz)
  expect_equal(remove_duplicated(dups, y, z, opt_keep = "none"), rm_all_on_yz)

  rm_first_on_xyz <- dplyr::anti_join(dups, dplyr::slice(dups_on_xyz, c(2, 4)))
  rm_last_on_xyz <- dplyr::anti_join(dups, dplyr::slice(dups_on_xyz, c(1, 3)))
  rm_all_on_xyz <- dplyr::anti_join(dups, dups_on_xyz)

  expect_equal(remove_duplicated(dups, x, y, z, opt_keep = "first"), rm_first_on_xyz)
  expect_equal(remove_duplicated(dups, x, y, z, opt_keep = "last"), rm_last_on_xyz)
  expect_equal(remove_duplicated(dups, x, y, z, opt_keep = "none"), rm_all_on_xyz)
})

test_that("remove_duplicated when there are no duplicates", {
  expect_equal(remove_duplicated(empty_df), empty_df)
  expect_equal(remove_duplicated(dups), dups)
})

test_that("remove_duplicated when there are all duplicates", {
  expect_equal(remove_duplicated(all_dups), head(all_dups, 10))
})

test_that("remove_duplicated handles missing data and invalid arguments", {
  expect_error(remove_duplicated())
  expect_error(remove_duplicated(1:10))
  expect_error(remove_duplicated(list()))
  expect_error(remove_duplicated(dups, opt_keep = "something"))
  expect_error(remove_duplicated(dups, opt_keep = 1))
  expect_error(remove_duplicated(dups, opt_keep = TRUE))
  expect_error(remove_duplicated(dups, opt_summary = "something"))
  expect_error(remove_duplicated(dups, opt_summary = 1))
})
jennguyen1/jn.general documentation built on March 28, 2020, 7:18 p.m.