tests/testthat/test-get_dummies.R

df1 <- data.table(
  "col1"= c(letters[1:3], letters[1:3]),
  "col2" = as.factor(c(letters[3:1], letters[3:1])),
  "var1"= rnorm(6,0,1)
)

df2 <- data.table(
  "col1"= c(letters[1:3], NA, letters[1:3]),
  "col2" = as.factor(c(letters[3:1], NA, letters[3:1])),
  "var1"= rnorm(7,0,1)
)

test_that("default uses all character/factor - no NAs & no modify-by-reference", {
  dummy_df <- get_dummies(df1)

  expect_named(dummy_df, c("col1", "col2", "var1",
                           "col1_a", "col1_b", "col1_c",
                           "col2_a", "col2_b", "col2_c"))

  expect_equal(dummy_df$col1_a, c(1, 0, 0, 1, 0, 0))
  expect_equal(dummy_df$col1_b, c(0, 1, 0, 0, 1, 0))
  expect_equal(dummy_df$col1_c, c(0, 0, 1, 0, 0, 1))

  expect_equal(dummy_df$col2_a, c(0, 0, 1, 0, 0, 1))
  expect_equal(dummy_df$col2_b, c(0, 1, 0, 0, 1, 0))
  expect_equal(dummy_df$col2_c, c(1, 0, 0, 1, 0, 0))

  expect_named(df1, c("col1", "col2", "var1"))
})

test_that("get_dummies. works", {
  dummy_df <- get_dummies.(df1) %>%
    suppressWarnings()

  expect_named(dummy_df, c("col1", "col2", "var1",
                           "col1_a", "col1_b", "col1_c",
                           "col2_a", "col2_b", "col2_c"))

  expect_equal(dummy_df$col1_a, c(1, 0, 0, 1, 0, 0))
  expect_equal(dummy_df$col1_b, c(0, 1, 0, 0, 1, 0))
  expect_equal(dummy_df$col1_c, c(0, 0, 1, 0, 0, 1))

  expect_equal(dummy_df$col2_a, c(0, 0, 1, 0, 0, 1))
  expect_equal(dummy_df$col2_b, c(0, 1, 0, 0, 1, 0))
  expect_equal(dummy_df$col2_c, c(1, 0, 0, 1, 0, 0))

  expect_named(df1, c("col1", "col2", "var1"))
})

test_that("works with data.frame input", {
  df <- as.data.frame(df1)

  dummy_df <- get_dummies(df)

  expect_named(dummy_df, c("col1", "col2", "var1",
                           "col1_a", "col1_b", "col1_c",
                           "col2_a", "col2_b", "col2_c"))

  expect_equal(dummy_df$col1_a, c(1, 0, 0, 1, 0, 0))
  expect_equal(dummy_df$col1_b, c(0, 1, 0, 0, 1, 0))
  expect_equal(dummy_df$col1_c, c(0, 0, 1, 0, 0, 1))

  expect_equal(dummy_df$col2_a, c(0, 0, 1, 0, 0, 1))
  expect_equal(dummy_df$col2_b, c(0, 1, 0, 0, 1, 0))
  expect_equal(dummy_df$col2_c, c(1, 0, 0, 1, 0, 0))

})

test_that("works with NAs", {
  dummy_df <- get_dummies(df2)

  expect_named(dummy_df, c("col1", "col2", "var1",
                           "col1_a", "col1_b", "col1_c", "col1_NA",
                           "col2_a", "col2_b", "col2_c", "col2_NA"))

  expect_equal(dummy_df$col1_a, c(1, 0, 0, 0, 1, 0, 0))
  expect_equal(dummy_df$col1_b, c(0, 1, 0, 0, 0, 1, 0))
  expect_equal(dummy_df$col1_c, c(0, 0, 1, 0, 0, 0, 1))
  expect_equal(dummy_df$col1_NA, c(0, 0, 0, 1, 0, 0, 0))

  expect_equal(dummy_df$col2_a, c(0, 0, 1, 0, 0, 0, 1))
  expect_equal(dummy_df$col2_b, c(0, 1, 0, 0, 0, 1, 0))
  expect_equal(dummy_df$col2_c, c(1, 0, 0, 0, 1, 0, 0))
  expect_equal(dummy_df$col2_NA, c(0, 0, 0, 1, 0, 0, 0))
})

test_that("no prefix works, even with numeric", {
  dummy_df <- df1 %>%
    mutate(col2 = c(1, 1, 1, 2, NA, 2)) %>%
    get_dummies(cols = c(col1, col2), prefix = FALSE)

  expect_named(dummy_df, c("col1", "col2", "var1",
                           "a", "b", "c",
                           "1", "2", "NA"))

  expect_equal(dummy_df$a, c(1, 0, 0, 1, 0, 0))
  expect_equal(dummy_df$b, c(0, 1, 0, 0, 1, 0))
  expect_equal(dummy_df$c, c(0, 0, 1, 0, 0, 1))
  expect_equal(dummy_df$`1`, c(1, 1, 1, 0, 0, 0))
  expect_equal(dummy_df$`2`, c(0, 0, 0, 1, 0, 1))
  expect_equal(dummy_df$`NA`, c(0, 0, 0, 0, 1, 0))
})

test_that("prefix_sep works", {
  dummy_df <- get_dummies(df1, prefix_sep = ".")

  expect_named(dummy_df, c("col1", "col2", "var1",
                           "col1.a", "col1.b", "col1.c",
                           "col2.a", "col2.b", "col2.c"))

  expect_equal(dummy_df$col1.a, c(1, 0, 0, 1, 0, 0))
  expect_equal(dummy_df$col1.b, c(0, 1, 0, 0, 1, 0))
  expect_equal(dummy_df$col1.c, c(0, 0, 1, 0, 0, 1))

  expect_equal(dummy_df$col2.a, c(0, 0, 1, 0, 0, 1))
  expect_equal(dummy_df$col2.b, c(0, 1, 0, 0, 1, 0))
  expect_equal(dummy_df$col2.c, c(1, 0, 0, 1, 0, 0))

})

test_that("drop_first works", {
  dummy_df <- get_dummies(df1, drop_first = TRUE)

  expect_named(dummy_df, c("col1", "col2", "var1",
                           "col1_b", "col1_c",
                           "col2_a", "col2_b"))

  expect_equal(dummy_df$col1_b, c(0, 1, 0, 0, 1, 0))
  expect_equal(dummy_df$col1_c, c(0, 0, 1, 0, 0, 1))

  expect_equal(dummy_df$col2_a, c(0, 0, 1, 0, 0, 1))
  expect_equal(dummy_df$col2_b, c(0, 1, 0, 0, 1, 0))
})

test_that("dummify_na = FALSE works", {
  dummy_df <- get_dummies(df2, dummify_na = FALSE)

  expect_named(dummy_df, c("col1", "col2", "var1",
                           "col1_a", "col1_b", "col1_c",
                           "col2_a", "col2_b", "col2_c"))

  expect_equal(dummy_df$col1_a, c(1, 0, 0, 0, 1, 0, 0))
  expect_equal(dummy_df$col1_b, c(0, 1, 0, 0, 0, 1, 0))
  expect_equal(dummy_df$col1_c, c(0, 0, 1, 0, 0, 0, 1))

  expect_equal(dummy_df$col2_a, c(0, 0, 1, 0, 0, 0, 1))
  expect_equal(dummy_df$col2_b, c(0, 1, 0, 0, 0, 1, 0))
  expect_equal(dummy_df$col2_c, c(1, 0, 0, 0, 1, 0, 0))
})

Try the tidytable package in your browser

Any scripts or data that you put into this service are public.

tidytable documentation built on Oct. 5, 2023, 5:07 p.m.