tests/testthat/test-sim_df.R

# error messages ----
test_that("error messages", {
  expect_error( sim_df("A"), "data must be a data frame or matrix" )
  expect_error( sim_df(iris, "A"), "n must be an integer > 2" )
  expect_error( sim_df(iris, 2), "n must be an integer > 2" )
  expect_error( sim_df(iris, 10, between = FALSE), "between must be a numeric or character vector" )
})

# default parameters ----
test_that("default parameters", {
  newdf <- sim_df(iris)

  expect_equal(nrow(newdf), 100)
  expect_equal(ncol(newdf), 5)
  expect_equal(names(newdf)[2:5], names(iris)[1:4])
})

# specified parameters ----
test_that("specified parameters", {
  n <- 100
  dat <- dplyr::select_if(iris, is.numeric)
  cors <- cor(dat)
  means <- dplyr::summarise_all(dat, mean) %>%
    as.data.frame()
  sds <- dplyr::summarise_all(dat, sd) %>%
    as.data.frame()
  
  # unnamed arguments in order
  newdf <- sim_df(iris, n, empirical = TRUE)
  newdat <- dplyr::select_if(newdf, is.numeric)
  newcors <- cor(newdat)
  newmeans <- dplyr::summarise_all(newdat, mean) %>%
    as.data.frame()
  newsds <- dplyr::summarise_all(newdat, sd) %>%
    as.data.frame()
  
  expect_equal(nrow(newdf), n)
  expect_equal(ncol(newdf), 5)
  expect_equal(names(newdf)[2:5], names(iris)[1:4])
  
  expect_equal(cors, newcors)
  expect_equal(means, newmeans)
  expect_equal(sds, newsds)
  
  # named arguments out of order
  newdf <- sim_df(between = c(), empirical = TRUE, data = iris, n = n)
  newdat <- dplyr::select_if(newdf, is.numeric)
  newcors <- cor(newdat)
  newmeans <- dplyr::summarise_all(newdat, mean) %>%
    as.data.frame()
  newsds <- dplyr::summarise_all(newdat, sd) %>%
    as.data.frame()
  
  expect_equal(nrow(newdf), n)
  expect_equal(ncol(newdf), 5)
  expect_equal(names(newdf)[2:5], names(iris)[1:4])
  
  expect_equal(cors, newcors)
  expect_equal(means, newmeans)
  expect_equal(sds, newsds)
  
})

# grouping by name ----
test_that("grouping by name", {
  newdf <- sim_df(iris, 20, between = "Species")
  
  expect_equal(nrow(newdf), 60)
  expect_equal(ncol(newdf), 6)
  expect_equal(names(newdf)[2:6] %>% sort(), names(iris) %>% sort())
})

# grouping by col number ----
test_that("grouping by col number", {
  newdf <- sim_df(iris, 20, between = 5)
  
  expect_equal(nrow(newdf), 60)
  expect_equal(ncol(newdf), 6)
  expect_equal(names(newdf)[2:6] %>% sort(), names(iris) %>% sort())
})

# missing data ----
test_that("missing data", {
  data <- messy(iris, c(.1, .2, .3, .4), 1:4)
  newdf <- sim_df(data)
  
  expect_equal(nrow(newdf), 100)
  expect_equal(ncol(newdf), 5)
  expect_equal(names(newdf)[2:5], names(iris)[1:4])
  
  newdf <- sim_df(data, n = 1000, missing = TRUE)
  
  dplyr::mutate_all(newdf[2:5], is.na) %>%
    dplyr::summarise_all(mean)
})

# within ----
test_that("within", {
  long_iris <- wide2long(
    iris,
    within_factors = c("feature", "dimension"),
    within_cols = c("Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width"),
    dv = "val",
    id = "sub_id", 
    sep = "\\."
  )
  
  newdf <- sim_df(long_iris, 20, 
                  within = c("feature", "dimension"),
                  between = "Species",
                  dv = "val", id = "sub_id")
  
  expect_equal(names(newdf), c("sub_id", "Species", "Petal_Length", "Petal_Width", "Sepal_Length", "Sepal_Width" ))
})

# test_that("mean stats are close over 1000 runs", {
#   skip_on_cran()
#   
#   simiris <- purrr::map_df(1:1000, function(i) {
#     iris %>%
#       sim_df(100) %>%
#       check_sim_stats(digits = 10)
#   })
#   
#   orig_stats <- iris %>%
#     check_sim_stats(digits = 10) %>%
#     dplyr::arrange(var) %>%
#     as.data.frame()
#   
#   sim_stats <- simiris %>% 
#     dplyr::group_by(var) %>%
#     dplyr::summarise_all(mean) %>%
#     dplyr::arrange(var) %>%
#     as.data.frame()
#   
#   expect_equal(orig_stats, sim_stats, tolerance = 0.02)
# })

# seed ----
test_that("seed", {
  # setting seed returns same DF, but is reset
  # set.seed(1)
  # rnd0 <- rnorm(1)
  # df1 <- sim_df(iris, seed = 910210)
  # rnd1 <- rnorm(1)
  # df2 <- sim_df(iris, seed = 910210)
  # rnd2 <- rnorm(1)
  # set.seed(1)
  # rnd0b <- rnorm(1)
  # rnd1b <- rnorm(1)
  # rnd2b <- rnorm(1)
  # df3 <- sim_df(iris, seed = 8675309)
  # 
  # expect_equal(df1, df2)
  # expect_false(rnd1 == rnd2)
  # expect_equal(rnd0, rnd0b)
  # expect_equal(rnd1, rnd1b)
  # expect_equal(rnd2, rnd2b)
  # expect_true(!identical(df1, df3))
  
  # user sets seed externally
  set.seed(1)
  df4 <- sim_df(iris, n = 10)
  set.seed(1)
  df5 <- sim_df(iris, n = 10)
  expect_equal(df4, df5)
})


# long ----
test_that("long", {
  longdf <- sim_df(iris, 
                   between = "Species", 
                   within = c("type", "dim"),
                   sep = ".",
                   long = TRUE)
  
  expect_equal(names(longdf), c("id", "Species", "type", "dim", "value"))
  
  widedf <- sim_df(longdf, 
                   between = "Species", 
                   within = c("type", "dim"))
  
  expect_equal(names(widedf), c("id", "Species", 
                                     "Sepal_Length", "Sepal_Width", 
                                     "Petal_Length", "Petal_Width"))
  
  widedf_dot <- sim_df(longdf, 
                   between = "Species", 
                   within = c("type", "dim"),
                   sep = ".")
  
  expect_equal(names(widedf_dot ), c("id", "Species", 
                                "Sepal.Length", "Sepal.Width", 
                                "Petal.Length", "Petal.Width"))
  
  simdf <- sim_design(between = list(B = c("ctl", "app")), 
                      within = list(W = c("start", "end")))
  widesim <- sim_df(simdf, between = "B", within = "W", long = TRUE)
  
  # factors stay in the right order
  expect_equal(levels(widesim$B), c("ctl", "app"))
  expect_equal(levels(widesim$W), c("start", "end"))
})

Try the faux package in your browser

Any scripts or data that you put into this service are public.

faux documentation built on April 20, 2023, 9:13 a.m.