tests/testthat/testB_Squashing.R

context("Data Squashing")
#For testing if the function that squashes data works properly

data.table::setDTthreads(2)  #only needed for CRAN checks

#Squash some processed data
set.seed(483726)
dat <- data.frame(var1 = letters[1:26],
                  var2 = LETTERS[1:26],
                  N = c(rep(0, 11), rep(1, 10), rep(2, 4), rep(3, 1)),
                  E = abs(c(rnorm(11, 0), rnorm(10, 1), rnorm(4, 2),
                            rnorm(1, 3))),
                  stringsAsFactors = FALSE
)

dat_no_N <- dat[, !grepl("N", names(dat))]
dat_missing1 <- dat
dat_missing1[3, "N"] <- NA
dat_missing2 <- dat
dat_missing2[1, "E"] <- ""

zeroes <- squashData(dat, count = 0, bin_size = 3, keep_pts = 3,
                     min_bin = 2, min_pts = 2)
ones   <- squashData(zeroes, count = 1, bin_size = 2, keep_pts = 2,
                     min_bin = 2, min_pts = 2)
twos   <- squashData(ones, count = 2, bin_size = 2, keep_pts = 2,
                     min_bin = 2, min_pts = 2)

test_that("correct number of rows and columns from squashData()", {
  expect_equal(nrow(zeroes), 21)
  expect_equal(nrow(ones), 17)
  expect_equal(nrow(twos), 16)
  expect_equal(ncol(twos), 3)
})

test_that("do the weights make sense?", {
  expect_equal(squashData(dat, count = 0, bin_size = 3, keep_pts = 0,
                          min_bin = 2, min_pts = 2)$weight,
               c(3,3,3,2, rep(1, 15)))
  expect_equal(squashData(dat, count = 0, bin_size = 3, keep_pts = 3,
                          min_bin = 2, min_pts = 2)$weight,
               c(3,3,2,1,1,1, rep(1, 15)))
  expect_equal(squashData(dat, count = 0, bin_size = 3, keep_pts = 6,
                          min_bin = 2, min_pts = 2)$weight,
               c(3,2,1,1,1,1,1,1, rep(1, 15)))
})

test_that("correct column names from squashData()", {
  expect_match(paste0(names(ones), collapse = ""), "NEweight")
})

test_that("values returned from squashData() make sense", {
  expect_true(max(twos$E) < Inf)
  expect_true(min(twos$E) >= 0)
  expect_equal(max(twos$N), 3)
  expect_equal(min(twos$N), 0)
  expect_equal(max(twos$weight), 3)
  expect_equal(min(twos$weight), 1)
})

test_that("do warnings/errors get correctly printed?", {
  expect_error(squashData(as.matrix(dat)),
               "'data' must be a data frame",
               fixed = TRUE)
  expect_error(squashData(dat_no_N),
               "missing the appropriate column names (need 'N' and 'E')",
               fixed = TRUE)
  expect_error(squashData(dat_missing1),
               "missing or infinite values for 'N' and 'E' are not allowed",
               fixed = TRUE)
  expect_error(squashData(dat_missing2),
               "missing values for 'N' or 'E' are not allowed",
               fixed = TRUE)
  expect_error(squashData(dat, count = -1),
               "'count' must be non-negative",
               fixed = TRUE)
  expect_error(squashData(dat, count = -1.4),
               "'count' must be non-negative",
               fixed = TRUE)
  expect_error(squashData(dat, bin_size = 1),
               "'bin_size' must be >= 2",
               fixed = TRUE)
  expect_error(squashData(dat, keep_pts = -1),
               "'keep_pts' must be non-negative",
               fixed = TRUE)
  expect_error(squashData(ones, count = 1),
               "this data set has already been squashed for this count size",
               fixed = TRUE)
  expect_error(squashData(dat, count = 3, min_pts = 2),
               "not enough points for squashing",
               fixed = TRUE)
  expect_error(squashData(dat, count = 0, bin_size = 50, keep_pts = 0,
                          min_bin = 2, min_pts = 2),
               "not enough bins -- reduce 'bin_size'",
               fixed = TRUE)
  expect_error(squashData(dat, count = 0, bin_size = 2, keep_pts = 12,
                          min_bin = 2, min_pts = 2),
               "reduce 'bin_size' or 'keep_pts'",
               fixed = TRUE)
})

dat_E     <- dat[order(dat$N, dat$E), "E"]
dat_E1    <- squashData(dat, count = 0, bin_size = 5, keep_pts = 0,
                        min_bin = 2, min_pts = 2)
dat_E1    <- dat_E1[dat_E1$N == 0, "E"]

dat_E2    <- squashData(dat, count = 0, bin_size = 5, keep_pts = 5,
                        min_bin = 2, min_pts = 2)
dat_E2    <- dat_E2[dat_E2$N == 0, "E"]

dat_E3    <- squashData(dat, count = 1, bin_size = 2, keep_pts = 0,
                        min_bin = 2, min_pts = 2)
dat_E3    <- dat_E3[dat_E3$N == 1, "E"]

test_that("are the Es averaged correctly?", {
  expect_equal(dat_E1,
               c(mean(dat_E[1:5]), mean(dat_E[6:10]), dat_E[11]))
  expect_equal(dat_E2,
               c(mean(dat_E[1:5]), dat_E[6:11]))
  expect_equal(dat_E3,
               c(mean(dat_E[12:13]), mean(dat_E[14:15]), mean(dat_E[16:17]),
                 mean(dat_E[18:19]), mean(dat_E[20:21])))
})


# autoSquash()------------------------------------------------------------------
dat2 <- data.frame(N = c(rep(0, 100), rep(1, 50), rep(2, 5)), E = runif(155))
squash1 <- autoSquash(dat2, keep_pts = 10,
                      cut_offs = c(25, 75), num_super_pts = c(3, 4, 5))

table1 <- as.matrix(ftable(squash1[, c("N", "weight")]))

test_that("values returned from autoSquash() make sense", {
  expect_true(all(table1[1, ] == c(10, 0, 5)))
  expect_true(all(table1[2, ] == c(10, 4, 0)))
  expect_true(all(table1[3, ] == c(5,  0, 0)))
})

test_that("autoSquash - do warnings/errors get correctly printed?", {
  expect_error(autoSquash(squash1),
               "'data' has already been squashed",
               fixed = TRUE)
  expect_error(autoSquash(dat2, keep_pts = c(100, NA, 50)),
               "missing values not allowed in 'keep_pts', 'cut_offs', or 'num_super_pts'",
               fixed = TRUE)
  expect_error(autoSquash(dat2, cut_offs = c()),
               "'keep_pts', 'cut_offs', & 'num_super_pts' must have a length of 1 or more",
               fixed = TRUE)
  expect_error(autoSquash(dat2, cut_offs = c(500, 1000), num_super_pts = c(50, 75)),
               "'num_super_pts' must have length 1 more than length of 'cut_offs'",
               fixed = TRUE)
  expect_error(autoSquash(dat2, cut_offs = c(1000, 500), num_super_pts = c(50, 75, 150)),
               "elements in 'cut_offs' must be in increasing order",
               fixed = TRUE)
})

Try the openEBGM package in your browser

Any scripts or data that you put into this service are public.

openEBGM documentation built on Sept. 15, 2023, 1:08 a.m.