Nothing
context("Data Squashing")
#For testing if the function that squashes data works properly
data.table::setDTthreads(2) #only needed for CRAN checks
#Squash some processed data
set.seed(483726)
dat <- data.frame(var1 = letters[1:26],
var2 = LETTERS[1:26],
N = c(rep(0, 11), rep(1, 10), rep(2, 4), rep(3, 1)),
E = abs(c(rnorm(11, 0), rnorm(10, 1), rnorm(4, 2),
rnorm(1, 3))),
stringsAsFactors = FALSE
)
dat_no_N <- dat[, !grepl("N", names(dat))]
dat_missing1 <- dat
dat_missing1[3, "N"] <- NA
dat_missing2 <- dat
dat_missing2[1, "E"] <- ""
zeroes <- squashData(dat, count = 0, bin_size = 3, keep_pts = 3,
min_bin = 2, min_pts = 2)
ones <- squashData(zeroes, count = 1, bin_size = 2, keep_pts = 2,
min_bin = 2, min_pts = 2)
twos <- squashData(ones, count = 2, bin_size = 2, keep_pts = 2,
min_bin = 2, min_pts = 2)
test_that("correct number of rows and columns from squashData()", {
expect_equal(nrow(zeroes), 21)
expect_equal(nrow(ones), 17)
expect_equal(nrow(twos), 16)
expect_equal(ncol(twos), 3)
})
test_that("do the weights make sense?", {
expect_equal(squashData(dat, count = 0, bin_size = 3, keep_pts = 0,
min_bin = 2, min_pts = 2)$weight,
c(3,3,3,2, rep(1, 15)))
expect_equal(squashData(dat, count = 0, bin_size = 3, keep_pts = 3,
min_bin = 2, min_pts = 2)$weight,
c(3,3,2,1,1,1, rep(1, 15)))
expect_equal(squashData(dat, count = 0, bin_size = 3, keep_pts = 6,
min_bin = 2, min_pts = 2)$weight,
c(3,2,1,1,1,1,1,1, rep(1, 15)))
})
test_that("correct column names from squashData()", {
expect_match(paste0(names(ones), collapse = ""), "NEweight")
})
test_that("values returned from squashData() make sense", {
expect_true(max(twos$E) < Inf)
expect_true(min(twos$E) >= 0)
expect_equal(max(twos$N), 3)
expect_equal(min(twos$N), 0)
expect_equal(max(twos$weight), 3)
expect_equal(min(twos$weight), 1)
})
test_that("do warnings/errors get correctly printed?", {
expect_error(squashData(as.matrix(dat)),
"'data' must be a data frame",
fixed = TRUE)
expect_error(squashData(dat_no_N),
"missing the appropriate column names (need 'N' and 'E')",
fixed = TRUE)
expect_error(squashData(dat_missing1),
"missing or infinite values for 'N' and 'E' are not allowed",
fixed = TRUE)
expect_error(squashData(dat_missing2),
"missing values for 'N' or 'E' are not allowed",
fixed = TRUE)
expect_error(squashData(dat, count = -1),
"'count' must be non-negative",
fixed = TRUE)
expect_error(squashData(dat, count = -1.4),
"'count' must be non-negative",
fixed = TRUE)
expect_error(squashData(dat, bin_size = 1),
"'bin_size' must be >= 2",
fixed = TRUE)
expect_error(squashData(dat, keep_pts = -1),
"'keep_pts' must be non-negative",
fixed = TRUE)
expect_error(squashData(ones, count = 1),
"this data set has already been squashed for this count size",
fixed = TRUE)
expect_error(squashData(dat, count = 3, min_pts = 2),
"not enough points for squashing",
fixed = TRUE)
expect_error(squashData(dat, count = 0, bin_size = 50, keep_pts = 0,
min_bin = 2, min_pts = 2),
"not enough bins -- reduce 'bin_size'",
fixed = TRUE)
expect_error(squashData(dat, count = 0, bin_size = 2, keep_pts = 12,
min_bin = 2, min_pts = 2),
"reduce 'bin_size' or 'keep_pts'",
fixed = TRUE)
})
dat_E <- dat[order(dat$N, dat$E), "E"]
dat_E1 <- squashData(dat, count = 0, bin_size = 5, keep_pts = 0,
min_bin = 2, min_pts = 2)
dat_E1 <- dat_E1[dat_E1$N == 0, "E"]
dat_E2 <- squashData(dat, count = 0, bin_size = 5, keep_pts = 5,
min_bin = 2, min_pts = 2)
dat_E2 <- dat_E2[dat_E2$N == 0, "E"]
dat_E3 <- squashData(dat, count = 1, bin_size = 2, keep_pts = 0,
min_bin = 2, min_pts = 2)
dat_E3 <- dat_E3[dat_E3$N == 1, "E"]
test_that("are the Es averaged correctly?", {
expect_equal(dat_E1,
c(mean(dat_E[1:5]), mean(dat_E[6:10]), dat_E[11]))
expect_equal(dat_E2,
c(mean(dat_E[1:5]), dat_E[6:11]))
expect_equal(dat_E3,
c(mean(dat_E[12:13]), mean(dat_E[14:15]), mean(dat_E[16:17]),
mean(dat_E[18:19]), mean(dat_E[20:21])))
})
# autoSquash()------------------------------------------------------------------
dat2 <- data.frame(N = c(rep(0, 100), rep(1, 50), rep(2, 5)), E = runif(155))
squash1 <- autoSquash(dat2, keep_pts = 10,
cut_offs = c(25, 75), num_super_pts = c(3, 4, 5))
table1 <- as.matrix(ftable(squash1[, c("N", "weight")]))
test_that("values returned from autoSquash() make sense", {
expect_true(all(table1[1, ] == c(10, 0, 5)))
expect_true(all(table1[2, ] == c(10, 4, 0)))
expect_true(all(table1[3, ] == c(5, 0, 0)))
})
test_that("autoSquash - do warnings/errors get correctly printed?", {
expect_error(autoSquash(squash1),
"'data' has already been squashed",
fixed = TRUE)
expect_error(autoSquash(dat2, keep_pts = c(100, NA, 50)),
"missing values not allowed in 'keep_pts', 'cut_offs', or 'num_super_pts'",
fixed = TRUE)
expect_error(autoSquash(dat2, cut_offs = c()),
"'keep_pts', 'cut_offs', & 'num_super_pts' must have a length of 1 or more",
fixed = TRUE)
expect_error(autoSquash(dat2, cut_offs = c(500, 1000), num_super_pts = c(50, 75)),
"'num_super_pts' must have length 1 more than length of 'cut_offs'",
fixed = TRUE)
expect_error(autoSquash(dat2, cut_offs = c(1000, 500), num_super_pts = c(50, 75, 150)),
"elements in 'cut_offs' must be in increasing order",
fixed = TRUE)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.