Nothing
context("test-allocate_wave")
library(dplyr)
library(optimall)
data <- data.frame(
"strata" = c(
rep("a", times = 15),
rep("b", times = 15),
rep("c", times = 12)
),
"y" = c(rnorm(30, sd = 1), rnorm(12, sd = 1.5)),
"key" = rbinom(42, 1, 0.2)
)
data$key[c(1, 16, 31)] <- 1 # To make sure no group gets zero in already_sampled
test_that("the output of allocate_wave is as expected", {
output <- allocate_wave(
data = data, strata = "strata",
already_sampled = "key", y = "y", nsample = 15
)
expect_equal(
output$nsample_actual,
optimum_allocation(
data = data, strata = "strata",
y = "y",
nsample = sum(data$key) + 15
)$stratum_size
)
# Only works if no oversampling in already_sampled
expect_equal(sum(output$n_to_sample), 15)
expect_equal(
sum(output$n_to_sample) + sum(output$nsample_prior),
sum(output$nsample_actual)
)
})
test_that("the output is as expected with allocation_method = Neyman", {
output <- allocate_wave(
data = data, strata = "strata",
already_sampled = "key", y = "y", nsample = 15, allocation_method = "Neyman"
)
expect_equal(
output$nsample_actual,
optimum_allocation(
data = data, strata = "strata",
y = "y", method = "Neyman",
nsample = sum(data$key) + 15
)$stratum_size
)
# Only works if no oversampling in already_sampled
expect_lt(sum(output$n_to_sample), 17)
expect_gt(sum(output$n_to_sample), 13)
expect_equal(
sum(output$n_to_sample) + sum(output$nsample_prior),
sum(output$nsample_actual)
)
})
test_that("If there is oversampling, allocate_wave does keeps strata
at least as large as they were in prior samples and total
sample sizes add up properly", {
data$key <- c(
rep(1, times = 13), 0, 0,
rep(0, times = 10),
rep(1, times = 5),
rep(0, times = 8),
rep(1, times = 4)
)
# total of 42. stratum a has been oversampled.
# Total prior nsample is 22.
output_over <- allocate_wave(
data = data,
strata = "strata", already_sampled = "key",
y = "y", nsample = 8
)
expect_equal(sum(output_over$nsample_actual), 30)
expect_equal(any(output_over$nsample_actual < c(13, 5, 4)), FALSE)
expect_equal(
output_over$n_to_sample + output_over$nsample_prior,
output_over$nsample_actual
)
output_over_simple <- allocate_wave(
data = data, strata = "strata",
already_sampled = "key", y = "y",
nsample = 8, method = "simple"
)
# and for simple method
expect_equal(sum(output_over_simple$nsample_actual), 30)
expect_equal(any(output_over_simple$nsample_actual < c(13, 5, 4)), FALSE)
expect_equal(
output_over_simple$n_to_sample +
output_over_simple$nsample_prior,
output_over_simple$nsample_actual
)
})
test_that("the output of allocate_wave matches the output of
optimum_allocation where it should", {
data2 <- data
data2$key2 <- c(rep(1, times = 10), rbinom(32, 1, 0.2))
output_wave <- allocate_wave(
data = data2, strata = "strata",
already_sampled = "key2", y = "y", nsample = 12,
detailed = TRUE
)
output_opt <- optimum_allocation(
data = data2, strata = "strata",
y = "y",
nsample = sum(data2$key2 == 1) + 12
)
expect_equal(output_wave$nsample_optimal, output_opt$stratum_size)
expect_equal(output_wave$sd, output_opt$sd)
expect_equal(
sum(output_wave$nsample_actual),
sum(output_opt$stratum_size)
)
expect_equal(
all((output_wave$nsample_prior +
output_wave$n_to_sample) ==
output_wave$nsample_actual),
TRUE
)
})
test_that("y must be numeric, as it has to be for optimum_allocation", {
data2 <- data
data2$y <- as.character(data2$y)
expect_error(
allocate_wave(
data = data2, strata = "strata",
already_sampled = "key", y = "y", nsample = 20
),
"'y' must be numeric."
)
})
test_that("nsample cannot be larger than npop - n_sampled_prior", {
expect_error(allocate_wave(
data = data, strata = "strata",
already_sampled = "key", y = "y", nsample = 39
),
"Total sample size across waves, taken as",
fixed = TRUE
)
})
test_that("error if data available for less than two samples", {
data3 <- data %>%
dplyr::mutate(y2 = ifelse(strata == "a", NA, y))
expect_error(allocate_wave(
data = data3, strata = "strata",
already_sampled = "key", y = "y2", nsample = 15
),
"Function requires at least two observations",
fixed = TRUE
)
})
test_that("detailed = TRUE gives all columns of interest", {
output <- allocate_wave(
data = data, strata = "strata",
already_sampled = "key", y = "y", nsample = 20,
detailed = TRUE
)
expect_equal(
names(output),
c(
"strata", "npop", "nsample_optimal", "nsample_actual",
"nsample_prior", "n_to_sample", "sd"
)
)
})
test_that("basic errors work", {
expect_error(
allocate_wave(
data = data, strata = "strata",
already_sampled = "key", y = "y_wrong", nsample = 20
),
"'y' must be a character string"
)
expect_error(
allocate_wave(
data = data, strata = "strata",
already_sampled = "key_wrong", y = "y", nsample = 20
),
"'already_sampled' must be a character string"
)
expect_error(
allocate_wave(
data = data, strata = "strata_wrong",
already_sampled = "key", y = "y", nsample = 20
),
"'strata' must be a character string"
)
bad_data <- data
bad_data$key_wrong <- rep(c(1, 2, 3), times = dim(bad_data)[1] / 3)
expect_error(
allocate_wave(
data = bad_data, strata = "strata",
already_sampled = "key_wrong", y = "y", nsample = 20
),
"has a binary indicator for whether each unit"
)
bad_data$key[3] <- NA
expect_error(
allocate_wave(
data = bad_data, strata = "strata",
already_sampled = "key", y = "y", nsample = 20
),
"cannot contain NAs"
)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.