Nothing
context("test-sample_strata")
library(dplyr)
library(optimall)
set.seed(2343)
data <- data.frame(
"strata" = c(
rep("a", times = 15),
rep("b", times = 15),
rep("c", times = 12)
),
"y" = c(rnorm(30, sd = 1), rnorm(12, sd = 2)),
"key" = rbinom(42, 1, 0.2),
"id" = seq(1:42)
)
data$key[c(1, 16, 31)] <- 1 # To make sure no group gets zero in already_sampled
df <- allocate_wave(
data = data, strata = "strata",
already_sampled = "key", y = "y", nsample = 15
)
set.seed(384)
sampled_data <- sample_strata(
data = data, strata = "strata",
id = "id", design_data = df,
already_sampled = "key", design_strata = "strata",
n_allocated = "n_to_sample"
)
test_that("samples are properly allocated", {
expect_equal(sum(sampled_data$sample_indicator == 1), 15)
expect_equal(
sort(as.vector(
table(sampled_data[sampled_data$sample_indicator == 1, ]$strata)
)),
sort(df$n_to_sample)
)
})
test_that("no samples in already_sampled are again sampled", {
expect_equal(any(sampled_data[sampled_data$sample_indicator == 1, ]$id %in%
data[data$key == 1, ]$id), FALSE)
})
test_that("same thing with new seed yields different sample", {
set.seed(6589)
sampled_design_data <- sample_strata(
data = data, strata = "strata",
id = "id", design_data = df,
already_sampled = "key", design_strata = "strata",
n_allocated = "n_to_sample"
)
expect_equal(
length(setdiff(
sampled_data[sampled_data$sample_indicator == 1, ]$id,
sampled_design_data[sampled_design_data$sample_indicator == 1, ]$id
)) > 0,
TRUE
)
})
test_that("works if already_sampled is NULL", {
design_data <- data
design_data$key <- rep(0, times = 42)
df2 <- optimum_allocation(
data = design_data, strata = "strata",
y = "y", nsample = 15
)
set.seed(384)
sampled_design_data <- sample_strata(
data = design_data, strata = "strata",
id = "id", design_data = df2,
already_sampled = NULL, design_strata = "strata",
n_allocated = "stratum_size"
)
expect_equal(sum(sampled_design_data$sample_indicator == 1), 15)
expect_equal(
sort(as.vector(table(
sampled_design_data[sampled_design_data$sample_indicator == 1, ]$strata
))),
sort(df2$stratum_size)
)
})
test_that("Error messages displayed as necessary", {
x <- c(1, 2, 3, 4, 5)
expect_error(
sampled_data <- sample_strata(
data = x,
strata = "strata",
id = "id", design_data = df,
already_sampled = "key",
design_strata = "strata",
n_allocated = "n_to_sample"
),
"'data' and 'design_data' must be a dataframe"
)
expect_error(
sampled_data <- sample_strata(
data = data,
strata = "strata_wrong",
id = "id", design_data = df,
already_sampled = "key",
design_strata = "strata",
n_allocated = "n_to_sample"
),
"'strata' and 'id' must be strings matching a column"
)
expect_error(
sampled_data <- sample_strata(
data = data,
strata = "strata",
id = "id", design_data = df,
already_sampled = "key",
design_strata = "strata_wrong",
n_allocated = "n_to_sample"
),
"'design_strata' and 'n_allocated' must be strings matching"
)
df_extra_row <- rbind(df, df[3, ])
expect_error(
sampled_data <- sample_strata(
data = data,
strata = "strata",
id = "id",
design_data = df_extra_row,
already_sampled = "key",
design_strata = "strata",
n_allocated = "n_to_sample"
),
"'design_data' may only contain one row per stratum"
)
expect_error(
sampled_data <- sample_strata(
data = data,
strata = "strata",
id = "id",
design_data = df,
already_sampled = "key_wrong",
design_strata = "strata",
n_allocated = "n_to_sample"
),
"If not NULL, 'already_sampled' must be a character string"
)
data$three_key <- rep(c(1, 2, 3), times = dim(data)[1] / 3)
expect_error(
sampled_data <- sample_strata(
data = data,
strata = "strata",
id = "id",
design_data = df,
already_sampled = "three_key",
design_strata = "strata",
n_allocated = "n_to_sample"
),
"has a binary indicator for whether"
)
data$wrong_two_key <- rep(c(2, 3), times = dim(data)[1] / 2)
expect_error(
sampled_data <- sample_strata(
data = data,
strata = "strata",
id = "id",
design_data = df,
already_sampled = "wrong_two_key",
design_strata = "strata",
n_allocated = "n_to_sample"
),
"'already_sampled' column must contain '1'"
)
data$wrong_two_key <- c(
rep(1, times = dim(data)[1] - 6), 0, 0, 0,
0, 0, 0
)
expect_error(
sampled_data <- sample_strata(
data = data,
strata = "strata",
id = "id",
design_data = df,
already_sampled = "wrong_two_key",
design_strata = "strata",
n_allocated = "n_to_sample"
),
"Total sample size across waves, taken as"
)
df_wrong_strata_name <- df %>%
dplyr::mutate(strata_new = case_when(
strata == "a" ~ "a",
strata == "b" ~ "b",
strata == "c" ~ "c_2"
))
expect_error(
sampled_data <- sample_strata(
data = data,
strata = "strata",
id = "id",
design_data = df_wrong_strata_name,
already_sampled = "key",
design_strata = "strata_new",
n_allocated = "n_to_sample"
),
"strata names in 'design_data' must all match strata"
)
})
test_that("works if input is a matrix", {
sampled_data_mat <- sample_strata(
data = data.matrix(data),
strata = "strata",
id = "id",
design_data = data.matrix(df),
already_sampled = "key",
design_strata = "strata",
n_allocated = "n_to_sample"
)
expect_equal(sum(sampled_data_mat$sample_indicator), 15)
})
test_that("returns error if n_allocated is not a whole number", {
df$n_to_sample <- c(1.5, 2, 4)
expect_error(
sample_strata(
data = data, strata = "strata",
id = "id", design_data = df,
already_sampled = "key", design_strata = "strata",
n_allocated = "n_to_sample"
),
"must specify a numeric column"
)
df$n_to_sample <- c("a", "b", "c")
expect_error(
sample_strata(
data = data, strata = "strata",
id = "id", design_data = df,
already_sampled = "key", design_strata = "strata",
n_allocated = "n_to_sample"
),
"must specify a numeric column"
)
})
test_that("wave argument is correctly accepted, and it rejects new name
if column with that name already exists", {
df <- allocate_wave(
data = data, strata = "strata",
already_sampled = "key", y = "y", nsample = 15
)
sampled_data <- sample_strata(
data = data, strata = "strata",
id = "id", design_data = df,
already_sampled = "key", design_strata = "strata", wave = "Wave2",
n_allocated = "n_to_sample")
expect_equal(names(sampled_data)[5], "sample_indicatorWave2")
sampled_data <- sample_strata(
data = sampled_data, strata = "strata",
id = "id", design_data = df,
already_sampled = "key", design_strata = "strata", wave = 2,
n_allocated = "n_to_sample")
expect_equal(names(sampled_data)[6], "sample_indicator2")
sampled_data <- sample_strata(
data = sampled_data, strata = "strata",
id = "id", design_data = df,
already_sampled = "key", design_strata = "strata", wave = "Wave2",
n_allocated = "n_to_sample")
expect_equal(names(sampled_data)[7], "sample_indicator")
})
test_that("probs argument is correctly accepted",{
df <- allocate_wave(
data = data, strata = "strata",
already_sampled = "key", y = "y", nsample = 15,
detailed = TRUE
)
df$sampprob <- df$n_to_sample/(df$npop - df$nsample_prior)
sampled_data <- sample_strata(
data = data, strata = "strata",
id = "id", design_data = df,
already_sampled = "key", design_strata = "strata",
wave = "Wave2", probs = "sampprob",
n_allocated = "n_to_sample")
expect_equal(names(sampled_data)[6], "sampling_prob")
expect_equal(names(table(df$sampprob)),
names(table(sampled_data$sampling_prob)))
expect_equal((sum(df$n_to_sample)),
sum(table(sampled_data$sampling_prob)))
expect_warning(sample_strata(
data = sampled_data, strata = "strata",
id = "id", design_data = df,
already_sampled = "key", design_strata = "strata",
wave = "Wave2", probs = "sampprob",
n_allocated = "n_to_sample"), "Overwriting prior")
# and still works if probs is given as a formula
sampled_data <- sample_strata(
data = data, strata = "strata",
id = "id", design_data = df,
already_sampled = "key", design_strata = "strata",
wave = "Wave2", probs = ~n_to_sample/(npop-nsample_prior),
n_allocated = "n_to_sample")
expect_equal(names(sampled_data)[6], "sampling_prob")
expect_equal(names(table(df$sampprob)),
names(table(sampled_data$sampling_prob)))
expect_equal((sum(df$n_to_sample)),
sum(table(sampled_data$sampling_prob)))
expect_error(sample_strata(
data = data, strata = "strata",
id = "id", design_data = df,
already_sampled = "key", design_strata = "strata",
wave = "Wave2", probs = ~n_to_sample/(npopppp-nsample_prior),
n_allocated = "n_to_sample"), "Variables in probs formula must")
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.