# tests/testthat/test-sample_strata.R In optimall: Allocate Samples Among Strata

```context("test-sample_strata")

library(dplyr)
library(optimall)

set.seed(2343)
data <- data.frame(
"strata" = c(
rep("a", times = 15),
rep("b", times = 15),
rep("c", times = 12)
),
"y" = c(rnorm(30, sd = 1), rnorm(12, sd = 2)),
"key" = rbinom(42, 1, 0.2),
"id" = seq(1:42)
)
data\$key[c(1, 16, 31)] <- 1 # To make sure no group gets zero in already_sampled

df <- allocate_wave(
data = data, strata = "strata",
already_sampled = "key", y = "y", nsample = 15
)

set.seed(384)
sampled_data <- sample_strata(
data = data, strata = "strata",
id = "id", design_data = df,
already_sampled = "key", design_strata = "strata",
n_allocated = "n_to_sample"
)

test_that("samples are properly allocated", {
expect_equal(sum(sampled_data\$sample_indicator == 1), 15)
expect_equal(
sort(as.vector(
table(sampled_data[sampled_data\$sample_indicator == 1, ]\$strata)
)),
sort(df\$n_to_sample)
)
})

test_that("no samples in already_sampled are again sampled", {
expect_equal(any(sampled_data[sampled_data\$sample_indicator == 1, ]\$id %in%
data[data\$key == 1, ]\$id), FALSE)
})

test_that("same thing with new seed yields different sample", {
set.seed(6589)
sampled_design_data <- sample_strata(
data = data, strata = "strata",
id = "id", design_data = df,
already_sampled = "key", design_strata = "strata",
n_allocated = "n_to_sample"
)
expect_equal(
length(setdiff(
sampled_data[sampled_data\$sample_indicator == 1, ]\$id,
sampled_design_data[sampled_design_data\$sample_indicator == 1, ]\$id
)) > 0,
TRUE
)
})

test_that("works if already_sampled is NULL", {
design_data <- data
design_data\$key <- rep(0, times = 42)
df2 <- optimum_allocation(
data = design_data, strata = "strata",
y = "y", nsample = 15
)
set.seed(384)
sampled_design_data <- sample_strata(
data = design_data, strata = "strata",
id = "id", design_data = df2,
already_sampled = NULL, design_strata = "strata",
n_allocated = "stratum_size"
)
expect_equal(sum(sampled_design_data\$sample_indicator == 1), 15)
expect_equal(
sort(as.vector(table(
sampled_design_data[sampled_design_data\$sample_indicator == 1, ]\$strata
))),
sort(df2\$stratum_size)
)
})

test_that("Error messages displayed as necessary", {
x <- c(1, 2, 3, 4, 5)
expect_error(
sampled_data <- sample_strata(
data = x,
strata = "strata",
id = "id", design_data = df,
design_strata = "strata",
n_allocated = "n_to_sample"
),
"'data' and 'design_data' must be a dataframe"
)
expect_error(
sampled_data <- sample_strata(
data = data,
strata = "strata_wrong",
id = "id", design_data = df,
design_strata = "strata",
n_allocated = "n_to_sample"
),
"'strata' and 'id' must be strings matching a column"
)
expect_error(
sampled_data <- sample_strata(
data = data,
strata = "strata",
id = "id", design_data = df,
design_strata = "strata_wrong",
n_allocated = "n_to_sample"
),
"'design_strata' and 'n_allocated' must be strings matching"
)
df_extra_row <- rbind(df, df[3, ])
expect_error(
sampled_data <- sample_strata(
data = data,
strata = "strata",
id = "id",
design_data = df_extra_row,
design_strata = "strata",
n_allocated = "n_to_sample"
),
"'design_data' may only contain one row per stratum"
)
expect_error(
sampled_data <- sample_strata(
data = data,
strata = "strata",
id = "id",
design_data = df,
design_strata = "strata",
n_allocated = "n_to_sample"
),
"If not NULL, 'already_sampled' must be a character string"
)
data\$three_key <- rep(c(1, 2, 3), times = dim(data)[1] / 3)
expect_error(
sampled_data <- sample_strata(
data = data,
strata = "strata",
id = "id",
design_data = df,
design_strata = "strata",
n_allocated = "n_to_sample"
),
"has a binary indicator for whether"
)
data\$wrong_two_key <- rep(c(2, 3), times = dim(data)[1] / 2)
expect_error(
sampled_data <- sample_strata(
data = data,
strata = "strata",
id = "id",
design_data = df,
design_strata = "strata",
n_allocated = "n_to_sample"
),
)
data\$wrong_two_key <- c(
rep(1, times = dim(data)[1] - 6), 0, 0, 0,
0, 0, 0
)
expect_error(
sampled_data <- sample_strata(
data = data,
strata = "strata",
id = "id",
design_data = df,
design_strata = "strata",
n_allocated = "n_to_sample"
),
"Total sample size across waves, taken as"
)

df_wrong_strata_name <- df %>%
dplyr::mutate(strata_new = case_when(
strata == "a" ~ "a",
strata == "b" ~ "b",
strata == "c" ~ "c_2"
))
expect_error(
sampled_data <- sample_strata(
data = data,
strata = "strata",
id = "id",
design_data = df_wrong_strata_name,
design_strata = "strata_new",
n_allocated = "n_to_sample"
),
"strata names in 'design_data' must all match strata"
)
})

test_that("works if input is a matrix", {
sampled_data_mat <- sample_strata(
data = data.matrix(data),
strata = "strata",
id = "id",
design_data = data.matrix(df),
design_strata = "strata",
n_allocated = "n_to_sample"
)
expect_equal(sum(sampled_data_mat\$sample_indicator), 15)
})

test_that("returns error if n_allocated is not a whole number", {
df\$n_to_sample <- c(1.5, 2, 4)
expect_error(
sample_strata(
data = data, strata = "strata",
id = "id", design_data = df,
already_sampled = "key", design_strata = "strata",
n_allocated = "n_to_sample"
),
"must specify a numeric column"
)
df\$n_to_sample <- c("a", "b", "c")
expect_error(
sample_strata(
data = data, strata = "strata",
id = "id", design_data = df,
already_sampled = "key", design_strata = "strata",
n_allocated = "n_to_sample"
),
"must specify a numeric column"
)
})

test_that("wave argument is correctly accepted, and it rejects new name
if column  with that name already exists", {
df <- allocate_wave(
data = data, strata = "strata",
already_sampled = "key", y = "y", nsample = 15
)
sampled_data <- sample_strata(
data = data, strata = "strata",
id = "id", design_data = df,
already_sampled = "key", design_strata = "strata", wave = "Wave2",
n_allocated = "n_to_sample")
expect_equal(names(sampled_data)[5], "sample_indicatorWave2")

sampled_data <- sample_strata(
data = sampled_data, strata = "strata",
id = "id", design_data = df,
already_sampled = "key", design_strata = "strata", wave = 2,
n_allocated = "n_to_sample")
expect_equal(names(sampled_data)[6], "sample_indicator2")

sampled_data <- sample_strata(
data = sampled_data, strata = "strata",
id = "id", design_data = df,
already_sampled = "key", design_strata = "strata", wave = "Wave2",
n_allocated = "n_to_sample")
expect_equal(names(sampled_data)[7], "sample_indicator")

})

test_that("probs argument is correctly accepted",{
df <- allocate_wave(
data = data, strata = "strata",
already_sampled = "key", y = "y", nsample = 15,
detailed = TRUE
)
df\$sampprob <- df\$n_to_sample/(df\$npop - df\$nsample_prior)
sampled_data <- sample_strata(
data = data, strata = "strata",
id = "id", design_data = df,
already_sampled = "key", design_strata = "strata",
wave = "Wave2", probs = "sampprob",
n_allocated = "n_to_sample")
expect_equal(names(sampled_data)[6], "sampling_prob")

expect_equal(names(table(df\$sampprob)),
names(table(sampled_data\$sampling_prob)))
expect_equal((sum(df\$n_to_sample)),
sum(table(sampled_data\$sampling_prob)))

expect_warning(sample_strata(
data = sampled_data, strata = "strata",
id = "id", design_data = df,
already_sampled = "key", design_strata = "strata",
wave = "Wave2", probs = "sampprob",
n_allocated = "n_to_sample"), "Overwriting prior")

# and still works if probs is given as a formula
sampled_data <- sample_strata(
data = data, strata = "strata",
id = "id", design_data = df,
already_sampled = "key", design_strata = "strata",
wave = "Wave2", probs = ~n_to_sample/(npop-nsample_prior),
n_allocated = "n_to_sample")
expect_equal(names(sampled_data)[6], "sampling_prob")

expect_equal(names(table(df\$sampprob)),
names(table(sampled_data\$sampling_prob)))
expect_equal((sum(df\$n_to_sample)),
sum(table(sampled_data\$sampling_prob)))

expect_error(sample_strata(
data = data, strata = "strata",
id = "id", design_data = df,
already_sampled = "key", design_strata = "strata",
wave = "Wave2", probs = ~n_to_sample/(npopppp-nsample_prior),
n_allocated = "n_to_sample"), "Variables in probs formula must")
})
```

## Try the optimall package in your browser

Any scripts or data that you put into this service are public.

optimall documentation built on June 22, 2024, 9:34 a.m.