Nothing
# Tests for experimental period identification strategies
# Tests for pnadc_experimental_periods() - probabilistic and UPA aggregation strategies
# =============================================================================
# PROBABILISTIC STRATEGY TESTS
# =============================================================================
test_that("probabilistic strategy requires store_date_bounds=TRUE", {
# 1. Setup: Create crosswalk WITHOUT date bounds
set.seed(400)
data <- create_realistic_pnadc(n_quarters = 4, n_upas = 15)
crosswalk <- pnadc_identify_periods(data, verbose = FALSE, store_date_bounds = FALSE)
# 2. Verify: Should error when date bounds not stored
# Error can be about missing columns like date_min, date_max, or week columns
expect_error(
pnadc_experimental_periods(
crosswalk,
strategy = "probabilistic",
verbose = FALSE
),
label = "Probabilistic strategy requires date bounds"
)
# 3. Context: Probabilistic strategy needs date ranges to calculate confidence
})
test_that("probabilistic strategy improves month determination", {
# 1. Setup: Create data with date bounds
set.seed(401)
data <- create_realistic_pnadc(n_quarters = 4, n_upas = 20)
crosswalk <- pnadc_identify_periods(data, verbose = FALSE, store_date_bounds = TRUE)
# Store strict determination rate
strict_rate <- crosswalk[, mean(determined_month, na.rm = TRUE)]
# 2. Execute: Apply probabilistic strategy
result <- pnadc_experimental_periods(
crosswalk,
strategy = "probabilistic",
confidence_threshold = 0.9,
verbose = FALSE
)
# Calculate experimental rate
exp_rate <- result[, mean(determined_month, na.rm = TRUE)]
# 3. Verify: Experimental rate should be >= strict rate
expect_true(exp_rate >= strict_rate,
label = paste0("Probabilistic should maintain or improve rate: ",
"strict=", round(strict_rate, 3), ", exp=", round(exp_rate, 3)))
# 4. Context: Probabilistic adds determinations, never removes strict ones
})
test_that("confidence_threshold parameter affects determination", {
# 1. Setup: Create data with store_date_bounds
set.seed(402)
data <- create_realistic_pnadc(n_quarters = 4, n_upas = 20)
crosswalk <- pnadc_identify_periods(data, verbose = FALSE, store_date_bounds = TRUE)
# 2. Execute: Test with different confidence thresholds
result_08 <- pnadc_experimental_periods(
crosswalk,
strategy = "probabilistic",
confidence_threshold = 0.8,
verbose = FALSE
)
result_09 <- pnadc_experimental_periods(
crosswalk,
strategy = "probabilistic",
confidence_threshold = 0.9,
verbose = FALSE
)
result_095 <- pnadc_experimental_periods(
crosswalk,
strategy = "probabilistic",
confidence_threshold = 0.95,
verbose = FALSE
)
# 3. Verify: Lower threshold should give >= determinations than higher threshold
rate_08 <- result_08[, mean(determined_month, na.rm = TRUE)]
rate_09 <- result_09[, mean(determined_month, na.rm = TRUE)]
rate_095 <- result_095[, mean(determined_month, na.rm = TRUE)]
expect_true(rate_08 >= rate_09 - 0.01, # Allow tiny tolerance
label = paste0("threshold=0.8 (", round(rate_08, 3),
") should be >= threshold=0.9 (", round(rate_09, 3), ")"))
expect_true(rate_09 >= rate_095 - 0.01,
label = paste0("threshold=0.9 (", round(rate_09, 3),
") should be >= threshold=0.95 (", round(rate_095, 3), ")"))
# 4. Context: More conservative thresholds assign fewer probabilistic months
})
test_that("probabilistic strategy preserves strict determinations", {
# 1. Setup: Create data
set.seed(403)
data <- create_realistic_pnadc(n_quarters = 4, n_upas = 15)
crosswalk <- pnadc_identify_periods(data, verbose = FALSE, store_date_bounds = TRUE)
# Identify strict determinations
strict_determined <- crosswalk[determined_month == TRUE]
# 2. Execute: Apply probabilistic
result <- pnadc_experimental_periods(
crosswalk,
strategy = "probabilistic",
confidence_threshold = 0.9,
verbose = FALSE
)
# 3. Verify: All strict determinations must be preserved
result_determined <- result[determined_month == TRUE]
# All strict IDs should still be determined in result
# Note: Crosswalk is at household level; V2003 is not in the crosswalk output
strict_ids <- paste(strict_determined$Ano, strict_determined$Trimestre,
strict_determined$UPA, strict_determined$V1008,
strict_determined$V1014, sep = "_")
result_ids <- paste(result_determined$Ano, result_determined$Trimestre,
result_determined$UPA, result_determined$V1008,
result_determined$V1014, sep = "_")
all_preserved <- all(strict_ids %in% result_ids)
expect_true(all_preserved,
label = "Probabilistic must preserve all strict determinations")
# 4. Context: Experimental strategies only ADD, never remove determinations
})
test_that("probabilistic assignment only for 2-period ranges", {
# 1. Setup: Create data
set.seed(404)
data <- create_realistic_pnadc(n_quarters = 4, n_upas = 15)
crosswalk <- pnadc_identify_periods(data, verbose = FALSE, store_date_bounds = TRUE)
# 2. Execute: Apply probabilistic
result <- pnadc_experimental_periods(
crosswalk,
strategy = "probabilistic",
confidence_threshold = 0.8,
verbose = FALSE
)
# 3. Verify: Check the experimental assignment pattern
# Probabilistic only assigns when there's ambiguity between 2 periods
# This is verified indirectly - the function should not error
expect_true(is.data.frame(result))
expect_true("determined_month" %in% names(result))
# 4. Context: Probabilistic works on 2-month ranges (implementation detail)
# Single-month or 3-month ranges are not probabilistically assigned
})
# =============================================================================
# UPA AGGREGATION STRATEGY TESTS
# =============================================================================
test_that("UPA aggregation extends determinations via consensus", {
# 1. Setup: Create data
set.seed(500)
data <- create_realistic_pnadc(n_quarters = 4, n_upas = 20)
crosswalk <- pnadc_identify_periods(data, verbose = FALSE, store_date_bounds = TRUE)
# Store strict rate
strict_rate <- crosswalk[, mean(determined_month, na.rm = TRUE)]
# 2. Execute: Apply UPA aggregation
result <- pnadc_experimental_periods(
crosswalk,
strategy = "upa_aggregation",
upa_proportion_threshold = 0.5,
verbose = FALSE
)
# Calculate rate after UPA aggregation
upa_rate <- result[, mean(determined_month, na.rm = TRUE)]
# 3. Verify: UPA aggregation should maintain or improve rate
expect_true(upa_rate >= strict_rate,
label = paste0("UPA aggregation maintains or improves: ",
"strict=", round(strict_rate, 3), ", upa=", round(upa_rate, 3)))
# 4. Context: UPA aggregation extends via UPA-level consensus
})
test_that("upa_proportion_threshold affects determination", {
# 1. Setup: Create data
set.seed(501)
data <- create_realistic_pnadc(n_quarters = 4, n_upas = 20)
crosswalk <- pnadc_identify_periods(data, verbose = FALSE, store_date_bounds = TRUE)
# 2. Execute: Test with different thresholds
result_05 <- pnadc_experimental_periods(
crosswalk,
strategy = "upa_aggregation",
upa_proportion_threshold = 0.5,
verbose = FALSE
)
result_08 <- pnadc_experimental_periods(
crosswalk,
strategy = "upa_aggregation",
upa_proportion_threshold = 0.8,
verbose = FALSE
)
# 3. Verify: Lower threshold should give >= determinations
rate_05 <- result_05[, mean(determined_month, na.rm = TRUE)]
rate_08 <- result_08[, mean(determined_month, na.rm = TRUE)]
expect_true(rate_05 >= rate_08 - 0.01,
label = paste0("threshold=0.5 (", round(rate_05, 3),
") should be >= threshold=0.8 (", round(rate_08, 3), ")"))
# 4. Context: Higher proportion threshold requires stronger UPA consensus
})
# =============================================================================
# "BOTH" STRATEGY TESTS
# =============================================================================
test_that("'both' strategy combines probabilistic and UPA aggregation", {
# 1. Setup: Create data
set.seed(600)
data <- create_realistic_pnadc(n_quarters = 4, n_upas = 20)
crosswalk <- pnadc_identify_periods(data, verbose = FALSE, store_date_bounds = TRUE)
# 2. Execute: Apply each strategy separately and combined
result_prob <- pnadc_experimental_periods(
crosswalk,
strategy = "probabilistic",
confidence_threshold = 0.9,
verbose = FALSE
)
result_upa <- pnadc_experimental_periods(
crosswalk,
strategy = "upa_aggregation",
upa_proportion_threshold = 0.5,
verbose = FALSE
)
result_both <- pnadc_experimental_periods(
crosswalk,
strategy = "both",
confidence_threshold = 0.9,
upa_proportion_threshold = 0.5,
verbose = FALSE
)
# 3. Verify: "both" should be >= max(individual strategies)
rate_prob <- result_prob[, mean(determined_month, na.rm = TRUE)]
rate_upa <- result_upa[, mean(determined_month, na.rm = TRUE)]
rate_both <- result_both[, mean(determined_month, na.rm = TRUE)]
max_individual <- max(rate_prob, rate_upa)
expect_true(rate_both >= max_individual - 0.01, # Allow tiny tolerance
label = paste0("'both' (", round(rate_both, 3),
") should be >= max(individual) (", round(max_individual, 3), ")"))
# 4. Context: Combined strategy should identify at least as many as either alone
})
test_that("'both' strategy preserves strict determinations", {
# 1. Setup: Create data
set.seed(601)
data <- create_realistic_pnadc(n_quarters = 4, n_upas = 15)
crosswalk <- pnadc_identify_periods(data, verbose = FALSE, store_date_bounds = TRUE)
# Count strict determinations
n_strict <- crosswalk[, sum(determined_month, na.rm = TRUE)]
# 2. Execute: Apply "both" strategy
result <- pnadc_experimental_periods(
crosswalk,
strategy = "both",
verbose = FALSE
)
# Count experimental determinations
n_exp <- result[, sum(determined_month, na.rm = TRUE)]
# 3. Verify: Should have at least as many determinations as strict
expect_true(n_exp >= n_strict,
label = "Combined strategy must preserve strict determinations")
# 4. Context: All experimental strategies are additive only
})
# =============================================================================
# NESTING PRESERVATION TESTS
# =============================================================================
test_that("experimental strategies maintain nesting (week requires fortnight)", {
# 1. Setup: Create data
set.seed(700)
data <- create_realistic_pnadc(n_quarters = 4, n_upas = 15)
crosswalk <- pnadc_identify_periods(data, verbose = FALSE, store_date_bounds = TRUE)
# 2. Execute: Apply experimental strategy
result <- pnadc_experimental_periods(
crosswalk,
strategy = "both",
verbose = FALSE
)
# 3. Verify: Nesting violation check
# All week-determined must have fortnight-determined
violation <- result[determined_week == TRUE & (is.na(determined_fortnight) | determined_fortnight == FALSE)]
expect_equal(nrow(violation), 0,
label = "Week determination requires fortnight determination (nesting)")
# 4. Context: Nesting is a fundamental invariant
})
test_that("experimental strategies maintain nesting (fortnight requires month)", {
# 1. Setup: Create data
set.seed(701)
data <- create_realistic_pnadc(n_quarters = 4, n_upas = 15)
crosswalk <- pnadc_identify_periods(data, verbose = FALSE, store_date_bounds = TRUE)
# 2. Execute: Apply experimental strategy
result <- pnadc_experimental_periods(
crosswalk,
strategy = "both",
verbose = FALSE
)
# 3. Verify: Nesting violation check
# All fortnight-determined must have month-determined
violation <- result[determined_fortnight == TRUE & (is.na(determined_month) | determined_month == FALSE)]
expect_equal(nrow(violation), 0,
label = "Fortnight determination requires month determination (nesting)")
})
# =============================================================================
# EDGE CASES
# =============================================================================
test_that("experimental strategies handle small data sets", {
# 1. Setup: Create data with enough observations for experimental strategies
set.seed(800)
data <- create_realistic_pnadc(n_quarters = 2, n_upas = 10)
crosswalk <- pnadc_identify_periods(data, verbose = FALSE, store_date_bounds = TRUE)
# 2. Execute: Should not error with reasonable data size
expect_no_error({
result <- pnadc_experimental_periods(
crosswalk,
strategy = "both",
verbose = FALSE
)
})
# 3. Context: Implementation should handle various data sizes gracefully
})
test_that("experimental strategies work with multi-quarter data", {
# 1. Setup: Use multi-quarter data for experimental strategies
# Single quarter may not have enough variation for experimental strategies
set.seed(801)
data <- create_realistic_pnadc(n_quarters = 3, n_upas = 15)
crosswalk <- pnadc_identify_periods(data, verbose = FALSE, store_date_bounds = TRUE)
# 2. Execute: Should work with sufficient data
expect_no_error({
result <- pnadc_experimental_periods(
crosswalk,
strategy = "both",
verbose = FALSE
)
})
# 3. Verify: Basic structure maintained
expect_true(is.data.frame(result))
expect_equal(nrow(result), nrow(crosswalk))
# 4. Context: Experimental strategies need sufficient data variation
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.