Nothing
# Tests for edge cases and boundary conditions
# Systematic coverage of unusual inputs and boundary transitions
# =============================================================================
# EMPTY AND MINIMAL DATA TESTS
# =============================================================================
test_that("identify_periods rejects empty data", {
# 1. Setup: Create empty data.table with correct structure
empty_data <- data.table::data.table(
Ano = integer(0),
Trimestre = integer(0),
UPA = integer(0),
V1008 = integer(0),
V1014 = integer(0),
V2008 = integer(0),
V20081 = integer(0),
V20082 = integer(0),
V2009 = integer(0)
)
# 2. Verify: Should error on empty data (validation fails)
expect_error(
pnadc_identify_periods(empty_data, verbose = FALSE),
"at least 1 rows",
label = "Empty data should be rejected by validation"
)
# 3. Context: Input validation prevents processing of empty data
})
test_that("identify_periods handles single observation", {
# 1. Setup: Create data with just one person
single_obs <- data.table::data.table(
Ano = 2023L,
Trimestre = 1L,
UPA = 1L,
V1008 = 1L,
V1014 = 1L,
V2008 = 15L,
V20081 = 6L,
V20082 = 1990L,
V2009 = 33L
)
# 2. Execute: Should not error
expect_no_error({
result <- pnadc_identify_periods(single_obs, verbose = FALSE)
})
# 3. Verify: Should return one row
expect_equal(nrow(result), 1)
expect_true("determined_month" %in% names(result))
# 4. Context: Minimal valid input
})
test_that("apply_periods handles all NA weights gracefully", {
# 1. Setup: Create data with NA weights
data <- create_realistic_pnadc(n_quarters = 1, n_upas = 5)
data[, V1028 := NA_real_]
valid_ufs <- c(11:17, 21:29, 31:35, 41:43, 50:53)
data[, `:=`(
UF = sample(valid_ufs, .N, replace = TRUE),
posest = sample(1:500, .N, replace = TRUE),
posest_sxi = sample(100:999, .N, replace = TRUE)
)]
crosswalk <- pnadc_identify_periods(data, verbose = FALSE)
# 2. Execute: Apply without calibration (NA weights)
expect_no_error({
result <- pnadc_apply_periods(
data, crosswalk,
weight_var = "V1028",
anchor = "quarter",
calibrate = FALSE,
verbose = FALSE
)
})
# 3. Context: Missing weight data should not crash
})
# =============================================================================
# YEAR AND QUARTER BOUNDARY TESTS
# =============================================================================
test_that("year boundary transition handled correctly", {
# 1. Setup: Create data spanning year boundary (Q4 2023 + Q1 2024)
# Use 4-quarter data from each year and filter to get correct Q4/Q1 with valid ages
set.seed(100)
data_2023 <- create_realistic_pnadc(n_quarters = 4, n_upas = 10, start_year = 2023)
data_q4 <- data_2023[Trimestre == 4L]
data_2024 <- create_realistic_pnadc(n_quarters = 4, n_upas = 10, start_year = 2024, seed = 99L)
data_q1 <- data_2024[Trimestre == 1L]
# Combine
data <- rbind(data_q4, data_q1)
# 2. Execute: Should handle year transition
expect_no_error({
result <- pnadc_identify_periods(data, verbose = FALSE)
})
# 3. Verify: Both years should be present
expect_true(2023L %in% result$Ano)
expect_true(2024L %in% result$Ano)
expect_true(4L %in% result$Trimestre)
expect_true(1L %in% result$Trimestre)
# 4. Context: Date calculations must handle year boundaries
})
test_that("December 31 to January 1 transition", {
# 1. Setup: Create observation with interview on Dec 31
data <- data.table::data.table(
Ano = 2023L,
Trimestre = 4L,
UPA = 1L,
V1008 = 1L,
V1014 = 1L,
V2008 = 31L, # Day 31
V20081 = 12L, # December
V20082 = 1990L,
V2009 = 33L
)
# 2. Execute: Should not error
expect_no_error({
result <- pnadc_identify_periods(data, verbose = FALSE)
})
# 3. Verify: Should produce valid crosswalk
expect_equal(nrow(result), 1)
# 4. Context: Edge case for last day of year
})
# =============================================================================
# LEAP YEAR TESTS
# =============================================================================
test_that("February 29 birthday in non-leap year", {
# 1. Setup: Person born Feb 29, interviewed in non-leap year (2023)
data <- data.table::data.table(
Ano = 2023L,
Trimestre = 1L,
UPA = 1L,
V1008 = 1L,
V1014 = 1L,
V2008 = 29L, # Day 29
V20081 = 2L, # February
V20082 = 1992L, # Leap year birth
V2009 = 31L # Age
)
# 2. Execute: Should handle gracefully (March 1 used as substitute in non-leap years)
expect_no_error({
result <- pnadc_identify_periods(data, verbose = FALSE)
})
# 3. Verify: Should produce valid crosswalk
expect_equal(nrow(result), 1)
# 4. Context: Implementation substitutes March 1 for Feb 29 in non-leap years
})
test_that("February 29 birthday in leap year", {
# 1. Setup: Person born Feb 29, interviewed in leap year (2024)
data <- data.table::data.table(
Ano = 2024L,
Trimestre = 1L,
UPA = 1L,
V1008 = 1L,
V1014 = 1L,
V2008 = 29L,
V20081 = 2L,
V20082 = 1992L,
V2009 = 32L
)
# 2. Execute: Should handle correctly
expect_no_error({
result <- pnadc_identify_periods(data, verbose = FALSE)
})
# 3. Verify: Valid crosswalk
expect_equal(nrow(result), 1)
# 4. Context: Feb 29 exists in leap years
})
# =============================================================================
# UNKNOWN BIRTHDAY CODES
# =============================================================================
test_that("all unknown birthdays (99/9999) handled", {
# 1. Setup: Create data where all observations have unknown birthdays
data <- create_realistic_pnadc(n_quarters = 2, n_upas = 10)
data[, `:=`(
V2008 = 99L,
V20081 = 99L,
V20082 = 9999L
)]
# 2. Execute: Should not error
expect_no_error({
result <- pnadc_identify_periods(data, verbose = FALSE)
})
# 3. Verify: Determination still works (based on other constraints)
expect_true(nrow(result) > 0)
expect_true("determined_month" %in% names(result))
# 4. Context: Birthday constraints skipped when unknown
})
test_that("mixed known and unknown birthdays", {
# 1. Setup: Half known, half unknown birthdays
set.seed(200)
data <- create_realistic_pnadc(n_quarters = 2, n_upas = 10)
n <- nrow(data)
unknown_idx <- sample(seq_len(n), size = n %/% 2)
data[unknown_idx, `:=`(
V2008 = 99L,
V20082 = 9999L
)]
# 2. Execute: Should not error
expect_no_error({
result <- pnadc_identify_periods(data, verbose = FALSE)
})
# 3. Verify: Both types processed
expect_equal(nrow(result), n)
# 4. Context: Mixed data should work
})
# =============================================================================
# EXTREME AGE VALUES
# =============================================================================
test_that("very young ages handled (age 0)", {
# 1. Setup: Create data with age 0 (infant)
data <- data.table::data.table(
Ano = 2023L,
Trimestre = 1L,
UPA = 1L,
V1008 = 1L,
V1014 = 1L,
V2008 = 1L,
V20081 = 1L,
V20082 = 2023L, # Born same year
V2009 = 0L # Age 0
)
# 2. Execute: Should not error
expect_no_error({
result <- pnadc_identify_periods(data, verbose = FALSE)
})
# 3. Context: PNADC includes all ages
})
test_that("very old ages handled (age 100+)", {
# 1. Setup: Create data with age 100+
data <- data.table::data.table(
Ano = 2023L,
Trimestre = 1L,
UPA = 1L,
V1008 = 1L,
V1014 = 1L,
V2008 = 15L,
V20081 = 6L,
V20082 = 1920L, # Very old
V2009 = 103L # Age 103
)
# 2. Execute: Should not error
expect_no_error({
result <- pnadc_identify_periods(data, verbose = FALSE)
})
# 3. Context: Centenarians exist in data
})
# =============================================================================
# SINGLE VS MULTI-QUARTER BEHAVIOR
# =============================================================================
test_that("single quarter produces lower determination rate", {
# 1. Setup: Same UPAs across 1 quarter vs 4 quarters
set.seed(300)
data_1q <- create_realistic_pnadc(n_quarters = 1, n_upas = 20)
data_4q <- create_realistic_pnadc(n_quarters = 4, n_upas = 20)
# 2. Execute: Identify periods
cw_1q <- pnadc_identify_periods(data_1q, verbose = FALSE)
cw_4q <- pnadc_identify_periods(data_4q, verbose = FALSE)
# 3. Verify: More quarters should give higher determination rate
rate_1q <- cw_1q[, mean(determined_month, na.rm = TRUE)]
rate_4q <- cw_4q[, mean(determined_month, na.rm = TRUE)]
expect_true(rate_4q > rate_1q,
label = paste0("4Q (", round(rate_4q, 3),
") should be > 1Q (", round(rate_1q, 3), ")"))
# 4. Context: Cross-quarter aggregation improves determination
})
# =============================================================================
# ZERO WEIGHT TESTS
# =============================================================================
test_that("zero weights in input handled", {
# 1. Setup: Create data with some zero weights
set.seed(400)
data <- create_realistic_pnadc(n_quarters = 2, n_upas = 10)
valid_ufs <- c(11:17, 21:29, 31:35, 41:43, 50:53)
data[, `:=`(
UF = sample(valid_ufs, .N, replace = TRUE),
V1028 = runif(.N, 500, 2000),
posest = sample(1:500, .N, replace = TRUE),
posest_sxi = sample(100:999, .N, replace = TRUE)
)]
# Set some weights to zero
zero_idx <- sample(1:nrow(data), size = 5)
data[zero_idx, V1028 := 0]
# 2. Execute: Apply with calibration (mock targets, no SIDRA)
crosswalk <- pnadc_identify_periods(data, verbose = FALSE)
result <- NULL
expect_no_error({
result <- pnadc_apply_periods(
data, crosswalk,
weight_var = "V1028",
anchor = "quarter",
calibrate = TRUE,
calibration_unit = "month",
target_totals = create_mock_pop_targets(data),
verbose = FALSE
)
})
# 3. Verify: No negative weights produced
expect_true(all(result$weight_monthly >= 0, na.rm = TRUE))
# 4. Context: Zero weights edge case
})
# =============================================================================
# ALL PERIODS INDETERMINATE TEST
# =============================================================================
test_that("data with no determinations handled", {
# 1. Setup: Create data that's very hard to determine
# Single quarter, wide date ranges, no birthday info
data <- data.table::data.table(
Ano = rep(2023L, 10),
Trimestre = rep(1L, 10),
UPA = 1:10, # All different UPAs
V1008 = 1L,
V1014 = 1:10, # All different V1014
V2008 = 99L, # Unknown birthday
V20081 = 99L,
V20082 = 9999L,
V2009 = 30L
)
# 2. Execute: Identify periods
result <- pnadc_identify_periods(data, verbose = FALSE)
# 3. Verify: Should return crosswalk (even if determination rate is low)
expect_equal(nrow(result), 10)
expect_true("determined_month" %in% names(result))
# 4. Context: Low determination rate is valid outcome
})
# =============================================================================
# PARAMETER VALIDATION TESTS
# =============================================================================
test_that("invalid anchor parameter rejected", {
# 1. Setup: Create data
data <- create_realistic_pnadc(n_quarters = 1, n_upas = 5)
valid_ufs <- c(11:17, 21:29, 31:35, 41:43, 50:53)
data[, `:=`(
UF = sample(valid_ufs, .N, replace = TRUE),
V1028 = runif(.N, 500, 2000),
posest = sample(1:500, .N, replace = TRUE),
posest_sxi = sample(100:999, .N, replace = TRUE)
)]
crosswalk <- pnadc_identify_periods(data, verbose = FALSE)
# 2. Verify: Invalid anchor should error
expect_error(
pnadc_apply_periods(
data, crosswalk,
weight_var = "V1028",
anchor = "invalid_anchor",
calibrate = TRUE,
calibration_unit = "month",
verbose = FALSE
),
"anchor.*quarter.*year",
label = "Invalid anchor parameter should be rejected"
)
})
test_that("invalid calibration_unit rejected", {
# 1. Setup: Create data
data <- create_realistic_pnadc(n_quarters = 1, n_upas = 5)
valid_ufs <- c(11:17, 21:29, 31:35, 41:43, 50:53)
data[, `:=`(
UF = sample(valid_ufs, .N, replace = TRUE),
V1028 = runif(.N, 500, 2000),
posest = sample(1:500, .N, replace = TRUE),
posest_sxi = sample(100:999, .N, replace = TRUE)
)]
crosswalk <- pnadc_identify_periods(data, verbose = FALSE)
# 2. Verify: Invalid calibration_unit should error
# Note: match.arg() throws error before custom message
expect_error(
pnadc_apply_periods(
data, crosswalk,
weight_var = "V1028",
anchor = "quarter",
calibrate = TRUE,
calibration_unit = "invalid_unit",
verbose = FALSE
),
label = "Invalid calibration_unit should be rejected"
)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.