Nothing
# Integration tests for end-to-end pipelines
# These tests verify the complete workflow: identify → apply → calibrate
# =============================================================================
# BASIC END-TO-END PIPELINE TESTS
# =============================================================================
test_that("full pipeline works end-to-end with monthly calibration", {
# 1. Setup: Create realistic multi-quarter data
set.seed(900)
data <- create_realistic_pnadc(n_quarters = 4, n_upas = 30)
# Add calibration columns
valid_ufs <- c(11:17, 21:29, 31:35, 41:43, 50:53)
data[, `:=`(
UF = sample(valid_ufs, .N, replace = TRUE),
V1028 = runif(.N, 500, 2000),
posest = sample(1:500, .N, replace = TRUE),
posest_sxi = sample(100:999, .N, replace = TRUE)
)]
# 2. Execute: Full pipeline
# Step 1: Identify periods
crosswalk <- pnadc_identify_periods(data, verbose = FALSE)
# Step 2: Apply and calibrate (mock targets, no SIDRA)
result <- pnadc_apply_periods(
data, crosswalk,
weight_var = "V1028",
anchor = "quarter",
calibrate = TRUE,
calibration_unit = "month",
target_totals = create_mock_pop_targets(data),
smooth = FALSE,
verbose = FALSE
)
# 3. Verify: Basic structure
expect_true(is.data.frame(result))
expect_equal(nrow(result), nrow(data))
expect_true("weight_monthly" %in% names(result))
expect_true("determined_month" %in% names(result))
# 4. Verify: Weights are valid (not sum preservation)
determined <- result[determined_month == TRUE]
expect_true(all(determined$weight_monthly > 0),
label = "Pipeline produces positive weights")
expect_false(any(is.na(determined$weight_monthly)),
label = "No NA weights for determined observations")
# 5. Verify: Determination rates are reasonable
# With 4 quarters, expect >70% month determination
det_rate <- result[, mean(determined_month, na.rm = TRUE)]
expect_true(det_rate > 0.70,
label = paste0("Month determination rate (", round(det_rate, 3),
") should be >70% with 4 quarters"))
})
test_that("full pipeline works with fortnight calibration", {
# 1. Setup: Create data
set.seed(901)
data <- create_realistic_pnadc(n_quarters = 4, n_upas = 25)
valid_ufs <- c(11:17, 21:29, 31:35, 41:43, 50:53)
data[, `:=`(
UF = sample(valid_ufs, .N, replace = TRUE),
V1028 = runif(.N, 500, 2000),
posest = sample(1:500, .N, replace = TRUE),
posest_sxi = sample(100:999, .N, replace = TRUE)
)]
# 2. Execute: Full pipeline with fortnight (mock targets, no SIDRA)
crosswalk <- pnadc_identify_periods(data, verbose = FALSE)
result <- pnadc_apply_periods(
data, crosswalk,
weight_var = "V1028",
anchor = "quarter",
calibrate = TRUE,
calibration_unit = "fortnight",
target_totals = PNADCperiods:::derive_fortnight_population(
create_mock_pop_targets(data)
),
smooth = FALSE,
verbose = FALSE
)
# 3. Verify: Structure
expect_true("weight_fortnight" %in% names(result))
expect_true("determined_fortnight" %in% names(result))
# 4. Verify: Each fortnight's weights sum to monthly total
# Use custom targets scaled to test data's quarterly V1028 sum so calibration converges
qtr_wsum <- data[, .(qtr_wsum = sum(V1028, na.rm = TRUE)),
by = .(Ano, Trimestre)]
targets <- qtr_wsum[, {
months <- (Trimestre - 1L) * 3L + 1:3
data.table::data.table(
ref_month_yyyymm = Ano * 100L + months,
m_populacao = qtr_wsum / 1000
)
}, by = .(Ano, Trimestre)][, .(ref_month_yyyymm, m_populacao)]
fn_targets <- PNADCperiods:::derive_fortnight_population(targets)
result_fn <- pnadc_apply_periods(
data, crosswalk,
weight_var = "V1028",
anchor = "quarter",
calibrate = TRUE,
calibration_unit = "fortnight",
target_totals = fn_targets,
smooth = FALSE,
verbose = FALSE
)
result_mo <- pnadc_apply_periods(
data, crosswalk,
weight_var = "V1028",
anchor = "quarter",
calibrate = TRUE,
calibration_unit = "month",
target_totals = targets,
smooth = FALSE,
verbose = FALSE
)
t_fortnight <- result_fn[!is.na(ref_fortnight_in_month), .(
N_fn = sum(weight_fortnight, na.rm = TRUE)
), by = .(Ano, ref_month_in_year, ref_fortnight_in_month)]
t_month <- result_mo[!is.na(ref_month_in_quarter), .(
N_mo = sum(weight_monthly, na.rm = TRUE)
), by = .(Ano, ref_month_in_year)]
chk <- merge(t_fortnight, t_month,
by = c("Ano", "ref_month_in_year"))
if (nrow(chk) > 0) {
expect_equal(chk$N_fn, chk$N_mo, tolerance = 0.01,
label = "Fortnight weights sum to monthly total")
}
})
test_that("full pipeline with smoothing produces consistent results", {
# 1. Setup: Create data with enough quarters for smoothing
set.seed(902)
data <- create_realistic_pnadc(n_quarters = 6, n_upas = 25)
valid_ufs <- c(11:17, 21:29, 31:35, 41:43, 50:53)
data[, `:=`(
UF = sample(valid_ufs, .N, replace = TRUE),
V1028 = runif(.N, 500, 2000),
posest = sample(1:500, .N, replace = TRUE),
posest_sxi = sample(100:999, .N, replace = TRUE)
)]
# 2. Execute: Pipeline with smoothing (mock targets, no SIDRA)
crosswalk <- pnadc_identify_periods(data, verbose = FALSE)
result <- pnadc_apply_periods(
data, crosswalk,
weight_var = "V1028",
anchor = "quarter",
calibrate = TRUE,
calibration_unit = "month",
target_totals = create_mock_pop_targets(data),
smooth = TRUE,
verbose = FALSE
)
# 3. Verify: Smoothed weights are valid
determined <- result[determined_month == TRUE]
expect_true(all(determined$weight_monthly > 0),
label = "Smoothed weights are positive")
expect_false(any(is.na(determined$weight_monthly)),
label = "No NA values after smoothing")
})
# =============================================================================
# EXPERIMENTAL STRATEGIES INTEGRATION
# =============================================================================
test_that("pipeline with experimental strategies maintains invariants", {
# 1. Setup: Create data
set.seed(903)
data <- create_realistic_pnadc(n_quarters = 4, n_upas = 25)
valid_ufs <- c(11:17, 21:29, 31:35, 41:43, 50:53)
data[, `:=`(
UF = sample(valid_ufs, .N, replace = TRUE),
V1028 = runif(.N, 500, 2000),
posest = sample(1:500, .N, replace = TRUE),
posest_sxi = sample(100:999, .N, replace = TRUE)
)]
# 2. Execute: Strict identification
crosswalk_strict <- pnadc_identify_periods(data, verbose = FALSE, store_date_bounds = TRUE)
# 3. Execute: Experimental strategies
crosswalk_exp <- pnadc_experimental_periods(
crosswalk_strict,
strategy = "both",
confidence_threshold = 0.9,
verbose = FALSE
)
# 4. Execute: Apply calibration with experimental crosswalk
# (mock targets, no SIDRA)
result <- pnadc_apply_periods(
data, crosswalk_exp,
weight_var = "V1028",
anchor = "quarter",
calibrate = TRUE,
calibration_unit = "month",
target_totals = create_mock_pop_targets(data),
verbose = FALSE
)
# 5. Verify: Weights are valid (experimental strategies)
determined <- result[determined_month == TRUE]
expect_true(all(determined$weight_monthly > 0),
label = "Experimental pipeline produces positive weights")
expect_false(any(is.na(determined$weight_monthly)),
label = "No NA weights for determined observations")
# 6. Verify: Experimental improved determination rate
strict_rate <- crosswalk_strict[, mean(determined_month, na.rm = TRUE)]
exp_rate <- crosswalk_exp[, mean(determined_month, na.rm = TRUE)]
expect_true(exp_rate >= strict_rate,
label = paste0("Experimental (", round(exp_rate, 3),
") >= strict (", round(strict_rate, 3), ")"))
})
# =============================================================================
# MULTI-YEAR PIPELINE TESTS
# =============================================================================
test_that("pipeline works with multi-year data (year anchor)", {
# 1. Setup: Create 2-year data (8 quarters)
set.seed(904)
data <- create_realistic_pnadc(n_quarters = 8, n_upas = 25, start_year = 2022)
valid_ufs <- c(11:17, 21:29, 31:35, 41:43, 50:53)
data[, `:=`(
UF = sample(valid_ufs, .N, replace = TRUE),
V1028 = runif(.N, 500, 2000),
posest = sample(1:500, .N, replace = TRUE),
posest_sxi = sample(100:999, .N, replace = TRUE)
)]
# 2. Execute: Pipeline with year anchor (mock targets, no SIDRA)
crosswalk <- pnadc_identify_periods(data, verbose = FALSE)
result <- pnadc_apply_periods(
data, crosswalk,
weight_var = "V1028",
anchor = "year",
calibrate = TRUE,
calibration_unit = "month",
target_totals = create_mock_pop_targets(data),
verbose = FALSE
)
# 3. Verify: Weights are valid
determined <- result[determined_month == TRUE]
expect_true(all(determined$weight_monthly > 0),
label = "Multi-year pipeline produces positive weights")
expect_false(any(is.na(determined$weight_monthly)),
label = "No NA weights for determined observations")
# 4. Verify: High determination rate with 8 quarters
det_rate <- result[, mean(determined_month, na.rm = TRUE)]
expect_true(det_rate > 0.85,
label = paste0("8-quarter determination (", round(det_rate, 3),
") should be >85%"))
})
# =============================================================================
# ERROR HANDLING IN PIPELINE
# =============================================================================
test_that("pipeline handles missing weight columns gracefully", {
# 1. Setup: Create data then remove V1028 to simulate missing weight column
set.seed(905)
data <- create_realistic_pnadc(n_quarters = 2, n_upas = 10)
data[, V1028 := NULL] # Remove weight column to test error handling
crosswalk <- pnadc_identify_periods(data, verbose = FALSE)
# 2. Verify: Should error when requesting calibration without weight columns
expect_error(
pnadc_apply_periods(
data, crosswalk,
weight_var = "V1028", # Column doesn't exist anymore
anchor = "quarter",
calibrate = TRUE,
calibration_unit = "month",
verbose = FALSE
),
"V1028|weight_var|missing",
label = "Missing weight column should error"
)
})
test_that("pipeline works without calibration", {
# 1. Setup: Create data with V1028 column but won't calibrate
set.seed(906)
data <- create_realistic_pnadc(n_quarters = 2, n_upas = 10)
# Add V1028 column (required even when not calibrating)
data[, V1028 := runif(.N, 500, 2000)]
# 2. Execute: Pipeline without calibration
crosswalk <- pnadc_identify_periods(data, verbose = FALSE)
result <- pnadc_apply_periods(
data, crosswalk,
weight_var = "V1028",
anchor = "quarter",
calibrate = FALSE, # No calibration
verbose = FALSE
)
# 3. Verify: Basic structure without calibrated weight columns
expect_true(is.data.frame(result))
expect_false("weight_monthly" %in% names(result))
expect_true("determined_month" %in% names(result))
expect_true("ref_month_in_quarter" %in% names(result))
expect_true("V1028" %in% names(result)) # Original weight column preserved
})
# =============================================================================
# KEEP_ALL PARAMETER INTEGRATION
# =============================================================================
test_that("pipeline with keep_all=FALSE filters correctly", {
# 1. Setup: Create data
set.seed(907)
data <- create_realistic_pnadc(n_quarters = 2, n_upas = 15)
valid_ufs <- c(11:17, 21:29, 31:35, 41:43, 50:53)
data[, `:=`(
UF = sample(valid_ufs, .N, replace = TRUE),
V1028 = runif(.N, 500, 2000),
posest = sample(1:500, .N, replace = TRUE),
posest_sxi = sample(100:999, .N, replace = TRUE)
)]
# 2. Execute: Pipeline with keep_all=TRUE (mock targets, no SIDRA)
crosswalk <- pnadc_identify_periods(data, verbose = FALSE)
mock_targets <- create_mock_pop_targets(data)
result_all <- pnadc_apply_periods(
data, crosswalk,
weight_var = "V1028",
anchor = "quarter",
calibrate = TRUE,
calibration_unit = "month",
target_totals = mock_targets,
keep_all = TRUE,
verbose = FALSE
)
# 3. Execute: Pipeline with keep_all=FALSE
result_filtered <- pnadc_apply_periods(
data, crosswalk,
weight_var = "V1028",
anchor = "quarter",
calibrate = TRUE,
calibration_unit = "month",
target_totals = mock_targets,
keep_all = FALSE,
verbose = FALSE
)
# 4. Verify: keep_all=FALSE returns fewer rows
expect_true(nrow(result_filtered) <= nrow(result_all))
# 5. Verify: All rows in filtered result are determined
expect_true(all(result_filtered$determined_month == TRUE, na.rm = TRUE))
# 6. Verify: No NA weights in filtered result
expect_true(all(!is.na(result_filtered$weight_monthly)))
})
# =============================================================================
# CONSISTENCY CHECKS
# =============================================================================
test_that("pipeline produces consistent results with same data", {
# 1. Setup: Create data
set.seed(908)
data <- create_realistic_pnadc(n_quarters = 3, n_upas = 20)
valid_ufs <- c(11:17, 21:29, 31:35, 41:43, 50:53)
data[, `:=`(
UF = sample(valid_ufs, .N, replace = TRUE),
V1028 = runif(.N, 500, 2000),
posest = sample(1:500, .N, replace = TRUE),
posest_sxi = sample(100:999, .N, replace = TRUE)
)]
# 2. Execute: Pipeline twice with same data (mock targets, no SIDRA)
mock_targets <- create_mock_pop_targets(data)
crosswalk1 <- pnadc_identify_periods(data, verbose = FALSE)
result1 <- pnadc_apply_periods(
data, crosswalk1,
weight_var = "V1028",
anchor = "quarter",
calibrate = TRUE,
calibration_unit = "month",
target_totals = mock_targets,
verbose = FALSE
)
crosswalk2 <- pnadc_identify_periods(data, verbose = FALSE)
result2 <- pnadc_apply_periods(
data, crosswalk2,
weight_var = "V1028",
anchor = "quarter",
calibrate = TRUE,
calibration_unit = "month",
target_totals = mock_targets,
verbose = FALSE
)
# 3. Verify: Results should be identical
expect_equal(result1[determined_month == TRUE]$weight_monthly,
result2[determined_month == TRUE]$weight_monthly,
tolerance = 1e-10,
label = "Pipeline should produce consistent results")
expect_equal(crosswalk1$determined_month,
crosswalk2$determined_month,
label = "Determination should be consistent")
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.