test-calibration-correctness.R
In PNADCperiods: Identify Reference Periods in Brazil's PNADC Survey Data

# Tests for weight calibration correctness
# Core invariant: each sub-monthly period (fortnight, week) is calibrated to the
# MONTHLY population total from SIDRA. So sum(weight_fortnight) per fortnight =
# sum(weight_monthly) per month = SIDRA population. Same for weeks.

# =============================================================================
# WEIGHT SUM PRESERVATION TESTS
# =============================================================================

test_that("all months are calibrated to SIDRA target population", {
  # 1. Setup: Create realistic data with multiple quarters
  set.seed(123)
  data <- create_realistic_pnadc(n_quarters = 4, n_upas = 20)

  crosswalk <- pnadc_identify_periods(data, verbose = FALSE)

  # Create custom target_totals so test is offline and deterministic.
  # Use the quarterly V1028 sum / 1000 as a population target so that
  # target_pop * 1000 gives a value the calibration can converge to.
  qtr_wsum <- data[, .(qtr_wsum = sum(V1028, na.rm = TRUE)), by = .(Ano, Trimestre)]
  targets <- qtr_wsum[, {
    months <- (Trimestre - 1L) * 3L + 1:3
    data.table::data.table(
      ref_month_yyyymm = Ano * 100L + months,
      m_populacao = qtr_wsum / 1000
    )
  }, by = .(Ano, Trimestre)][, .(ref_month_yyyymm, m_populacao)]

  # 2. Execute: Identify periods and apply calibration with custom targets
  result <- pnadc_apply_periods(
    data, crosswalk,
    weight_var = "V1028",
    anchor = "quarter",
    calibrate = TRUE,
    calibration_unit = "month",
    target_totals = targets,
    verbose = FALSE
  )

  # 3. Verify: Weights are valid for determined observations
  determined <- result[determined_month == TRUE]

  # All determined obs have positive weights
  expect_true(all(determined$weight_monthly > 0),
              label = "All determined observations have positive weights")

  # No NA weights for determined obs
  expect_false(any(is.na(determined$weight_monthly)),
               label = "No NA weights for determined observations")

  # 4. Verify: ALL months (1, 2, 3) are calibrated to the same SIDRA target
  # Each month's weight sum should equal target_pop * 1000
  month_sums <- determined[, .(w_sum = sum(weight_monthly)),
                           by = .(Ano, ref_month_in_year)]
  month_sums[, ref_month_yyyymm := Ano * 100L + ref_month_in_year]
  merged <- merge(month_sums, targets, by = "ref_month_yyyymm")
  expect_equal(merged$w_sum, merged$m_populacao * 1000, tolerance = 0.01,
               label = "All months calibrated to SIDRA target population")
})


test_that("each fortnight's weights sum to the monthly population total", {
  # 1. Setup: Create data and identify periods
  set.seed(124)
  data <- create_realistic_pnadc(n_quarters = 4, n_upas = 20)

  crosswalk <- pnadc_identify_periods(data, verbose = FALSE)

  # Create custom target_totals scaled to the test data's weight sums.
  # With simulated data, quarterly V1028 sum differs from real SIDRA population.
  # Setting m_populacao = quarterly_sum / 1000 gives a consistent target value.
  qtr_wsum <- data[, .(qtr_wsum = sum(V1028, na.rm = TRUE)), by = .(Ano, Trimestre)]
  targets <- qtr_wsum[, {
    months <- (Trimestre - 1L) * 3L + 1:3
    data.table::data.table(
      ref_month_yyyymm = Ano * 100L + months,
      m_populacao = qtr_wsum / 1000
    )
  }, by = .(Ano, Trimestre)][, .(ref_month_yyyymm, m_populacao)]

  # 2. Execute: Apply fortnight calibration with custom targets
  fortnight_targets <- PNADCperiods:::derive_fortnight_population(targets)
  result_fortnight <- pnadc_apply_periods(
    data, crosswalk,
    weight_var = "V1028",
    anchor = "quarter",
    calibrate = TRUE,
    calibration_unit = "fortnight",
    target_totals = fortnight_targets,
    verbose = FALSE
  )

  # 3. Execute: Apply month calibration with same base targets
  result_month <- pnadc_apply_periods(
    data, crosswalk,
    weight_var = "V1028",
    anchor = "quarter",
    calibrate = TRUE,
    calibration_unit = "month",
    target_totals = targets,
    verbose = FALSE
  )

  # 4. Each INDIVIDUAL fortnight's weights should sum to the monthly population total
  # This is the key calibration invariant: fortnights and weeks are calibrated
  # to the SIDRA monthly population target, so each sub-period = monthly total
  t_fortnight <- result_fortnight[!is.na(ref_fortnight_in_month), .(
    N_fortnight = sum(weight_fortnight, na.rm = TRUE)
  ), by = .(Ano, ref_month_in_year, ref_fortnight_in_month)][
    order(Ano, ref_month_in_year, ref_fortnight_in_month)]

  t_month <- result_month[!is.na(ref_month_in_quarter), .(
    N_month = sum(weight_monthly, na.rm = TRUE)
  ), by = .(Ano, ref_month_in_year)][
    order(Ano, ref_month_in_year)]

  t <- merge(t_fortnight, t_month, by = c("Ano", "ref_month_in_year"))

  if (nrow(t) > 0) {
    expect_equal(t$N_fortnight, t$N_month, tolerance = 0.01,
                 label = "Each fortnight's weights must sum to monthly population total")
  }
})


test_that("each week's weights sum to the monthly population total", {
  # 1. Setup: Create data and identify periods
  set.seed(125)
  data <- create_realistic_pnadc(n_quarters = 4, n_upas = 20)

  crosswalk <- pnadc_identify_periods(data, verbose = FALSE)

  # Custom targets scaled to test data (same approach as fortnight test)
  qtr_wsum <- data[, .(qtr_wsum = sum(V1028, na.rm = TRUE)),
                  by = .(Ano, Trimestre)]
  targets <- qtr_wsum[, {
    months <- (Trimestre - 1L) * 3L + 1:3
    data.table::data.table(
      ref_month_yyyymm = Ano * 100L + months,
      m_populacao = qtr_wsum / 1000
    )
  }, by = .(Ano, Trimestre)][, .(ref_month_yyyymm, m_populacao)]

  # 2. Execute: Apply week calibration with custom targets
  week_targets <- PNADCperiods:::derive_weekly_population(targets)
  result_week <- pnadc_apply_periods(
    data, crosswalk,
    weight_var = "V1028",
    anchor = "quarter",
    calibrate = TRUE,
    calibration_unit = "week",
    target_totals = week_targets,
    verbose = FALSE
  )

  # 3. Execute: Apply month calibration with same base targets
  result_month <- pnadc_apply_periods(
    data, crosswalk,
    weight_var = "V1028",
    anchor = "quarter",
    calibrate = TRUE,
    calibration_unit = "month",
    target_totals = targets,
    verbose = FALSE
  )

  # 4. Each INDIVIDUAL week's weights should sum to monthly total
  t_week <- result_week[!is.na(ref_week_in_month), .(
    N_week = sum(weight_weekly, na.rm = TRUE)
  ), by = .(Ano, ref_month_in_year, ref_week_in_month)][
    order(Ano, ref_month_in_year, ref_week_in_month)]

  t_month <- result_month[!is.na(ref_month_in_quarter), .(
    N_month = sum(weight_monthly, na.rm = TRUE)
  ), by = .(Ano, ref_month_in_year)][
    order(Ano, ref_month_in_year)]

  t <- merge(t_week, t_month, by = c("Ano", "ref_month_in_year"))

  if (nrow(t) > 0) {
    expect_equal(t$N_week, t$N_month, tolerance = 0.01,
                 label = "Each week's weights must sum to monthly total")
  }
})


test_that("no negative weights are produced in any calibration unit", {
  # 1. Setup: Create data
  set.seed(126)
  data <- create_realistic_pnadc(n_quarters = 4, n_upas = 20)

  valid_ufs <- c(11:17, 21:29, 31:35, 41:43, 50:53)
  data[, `:=`(
    UF = sample(valid_ufs, .N, replace = TRUE),
    V1028 = runif(.N, 500, 2000),
    posest = sample(1:500, .N, replace = TRUE),
    posest_sxi = sample(100:999, .N, replace = TRUE)
  )]

  crosswalk <- pnadc_identify_periods(data, verbose = FALSE)

  # Mock targets derived from V1028 sums (offline; no SIDRA fetch).
  mock_monthly <- create_mock_pop_targets(data)

  # 2. Execute: Test all calibration units
  for (unit in c("month", "fortnight", "week")) {
    unit_targets <- switch(unit,
      "month"     = mock_monthly,
      "fortnight" = PNADCperiods:::derive_fortnight_population(mock_monthly),
      "week"      = PNADCperiods:::derive_weekly_population(mock_monthly)
    )
    result <- pnadc_apply_periods(
      data, crosswalk,
      weight_var = "V1028",
      anchor = "quarter",
      calibrate = TRUE,
      calibration_unit = unit,
      target_totals = unit_targets,
      verbose = FALSE
    )

    # 3. Verify: No negative weights allowed
    weight_col <- paste0("weight_", ifelse(unit == "fortnight", "fortnight",
                                           ifelse(unit == "week", "weekly", "monthly")))

    negative_weights <- result[!is.na(get(weight_col)) & get(weight_col) < 0]

    # 4. Context: Negative weights are methodologically invalid
    expect_equal(nrow(negative_weights), 0,
                 label = paste("No negative weights allowed in", unit, "calibration"))
  }
})


test_that("anchor='year' produces valid calibrated weights", {
  # 1. Setup: Create multi-year data
  set.seed(127)
  data <- create_realistic_pnadc(n_quarters = 8, n_upas = 20, start_year = 2022)

  valid_ufs <- c(11:17, 21:29, 31:35, 41:43, 50:53)
  data[, `:=`(
    UF = sample(valid_ufs, .N, replace = TRUE),
    V1028 = runif(.N, 500, 2000),
    posest = sample(1:500, .N, replace = TRUE),
    posest_sxi = sample(100:999, .N, replace = TRUE)
  )]

  crosswalk <- pnadc_identify_periods(data, verbose = FALSE)

  # 2. Execute: Apply with year anchor (mock targets, no SIDRA)
  result <- pnadc_apply_periods(
    data, crosswalk,
    weight_var = "V1028",
    anchor = "year",
    calibrate = TRUE,
    calibration_unit = "month",
    target_totals = create_mock_pop_targets(data),
    verbose = FALSE
  )

  # 3. Verify: Weights are valid
  determined <- result[determined_month == TRUE]
  expect_true(all(determined$weight_monthly > 0),
              label = "All weights are positive")
  expect_false(any(is.na(determined$weight_monthly)),
               label = "No NA weights for determined observations")

  # 4. Context: Year anchor calibrates at year level
  # Each month is still scaled to external population targets
})


# =============================================================================
# INDETERMINATE OBSERVATIONS
# =============================================================================

test_that("indeterminate observations have NA weights", {
  # 1. Setup: Create data with some indeterminate cases
  set.seed(128)
  data <- create_realistic_pnadc(n_quarters = 2, n_upas = 15)

  valid_ufs <- c(11:17, 21:29, 31:35, 41:43, 50:53)
  data[, `:=`(
    UF = sample(valid_ufs, .N, replace = TRUE),
    V1028 = runif(.N, 500, 2000),
    posest = sample(1:500, .N, replace = TRUE),
    posest_sxi = sample(100:999, .N, replace = TRUE)
  )]

  crosswalk <- pnadc_identify_periods(data, verbose = FALSE)

  # 2. Execute: Apply calibration with keep_all=TRUE (mock targets, no SIDRA)
  result <- pnadc_apply_periods(
    data, crosswalk,
    weight_var = "V1028",
    anchor = "quarter",
    calibrate = TRUE,
    calibration_unit = "month",
    target_totals = create_mock_pop_targets(data),
    keep_all = TRUE,
    verbose = FALSE
  )

  # 3. Verify: Indeterminate observations must have NA weights
  indeterminate <- result[is.na(determined_month) | determined_month == FALSE]

  if (nrow(indeterminate) > 0) {
    expect_true(all(is.na(indeterminate$weight_monthly)),
                label = "Indeterminate observations must have NA monthly weights")
  }

  # 4. Context: Only determined observations can have calibrated weights
})


test_that("keep_all=FALSE excludes indeterminate observations", {
  # 1. Setup: Create data
  set.seed(129)
  data <- create_realistic_pnadc(n_quarters = 2, n_upas = 15)

  valid_ufs <- c(11:17, 21:29, 31:35, 41:43, 50:53)
  data[, `:=`(
    UF = sample(valid_ufs, .N, replace = TRUE),
    V1028 = runif(.N, 500, 2000),
    posest = sample(1:500, .N, replace = TRUE),
    posest_sxi = sample(100:999, .N, replace = TRUE)
  )]

  crosswalk <- pnadc_identify_periods(data, verbose = FALSE)

  # 2. Execute: Apply with keep_all=FALSE (mock targets, no SIDRA)
  result <- pnadc_apply_periods(
    data, crosswalk,
    weight_var = "V1028",
    anchor = "quarter",
    calibrate = TRUE,
    calibration_unit = "month",
    target_totals = create_mock_pop_targets(data),
    keep_all = FALSE,
    verbose = FALSE
  )

  # 3. Verify: All returned observations must be determined
  expect_true(all(result$determined_month == TRUE, na.rm = TRUE),
              label = "keep_all=FALSE returns only determined observations")

  # 4. Verify: No NA weights in result
  expect_true(all(!is.na(result$weight_monthly)),
              label = "No NA weights when keep_all=FALSE")
})


# =============================================================================
# EDGE CASES
# =============================================================================

test_that("calibration works with single quarter data", {
  # 1. Setup: Minimal single-quarter data
  set.seed(130)
  data <- create_realistic_pnadc(n_quarters = 1, n_upas = 10)

  valid_ufs <- c(11:17, 21:29, 31:35, 41:43, 50:53)
  data[, `:=`(
    UF = sample(valid_ufs, .N, replace = TRUE),
    V1028 = runif(.N, 500, 2000),
    posest = sample(1:500, .N, replace = TRUE),
    posest_sxi = sample(100:999, .N, replace = TRUE)
  )]

  crosswalk <- pnadc_identify_periods(data, verbose = FALSE)

  # 2. Execute: Apply calibration (mock targets, no SIDRA)
  result <- pnadc_apply_periods(
    data, crosswalk,
    weight_var = "V1028",
    anchor = "quarter",
    calibrate = TRUE,
    calibration_unit = "month",
    target_totals = create_mock_pop_targets(data),
    verbose = FALSE
  )

  # 3. Verify: Basic structure preserved
  expect_true("weight_monthly" %in% names(result))
  expect_equal(nrow(result), nrow(data))

  # 4. Verify: Weights are valid even with sparse data
  if (any(result$determined_month == TRUE, na.rm = TRUE)) {
    determined <- result[determined_month == TRUE]
    expect_true(all(determined$weight_monthly > 0),
                label = "All determined observations have positive weights")
  }
})


test_that("calibration handles empty periods gracefully", {
  # 1. Setup: Create data where some months may be empty
  set.seed(131)
  data <- create_realistic_pnadc(n_quarters = 2, n_upas = 5)

  valid_ufs <- c(11:17, 21:29, 31:35, 41:43, 50:53)
  data[, `:=`(
    UF = sample(valid_ufs, .N, replace = TRUE),
    V1028 = runif(.N, 500, 2000),
    posest = sample(1:500, .N, replace = TRUE),
    posest_sxi = sample(100:999, .N, replace = TRUE)
  )]

  crosswalk <- pnadc_identify_periods(data, verbose = FALSE)

  # 2. Execute: Should not error with sparse data (mock targets, no SIDRA)
  expect_no_error({
    result <- pnadc_apply_periods(
      data, crosswalk,
      weight_var = "V1028",
      anchor = "quarter",
      calibrate = TRUE,
      calibration_unit = "month",
      target_totals = create_mock_pop_targets(data),
      verbose = FALSE
    )
  })

  # 3. Context: Implementation should handle empty cells gracefully
})


# =============================================================================
# SMOOTHING BEHAVIOR
# =============================================================================

test_that("smooth=TRUE modifies weights differently than smooth=FALSE", {
  # 1. Setup: Create data with enough quarters for smoothing to have effect
  # Smoothing uses 3-period window for months, so need at least 6+ quarters
  set.seed(132)
  data <- create_realistic_pnadc(n_quarters = 8, n_upas = 20)

  crosswalk <- pnadc_identify_periods(data, verbose = FALSE)

  # Mock targets (offline; no SIDRA fetch).
  mock_targets <- create_mock_pop_targets(data)

  # 2. Execute: Apply with and without smoothing
  result_no_smooth <- pnadc_apply_periods(
    data, crosswalk,
    weight_var = "V1028",
    anchor = "quarter",
    calibrate = TRUE,
    calibration_unit = "month",
    target_totals = mock_targets,
    smooth = FALSE,
    verbose = FALSE
  )

  result_smooth <- pnadc_apply_periods(
    data, crosswalk,
    weight_var = "V1028",
    anchor = "quarter",
    calibrate = TRUE,
    calibration_unit = "month",
    target_totals = mock_targets,
    smooth = TRUE,
    verbose = FALSE
  )

  # 3. Verify: Both runs should complete successfully
  expect_true(is.data.frame(result_no_smooth))
  expect_true(is.data.frame(result_smooth))
  expect_true("weight_monthly" %in% names(result_no_smooth))
  expect_true("weight_monthly" %in% names(result_smooth))

  # 4. Context: Smoothing parameter is accepted and processed
  # Note: Whether smoothing produces different weights depends on data characteristics
  # (month distribution, determination patterns, etc.). The key is that the parameter
  # is processed without error and produces valid weights.
})


test_that("smoothing produces valid non-negative weights", {
  # 1. Setup: Create data
  set.seed(133)
  data <- create_realistic_pnadc(n_quarters = 4, n_upas = 20)

  crosswalk <- pnadc_identify_periods(data, verbose = FALSE)

  # 2. Execute: Apply with smoothing (mock targets, no SIDRA)
  result <- pnadc_apply_periods(
    data, crosswalk,
    weight_var = "V1028",
    anchor = "quarter",
    calibrate = TRUE,
    calibration_unit = "month",
    target_totals = create_mock_pop_targets(data),
    smooth = TRUE,
    verbose = FALSE
  )

  # 3. Verify: Smoothed weights are valid
  determined <- result[determined_month == TRUE]
  expect_true(all(determined$weight_monthly > 0),
              label = "Smoothed weights are positive")
  expect_false(any(is.na(determined$weight_monthly)),
               label = "No NA values after smoothing")
})
Any scripts or data that you put into this service are public.
PNADCperiods documentation built on April 28, 2026, 9:07 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
PNADCperiods
Identify Reference Periods in Brazil's PNADC Survey Data

tests/testthat/test-calibration-correctness.R
In PNADCperiods: Identify Reference Periods in Brazil's PNADC Survey Data

Try the PNADCperiods package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

PNADCperiods Identify Reference Periods in Brazil's PNADC Survey Data

tests/testthat/test-calibration-correctness.R In PNADCperiods: Identify Reference Periods in Brazil's PNADC Survey Data

Try the PNADCperiods package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

PNADCperiods
Identify Reference Periods in Brazil's PNADC Survey Data

tests/testthat/test-calibration-correctness.R
In PNADCperiods: Identify Reference Periods in Brazil's PNADC Survey Data