tests/testthat/test-period-invariants.R

# Tests for IBGE Period Calendar Invariants
#
# These tests verify that period identification functions NEVER produce
# invalid IBGE calendar values. All invariants must hold by construction.
#
# IBGE Calendar Invariants:
# - Weeks per month: Always exactly 4
# - Fortnights per month: Always exactly 2
# - Weeks per quarter: Always exactly 12
# - Fortnights per quarter: Always exactly 6
# - Fortnight 1 composition: Weeks 1 + 2 of the month
# - Fortnight 2 composition: Weeks 3 + 4 of the month
# - Week uniqueness: No week belongs to multiple months
# - Fortnight uniqueness: No fortnight spans multiple months
# - Valid ranges: ref_week_in_quarter in [1,12], ref_fortnight_in_quarter in [1,6],
#                 ref_month_in_quarter in [1,3]

# =============================================================================
# HELPER FUNCTIONS
# =============================================================================

#' Create test crosswalk with specific invariant violations for testing
#' @param violation_type Type of violation to introduce
create_crosswalk_with_violation <- function(violation_type = c(
  "invalid_week", "invalid_fortnight", "invalid_month",
  "week_without_fortnight", "fortnight_without_month",
  "week_fortnight_mismatch", "fortnight_month_mismatch",
  "week_month_mismatch"
)) {
  violation_type <- match.arg(violation_type)

  # Create a minimal valid crosswalk
  crosswalk <- data.table::data.table(
    Ano = rep(2023L, 10),
    Trimestre = rep(1L, 10),
    UPA = 1:10,
    V1008 = rep(1L, 10),
    V1014 = rep(1L, 10),
    ref_month_in_quarter = c(1L, 1L, 2L, 2L, 3L, 3L, NA_integer_, NA_integer_, 1L, 2L),
    ref_fortnight_in_quarter = c(1L, 2L, 3L, 4L, 5L, 6L, NA_integer_, NA_integer_, NA_integer_, 4L),
    ref_week_in_quarter = c(1L, 4L, 5L, 8L, 9L, 12L, NA_integer_, NA_integer_, NA_integer_, NA_integer_)
  )

  switch(violation_type,
    "invalid_week" = {
      crosswalk[1, ref_week_in_quarter := 13L]  # Invalid: > 12
    },
    "invalid_fortnight" = {
      crosswalk[1, ref_fortnight_in_quarter := 7L]  # Invalid: > 6
    },
    "invalid_month" = {
      crosswalk[1, ref_month_in_quarter := 4L]  # Invalid: > 3
    },
    "week_without_fortnight" = {
      crosswalk[1, ref_fortnight_in_quarter := NA_integer_]  # Nesting violation
    },
    "fortnight_without_month" = {
      crosswalk[1, ref_month_in_quarter := NA_integer_]  # Nesting violation
    },
    "week_fortnight_mismatch" = {
      # Week 1-2 should be in fortnight 1, but we put week 1 in fortnight 2
      crosswalk[1, `:=`(ref_week_in_quarter = 1L, ref_fortnight_in_quarter = 2L)]
    },
    "fortnight_month_mismatch" = {
      # Fortnight 1-2 should be in month 1, but we put fortnight 1 in month 2
      crosswalk[1, `:=`(ref_fortnight_in_quarter = 1L, ref_month_in_quarter = 2L)]
    },
    "week_month_mismatch" = {
      # Week 1-4 should be in month 1, but we put week 1 in month 2
      crosswalk[1, `:=`(ref_week_in_quarter = 1L, ref_month_in_quarter = 2L)]
    }
  )

  crosswalk
}

# =============================================================================
# VALIDATION FUNCTION TESTS
# =============================================================================

test_that("validate_period_invariants detects invalid week values", {
  crosswalk <- create_crosswalk_with_violation("invalid_week")

  # Should fail with strict=TRUE
  expect_error(
    validate_period_invariants(crosswalk, strict = TRUE),
    regexp = "ref_week_in_quarter must be 1-12"
  )

  # Should return violations with strict=FALSE
  result <- validate_period_invariants(crosswalk, strict = FALSE)
  expect_false(result$valid)
  expect_true("invalid_week_values" %in% names(result$violations))
})

test_that("validate_period_invariants detects invalid fortnight values", {
  crosswalk <- create_crosswalk_with_violation("invalid_fortnight")

  expect_error(
    validate_period_invariants(crosswalk, strict = TRUE),
    regexp = "ref_fortnight_in_quarter must be 1-6"
  )

  result <- validate_period_invariants(crosswalk, strict = FALSE)
  expect_false(result$valid)
  expect_true("invalid_fortnight_values" %in% names(result$violations))
})

test_that("validate_period_invariants detects invalid month values", {
  crosswalk <- create_crosswalk_with_violation("invalid_month")

  expect_error(
    validate_period_invariants(crosswalk, strict = TRUE),
    regexp = "ref_month_in_quarter must be 1-3"
  )

  result <- validate_period_invariants(crosswalk, strict = FALSE)
  expect_false(result$valid)
  expect_true("invalid_month_values" %in% names(result$violations))
})

test_that("validate_period_invariants detects week without fortnight", {
  crosswalk <- create_crosswalk_with_violation("week_without_fortnight")

  expect_error(
    validate_period_invariants(crosswalk, strict = TRUE),
    regexp = "Nesting violation.*week determined but fortnight is NA"
  )

  result <- validate_period_invariants(crosswalk, strict = FALSE)
  expect_false(result$valid)
  expect_true("week_without_fortnight" %in% names(result$violations))
})

test_that("validate_period_invariants detects fortnight without month", {
  crosswalk <- create_crosswalk_with_violation("fortnight_without_month")

  expect_error(
    validate_period_invariants(crosswalk, strict = TRUE),
    regexp = "Nesting violation.*fortnight determined but month is NA"
  )

  result <- validate_period_invariants(crosswalk, strict = FALSE)
  expect_false(result$valid)
  expect_true("fortnight_without_month" %in% names(result$violations))
})

test_that("validate_period_invariants detects week-fortnight mismatch", {
  crosswalk <- create_crosswalk_with_violation("week_fortnight_mismatch")

  expect_error(
    validate_period_invariants(crosswalk, strict = TRUE),
    regexp = "Week-fortnight inconsistency"
  )

  result <- validate_period_invariants(crosswalk, strict = FALSE)
  expect_false(result$valid)
  expect_true("week_fortnight_mismatch" %in% names(result$violations))
})

test_that("validate_period_invariants detects fortnight-month mismatch", {
  crosswalk <- create_crosswalk_with_violation("fortnight_month_mismatch")

  expect_error(
    validate_period_invariants(crosswalk, strict = TRUE),
    regexp = "Fortnight-month inconsistency"
  )

  result <- validate_period_invariants(crosswalk, strict = FALSE)
  expect_false(result$valid)
  expect_true("fortnight_month_mismatch" %in% names(result$violations))
})

test_that("validate_period_invariants detects week-month mismatch", {
  crosswalk <- create_crosswalk_with_violation("week_month_mismatch")

  expect_error(
    validate_period_invariants(crosswalk, strict = TRUE),
    regexp = "Week-month inconsistency"
  )

  result <- validate_period_invariants(crosswalk, strict = FALSE)
  expect_false(result$valid)
  expect_true("week_month_mismatch" %in% names(result$violations))
})

test_that("validate_period_invariants passes for valid crosswalk", {
  # Create a valid crosswalk
  crosswalk <- data.table::data.table(
    Ano = rep(2023L, 6),
    Trimestre = rep(1L, 6),
    UPA = 1:6,
    V1008 = rep(1L, 6),
    V1014 = rep(1L, 6),
    ref_month_in_quarter = c(1L, 1L, 2L, 2L, 3L, NA_integer_),
    ref_fortnight_in_quarter = c(1L, 2L, 3L, 4L, NA_integer_, NA_integer_),
    ref_week_in_quarter = c(1L, 4L, 5L, NA_integer_, NA_integer_, NA_integer_)
  )

  # Should pass without error
  expect_silent(validate_period_invariants(crosswalk, strict = TRUE))

  result <- validate_period_invariants(crosswalk, strict = FALSE)
  expect_true(result$valid)
  expect_length(result$violations, 0)
})

# =============================================================================
# STRICT IDENTIFICATION INVARIANT TESTS
# =============================================================================

test_that("strict identification never produces invalid week values", {
  test_data <- create_stacked_pnadc(n_quarters = 4, n_upas = 15)

  result <- pnadc_identify_periods(test_data, verbose = FALSE)

  # Check ref_week_in_quarter is always 1-12 or NA
  invalid_weeks <- result[!is.na(ref_week_in_quarter) &
                           (ref_week_in_quarter < 1L | ref_week_in_quarter > 12L)]
  expect_equal(nrow(invalid_weeks), 0L,
               info = paste("Found", nrow(invalid_weeks), "invalid week values"))
})

test_that("strict identification never produces invalid fortnight values", {
  test_data <- create_stacked_pnadc(n_quarters = 4, n_upas = 15)

  result <- pnadc_identify_periods(test_data, verbose = FALSE)

  # Check ref_fortnight_in_quarter is always 1-6 or NA
  invalid_fortnights <- result[!is.na(ref_fortnight_in_quarter) &
                                 (ref_fortnight_in_quarter < 1L | ref_fortnight_in_quarter > 6L)]
  expect_equal(nrow(invalid_fortnights), 0L,
               info = paste("Found", nrow(invalid_fortnights), "invalid fortnight values"))
})

test_that("strict identification never produces invalid month values", {
  test_data <- create_stacked_pnadc(n_quarters = 4, n_upas = 15)

  result <- pnadc_identify_periods(test_data, verbose = FALSE)

  # Check ref_month_in_quarter is always 1-3 or NA
  invalid_months <- result[!is.na(ref_month_in_quarter) &
                            (ref_month_in_quarter < 1L | ref_month_in_quarter > 3L)]
  expect_equal(nrow(invalid_months), 0L,
               info = paste("Found", nrow(invalid_months), "invalid month values"))
})

test_that("strict identification maintains week-fortnight consistency", {
  test_data <- create_stacked_pnadc(n_quarters = 4, n_upas = 15)

  result <- pnadc_identify_periods(test_data, verbose = FALSE)

  # Check: week 1-2 -> fortnight 1, week 3-4 -> fortnight 2, etc.
  # Expected fortnight = ((week - 1) %/% 2) + 1
  inconsistent <- result[
    !is.na(ref_week_in_quarter) & !is.na(ref_fortnight_in_quarter) &
      (((ref_week_in_quarter - 1L) %/% 2L) + 1L) != ref_fortnight_in_quarter
  ]
  expect_equal(nrow(inconsistent), 0L,
               info = paste("Found", nrow(inconsistent), "week-fortnight inconsistencies"))
})

test_that("strict identification maintains fortnight-month consistency", {
  test_data <- create_stacked_pnadc(n_quarters = 4, n_upas = 15)

  result <- pnadc_identify_periods(test_data, verbose = FALSE)

  # Check: fortnight 1-2 -> month 1, fortnight 3-4 -> month 2, fortnight 5-6 -> month 3
  # Expected month = ((fortnight - 1) %/% 2) + 1
  inconsistent <- result[
    !is.na(ref_fortnight_in_quarter) & !is.na(ref_month_in_quarter) &
      (((ref_fortnight_in_quarter - 1L) %/% 2L) + 1L) != ref_month_in_quarter
  ]
  expect_equal(nrow(inconsistent), 0L,
               info = paste("Found", nrow(inconsistent), "fortnight-month inconsistencies"))
})

test_that("strict identification maintains week-month consistency", {
  test_data <- create_stacked_pnadc(n_quarters = 4, n_upas = 15)

  result <- pnadc_identify_periods(test_data, verbose = FALSE)

  # Check: weeks 1-4 -> month 1, weeks 5-8 -> month 2, weeks 9-12 -> month 3
  # Expected month = ((week - 1) %/% 4) + 1
  inconsistent <- result[
    !is.na(ref_week_in_quarter) & !is.na(ref_month_in_quarter) &
      (((ref_week_in_quarter - 1L) %/% 4L) + 1L) != ref_month_in_quarter
  ]
  expect_equal(nrow(inconsistent), 0L,
               info = paste("Found", nrow(inconsistent), "week-month inconsistencies"))
})

# =============================================================================
# EXPERIMENTAL STRATEGIES INVARIANT TESTS
# =============================================================================

test_that("experimental strategies never produce invalid week values", {
  test_data <- create_stacked_pnadc(n_quarters = 4, n_upas = 15)
  crosswalk <- pnadc_identify_periods(test_data, verbose = FALSE, store_date_bounds = TRUE)

  # Test probabilistic strategy
  result_prob <- pnadc_experimental_periods(
    crosswalk,
    strategy = "probabilistic",
    confidence_threshold = 0.9,
    verbose = FALSE
  )

  # Check ref_week_in_month is always 1-4 or NA (experimental strategies update main columns)
  invalid_weeks <- result_prob[!is.na(ref_week_in_month) &
                                 (ref_week_in_month < 1L | ref_week_in_month > 4L)]
  expect_equal(nrow(invalid_weeks), 0L,
               info = paste("Probabilistic strategy produced", nrow(invalid_weeks), "invalid week values"))

  # Test UPA aggregation strategy
  result_upa <- pnadc_experimental_periods(
    crosswalk,
    strategy = "upa_aggregation",
    upa_proportion_threshold = 0.5,
    verbose = FALSE
  )

  invalid_weeks_upa <- result_upa[!is.na(ref_week_in_month) &
                                    (ref_week_in_month < 1L | ref_week_in_month > 4L)]
  expect_equal(nrow(invalid_weeks_upa), 0L,
               info = paste("UPA aggregation produced", nrow(invalid_weeks_upa), "invalid week values"))
})

test_that("experimental strategies never produce invalid fortnight values", {
  test_data <- create_stacked_pnadc(n_quarters = 4, n_upas = 15)
  crosswalk <- pnadc_identify_periods(test_data, verbose = FALSE, store_date_bounds = TRUE)

  # Test probabilistic strategy
  result_prob <- pnadc_experimental_periods(
    crosswalk,
    strategy = "probabilistic",
    confidence_threshold = 0.9,
    verbose = FALSE
  )

  # Check ref_fortnight_in_month is always 1-2 or NA (experimental strategies update main columns)
  invalid_fortnights <- result_prob[!is.na(ref_fortnight_in_month) &
                                      (ref_fortnight_in_month < 1L | ref_fortnight_in_month > 2L)]
  expect_equal(nrow(invalid_fortnights), 0L,
               info = paste("Probabilistic strategy produced", nrow(invalid_fortnights),
                           "invalid fortnight values"))

  # Test UPA aggregation strategy
  result_upa <- pnadc_experimental_periods(
    crosswalk,
    strategy = "upa_aggregation",
    upa_proportion_threshold = 0.5,
    verbose = FALSE
  )

  invalid_fortnights_upa <- result_upa[!is.na(ref_fortnight_in_month) &
                                         (ref_fortnight_in_month < 1L | ref_fortnight_in_month > 2L)]
  expect_equal(nrow(invalid_fortnights_upa), 0L,
               info = paste("UPA aggregation produced", nrow(invalid_fortnights_upa),
                           "invalid fortnight values"))
})

test_that("experimental strategies never produce invalid month values", {
  test_data <- create_stacked_pnadc(n_quarters = 4, n_upas = 15)
  crosswalk <- pnadc_identify_periods(test_data, verbose = FALSE, store_date_bounds = TRUE)

  # Test probabilistic strategy
  result_prob <- pnadc_experimental_periods(
    crosswalk,
    strategy = "probabilistic",
    confidence_threshold = 0.9,
    verbose = FALSE
  )

  # Check ref_month_in_quarter is always 1-3 or NA (experimental strategies update main columns)
  invalid_months <- result_prob[!is.na(ref_month_in_quarter) &
                                  (ref_month_in_quarter < 1L | ref_month_in_quarter > 3L)]
  expect_equal(nrow(invalid_months), 0L,
               info = paste("Probabilistic strategy produced", nrow(invalid_months),
                           "invalid month values"))

  # Test UPA aggregation strategy
  result_upa <- pnadc_experimental_periods(
    crosswalk,
    strategy = "upa_aggregation",
    upa_proportion_threshold = 0.5,
    verbose = FALSE
  )

  invalid_months_upa <- result_upa[!is.na(ref_month_in_quarter) &
                                     (ref_month_in_quarter < 1L | ref_month_in_quarter > 3L)]
  expect_equal(nrow(invalid_months_upa), 0L,
               info = paste("UPA aggregation produced", nrow(invalid_months_upa),
                           "invalid month values"))
})

test_that("combined strategy maintains all invariants", {
  test_data <- create_stacked_pnadc(n_quarters = 4, n_upas = 15)
  crosswalk <- pnadc_identify_periods(test_data, verbose = FALSE, store_date_bounds = TRUE)

  # Test combined "both" strategy
  result <- pnadc_experimental_periods(
    crosswalk,
    strategy = "both",
    confidence_threshold = 0.9,
    upa_proportion_threshold = 0.5,
    verbose = FALSE
  )

  # Check all ranges (experimental strategies update main columns)
  expect_equal(
    nrow(result[!is.na(ref_month_in_quarter) & (ref_month_in_quarter < 1L | ref_month_in_quarter > 3L)]),
    0L,
    info = "Invalid month values in combined strategy"
  )
  expect_equal(
    nrow(result[!is.na(ref_fortnight_in_month) & (ref_fortnight_in_month < 1L | ref_fortnight_in_month > 2L)]),
    0L,
    info = "Invalid fortnight values in combined strategy"
  )
  expect_equal(
    nrow(result[!is.na(ref_week_in_month) & (ref_week_in_month < 1L | ref_week_in_month > 4L)]),
    0L,
    info = "Invalid week values in combined strategy"
  )
})

# =============================================================================
# EDGE CASE TESTS
# =============================================================================

test_that("invariants hold for single-quarter data", {
  test_data <- create_stacked_pnadc(n_quarters = 1, n_upas = 10)

  result <- pnadc_identify_periods(test_data, verbose = FALSE)

  # All invariants should still hold
  result_check <- validate_period_invariants(result, strict = FALSE)
  expect_true(result_check$valid,
              info = paste("Violations found:",
                          paste(names(result_check$violations), collapse = ", ")))
})

test_that("invariants hold for many-quarter data", {
  test_data <- create_stacked_pnadc(n_quarters = 12, n_upas = 10)

  result <- pnadc_identify_periods(test_data, verbose = FALSE)

  # All invariants should still hold
  result_check <- validate_period_invariants(result, strict = FALSE)
  expect_true(result_check$valid,
              info = paste("Violations found:",
                          paste(names(result_check$violations), collapse = ", ")))
})

test_that("invariants hold per quarter", {
  test_data <- create_stacked_pnadc(n_quarters = 8, n_upas = 10)

  result <- pnadc_identify_periods(test_data, verbose = FALSE)

  # Check invariants per quarter
  for (q in unique(result$Trimestre)) {
    quarter_data <- result[Trimestre == q]
    result_check <- validate_period_invariants(quarter_data, strict = FALSE)
    expect_true(result_check$valid,
                info = paste("Quarter", q, "violations:",
                            paste(names(result_check$violations), collapse = ", ")))
  }
})

Try the PNADCperiods package in your browser

Any scripts or data that you put into this service are public.

PNADCperiods documentation built on April 28, 2026, 9:07 a.m.