tests/testthat/test-nesting-enforcement.R

# Tests for nesting enforcement in period identification
#
# These tests verify the fundamental guarantee of the nested algorithm:
# - Fortnights can ONLY be determined for observations with determined months
# - Weeks can ONLY be determined for observations with determined fortnights
#
# This nesting is enforced BY CONSTRUCTION in the algorithm, not by post-hoc cleanup.

# =============================================================================
# HELPER FUNCTIONS
# =============================================================================

# Note: Using shared test data generators from helper-test-data.R
# - create_realistic_pnadc() for test data generation

# =============================================================================
# NESTING ENFORCEMENT TESTS
# =============================================================================

test_that("fortnight determination requires month determination", {
  # This is the KEY test: no observation should have determined_fortnight = TRUE

  # while determined_month = FALSE
  test_data <- create_realistic_pnadc(n_quarters = 4, n_upas = 10)

  result <- pnadc_identify_periods(test_data, verbose = FALSE)

  # Count violations
  n_violations <- sum(result$determined_fortnight & !result$determined_month, na.rm = TRUE)

  expect_equal(
    n_violations, 0L,
    info = paste(
      "Found", n_violations, "observations with determined fortnight but undetermined month.",
      "This violates the nesting requirement."
    )
  )
})

test_that("week determination requires fortnight determination", {
  # No observation should have determined_week = TRUE while determined_fortnight = FALSE
  test_data <- create_realistic_pnadc(n_quarters = 4, n_upas = 10)

  result <- pnadc_identify_periods(test_data, verbose = FALSE)

  # Count violations
  n_violations <- sum(result$determined_week & !result$determined_fortnight, na.rm = TRUE)

  expect_equal(
    n_violations, 0L,
    info = paste(
      "Found", n_violations, "observations with determined week but undetermined fortnight.",
      "This violates the nesting requirement."
    )
  )
})

test_that("week determination implies month determination (transitive)", {
  # By transitivity: determined_week => determined_fortnight => determined_month
  test_data <- create_realistic_pnadc(n_quarters = 4, n_upas = 10)

  result <- pnadc_identify_periods(test_data, verbose = FALSE)

  # Count violations
  n_violations <- sum(result$determined_week & !result$determined_month, na.rm = TRUE)

  expect_equal(
    n_violations, 0L,
    info = paste(
      "Found", n_violations, "observations with determined week but undetermined month.",
      "This violates the transitive nesting requirement."
    )
  )
})

# =============================================================================
# NESTING CONSISTENCY TESTS
# =============================================================================

test_that("fortnight value is consistent with month value when both determined", {
  # When fortnight is determined, it should fall within the determined month
  # Month 1 = fortnights 1-2, Month 2 = fortnights 3-4, Month 3 = fortnights 5-6
  test_data <- create_realistic_pnadc(n_quarters = 4, n_upas = 10)

  result <- pnadc_identify_periods(test_data, verbose = FALSE)

  both_determined <- result[determined_month & determined_fortnight]

  if (nrow(both_determined) > 0) {
    # Calculate expected fortnight range for determined month
    both_determined[, `:=`(
      expected_fortnight_min = (ref_month_in_quarter - 1L) * 2L + 1L,
      expected_fortnight_max = ref_month_in_quarter * 2L
    )]

    # Check that actual fortnight falls within range
    inconsistent <- both_determined[
      ref_fortnight_in_quarter < expected_fortnight_min |
      ref_fortnight_in_quarter > expected_fortnight_max
    ]

    expect_equal(
      nrow(inconsistent), 0L,
      info = paste(
        "Found", nrow(inconsistent), "observations where fortnight is outside determined month range."
      )
    )
  }
})

test_that("week value is consistent with fortnight value when both determined", {
  # When week is determined, it should fall within the determined fortnight
  test_data <- create_realistic_pnadc(n_quarters = 4, n_upas = 10)

  result <- pnadc_identify_periods(test_data, verbose = FALSE)

  both_determined <- result[determined_fortnight & determined_week]

  if (nrow(both_determined) > 0) {
    # Week should correspond to dates within the fortnight
    # Fortnight 1 = weeks 1-2, Fortnight 2 = weeks 3-4 of each month
    # This is a weaker check - just verify week exists for fortnight-determined obs
    expect_true(all(!is.na(both_determined$ref_week_in_quarter)))
  }
})

# =============================================================================
# NESTING BY CONSTRUCTION TESTS
# =============================================================================

test_that("determination rates follow nesting hierarchy", {
  # By construction: month_rate >= fortnight_rate >= week_rate
  test_data <- create_realistic_pnadc(n_quarters = 4, n_upas = 10)

  result <- pnadc_identify_periods(test_data, verbose = FALSE)

  # Compute determination rates from result
  n_total <- nrow(result)
  month_rate <- sum(result$determined_month) / n_total
  fortnight_rate <- sum(result$determined_fortnight) / n_total
  week_rate <- sum(result$determined_week) / n_total

  expect_true(
    month_rate >= fortnight_rate,
    info = paste(
      "Month rate", round(month_rate, 4), "should be >= fortnight rate", round(fortnight_rate, 4)
    )
  )

  expect_true(
    fortnight_rate >= week_rate,
    info = paste(
      "Fortnight rate", round(fortnight_rate, 4), "should be >= week rate", round(week_rate, 4)
    )
  )
})

test_that("all determined fortnights have valid month reference", {
  # Every observation with determined fortnight should have valid month columns
  test_data <- create_realistic_pnadc(n_quarters = 4, n_upas = 10)

  result <- pnadc_identify_periods(test_data, verbose = FALSE)

  fortnight_det <- result[determined_fortnight == TRUE]

  if (nrow(fortnight_det) > 0) {
    # All should have valid month values
    expect_true(all(!is.na(fortnight_det$ref_month_in_quarter)))
    expect_true(all(!is.na(fortnight_det$ref_month_in_year)))
    expect_true(all(!is.na(fortnight_det$ref_month_yyyymm)))
  }
})

test_that("all determined weeks have valid fortnight reference", {
  # Every observation with determined week should have valid fortnight columns
  test_data <- create_realistic_pnadc(n_quarters = 4, n_upas = 10)

  result <- pnadc_identify_periods(test_data, verbose = FALSE)

  week_det <- result[determined_week == TRUE]

  if (nrow(week_det) > 0) {
    # All should have valid fortnight values
    expect_true(all(!is.na(week_det$ref_fortnight_in_quarter)))
    expect_true(all(!is.na(week_det$ref_fortnight_in_month)))
    expect_true(all(!is.na(week_det$ref_fortnight_yyyyff)))
  }
})

# =============================================================================
# EDGE CASE TESTS
# =============================================================================

test_that("single quarter data respects nesting", {
  # Even with single quarter (lower determination rate), nesting should hold
  test_data <- create_realistic_pnadc(n_quarters = 1, n_upas = 5)

  result <- pnadc_identify_periods(test_data, verbose = FALSE)

  # Fortnight requires month
  n_violations_fm <- sum(result$determined_fortnight & !result$determined_month, na.rm = TRUE)
  expect_equal(n_violations_fm, 0L)

  # Week requires fortnight
  n_violations_wf <- sum(result$determined_week & !result$determined_fortnight, na.rm = TRUE)
  expect_equal(n_violations_wf, 0L)
})

test_that("nesting holds across all quarters when stacked", {
  # Test with more quarters to stress the algorithm
  test_data <- create_realistic_pnadc(n_quarters = 8, n_upas = 5)

  result <- pnadc_identify_periods(test_data, verbose = FALSE)

  # Check nesting per quarter
  for (q in unique(result$Trimestre)) {
    quarter_data <- result[Trimestre == q]

    n_violations_fm <- sum(quarter_data$determined_fortnight & !quarter_data$determined_month, na.rm = TRUE)
    n_violations_wf <- sum(quarter_data$determined_week & !quarter_data$determined_fortnight, na.rm = TRUE)

    expect_equal(
      n_violations_fm, 0L,
      info = paste("Quarter", q, "has fortnight without month violations")
    )
    expect_equal(
      n_violations_wf, 0L,
      info = paste("Quarter", q, "has week without fortnight violations")
    )
  }
})

# =============================================================================
# EXPERIMENTAL STRATEGIES NESTING TESTS
# =============================================================================

test_that("experimental fortnight requires month (strict or experimental)", {
  # When fortnight is assigned experimentally, month must already exist
  test_data <- create_realistic_pnadc(n_quarters = 4, n_upas = 10)

  crosswalk <- pnadc_identify_periods(test_data, verbose = FALSE)
  # Use upa_aggregation strategy which doesn't require original data
  result <- pnadc_experimental_periods(crosswalk, strategy = "upa_aggregation", verbose = FALSE)

  # Check that fortnight requires month (experimental strategies update main columns)
  has_fortnight <- !is.na(result$ref_fortnight_in_month)
  has_month <- !is.na(result$ref_month_in_quarter)

  n_violations <- sum(has_fortnight & !has_month, na.rm = TRUE)

  expect_equal(
    n_violations, 0L,
    info = paste(
      "Found", n_violations, "observations with fortnight but no month.",
      "This violates nesting."
    )
  )
})

test_that("experimental week requires fortnight (strict or experimental)", {
  # When week is assigned experimentally, fortnight must already exist
  test_data <- create_realistic_pnadc(n_quarters = 4, n_upas = 10)

  crosswalk <- pnadc_identify_periods(test_data, verbose = FALSE)
  # Use upa_aggregation strategy which doesn't require original data
  result <- pnadc_experimental_periods(crosswalk, strategy = "upa_aggregation", verbose = FALSE)

  # Check that week requires fortnight (experimental strategies update main columns)
  has_week <- !is.na(result$ref_week_in_month)
  has_fortnight <- !is.na(result$ref_fortnight_in_month)

  n_violations <- sum(has_week & !has_fortnight, na.rm = TRUE)

  expect_equal(
    n_violations, 0L,
    info = paste(
      "Found", n_violations, "observations with experimental week but no fortnight (strict or exp).",
      "This violates experimental nesting."
    )
  )
})

test_that("experimental fortnight is consistent with month bounds", {
  # When fortnight is assigned, it should be within valid range (1 or 2 within month)
  test_data <- create_realistic_pnadc(n_quarters = 4, n_upas = 10)

  crosswalk <- pnadc_identify_periods(test_data, verbose = FALSE)
  # Use upa_aggregation strategy which doesn't require original data
  result <- pnadc_experimental_periods(crosswalk, strategy = "upa_aggregation", verbose = FALSE)

  # Check fortnight values are in valid range
  has_fortnight <- result[!is.na(ref_fortnight_in_month)]

  if (nrow(has_fortnight) > 0) {
    # ref_fortnight_in_month should be 1 or 2
    invalid <- has_fortnight[ref_fortnight_in_month < 1 | ref_fortnight_in_month > 2]

    expect_equal(
      nrow(invalid), 0L,
      info = paste(
        "Found", nrow(invalid), "observations where fortnight is outside valid range (1-2)."
      )
    )
  }
})

test_that("experimental strategies extend but don't contradict strict determination", {
  # Experimental strategies may fill in previously NA values but should not

  # change values that were already determined by strict algorithm
  test_data <- create_realistic_pnadc(n_quarters = 4, n_upas = 10)

  crosswalk <- pnadc_identify_periods(test_data, verbose = FALSE)

  # Save original strict values (only where determined)
  original_month <- crosswalk$ref_month_in_quarter
  original_fortnight <- crosswalk$ref_fortnight_in_quarter
  original_week <- crosswalk$ref_week_in_quarter

  # Use upa_aggregation strategy which doesn't require original data
  result <- pnadc_experimental_periods(crosswalk, strategy = "upa_aggregation", verbose = FALSE)

  # Where strict algorithm determined values, they should be preserved
  strict_month_mask <- !is.na(original_month)
  strict_fortnight_mask <- !is.na(original_fortnight)
  strict_week_mask <- !is.na(original_week)

  # Strict values should be unchanged where they existed
  expect_equal(result$ref_month_in_quarter[strict_month_mask], original_month[strict_month_mask])
  expect_equal(result$ref_fortnight_in_quarter[strict_fortnight_mask], original_fortnight[strict_fortnight_mask])
  expect_equal(result$ref_week_in_quarter[strict_week_mask], original_week[strict_week_mask])
})

test_that("experimental determination rates follow nesting hierarchy", {
  # After experimental strategies, determination rates should follow: month >= fortnight >= week
  test_data <- create_realistic_pnadc(n_quarters = 4, n_upas = 10)

  crosswalk <- pnadc_identify_periods(test_data, verbose = FALSE)
  # Use upa_aggregation strategy which doesn't require original data
  result <- pnadc_experimental_periods(crosswalk, strategy = "upa_aggregation", verbose = FALSE)

  # Calculate determination rates from main columns (experimental strategies update these directly)
  month_rate <- mean(!is.na(result$ref_month_in_quarter))
  fortnight_rate <- mean(!is.na(result$ref_fortnight_in_quarter))
  week_rate <- mean(!is.na(result$ref_week_in_quarter))

  expect_true(
    month_rate >= fortnight_rate,
    info = paste(
      "Month rate", round(month_rate, 4),
      "should be >= fortnight rate", round(fortnight_rate, 4)
    )
  )

  expect_true(
    fortnight_rate >= week_rate,
    info = paste(
      "Fortnight rate", round(fortnight_rate, 4),
      "should be >= week rate", round(week_rate, 4)
    )
  )
})

Try the PNADCperiods package in your browser

Any scripts or data that you put into this service are public.

PNADCperiods documentation built on April 28, 2026, 9:07 a.m.