test-experimental-strategies.R
In PNADCperiods: Identify Reference Periods in Brazil's PNADC Survey Data

# Tests for experimental period identification strategies
# Tests for pnadc_experimental_periods() - probabilistic and UPA aggregation strategies

# =============================================================================
# PROBABILISTIC STRATEGY TESTS
# =============================================================================

test_that("probabilistic strategy requires store_date_bounds=TRUE", {
  # 1. Setup: Create crosswalk WITHOUT date bounds
  set.seed(400)
  data <- create_realistic_pnadc(n_quarters = 4, n_upas = 15)

  crosswalk <- pnadc_identify_periods(data, verbose = FALSE, store_date_bounds = FALSE)

  # 2. Verify: Should error when date bounds not stored
  # Error can be about missing columns like date_min, date_max, or week columns
  expect_error(
    pnadc_experimental_periods(
      crosswalk,
      strategy = "probabilistic",
      verbose = FALSE
    ),
    label = "Probabilistic strategy requires date bounds"
  )

  # 3. Context: Probabilistic strategy needs date ranges to calculate confidence
})


test_that("probabilistic strategy improves month determination", {
  # 1. Setup: Create data with date bounds
  set.seed(401)
  data <- create_realistic_pnadc(n_quarters = 4, n_upas = 20)

  crosswalk <- pnadc_identify_periods(data, verbose = FALSE, store_date_bounds = TRUE)

  # Store strict determination rate
  strict_rate <- crosswalk[, mean(determined_month, na.rm = TRUE)]

  # 2. Execute: Apply probabilistic strategy
  result <- pnadc_experimental_periods(
    crosswalk,
    strategy = "probabilistic",
    confidence_threshold = 0.9,
    verbose = FALSE
  )

  # Calculate experimental rate
  exp_rate <- result[, mean(determined_month, na.rm = TRUE)]

  # 3. Verify: Experimental rate should be >= strict rate
  expect_true(exp_rate >= strict_rate,
              label = paste0("Probabilistic should maintain or improve rate: ",
                             "strict=", round(strict_rate, 3), ", exp=", round(exp_rate, 3)))

  # 4. Context: Probabilistic adds determinations, never removes strict ones
})


test_that("confidence_threshold parameter affects determination", {
  # 1. Setup: Create data with store_date_bounds
  set.seed(402)
  data <- create_realistic_pnadc(n_quarters = 4, n_upas = 20)

  crosswalk <- pnadc_identify_periods(data, verbose = FALSE, store_date_bounds = TRUE)

  # 2. Execute: Test with different confidence thresholds
  result_08 <- pnadc_experimental_periods(
    crosswalk,
    strategy = "probabilistic",
    confidence_threshold = 0.8,
    verbose = FALSE
  )

  result_09 <- pnadc_experimental_periods(
    crosswalk,
    strategy = "probabilistic",
    confidence_threshold = 0.9,
    verbose = FALSE
  )

  result_095 <- pnadc_experimental_periods(
    crosswalk,
    strategy = "probabilistic",
    confidence_threshold = 0.95,
    verbose = FALSE
  )

  # 3. Verify: Lower threshold should give >= determinations than higher threshold
  rate_08 <- result_08[, mean(determined_month, na.rm = TRUE)]
  rate_09 <- result_09[, mean(determined_month, na.rm = TRUE)]
  rate_095 <- result_095[, mean(determined_month, na.rm = TRUE)]

  expect_true(rate_08 >= rate_09 - 0.01,  # Allow tiny tolerance
              label = paste0("threshold=0.8 (", round(rate_08, 3),
                             ") should be >= threshold=0.9 (", round(rate_09, 3), ")"))

  expect_true(rate_09 >= rate_095 - 0.01,
              label = paste0("threshold=0.9 (", round(rate_09, 3),
                             ") should be >= threshold=0.95 (", round(rate_095, 3), ")"))

  # 4. Context: More conservative thresholds assign fewer probabilistic months
})


test_that("probabilistic strategy preserves strict determinations", {
  # 1. Setup: Create data
  set.seed(403)
  data <- create_realistic_pnadc(n_quarters = 4, n_upas = 15)

  crosswalk <- pnadc_identify_periods(data, verbose = FALSE, store_date_bounds = TRUE)

  # Identify strict determinations
  strict_determined <- crosswalk[determined_month == TRUE]

  # 2. Execute: Apply probabilistic
  result <- pnadc_experimental_periods(
    crosswalk,
    strategy = "probabilistic",
    confidence_threshold = 0.9,
    verbose = FALSE
  )

  # 3. Verify: All strict determinations must be preserved
  result_determined <- result[determined_month == TRUE]

  # All strict IDs should still be determined in result
  # Note: Crosswalk is at household level; V2003 is not in the crosswalk output
  strict_ids <- paste(strict_determined$Ano, strict_determined$Trimestre,
                      strict_determined$UPA, strict_determined$V1008,
                      strict_determined$V1014, sep = "_")

  result_ids <- paste(result_determined$Ano, result_determined$Trimestre,
                      result_determined$UPA, result_determined$V1008,
                      result_determined$V1014, sep = "_")

  all_preserved <- all(strict_ids %in% result_ids)

  expect_true(all_preserved,
              label = "Probabilistic must preserve all strict determinations")

  # 4. Context: Experimental strategies only ADD, never remove determinations
})


test_that("probabilistic assignment only for 2-period ranges", {
  # 1. Setup: Create data
  set.seed(404)
  data <- create_realistic_pnadc(n_quarters = 4, n_upas = 15)

  crosswalk <- pnadc_identify_periods(data, verbose = FALSE, store_date_bounds = TRUE)

  # 2. Execute: Apply probabilistic
  result <- pnadc_experimental_periods(
    crosswalk,
    strategy = "probabilistic",
    confidence_threshold = 0.8,
    verbose = FALSE
  )

  # 3. Verify: Check the experimental assignment pattern
  # Probabilistic only assigns when there's ambiguity between 2 periods
  # This is verified indirectly - the function should not error

  expect_true(is.data.frame(result))
  expect_true("determined_month" %in% names(result))

  # 4. Context: Probabilistic works on 2-month ranges (implementation detail)
  # Single-month or 3-month ranges are not probabilistically assigned
})


# =============================================================================
# UPA AGGREGATION STRATEGY TESTS
# =============================================================================

test_that("UPA aggregation extends determinations via consensus", {
  # 1. Setup: Create data
  set.seed(500)
  data <- create_realistic_pnadc(n_quarters = 4, n_upas = 20)

  crosswalk <- pnadc_identify_periods(data, verbose = FALSE, store_date_bounds = TRUE)

  # Store strict rate
  strict_rate <- crosswalk[, mean(determined_month, na.rm = TRUE)]

  # 2. Execute: Apply UPA aggregation
  result <- pnadc_experimental_periods(
    crosswalk,
    strategy = "upa_aggregation",
    upa_proportion_threshold = 0.5,
    verbose = FALSE
  )

  # Calculate rate after UPA aggregation
  upa_rate <- result[, mean(determined_month, na.rm = TRUE)]

  # 3. Verify: UPA aggregation should maintain or improve rate
  expect_true(upa_rate >= strict_rate,
              label = paste0("UPA aggregation maintains or improves: ",
                             "strict=", round(strict_rate, 3), ", upa=", round(upa_rate, 3)))

  # 4. Context: UPA aggregation extends via UPA-level consensus
})


test_that("upa_proportion_threshold affects determination", {
  # 1. Setup: Create data
  set.seed(501)
  data <- create_realistic_pnadc(n_quarters = 4, n_upas = 20)

  crosswalk <- pnadc_identify_periods(data, verbose = FALSE, store_date_bounds = TRUE)

  # 2. Execute: Test with different thresholds
  result_05 <- pnadc_experimental_periods(
    crosswalk,
    strategy = "upa_aggregation",
    upa_proportion_threshold = 0.5,
    verbose = FALSE
  )

  result_08 <- pnadc_experimental_periods(
    crosswalk,
    strategy = "upa_aggregation",
    upa_proportion_threshold = 0.8,
    verbose = FALSE
  )

  # 3. Verify: Lower threshold should give >= determinations
  rate_05 <- result_05[, mean(determined_month, na.rm = TRUE)]
  rate_08 <- result_08[, mean(determined_month, na.rm = TRUE)]

  expect_true(rate_05 >= rate_08 - 0.01,
              label = paste0("threshold=0.5 (", round(rate_05, 3),
                             ") should be >= threshold=0.8 (", round(rate_08, 3), ")"))

  # 4. Context: Higher proportion threshold requires stronger UPA consensus
})


# =============================================================================
# "BOTH" STRATEGY TESTS
# =============================================================================

test_that("'both' strategy combines probabilistic and UPA aggregation", {
  # 1. Setup: Create data
  set.seed(600)
  data <- create_realistic_pnadc(n_quarters = 4, n_upas = 20)

  crosswalk <- pnadc_identify_periods(data, verbose = FALSE, store_date_bounds = TRUE)

  # 2. Execute: Apply each strategy separately and combined
  result_prob <- pnadc_experimental_periods(
    crosswalk,
    strategy = "probabilistic",
    confidence_threshold = 0.9,
    verbose = FALSE
  )

  result_upa <- pnadc_experimental_periods(
    crosswalk,
    strategy = "upa_aggregation",
    upa_proportion_threshold = 0.5,
    verbose = FALSE
  )

  result_both <- pnadc_experimental_periods(
    crosswalk,
    strategy = "both",
    confidence_threshold = 0.9,
    upa_proportion_threshold = 0.5,
    verbose = FALSE
  )

  # 3. Verify: "both" should be >= max(individual strategies)
  rate_prob <- result_prob[, mean(determined_month, na.rm = TRUE)]
  rate_upa <- result_upa[, mean(determined_month, na.rm = TRUE)]
  rate_both <- result_both[, mean(determined_month, na.rm = TRUE)]

  max_individual <- max(rate_prob, rate_upa)

  expect_true(rate_both >= max_individual - 0.01,  # Allow tiny tolerance
              label = paste0("'both' (", round(rate_both, 3),
                             ") should be >= max(individual) (", round(max_individual, 3), ")"))

  # 4. Context: Combined strategy should identify at least as many as either alone
})


test_that("'both' strategy preserves strict determinations", {
  # 1. Setup: Create data
  set.seed(601)
  data <- create_realistic_pnadc(n_quarters = 4, n_upas = 15)

  crosswalk <- pnadc_identify_periods(data, verbose = FALSE, store_date_bounds = TRUE)

  # Count strict determinations
  n_strict <- crosswalk[, sum(determined_month, na.rm = TRUE)]

  # 2. Execute: Apply "both" strategy
  result <- pnadc_experimental_periods(
    crosswalk,
    strategy = "both",
    verbose = FALSE
  )

  # Count experimental determinations
  n_exp <- result[, sum(determined_month, na.rm = TRUE)]

  # 3. Verify: Should have at least as many determinations as strict
  expect_true(n_exp >= n_strict,
              label = "Combined strategy must preserve strict determinations")

  # 4. Context: All experimental strategies are additive only
})


# =============================================================================
# NESTING PRESERVATION TESTS
# =============================================================================

test_that("experimental strategies maintain nesting (week requires fortnight)", {
  # 1. Setup: Create data
  set.seed(700)
  data <- create_realistic_pnadc(n_quarters = 4, n_upas = 15)

  crosswalk <- pnadc_identify_periods(data, verbose = FALSE, store_date_bounds = TRUE)

  # 2. Execute: Apply experimental strategy
  result <- pnadc_experimental_periods(
    crosswalk,
    strategy = "both",
    verbose = FALSE
  )

  # 3. Verify: Nesting violation check
  # All week-determined must have fortnight-determined
  violation <- result[determined_week == TRUE & (is.na(determined_fortnight) | determined_fortnight == FALSE)]

  expect_equal(nrow(violation), 0,
               label = "Week determination requires fortnight determination (nesting)")

  # 4. Context: Nesting is a fundamental invariant
})


test_that("experimental strategies maintain nesting (fortnight requires month)", {
  # 1. Setup: Create data
  set.seed(701)
  data <- create_realistic_pnadc(n_quarters = 4, n_upas = 15)

  crosswalk <- pnadc_identify_periods(data, verbose = FALSE, store_date_bounds = TRUE)

  # 2. Execute: Apply experimental strategy
  result <- pnadc_experimental_periods(
    crosswalk,
    strategy = "both",
    verbose = FALSE
  )

  # 3. Verify: Nesting violation check
  # All fortnight-determined must have month-determined
  violation <- result[determined_fortnight == TRUE & (is.na(determined_month) | determined_month == FALSE)]

  expect_equal(nrow(violation), 0,
               label = "Fortnight determination requires month determination (nesting)")
})


# =============================================================================
# EDGE CASES
# =============================================================================

test_that("experimental strategies handle small data sets", {
  # 1. Setup: Create data with enough observations for experimental strategies
  set.seed(800)
  data <- create_realistic_pnadc(n_quarters = 2, n_upas = 10)

  crosswalk <- pnadc_identify_periods(data, verbose = FALSE, store_date_bounds = TRUE)

  # 2. Execute: Should not error with reasonable data size
  expect_no_error({
    result <- pnadc_experimental_periods(
      crosswalk,
      strategy = "both",
      verbose = FALSE
    )
  })

  # 3. Context: Implementation should handle various data sizes gracefully
})


test_that("experimental strategies work with multi-quarter data", {
  # 1. Setup: Use multi-quarter data for experimental strategies
  # Single quarter may not have enough variation for experimental strategies
  set.seed(801)
  data <- create_realistic_pnadc(n_quarters = 3, n_upas = 15)

  crosswalk <- pnadc_identify_periods(data, verbose = FALSE, store_date_bounds = TRUE)

  # 2. Execute: Should work with sufficient data
  expect_no_error({
    result <- pnadc_experimental_periods(
      crosswalk,
      strategy = "both",
      verbose = FALSE
    )
  })

  # 3. Verify: Basic structure maintained
  expect_true(is.data.frame(result))
  expect_equal(nrow(result), nrow(crosswalk))

  # 4. Context: Experimental strategies need sufficient data variation
})
Any scripts or data that you put into this service are public.
PNADCperiods documentation built on April 28, 2026, 9:07 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
PNADCperiods
Identify Reference Periods in Brazil's PNADC Survey Data

tests/testthat/test-experimental-strategies.R
In PNADCperiods: Identify Reference Periods in Brazil's PNADC Survey Data

Try the PNADCperiods package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

PNADCperiods Identify Reference Periods in Brazil's PNADC Survey Data

tests/testthat/test-experimental-strategies.R In PNADCperiods: Identify Reference Periods in Brazil's PNADC Survey Data

Try the PNADCperiods package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

PNADCperiods
Identify Reference Periods in Brazil's PNADC Survey Data

tests/testthat/test-experimental-strategies.R
In PNADCperiods: Identify Reference Periods in Brazil's PNADC Survey Data