test-derived-series.R
In PNADCperiods: Identify Reference Periods in Brazil's PNADC Survey Data

# Tests for .compute_derived_series() and .DERIVED_SERIES_SPEC
# These tests verify the derived series computation engine (aggregates, average
# income, residuals, rates, deflated) and the structural integrity of the
# declarative specification that drives it.

library(data.table)

# =============================================================================
# PHASE 1: AGGREGATE TESTS
# =============================================================================

test_that("aggregate: popocup + popdesocup = popnaforca", {
  dt <- data.table(
    anomesexato = 202301:202310,
    m_popocup    = c(100, 105, 110, 108, 112, 115, 118, 120, 122, 125),
    m_popdesocup = c(10, 12, 11, 13, 9, 8, 10, 11, 12, 10)
  )
  result <- PNADCperiods:::.compute_derived_series(copy(dt))

  expect_true("m_popnaforca" %in% names(result))
  expect_equal(result$m_popnaforca, dt$m_popocup + dt$m_popdesocup)
})

test_that("aggregate dependency chain: empregpriv built first, then empregado uses it", {
  dt <- data.table(
    anomesexato = 202301:202310,
    # Components for empregpriv
    m_empregprivcomcart = seq(40, 49),
    m_empregprivsemcart = seq(10, 19),
    # Components for domestico
    m_domesticocomcart = seq(5, 14),
    m_domesticosemcart = seq(2, 11),
    # Components for empregpubl
    m_empregpublcomcart = seq(8, 17),
    m_empregpublsemcart = seq(3, 12),
    m_estatutmilitar    = seq(1, 10)
  )
  result <- PNADCperiods:::.compute_derived_series(copy(dt))

  # empregpriv should be computed first
  expect_true("m_empregpriv" %in% names(result))
  expected_empregpriv <- dt$m_empregprivcomcart + dt$m_empregprivsemcart
  expect_equal(result$m_empregpriv, expected_empregpriv)

  # domestico should also be computed
  expect_true("m_domestico" %in% names(result))
  expected_domestico <- dt$m_domesticocomcart + dt$m_domesticosemcart
  expect_equal(result$m_domestico, expected_domestico)

  # empregpubl depends on 3 components

  expect_true("m_empregpubl" %in% names(result))
  expected_empregpubl <- dt$m_empregpublcomcart + dt$m_empregpublsemcart + dt$m_estatutmilitar
  expect_equal(result$m_empregpubl, expected_empregpubl)

  # empregado = empregpriv + domestico + empregpubl (all computed above)
  expect_true("m_empregado" %in% names(result))
  expected_empregado <- expected_empregpriv + expected_domestico + expected_empregpubl
  expect_equal(result$m_empregado, expected_empregado)
})

# =============================================================================
# PHASE 2: AVERAGE INCOME TESTS
# =============================================================================

test_that("average income: rendhabnominaltodos = massahabnominaltodos / comrendtodos * 1000", {
  dt <- data.table(
    anomesexato        = 202301:202310,
    m_massahabnominaltodos = c(250e6, 260e6, 255e6, 265e6, 270e6,
                               275e6, 280e6, 285e6, 290e6, 295e6),
    m_comrendtodos         = c(90e3, 91e3, 92e3, 93e3, 94e3,
                               95e3, 96e3, 97e3, 98e3, 99e3)
  )
  result <- PNADCperiods:::.compute_derived_series(copy(dt))

  expect_true("m_rendhabnominaltodos" %in% names(result))
  expected <- round(dt$m_massahabnominaltodos / dt$m_comrendtodos * 1000, 0)
  expect_equal(result$m_rendhabnominaltodos, expected)
})

# =============================================================================
# PHASE 3: RESIDUAL TESTS
# =============================================================================

test_that("residual: popforadaforca = pop14mais - popocup - popdesocup", {
  dt <- data.table(
    anomesexato  = 202301:202310,
    m_pop14mais  = c(170, 172, 174, 176, 178, 180, 182, 184, 186, 188),
    m_popocup    = c(100, 102, 104, 106, 108, 110, 112, 114, 116, 118),
    m_popdesocup = c(12, 13, 11, 14, 10, 9, 11, 12, 13, 10)
  )
  result <- PNADCperiods:::.compute_derived_series(copy(dt))

  expect_true("m_popforadaforca" %in% names(result))
  expected <- dt$m_pop14mais - dt$m_popocup - dt$m_popdesocup
  expect_equal(result$m_popforadaforca, expected)
})

# =============================================================================
# PHASE 4: RATE TESTS
# =============================================================================

test_that("rate: taxadesocup = popdesocup / popnaforca * 100", {
  dt <- data.table(
    anomesexato  = 202301:202310,
    m_popocup    = c(100, 105, 110, 108, 112, 115, 118, 120, 122, 125),
    m_popdesocup = c(10, 12, 11, 13, 9, 8, 10, 11, 12, 10)
  )
  # Phase 1 computes m_popnaforca = popocup + popdesocup
  # Phase 4 computes taxadesocup = popdesocup / popnaforca * 100
  result <- PNADCperiods:::.compute_derived_series(copy(dt))

  expect_true("m_taxadesocup" %in% names(result))
  popnaforca <- dt$m_popocup + dt$m_popdesocup
  expected <- round(dt$m_popdesocup / popnaforca * 100, 1)
  expect_equal(result$m_taxadesocup, expected)
})

test_that("rate with compound denominator: percdesalento uses (popnaforca + desalentado)", {
  dt <- data.table(
    anomesexato    = 202301:202310,
    m_popocup      = c(100, 105, 110, 108, 112, 115, 118, 120, 122, 125),
    m_popdesocup   = c(10, 12, 11, 13, 9, 8, 10, 11, 12, 10),
    m_desalentado  = c(5, 6, 4, 7, 3, 4, 5, 6, 7, 4),
    m_forcapotencial = c(8, 9, 7, 10, 6, 7, 8, 9, 10, 7)
  )
  result <- PNADCperiods:::.compute_derived_series(copy(dt))

  expect_true("m_percdesalento" %in% names(result))

  # percdesalento spec: numerator = "desalentado", denominator = c("popnaforca", "desalentado")
  # popnaforca is computed in Phase 1 as popocup + popdesocup
  popnaforca <- dt$m_popocup + dt$m_popdesocup
  # Compound denominator: rowSums of m_popnaforca + m_desalentado
  denom <- popnaforca + dt$m_desalentado
  expected <- round(dt$m_desalentado / denom * 100, 1)
  expect_equal(result$m_percdesalento, expected)
})

# =============================================================================
# PHASE 5: DEFLATED SERIES TESTS
# =============================================================================

test_that("deflated series: verify IPCA deflation formula for hab and efet", {
  # 10 months with rising IPCA index
  ipca_values <- seq(5000, 5450, by = 50)
  dt <- data.table(
    anomesexato            = 202301:202310,
    m_massahabnominaltodos = c(250e6, 260e6, 255e6, 265e6, 270e6,
                               275e6, 280e6, 285e6, 290e6, 295e6),
    m_massaefetnominaltodos = c(240e6, 250e6, 245e6, 255e6, 260e6,
                                265e6, 270e6, 275e6, 280e6, 285e6),
    m_comrendtodos         = c(90e3, 91e3, 92e3, 93e3, 94e3,
                               95e3, 96e3, 97e3, 98e3, 99e3),
    ipca100dez1993         = ipca_values
  )
  result <- PNADCperiods:::.compute_derived_series(copy(dt))

  latest_ipca <- ipca_values[10]  # max anomesexato

  # hab series use current IPCA (use_lagged_ipca = FALSE)
  # deflator_hab = latest_ipca / ipca100dez1993
  expect_true("m_massahabtodosipcabr" %in% names(result))
  deflator_hab <- latest_ipca / ipca_values
  expected_hab <- round(dt$m_massahabnominaltodos * deflator_hab, 0)
  expect_equal(result$m_massahabtodosipcabr, expected_hab)

  # efet series use lagged IPCA (use_lagged_ipca = TRUE)
  # deflator_efet = latest_ipca / shift(ipca100dez1993, 1, type="lag")
  expect_true("m_massaefettodosipcabr" %in% names(result))
  ipca_lagged <- c(NA, ipca_values[1:9])
  deflator_efet <- latest_ipca / ipca_lagged
  expected_efet <- round(dt$m_massaefetnominaltodos * deflator_efet, 0)
  # Row 1 is NA because lagged IPCA is NA
  expect_true(is.na(result$m_massaefettodosipcabr[1]))
  expect_equal(result$m_massaefettodosipcabr[2:10], expected_efet[2:10])
})

# =============================================================================
# EDGE CASE TESTS
# =============================================================================

test_that("missing component column: derived series gracefully skipped", {
  # Only provide popocup, not popdesocup -- popnaforca cannot be computed
  dt <- data.table(
    anomesexato = 202301:202310,
    m_popocup   = seq(100, 109)
  )
  # Should not error
  result <- expect_silent(PNADCperiods:::.compute_derived_series(copy(dt)))
  # popnaforca should NOT be created (missing component)
  expect_false("m_popnaforca" %in% names(result))
  # taxadesocup also should NOT be created (missing numerator and denominator)
  expect_false("m_taxadesocup" %in% names(result))
})

test_that("division by zero: denominator=0 does not crash", {
  dt <- data.table(
    anomesexato  = 202301:202310,
    m_popocup    = c(100, 105, 110, 108, 0, 115, 118, 120, 122, 125),
    m_popdesocup = c(10, 12, 11, 13, 0, 8, 10, 11, 12, 10)
  )
  # Row 5: popnaforca = 0, so taxadesocup = 0/0
  # Should not error -- NaN or Inf is acceptable, just no crash
  result <- expect_silent(PNADCperiods:::.compute_derived_series(copy(dt)))
  expect_true("m_taxadesocup" %in% names(result))
  # The function completes without error
  expect_s3_class(result, "data.table")
})

# =============================================================================
# .DERIVED_SERIES_SPEC STRUCTURAL VALIDATION
# =============================================================================

test_that("spec: all aggregate specs have 'name' and non-empty 'components'", {
  spec <- PNADCperiods:::.DERIVED_SERIES_SPEC
  for (s in spec$aggregates) {
    expect_true(!is.null(s$name), info = paste("Aggregate missing 'name'"))
    expect_true(is.character(s$name) && nchar(s$name) > 0,
                info = paste("Aggregate has empty name"))
    expect_true(!is.null(s$components),
                info = paste("Aggregate", s$name, "missing 'components'"))
    expect_true(length(s$components) >= 2,
                info = paste("Aggregate", s$name, "has fewer than 2 components"))
  }
})

test_that("spec: all rate specs have 'name', 'numerator', 'denominator'", {
  spec <- PNADCperiods:::.DERIVED_SERIES_SPEC
  for (s in spec$rates) {
    expect_true(!is.null(s$name),
                info = "Rate missing 'name'")
    expect_true(is.character(s$name) && nchar(s$name) > 0,
                info = "Rate has empty name")
    expect_true(!is.null(s$numerator),
                info = paste("Rate", s$name, "missing 'numerator'"))
    expect_true(!is.null(s$denominator),
                info = paste("Rate", s$name, "missing 'denominator'"))
    expect_true(length(unlist(s$numerator)) >= 1,
                info = paste("Rate", s$name, "has empty numerator"))
    expect_true(length(unlist(s$denominator)) >= 1,
                info = paste("Rate", s$name, "has empty denominator"))
  }
})

test_that("spec: all residual specs have 'name', 'parent', 'subtract'", {
  spec <- PNADCperiods:::.DERIVED_SERIES_SPEC
  for (s in spec$residuals) {
    expect_true(!is.null(s$name),
                info = "Residual missing 'name'")
    expect_true(is.character(s$name) && nchar(s$name) > 0,
                info = "Residual has empty name")
    expect_true(!is.null(s$parent),
                info = paste("Residual", s$name, "missing 'parent'"))
    expect_true(is.character(s$parent) && nchar(s$parent) > 0,
                info = paste("Residual", s$name, "has empty parent"))
    expect_true(!is.null(s$subtract),
                info = paste("Residual", s$name, "missing 'subtract'"))
    expect_true(length(s$subtract) >= 1,
                info = paste("Residual", s$name, "has empty subtract list"))
  }
})

test_that("spec: no duplicate names across all spec sections", {
  spec <- PNADCperiods:::.DERIVED_SERIES_SPEC
  all_names <- c(
    sapply(spec$aggregates, `[[`, "name"),
    sapply(spec$average_income, `[[`, "name"),
    sapply(spec$residuals, `[[`, "name"),
    sapply(spec$rates, `[[`, "name"),
    sapply(spec$deflated, `[[`, "name")
  )
  duplicated_names <- all_names[duplicated(all_names)]
  expect_equal(length(duplicated_names), 0,
               info = paste("Duplicate derived series names:", paste(duplicated_names, collapse = ", ")))
})

test_that("spec: aggregate dependencies in correct order (empregpriv before empregado)", {
  spec <- PNADCperiods:::.DERIVED_SERIES_SPEC
  agg_names <- sapply(spec$aggregates, `[[`, "name")

  # empregpriv must appear before empregado (empregado depends on empregpriv)
  idx_empregpriv <- which(agg_names == "empregpriv")
  idx_empregado  <- which(agg_names == "empregado")
  expect_true(length(idx_empregpriv) == 1, info = "empregpriv not found in aggregates")
  expect_true(length(idx_empregado) == 1, info = "empregado not found in aggregates")
  expect_true(idx_empregpriv < idx_empregado,
              info = "empregpriv must be computed before empregado")

  # popnaforca must appear before forcaampliada (forcaampliada depends on popnaforca)
  idx_popnaforca   <- which(agg_names == "popnaforca")
  idx_forcaampliada <- which(agg_names == "forcaampliada")
  expect_true(length(idx_popnaforca) == 1, info = "popnaforca not found in aggregates")
  expect_true(length(idx_forcaampliada) == 1, info = "forcaampliada not found in aggregates")
  expect_true(idx_popnaforca < idx_forcaampliada,
              info = "popnaforca must be computed before forcaampliada")

  # domestico must appear before empregado
  idx_domestico <- which(agg_names == "domestico")
  expect_true(idx_domestico < idx_empregado,
              info = "domestico must be computed before empregado")

  # empregpubl must appear before empregado
  idx_empregpubl <- which(agg_names == "empregpubl")
  expect_true(idx_empregpubl < idx_empregado,
              info = "empregpubl must be computed before empregado")
})

test_that("spec: all deflated specs have 'source' and 'use_lagged_ipca'", {
  spec <- PNADCperiods:::.DERIVED_SERIES_SPEC
  for (s in spec$deflated) {
    expect_true(!is.null(s$name),
                info = "Deflated missing 'name'")
    expect_true(!is.null(s$source),
                info = paste("Deflated", s$name, "missing 'source'"))
    expect_true(is.character(s$source) && nchar(s$source) > 0,
                info = paste("Deflated", s$name, "has empty source"))
    expect_true(!is.null(s$use_lagged_ipca),
                info = paste("Deflated", s$name, "missing 'use_lagged_ipca'"))
    expect_true(is.logical(s$use_lagged_ipca),
                info = paste("Deflated", s$name, "'use_lagged_ipca' is not logical"))
  }
})

test_that("spec: phase ordering ensures aggregates computed before rates that use them", {
  spec <- PNADCperiods:::.DERIVED_SERIES_SPEC

  # Collect all names produced by aggregates (Phase 1)
  agg_names <- sapply(spec$aggregates, `[[`, "name")

  # Rates that reference aggregate outputs in their numerator or denominator
  # e.g., taxadesocup uses popnaforca (aggregate) as denominator
  for (s in spec$rates) {
    # Check denominator references
    denom_components <- unlist(s$denominator)
    for (d in denom_components) {
      if (d %in% agg_names) {
        # This is fine -- the aggregate is computed in Phase 1, rate in Phase 4
        expect_true(d %in% agg_names,
                    info = paste("Rate", s$name, "references aggregate", d,
                                 "which should be computed before rates"))
      }
    }
    # Check numerator references
    num_components <- unlist(s$numerator)
    for (n in num_components) {
      if (n %in% agg_names) {
        expect_true(n %in% agg_names,
                    info = paste("Rate", s$name, "references aggregate", n,
                                 "which should be computed before rates"))
      }
    }
  }

  # Specifically verify that taxadesocup denominator (popnaforca) is an aggregate
  taxadesocup <- Filter(function(s) s$name == "taxadesocup", spec$rates)
  expect_true(length(taxadesocup) == 1)
  denom <- unlist(taxadesocup[[1]]$denominator)
  expect_true("popnaforca" %in% agg_names,
              info = "popnaforca must be in aggregates since taxadesocup depends on it")
  expect_true(all(denom %in% c(agg_names, "popnaforca")),
              info = "taxadesocup denominator must reference known aggregates or primaries")
})

test_that("spec: all average_income specs have 'multiplier' and 'decimals'", {
  spec <- PNADCperiods:::.DERIVED_SERIES_SPEC
  for (s in spec$average_income) {
    expect_true(!is.null(s$name),
                info = "Average income spec missing 'name'")
    expect_true(!is.null(s$numerator),
                info = paste("Average income", s$name, "missing 'numerator'"))
    expect_true(!is.null(s$denominator),
                info = paste("Average income", s$name, "missing 'denominator'"))
    expect_true(!is.null(s$multiplier),
                info = paste("Average income", s$name, "missing 'multiplier'"))
    expect_true(is.numeric(s$multiplier) && s$multiplier > 0,
                info = paste("Average income", s$name, "'multiplier' must be positive numeric"))
    expect_true(!is.null(s$decimals),
                info = paste("Average income", s$name, "missing 'decimals'"))
    expect_true(is.numeric(s$decimals) && s$decimals >= 0,
                info = paste("Average income", s$name, "'decimals' must be non-negative numeric"))
  }
})
Any scripts or data that you put into this service are public.
PNADCperiods documentation built on April 28, 2026, 9:07 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
PNADCperiods
Identify Reference Periods in Brazil's PNADC Survey Data

tests/testthat/test-derived-series.R
In PNADCperiods: Identify Reference Periods in Brazil's PNADC Survey Data

Try the PNADCperiods package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

PNADCperiods Identify Reference Periods in Brazil's PNADC Survey Data

tests/testthat/test-derived-series.R In PNADCperiods: Identify Reference Periods in Brazil's PNADC Survey Data

Try the PNADCperiods package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

PNADCperiods
Identify Reference Periods in Brazil's PNADC Survey Data

tests/testthat/test-derived-series.R
In PNADCperiods: Identify Reference Periods in Brazil's PNADC Survey Data