test-sidra-series-metadata.R
In PNADCperiods: Identify Reference Periods in Brazil's PNADC Survey Data

# Tests for sidra-series-metadata.R
# Note: All tests use the metadata function's own output (no external data needed)

# =============================================================================
# DEFAULT CALL: STRUCTURE AND COMPLETENESS
# =============================================================================

test_that("default call returns 86+ rows data.table with all required columns", {
  # 1. Setup: Call with defaults
  meta <- get_sidra_series_metadata()

  # 2. Verify: Structure

  expect_s3_class(meta, "data.table")
  expect_gte(nrow(meta), 86)

  # 3. Verify: All documented columns are present
  required_cols <- c(
    "series_name", "api_path", "table_id", "variable_id",
    "classification_id", "classification_value",
    "theme", "theme_category", "subcategory",
    "description_pt", "description_en", "description",
    "unit", "unit_label_pt", "unit_label_en",
    "is_derived", "requires_deflation"
  )
  for (col in required_cols) {
    expect_true(col %in% names(meta),
                label = paste("Column", col, "must be present"))
  }
})

test_that("no duplicate series_name values", {
  # 1. Setup
  meta <- get_sidra_series_metadata()

  # 2. Verify: All series names are unique
  expect_equal(length(unique(meta$series_name)), nrow(meta),
               label = "series_name values must be unique")

  # 3. Context: Duplicates would cause ambiguous lookups
  dup_names <- meta$series_name[duplicated(meta$series_name)]
  expect_equal(length(dup_names), 0,
               label = paste("Duplicated names:", paste(dup_names, collapse = ", ")))
})

test_that("all api_path values start with /t/", {
  # 1. Setup
  meta <- get_sidra_series_metadata()

  # 2. Verify: SIDRA API paths follow the /t/{table_id}/... pattern
  expect_true(all(grepl("^/t/", meta$api_path)),
              label = "All api_path values must start with /t/")

  # 3. Context: Malformed paths will cause SIDRA API failures
  bad_paths <- meta$api_path[!grepl("^/t/", meta$api_path)]
  expect_equal(length(bad_paths), 0,
               label = paste("Invalid paths:", paste(bad_paths, collapse = "; ")))
})

test_that("all table_id and variable_id are positive integers", {
  # 1. Setup
  meta <- get_sidra_series_metadata()

  # 2. Verify: table_id
  expect_type(meta$table_id, "integer")
  expect_true(all(meta$table_id > 0L),
              label = "All table_id must be positive integers")

  # 3. Verify: variable_id
  expect_type(meta$variable_id, "integer")
  expect_true(all(meta$variable_id > 0L),
              label = "All variable_id must be positive integers")
})

# =============================================================================
# THEME FILTERING
# =============================================================================

test_that("theme filtering: labor_market returns a subset", {
  # 1. Setup
  all_meta <- get_sidra_series_metadata()
  labor <- get_sidra_series_metadata(theme = "labor_market")

  # 2. Verify: Filtered result is a proper subset
  expect_s3_class(labor, "data.table")
  expect_gt(nrow(labor), 0)
  expect_lt(nrow(labor), nrow(all_meta))

  # 3. Verify: All returned rows have the correct theme
  expect_true(all(labor$theme == "labor_market"),
              label = "All rows must have theme == 'labor_market'")
})

test_that("invalid theme raises error with informative message", {
  # 1. Execute + Verify: Invalid theme name
  expect_error(
    get_sidra_series_metadata(theme = "nonexistent_theme"),
    "Invalid theme.*nonexistent_theme"
  )

  # 2. Context: Check that valid themes are listed in the error
  expect_error(
    get_sidra_series_metadata(theme = "fake"),
    "labor_market"
  )
})

test_that("multiple themes can be specified", {
  # 1. Setup
  combined <- get_sidra_series_metadata(theme = c("labor_market", "earnings"))

  # 2. Verify: Contains rows from both themes
  expect_true("labor_market" %in% combined$theme)
  expect_true("earnings" %in% combined$theme)

  # 3. Verify: No rows from other themes
  expect_true(all(combined$theme %in% c("labor_market", "earnings")),
              label = "Only requested themes should appear")
})

# =============================================================================
# THEME_CATEGORY FILTERING
# =============================================================================

test_that("theme_category filtering works", {
  # 1. Setup
  unemployment <- get_sidra_series_metadata(theme_category = "unemployment")

  # 2. Verify: Returns non-empty subset
  expect_gt(nrow(unemployment), 0)
  expect_true(all(unemployment$theme_category == "unemployment"))

  # 3. Context: taxadesocup should be in unemployment category
  expect_true("taxadesocup" %in% unemployment$series_name)
})

test_that("theme_category with no matches returns zero rows", {
  # 1. Setup: Use a category that does not exist
  result <- get_sidra_series_metadata(theme_category = "nonexistent_category")

  # 2. Verify: Returns valid data.table with zero rows
  expect_s3_class(result, "data.table")
  expect_equal(nrow(result), 0)
})

# =============================================================================
# SUBCATEGORY FILTERING
# =============================================================================

test_that("subcategory filtering includes NA subcategory rows", {
  # 1. Setup: economic_sector has subcategory = NA; filter for "levels"
  #    should return rows with subcategory == "levels" OR subcategory == NA
  result <- get_sidra_series_metadata(
    theme = "labor_market",
    subcategory = "levels"
  )

  # 2. Verify: Contains explicit "levels" rows
  expect_true("levels" %in% result$subcategory)

  # 3. Verify: Also includes NA subcategory rows (economic_sector, etc.)
  expect_true(any(is.na(result$subcategory)),
              label = "NA subcategory rows should be included when filtering by subcategory")
})

test_that("subcategory filtering returns correct subset", {
  # 1. Setup: Get only "rates" subcategory from labor_market
  rates <- get_sidra_series_metadata(theme = "labor_market", subcategory = "rates")

  # 2. Verify: All non-NA subcategories are "rates"
  non_na_subcats <- rates$subcategory[!is.na(rates$subcategory)]
  expect_true(all(non_na_subcats == "rates"),
              label = "Non-NA subcategories should all be 'rates'")
})

# =============================================================================
# SPECIFIC SERIES SELECTION
# =============================================================================

test_that("specific series selection: taxadesocup returns exactly 1 row", {
  # 1. Setup
  result <- get_sidra_series_metadata(series = "taxadesocup")

  # 2. Verify: Exactly one row
  expect_equal(nrow(result), 1)
  expect_equal(result$series_name, "taxadesocup")

  # 3. Verify: Known metadata values
  expect_equal(result$table_id, 6381L)
  expect_equal(result$variable_id, 4099L)
  expect_equal(result$theme, "labor_market")
  expect_equal(result$theme_category, "unemployment")
  expect_equal(result$unit, "percent")
})

test_that("multiple specific series selection works", {
  # 1. Setup
  result <- get_sidra_series_metadata(series = c("taxadesocup", "popocup", "populacao"))

  # 2. Verify: Exactly 3 rows, one per requested series
  expect_equal(nrow(result), 3)
  expect_true(all(c("taxadesocup", "popocup", "populacao") %in% result$series_name))
})

test_that("invalid series name raises error", {
  # 1. Execute + Verify: Unknown series name
  expect_error(
    get_sidra_series_metadata(series = "this_series_does_not_exist"),
    "Unknown series.*this_series_does_not_exist"
  )

  # 2. Verify: Partially valid set still errors on the invalid entry
  expect_error(
    get_sidra_series_metadata(series = c("taxadesocup", "fake_series")),
    "Unknown series.*fake_series"
  )
})

# =============================================================================
# COMBINED FILTERS
# =============================================================================

test_that("combined theme + theme_category filters work correctly", {
  # 1. Setup: Filter labor_market -> participation
  result <- get_sidra_series_metadata(
    theme = "labor_market",
    theme_category = "participation"
  )

  # 2. Verify: All rows match both criteria
  expect_true(all(result$theme == "labor_market"))
  expect_true(all(result$theme_category == "participation"))

  # 3. Verify: Known series are present
  expect_true("taxapartic" %in% result$series_name)
  expect_true("popocup" %in% result$series_name)
  expect_true("popdesocup" %in% result$series_name)
})

test_that("combined theme + subcategory + series filters work", {
  # 1. Setup: Narrow filter that should return 1 row
  result <- get_sidra_series_metadata(
    theme = "labor_market",
    subcategory = "rates",
    series = "taxadesocup"
  )

  # 2. Verify
  expect_equal(nrow(result), 1)
  expect_equal(result$series_name, "taxadesocup")
})

# =============================================================================
# UNIT CONSISTENCY
# =============================================================================

test_that("population/level series have unit = thousands", {
  # 1. Setup
  meta <- get_sidra_series_metadata()

  # 2. Verify: Known population-level series use "thousands"
  pop_series <- c("popocup", "popdesocup", "popnaforca", "popforadaforca",
                   "populacao", "pop14mais")
  pop_meta <- meta[series_name %in% pop_series]
  expect_true(all(pop_meta$unit == "thousands"),
              label = "All population/level series must use unit = 'thousands'")
})

test_that("rate series have unit = percent", {
  # 1. Setup
  meta <- get_sidra_series_metadata()

  # 2. Verify: Known rate series use "percent"
  rate_series <- c("taxadesocup", "taxapartic", "nivelocup", "niveldesocup",
                    "taxacombdesosub", "taxacompsubutlz", "perccontribprev")
  rate_meta <- meta[series_name %in% rate_series]
  expect_true(all(rate_meta$unit == "percent"),
              label = "All rate series must use unit = 'percent'")
})

test_that("income series have unit = currency or currency_millions", {
  # 1. Setup
  meta <- get_sidra_series_metadata()

  # 2. Verify: Earnings series use "currency"
  earnings_avg <- c("rendhabnominaltodos", "rendhabrealtodos",
                     "rendhabrealprinc", "rendefetrealprinc")
  earn_meta <- meta[series_name %in% earnings_avg]
  expect_true(all(earn_meta$unit == "currency"),
              label = "Average earnings series must use unit = 'currency'")

  # 3. Verify: Wage mass series use "currency_millions"
  mass_series <- c("massahabnominaltodos", "massahabrealtodos",
                    "massaefetnominaltodos", "massaefetrealtodos")
  mass_meta <- meta[series_name %in% mass_series]
  expect_true(all(mass_meta$unit == "currency_millions"),
              label = "Wage mass series must use unit = 'currency_millions'")
})

# =============================================================================
# IS_DERIVED CONSISTENCY
# =============================================================================

test_that("is_derived is TRUE for rate/percentage series", {
  # 1. Setup
  meta <- get_sidra_series_metadata()

  # 2. Verify: Known rate series (which are derived from ratios) are marked is_derived
  derived_series <- c("taxadesocup", "taxapartic", "nivelocup", "niveldesocup",
                       "taxacombdesosub", "taxacombdesopot", "taxacompsubutlz",
                       "taxasubocuphoras", "percdesalento", "perccontribprev")
  derived_meta <- meta[series_name %in% derived_series]
  expect_true(all(derived_meta$is_derived),
              label = "Rate/percentage series should have is_derived = TRUE")

  # 3. Verify: Population level series are NOT derived
  level_series <- c("popocup", "popdesocup", "popnaforca", "populacao")
  level_meta <- meta[series_name %in% level_series]
  expect_true(all(!level_meta$is_derived),
              label = "Population level series should have is_derived = FALSE")
})

# =============================================================================
# LANGUAGE SWITCHING
# =============================================================================

test_that("lang = 'en' switches description column to English", {
  # 1. Setup
  meta_pt <- get_sidra_series_metadata(series = "taxadesocup", lang = "pt")
  meta_en <- get_sidra_series_metadata(series = "taxadesocup", lang = "en")

  # 2. Verify: description column differs by language
  expect_equal(meta_pt$description, meta_pt$description_pt)
  expect_equal(meta_en$description, meta_en$description_en)

  # 3. Verify: English and Portuguese descriptions are different
  expect_false(identical(meta_pt$description, meta_en$description),
               label = "PT and EN descriptions should differ")

  # 4. Context: Both description_pt and description_en columns should always be present
  expect_true("description_pt" %in% names(meta_en))
  expect_true("description_en" %in% names(meta_pt))
})

test_that("lang = 'pt' is the default", {
  # 1. Setup
  meta_default <- get_sidra_series_metadata(series = "popocup")
  meta_pt <- get_sidra_series_metadata(series = "popocup", lang = "pt")

  # 2. Verify: Default description matches Portuguese
  expect_equal(meta_default$description, meta_pt$description_pt)
})

# =============================================================================
# .get_mesnotrim INTERNAL FUNCTION
# =============================================================================

test_that(".get_mesnotrim maps quarter-starting months to position 1", {
  # 1. Setup + Verify: Months 1, 4, 7, 10 are the first month of their quarter
  expect_equal(PNADCperiods:::.get_mesnotrim(1), 1)
  expect_equal(PNADCperiods:::.get_mesnotrim(4), 1)
  expect_equal(PNADCperiods:::.get_mesnotrim(7), 1)
  expect_equal(PNADCperiods:::.get_mesnotrim(10), 1)
})

test_that(".get_mesnotrim maps second months to position 2", {
  # 1. Setup + Verify: Months 2, 5, 8, 11
  expect_equal(PNADCperiods:::.get_mesnotrim(2), 2)
  expect_equal(PNADCperiods:::.get_mesnotrim(5), 2)
  expect_equal(PNADCperiods:::.get_mesnotrim(8), 2)
  expect_equal(PNADCperiods:::.get_mesnotrim(11), 2)
})

test_that(".get_mesnotrim maps quarter-ending months to position 3", {
  # 1. Setup + Verify: Months 3, 6, 9, 12
  expect_equal(PNADCperiods:::.get_mesnotrim(3), 3)
  expect_equal(PNADCperiods:::.get_mesnotrim(6), 3)
  expect_equal(PNADCperiods:::.get_mesnotrim(9), 3)
  expect_equal(PNADCperiods:::.get_mesnotrim(12), 3)
})

test_that(".get_mesnotrim is vectorized", {
  # 1. Setup: Pass all 12 months at once
  months <- 1:12
  expected <- rep(1:3, 4)
  result <- PNADCperiods:::.get_mesnotrim(months)

  # 2. Verify: Vectorized output matches individual calls
  expect_equal(result, expected)
  expect_length(result, 12)
})

# =============================================================================
# .PNADC_DATES INTERNAL CONSTANTS
# =============================================================================

test_that(".PNADC_DATES contains expected constants with correct values", {
  # 1. Setup
  dates <- PNADCperiods:::.PNADC_DATES

  # 2. Verify: Is a named list
  expect_type(dates, "list")
  expect_true(length(dates) > 0)

  # 3. Verify: Specific constant values
  expect_equal(dates$PNADC_START, 201201L,
               label = "PNADC started in January 2012")
  expect_equal(dates$VD4004_SPLIT, 201509L,
               label = "VD4004 split occurred in September 2015")
  expect_equal(dates$V4019_AVAILABLE, 201510L,
               label = "V4019 available from October 2015")
  expect_equal(dates$DEFAULT_CALIB_START, 201301L,
               label = "Default calibration starts January 2013")
  expect_equal(dates$DEFAULT_CALIB_END, 201912L,
               label = "Default calibration ends December 2019 (pre-COVID)")
  expect_equal(dates$CNPJ_CALIB_START, 201601L,
               label = "CNPJ calibration starts January 2016")
  expect_equal(dates$PRESPLIT_CALIB_END, 201412L,
               label = "Pre-split calibration ends December 2014")
})

test_that(".PNADC_DATES values are all integer scalars", {
  # 1. Setup
  dates <- PNADCperiods:::.PNADC_DATES

  # 2. Verify: Each element is a single integer
  for (nm in names(dates)) {
    expect_type(dates[[nm]], "integer")
    expect_length(dates[[nm]], 1)
  }
})

# =============================================================================
# KEY SERIES METADATA SPOT-CHECKS
# =============================================================================

test_that("taxadesocup has correct api_path and table_id", {
  # 1. Setup
  result <- get_sidra_series_metadata(series = "taxadesocup")

  # 2. Verify: Known correct values from SIDRA
  expect_equal(result$table_id, 6381L)
  expect_equal(result$variable_id, 4099L)
  expect_equal(result$api_path, "/t/6381/n1/all/v/4099/p/all/d/v4099%201")
  expect_equal(result$unit, "percent")
  expect_true(result$is_derived)
  expect_false(result$requires_deflation)
})

test_that("populacao series has correct metadata", {
  # 1. Setup
  result <- get_sidra_series_metadata(series = "populacao")

  # 2. Verify
  expect_equal(result$table_id, 6022L)
  expect_equal(result$variable_id, 606L)
  expect_equal(result$theme, "demographics")
  expect_equal(result$unit, "thousands")
  expect_false(result$is_derived)
  expect_false(result$requires_deflation)
})

test_that("rendhabrealtodos earnings series has correct metadata", {
  # 1. Setup
  result <- get_sidra_series_metadata(series = "rendhabrealtodos")

  # 2. Verify
  expect_equal(result$table_id, 6390L)
  expect_equal(result$theme, "earnings")
  expect_equal(result$unit, "currency")
  expect_false(result$is_derived)
  expect_true(result$requires_deflation)
})

# =============================================================================
# API_PATH VALIDITY FOR NON-DERIVED SERIES
# =============================================================================

test_that("all non-derived series have valid api_path structure", {
  # 1. Setup
  meta <- get_sidra_series_metadata()
  non_derived <- meta[is_derived == FALSE]

  # 2. Verify: api_path matches /t/{table_id}/... pattern
  for (i in seq_len(nrow(non_derived))) {
    row <- non_derived[i]
    expected_prefix <- paste0("/t/", row$table_id, "/")
    expect_true(
      startsWith(row$api_path, expected_prefix),
      label = paste(row$series_name, "api_path should start with", expected_prefix)
    )
  }
})

test_that("api_path contains the variable_id for all series", {
  # 1. Setup
  meta <- get_sidra_series_metadata()

  # 2. Verify: Each api_path references its variable_id in the /v/ segment
  for (i in seq_len(nrow(meta))) {
    row <- meta[i]
    var_pattern <- paste0("/v/", row$variable_id)
    expect_true(
      grepl(var_pattern, row$api_path, fixed = TRUE),
      label = paste(row$series_name, "api_path should contain", var_pattern)
    )
  }
})

# =============================================================================
# REQUIRES_DEFLATION CONSISTENCY
# =============================================================================

test_that("requires_deflation is TRUE only for real earnings series", {
  # 1. Setup
  meta <- get_sidra_series_metadata()

  # 2. Verify: Series requiring deflation are in the earnings theme
  deflated <- meta[requires_deflation == TRUE]
  expect_true(all(deflated$theme == "earnings"),
              label = "Only earnings series should require deflation")

  # 3. Verify: Non-earnings series never require deflation
  non_earnings <- meta[theme != "earnings"]
  expect_true(all(non_earnings$requires_deflation == FALSE),
              label = "Non-earnings series should not require deflation")
})

test_that("nominal earnings series do not require deflation", {
  # 1. Setup
  nominal_series <- c("rendhabnominaltodos", "rendefetnominaltodos",
                       "massahabnominaltodos", "massaefetnominaltodos")
  nominal_meta <- get_sidra_series_metadata(series = nominal_series)

  # 2. Verify: Nominal series do not require deflation
  expect_true(all(nominal_meta$requires_deflation == FALSE),
              label = "Nominal earnings series should not require deflation")
})

test_that("real earnings series require deflation", {
  # 1. Setup
  real_series <- c("rendhabrealtodos", "rendhabrealprinc",
                    "rendefetrealprinc", "massahabrealtodos", "massaefetrealtodos")
  real_meta <- get_sidra_series_metadata(series = real_series)

  # 2. Verify: Real series require deflation
  expect_true(all(real_meta$requires_deflation == TRUE),
              label = "Real earnings series should require deflation")
})

# =============================================================================
# THEME COVERAGE
# =============================================================================

test_that("all five themes are represented in the metadata", {
  # 1. Setup
  meta <- get_sidra_series_metadata()
  themes <- unique(meta$theme)

  # 2. Verify: All expected themes exist
  expected_themes <- c("labor_market", "earnings", "demographics",
                        "social_protection", "prices")
  for (th in expected_themes) {
    expect_true(th %in% themes,
                label = paste("Theme", th, "should be represented"))
  }

  # 3. Verify: No unexpected themes
  expect_true(all(themes %in% expected_themes),
              label = "No unexpected themes should exist")
})

test_that("prices theme includes IPCA and INPC series", {
  # 1. Setup
  prices <- get_sidra_series_metadata(theme = "prices")

  # 2. Verify: Known price index series
  expect_true("ipca100dez1993" %in% prices$series_name)
  expect_true("ipcavarmensal" %in% prices$series_name)
  expect_true("inpc100dez1993" %in% prices$series_name)
  expect_true("inpcvarmensal" %in% prices$series_name)

  # 3. Verify: Price index units
  ipca_idx <- prices[series_name == "ipca100dez1993"]
  expect_equal(ipca_idx$unit, "index")
  ipca_var <- prices[series_name == "ipcavarmensal"]
  expect_equal(ipca_var$unit, "percent")
})

# =============================================================================
# CLASSIFICATION COLUMNS
# =============================================================================

test_that("classification_id and classification_value are consistent", {
  # 1. Setup
  meta <- get_sidra_series_metadata()

  # 2. Verify: When classification_id is NA, classification_value is also NA
  na_class_id <- meta[is.na(classification_id)]
  expect_true(all(is.na(na_class_id$classification_value)),
              label = "classification_value should be NA when classification_id is NA")

  # 3. Verify: When classification_id is not NA, classification_value is not NA
  non_na_class_id <- meta[!is.na(classification_id)]
  expect_true(all(!is.na(non_na_class_id$classification_value)),
              label = "classification_value should not be NA when classification_id is set")
})

test_that("api_path contains classification when classification_id is set", {
  # 1. Setup
  meta <- get_sidra_series_metadata()
  classified <- meta[!is.na(classification_id)]

  # 2. Verify: api_path references the classification
  for (i in seq_len(nrow(classified))) {
    row <- classified[i]
    expect_true(
      grepl(row$classification_id, row$api_path, fixed = TRUE),
      label = paste(row$series_name, "api_path should contain", row$classification_id)
    )
    expect_true(
      grepl(row$classification_value, row$api_path, fixed = TRUE),
      label = paste(row$series_name, "api_path should contain value", row$classification_value)
    )
  }
})
Any scripts or data that you put into this service are public.
PNADCperiods documentation built on April 28, 2026, 9:07 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
PNADCperiods
Identify Reference Periods in Brazil's PNADC Survey Data

tests/testthat/test-sidra-series-metadata.R
In PNADCperiods: Identify Reference Periods in Brazil's PNADC Survey Data

Try the PNADCperiods package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

PNADCperiods Identify Reference Periods in Brazil's PNADC Survey Data

tests/testthat/test-sidra-series-metadata.R In PNADCperiods: Identify Reference Periods in Brazil's PNADC Survey Data

Try the PNADCperiods package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

PNADCperiods
Identify Reference Periods in Brazil's PNADC Survey Data

tests/testthat/test-sidra-series-metadata.R
In PNADCperiods: Identify Reference Periods in Brazil's PNADC Survey Data