tests/testthat/test-sidra-series-metadata.R

# Tests for sidra-series-metadata.R
# Note: All tests use the metadata function's own output (no external data needed)

# =============================================================================
# DEFAULT CALL: STRUCTURE AND COMPLETENESS
# =============================================================================

test_that("default call returns 86+ rows data.table with all required columns", {
  # 1. Setup: Call with defaults
  meta <- get_sidra_series_metadata()

  # 2. Verify: Structure

  expect_s3_class(meta, "data.table")
  expect_gte(nrow(meta), 86)

  # 3. Verify: All documented columns are present
  required_cols <- c(
    "series_name", "api_path", "table_id", "variable_id",
    "classification_id", "classification_value",
    "theme", "theme_category", "subcategory",
    "description_pt", "description_en", "description",
    "unit", "unit_label_pt", "unit_label_en",
    "is_derived", "requires_deflation"
  )
  for (col in required_cols) {
    expect_true(col %in% names(meta),
                label = paste("Column", col, "must be present"))
  }
})

test_that("no duplicate series_name values", {
  # 1. Setup
  meta <- get_sidra_series_metadata()

  # 2. Verify: All series names are unique
  expect_equal(length(unique(meta$series_name)), nrow(meta),
               label = "series_name values must be unique")

  # 3. Context: Duplicates would cause ambiguous lookups
  dup_names <- meta$series_name[duplicated(meta$series_name)]
  expect_equal(length(dup_names), 0,
               label = paste("Duplicated names:", paste(dup_names, collapse = ", ")))
})

test_that("all api_path values start with /t/", {
  # 1. Setup
  meta <- get_sidra_series_metadata()

  # 2. Verify: SIDRA API paths follow the /t/{table_id}/... pattern
  expect_true(all(grepl("^/t/", meta$api_path)),
              label = "All api_path values must start with /t/")

  # 3. Context: Malformed paths will cause SIDRA API failures
  bad_paths <- meta$api_path[!grepl("^/t/", meta$api_path)]
  expect_equal(length(bad_paths), 0,
               label = paste("Invalid paths:", paste(bad_paths, collapse = "; ")))
})

test_that("all table_id and variable_id are positive integers", {
  # 1. Setup
  meta <- get_sidra_series_metadata()

  # 2. Verify: table_id
  expect_type(meta$table_id, "integer")
  expect_true(all(meta$table_id > 0L),
              label = "All table_id must be positive integers")

  # 3. Verify: variable_id
  expect_type(meta$variable_id, "integer")
  expect_true(all(meta$variable_id > 0L),
              label = "All variable_id must be positive integers")
})

# =============================================================================
# THEME FILTERING
# =============================================================================

test_that("theme filtering: labor_market returns a subset", {
  # 1. Setup
  all_meta <- get_sidra_series_metadata()
  labor <- get_sidra_series_metadata(theme = "labor_market")

  # 2. Verify: Filtered result is a proper subset
  expect_s3_class(labor, "data.table")
  expect_gt(nrow(labor), 0)
  expect_lt(nrow(labor), nrow(all_meta))

  # 3. Verify: All returned rows have the correct theme
  expect_true(all(labor$theme == "labor_market"),
              label = "All rows must have theme == 'labor_market'")
})

test_that("invalid theme raises error with informative message", {
  # 1. Execute + Verify: Invalid theme name
  expect_error(
    get_sidra_series_metadata(theme = "nonexistent_theme"),
    "Invalid theme.*nonexistent_theme"
  )

  # 2. Context: Check that valid themes are listed in the error
  expect_error(
    get_sidra_series_metadata(theme = "fake"),
    "labor_market"
  )
})

test_that("multiple themes can be specified", {
  # 1. Setup
  combined <- get_sidra_series_metadata(theme = c("labor_market", "earnings"))

  # 2. Verify: Contains rows from both themes
  expect_true("labor_market" %in% combined$theme)
  expect_true("earnings" %in% combined$theme)

  # 3. Verify: No rows from other themes
  expect_true(all(combined$theme %in% c("labor_market", "earnings")),
              label = "Only requested themes should appear")
})

# =============================================================================
# THEME_CATEGORY FILTERING
# =============================================================================

test_that("theme_category filtering works", {
  # 1. Setup
  unemployment <- get_sidra_series_metadata(theme_category = "unemployment")

  # 2. Verify: Returns non-empty subset
  expect_gt(nrow(unemployment), 0)
  expect_true(all(unemployment$theme_category == "unemployment"))

  # 3. Context: taxadesocup should be in unemployment category
  expect_true("taxadesocup" %in% unemployment$series_name)
})

test_that("theme_category with no matches returns zero rows", {
  # 1. Setup: Use a category that does not exist
  result <- get_sidra_series_metadata(theme_category = "nonexistent_category")

  # 2. Verify: Returns valid data.table with zero rows
  expect_s3_class(result, "data.table")
  expect_equal(nrow(result), 0)
})

# =============================================================================
# SUBCATEGORY FILTERING
# =============================================================================

test_that("subcategory filtering includes NA subcategory rows", {
  # 1. Setup: economic_sector has subcategory = NA; filter for "levels"
  #    should return rows with subcategory == "levels" OR subcategory == NA
  result <- get_sidra_series_metadata(
    theme = "labor_market",
    subcategory = "levels"
  )

  # 2. Verify: Contains explicit "levels" rows
  expect_true("levels" %in% result$subcategory)

  # 3. Verify: Also includes NA subcategory rows (economic_sector, etc.)
  expect_true(any(is.na(result$subcategory)),
              label = "NA subcategory rows should be included when filtering by subcategory")
})

test_that("subcategory filtering returns correct subset", {
  # 1. Setup: Get only "rates" subcategory from labor_market
  rates <- get_sidra_series_metadata(theme = "labor_market", subcategory = "rates")

  # 2. Verify: All non-NA subcategories are "rates"
  non_na_subcats <- rates$subcategory[!is.na(rates$subcategory)]
  expect_true(all(non_na_subcats == "rates"),
              label = "Non-NA subcategories should all be 'rates'")
})

# =============================================================================
# SPECIFIC SERIES SELECTION
# =============================================================================

test_that("specific series selection: taxadesocup returns exactly 1 row", {
  # 1. Setup
  result <- get_sidra_series_metadata(series = "taxadesocup")

  # 2. Verify: Exactly one row
  expect_equal(nrow(result), 1)
  expect_equal(result$series_name, "taxadesocup")

  # 3. Verify: Known metadata values
  expect_equal(result$table_id, 6381L)
  expect_equal(result$variable_id, 4099L)
  expect_equal(result$theme, "labor_market")
  expect_equal(result$theme_category, "unemployment")
  expect_equal(result$unit, "percent")
})

test_that("multiple specific series selection works", {
  # 1. Setup
  result <- get_sidra_series_metadata(series = c("taxadesocup", "popocup", "populacao"))

  # 2. Verify: Exactly 3 rows, one per requested series
  expect_equal(nrow(result), 3)
  expect_true(all(c("taxadesocup", "popocup", "populacao") %in% result$series_name))
})

test_that("invalid series name raises error", {
  # 1. Execute + Verify: Unknown series name
  expect_error(
    get_sidra_series_metadata(series = "this_series_does_not_exist"),
    "Unknown series.*this_series_does_not_exist"
  )

  # 2. Verify: Partially valid set still errors on the invalid entry
  expect_error(
    get_sidra_series_metadata(series = c("taxadesocup", "fake_series")),
    "Unknown series.*fake_series"
  )
})

# =============================================================================
# COMBINED FILTERS
# =============================================================================

test_that("combined theme + theme_category filters work correctly", {
  # 1. Setup: Filter labor_market -> participation
  result <- get_sidra_series_metadata(
    theme = "labor_market",
    theme_category = "participation"
  )

  # 2. Verify: All rows match both criteria
  expect_true(all(result$theme == "labor_market"))
  expect_true(all(result$theme_category == "participation"))

  # 3. Verify: Known series are present
  expect_true("taxapartic" %in% result$series_name)
  expect_true("popocup" %in% result$series_name)
  expect_true("popdesocup" %in% result$series_name)
})

test_that("combined theme + subcategory + series filters work", {
  # 1. Setup: Narrow filter that should return 1 row
  result <- get_sidra_series_metadata(
    theme = "labor_market",
    subcategory = "rates",
    series = "taxadesocup"
  )

  # 2. Verify
  expect_equal(nrow(result), 1)
  expect_equal(result$series_name, "taxadesocup")
})

# =============================================================================
# UNIT CONSISTENCY
# =============================================================================

test_that("population/level series have unit = thousands", {
  # 1. Setup
  meta <- get_sidra_series_metadata()

  # 2. Verify: Known population-level series use "thousands"
  pop_series <- c("popocup", "popdesocup", "popnaforca", "popforadaforca",
                   "populacao", "pop14mais")
  pop_meta <- meta[series_name %in% pop_series]
  expect_true(all(pop_meta$unit == "thousands"),
              label = "All population/level series must use unit = 'thousands'")
})

test_that("rate series have unit = percent", {
  # 1. Setup
  meta <- get_sidra_series_metadata()

  # 2. Verify: Known rate series use "percent"
  rate_series <- c("taxadesocup", "taxapartic", "nivelocup", "niveldesocup",
                    "taxacombdesosub", "taxacompsubutlz", "perccontribprev")
  rate_meta <- meta[series_name %in% rate_series]
  expect_true(all(rate_meta$unit == "percent"),
              label = "All rate series must use unit = 'percent'")
})

test_that("income series have unit = currency or currency_millions", {
  # 1. Setup
  meta <- get_sidra_series_metadata()

  # 2. Verify: Earnings series use "currency"
  earnings_avg <- c("rendhabnominaltodos", "rendhabrealtodos",
                     "rendhabrealprinc", "rendefetrealprinc")
  earn_meta <- meta[series_name %in% earnings_avg]
  expect_true(all(earn_meta$unit == "currency"),
              label = "Average earnings series must use unit = 'currency'")

  # 3. Verify: Wage mass series use "currency_millions"
  mass_series <- c("massahabnominaltodos", "massahabrealtodos",
                    "massaefetnominaltodos", "massaefetrealtodos")
  mass_meta <- meta[series_name %in% mass_series]
  expect_true(all(mass_meta$unit == "currency_millions"),
              label = "Wage mass series must use unit = 'currency_millions'")
})

# =============================================================================
# IS_DERIVED CONSISTENCY
# =============================================================================

test_that("is_derived is TRUE for rate/percentage series", {
  # 1. Setup
  meta <- get_sidra_series_metadata()

  # 2. Verify: Known rate series (which are derived from ratios) are marked is_derived
  derived_series <- c("taxadesocup", "taxapartic", "nivelocup", "niveldesocup",
                       "taxacombdesosub", "taxacombdesopot", "taxacompsubutlz",
                       "taxasubocuphoras", "percdesalento", "perccontribprev")
  derived_meta <- meta[series_name %in% derived_series]
  expect_true(all(derived_meta$is_derived),
              label = "Rate/percentage series should have is_derived = TRUE")

  # 3. Verify: Population level series are NOT derived
  level_series <- c("popocup", "popdesocup", "popnaforca", "populacao")
  level_meta <- meta[series_name %in% level_series]
  expect_true(all(!level_meta$is_derived),
              label = "Population level series should have is_derived = FALSE")
})

# =============================================================================
# LANGUAGE SWITCHING
# =============================================================================

test_that("lang = 'en' switches description column to English", {
  # 1. Setup
  meta_pt <- get_sidra_series_metadata(series = "taxadesocup", lang = "pt")
  meta_en <- get_sidra_series_metadata(series = "taxadesocup", lang = "en")

  # 2. Verify: description column differs by language
  expect_equal(meta_pt$description, meta_pt$description_pt)
  expect_equal(meta_en$description, meta_en$description_en)

  # 3. Verify: English and Portuguese descriptions are different
  expect_false(identical(meta_pt$description, meta_en$description),
               label = "PT and EN descriptions should differ")

  # 4. Context: Both description_pt and description_en columns should always be present
  expect_true("description_pt" %in% names(meta_en))
  expect_true("description_en" %in% names(meta_pt))
})

test_that("lang = 'pt' is the default", {
  # 1. Setup
  meta_default <- get_sidra_series_metadata(series = "popocup")
  meta_pt <- get_sidra_series_metadata(series = "popocup", lang = "pt")

  # 2. Verify: Default description matches Portuguese
  expect_equal(meta_default$description, meta_pt$description_pt)
})

# =============================================================================
# .get_mesnotrim INTERNAL FUNCTION
# =============================================================================

test_that(".get_mesnotrim maps quarter-starting months to position 1", {
  # 1. Setup + Verify: Months 1, 4, 7, 10 are the first month of their quarter
  expect_equal(PNADCperiods:::.get_mesnotrim(1), 1)
  expect_equal(PNADCperiods:::.get_mesnotrim(4), 1)
  expect_equal(PNADCperiods:::.get_mesnotrim(7), 1)
  expect_equal(PNADCperiods:::.get_mesnotrim(10), 1)
})

test_that(".get_mesnotrim maps second months to position 2", {
  # 1. Setup + Verify: Months 2, 5, 8, 11
  expect_equal(PNADCperiods:::.get_mesnotrim(2), 2)
  expect_equal(PNADCperiods:::.get_mesnotrim(5), 2)
  expect_equal(PNADCperiods:::.get_mesnotrim(8), 2)
  expect_equal(PNADCperiods:::.get_mesnotrim(11), 2)
})

test_that(".get_mesnotrim maps quarter-ending months to position 3", {
  # 1. Setup + Verify: Months 3, 6, 9, 12
  expect_equal(PNADCperiods:::.get_mesnotrim(3), 3)
  expect_equal(PNADCperiods:::.get_mesnotrim(6), 3)
  expect_equal(PNADCperiods:::.get_mesnotrim(9), 3)
  expect_equal(PNADCperiods:::.get_mesnotrim(12), 3)
})

test_that(".get_mesnotrim is vectorized", {
  # 1. Setup: Pass all 12 months at once
  months <- 1:12
  expected <- rep(1:3, 4)
  result <- PNADCperiods:::.get_mesnotrim(months)

  # 2. Verify: Vectorized output matches individual calls
  expect_equal(result, expected)
  expect_length(result, 12)
})

# =============================================================================
# .PNADC_DATES INTERNAL CONSTANTS
# =============================================================================

test_that(".PNADC_DATES contains expected constants with correct values", {
  # 1. Setup
  dates <- PNADCperiods:::.PNADC_DATES

  # 2. Verify: Is a named list
  expect_type(dates, "list")
  expect_true(length(dates) > 0)

  # 3. Verify: Specific constant values
  expect_equal(dates$PNADC_START, 201201L,
               label = "PNADC started in January 2012")
  expect_equal(dates$VD4004_SPLIT, 201509L,
               label = "VD4004 split occurred in September 2015")
  expect_equal(dates$V4019_AVAILABLE, 201510L,
               label = "V4019 available from October 2015")
  expect_equal(dates$DEFAULT_CALIB_START, 201301L,
               label = "Default calibration starts January 2013")
  expect_equal(dates$DEFAULT_CALIB_END, 201912L,
               label = "Default calibration ends December 2019 (pre-COVID)")
  expect_equal(dates$CNPJ_CALIB_START, 201601L,
               label = "CNPJ calibration starts January 2016")
  expect_equal(dates$PRESPLIT_CALIB_END, 201412L,
               label = "Pre-split calibration ends December 2014")
})

test_that(".PNADC_DATES values are all integer scalars", {
  # 1. Setup
  dates <- PNADCperiods:::.PNADC_DATES

  # 2. Verify: Each element is a single integer
  for (nm in names(dates)) {
    expect_type(dates[[nm]], "integer")
    expect_length(dates[[nm]], 1)
  }
})

# =============================================================================
# KEY SERIES METADATA SPOT-CHECKS
# =============================================================================

test_that("taxadesocup has correct api_path and table_id", {
  # 1. Setup
  result <- get_sidra_series_metadata(series = "taxadesocup")

  # 2. Verify: Known correct values from SIDRA
  expect_equal(result$table_id, 6381L)
  expect_equal(result$variable_id, 4099L)
  expect_equal(result$api_path, "/t/6381/n1/all/v/4099/p/all/d/v4099%201")
  expect_equal(result$unit, "percent")
  expect_true(result$is_derived)
  expect_false(result$requires_deflation)
})

test_that("populacao series has correct metadata", {
  # 1. Setup
  result <- get_sidra_series_metadata(series = "populacao")

  # 2. Verify
  expect_equal(result$table_id, 6022L)
  expect_equal(result$variable_id, 606L)
  expect_equal(result$theme, "demographics")
  expect_equal(result$unit, "thousands")
  expect_false(result$is_derived)
  expect_false(result$requires_deflation)
})

test_that("rendhabrealtodos earnings series has correct metadata", {
  # 1. Setup
  result <- get_sidra_series_metadata(series = "rendhabrealtodos")

  # 2. Verify
  expect_equal(result$table_id, 6390L)
  expect_equal(result$theme, "earnings")
  expect_equal(result$unit, "currency")
  expect_false(result$is_derived)
  expect_true(result$requires_deflation)
})

# =============================================================================
# API_PATH VALIDITY FOR NON-DERIVED SERIES
# =============================================================================

test_that("all non-derived series have valid api_path structure", {
  # 1. Setup
  meta <- get_sidra_series_metadata()
  non_derived <- meta[is_derived == FALSE]

  # 2. Verify: api_path matches /t/{table_id}/... pattern
  for (i in seq_len(nrow(non_derived))) {
    row <- non_derived[i]
    expected_prefix <- paste0("/t/", row$table_id, "/")
    expect_true(
      startsWith(row$api_path, expected_prefix),
      label = paste(row$series_name, "api_path should start with", expected_prefix)
    )
  }
})

test_that("api_path contains the variable_id for all series", {
  # 1. Setup
  meta <- get_sidra_series_metadata()

  # 2. Verify: Each api_path references its variable_id in the /v/ segment
  for (i in seq_len(nrow(meta))) {
    row <- meta[i]
    var_pattern <- paste0("/v/", row$variable_id)
    expect_true(
      grepl(var_pattern, row$api_path, fixed = TRUE),
      label = paste(row$series_name, "api_path should contain", var_pattern)
    )
  }
})

# =============================================================================
# REQUIRES_DEFLATION CONSISTENCY
# =============================================================================

test_that("requires_deflation is TRUE only for real earnings series", {
  # 1. Setup
  meta <- get_sidra_series_metadata()

  # 2. Verify: Series requiring deflation are in the earnings theme
  deflated <- meta[requires_deflation == TRUE]
  expect_true(all(deflated$theme == "earnings"),
              label = "Only earnings series should require deflation")

  # 3. Verify: Non-earnings series never require deflation
  non_earnings <- meta[theme != "earnings"]
  expect_true(all(non_earnings$requires_deflation == FALSE),
              label = "Non-earnings series should not require deflation")
})

test_that("nominal earnings series do not require deflation", {
  # 1. Setup
  nominal_series <- c("rendhabnominaltodos", "rendefetnominaltodos",
                       "massahabnominaltodos", "massaefetnominaltodos")
  nominal_meta <- get_sidra_series_metadata(series = nominal_series)

  # 2. Verify: Nominal series do not require deflation
  expect_true(all(nominal_meta$requires_deflation == FALSE),
              label = "Nominal earnings series should not require deflation")
})

test_that("real earnings series require deflation", {
  # 1. Setup
  real_series <- c("rendhabrealtodos", "rendhabrealprinc",
                    "rendefetrealprinc", "massahabrealtodos", "massaefetrealtodos")
  real_meta <- get_sidra_series_metadata(series = real_series)

  # 2. Verify: Real series require deflation
  expect_true(all(real_meta$requires_deflation == TRUE),
              label = "Real earnings series should require deflation")
})

# =============================================================================
# THEME COVERAGE
# =============================================================================

test_that("all five themes are represented in the metadata", {
  # 1. Setup
  meta <- get_sidra_series_metadata()
  themes <- unique(meta$theme)

  # 2. Verify: All expected themes exist
  expected_themes <- c("labor_market", "earnings", "demographics",
                        "social_protection", "prices")
  for (th in expected_themes) {
    expect_true(th %in% themes,
                label = paste("Theme", th, "should be represented"))
  }

  # 3. Verify: No unexpected themes
  expect_true(all(themes %in% expected_themes),
              label = "No unexpected themes should exist")
})

test_that("prices theme includes IPCA and INPC series", {
  # 1. Setup
  prices <- get_sidra_series_metadata(theme = "prices")

  # 2. Verify: Known price index series
  expect_true("ipca100dez1993" %in% prices$series_name)
  expect_true("ipcavarmensal" %in% prices$series_name)
  expect_true("inpc100dez1993" %in% prices$series_name)
  expect_true("inpcvarmensal" %in% prices$series_name)

  # 3. Verify: Price index units
  ipca_idx <- prices[series_name == "ipca100dez1993"]
  expect_equal(ipca_idx$unit, "index")
  ipca_var <- prices[series_name == "ipcavarmensal"]
  expect_equal(ipca_var$unit, "percent")
})

# =============================================================================
# CLASSIFICATION COLUMNS
# =============================================================================

test_that("classification_id and classification_value are consistent", {
  # 1. Setup
  meta <- get_sidra_series_metadata()

  # 2. Verify: When classification_id is NA, classification_value is also NA
  na_class_id <- meta[is.na(classification_id)]
  expect_true(all(is.na(na_class_id$classification_value)),
              label = "classification_value should be NA when classification_id is NA")

  # 3. Verify: When classification_id is not NA, classification_value is not NA
  non_na_class_id <- meta[!is.na(classification_id)]
  expect_true(all(!is.na(non_na_class_id$classification_value)),
              label = "classification_value should not be NA when classification_id is set")
})

test_that("api_path contains classification when classification_id is set", {
  # 1. Setup
  meta <- get_sidra_series_metadata()
  classified <- meta[!is.na(classification_id)]

  # 2. Verify: api_path references the classification
  for (i in seq_len(nrow(classified))) {
    row <- classified[i]
    expect_true(
      grepl(row$classification_id, row$api_path, fixed = TRUE),
      label = paste(row$series_name, "api_path should contain", row$classification_id)
    )
    expect_true(
      grepl(row$classification_value, row$api_path, fixed = TRUE),
      label = paste(row$series_name, "api_path should contain value", row$classification_value)
    )
  }
})

Try the PNADCperiods package in your browser

Any scripts or data that you put into this service are public.

PNADCperiods documentation built on April 28, 2026, 9:07 a.m.