tests/testthat/test-fetch-sidra-population.R

# Tests for fetch-sidra-population.R
# Note: Some tests require internet connection and sidrar package

test_that("fetch_monthly_population requires sidrar package", {
  skip_if(requireNamespace("sidrar", quietly = TRUE),
          "sidrar is installed, skipping missing package test")

  # Mock the requireNamespace to return FALSE
  # This test only runs if sidrar is not installed
  expect_error(fetch_monthly_population(), "Package 'sidrar' is required")
})

test_that("fetch_monthly_population returns expected structure", {
  skip_on_cran()
  skip_if_not(requireNamespace("sidrar", quietly = TRUE),
              "sidrar package not available")
  skip_if_offline()

  result <- fetch_monthly_population(verbose = FALSE)

  # Should be a data.table
  expect_s3_class(result, "data.table")

  # Should have required columns
  expect_true("ref_month_yyyymm" %in% names(result))
  expect_true("m_populacao" %in% names(result))

  # Should have reasonable values
  expect_true(all(result$ref_month_yyyymm >= 201201))
  expect_true(all(result$m_populacao > 0, na.rm = TRUE))
})

test_that("fetch_monthly_population respects date range", {
  skip_on_cran()
  skip_if_not(requireNamespace("sidrar", quietly = TRUE),
              "sidrar package not available")
  skip_if_offline()

  result <- fetch_monthly_population(
    start_yyyymm = 201501,
    end_yyyymm = 201512,
    verbose = FALSE
  )

  # Should only have 2015 months
  expect_true(all(result$ref_month_yyyymm >= 201501))
  expect_true(all(result$ref_month_yyyymm <= 201512))
  expect_equal(nrow(result), 12)
})

test_that("transform_moving_quarter_to_monthly transforms correctly", {
  # Create sample moving quarter data
  dt <- data.table::data.table(
    anomesfinaltrimmovel = c(201203L, 201204L, 201205L, 201206L),
    populacao = c(200000, 200100, 200200, 200300)
  )

  result <- transform_moving_quarter_to_monthly(dt, verbose = FALSE)

  # Should have anomesexato and m_populacao columns
  expect_true("anomesexato" %in% names(result))
  expect_true("m_populacao" %in% names(result))

  # Should have correct number of rows (original + 2 dummy rows)
  expect_equal(nrow(result), 6)

  # The shift logic: month N gets population from row N+1
  # So 201202 (Feb 2012) gets value from 201203 row = 200000
  expect_equal(result[anomesexato == 201202L, m_populacao], 200000)

  # First month (201201) should be NA (needs extrapolation)
  expect_true(is.na(result[anomesexato == 201201L, m_populacao]))
})

test_that("extrapolate_boundary_months fills in boundary values", {
  # Create a series with missing first and last values
  set.seed(42)
  n <- 30
  dt <- data.table::data.table(
    anomesexato = generate_yyyymm_seq(201201L, n),
    m_populacao = c(NA, 200000 + cumsum(rnorm(n-2, 100, 10)), NA)
  )

  result <- extrapolate_boundary_months(dt)

  # Should fill in first and last values
  expect_false(is.na(result[1, m_populacao]))
  expect_false(is.na(result[n, m_populacao]))

  # Values should be reasonable (close to neighbors)
  expect_true(abs(result[1, m_populacao] - result[2, m_populacao]) < 500)
  expect_true(abs(result[n, m_populacao] - result[n-1, m_populacao]) < 500)
})

test_that("extrapolate_boundary_months handles short series", {
  # Series too short for extrapolation (< 26 rows)
  dt <- data.table::data.table(
    anomesexato = generate_yyyymm_seq(201201L, 10L),
    m_populacao = c(NA, 200000:200007, NA)
  )

  result <- extrapolate_boundary_months(dt)

  # With only 10 rows, cannot extrapolate using 26-row regression
  # First and last should still be NA
  expect_true(is.na(result[1, m_populacao]))
  expect_true(is.na(result[10, m_populacao]))
})

test_that("extrapolate_boundary_months preserves middle values", {
  set.seed(42)
  n <- 30
  original_middle <- 200000 + cumsum(rnorm(n-2, 100, 10))

  dt <- data.table::data.table(
    anomesexato = generate_yyyymm_seq(201201L, n),
    m_populacao = c(NA, original_middle, NA)
  )

  result <- extrapolate_boundary_months(dt)

  # Middle values should be unchanged
  expect_equal(result$m_populacao[2:(n-1)], original_middle)
})

test_that("extrapolate_boundary_months removes temporary columns", {
  set.seed(42)
  n <- 30
  dt <- data.table::data.table(
    anomesexato = generate_yyyymm_seq(201201L, n),
    m_populacao = c(NA, 200000 + cumsum(rnorm(n-2, 100, 10)), NA)
  )

  result <- extrapolate_boundary_months(dt)

  # Should not have regression columns
  expect_false("row_num" %in% names(result))
  expect_false("row_num2" %in% names(result))
  expect_false("d_pop" %in% names(result))
})


# =============================================================================
# CACHING BEHAVIOR TESTS
# =============================================================================

test_that("clear_sidra_cache clears the cache", {
  skip_on_cran()
  skip_if_not(requireNamespace("sidrar", quietly = TRUE),
              "sidrar package not available")
  skip_if_offline()

  # 1. Setup: Fetch data to populate cache
  fetch_monthly_population(
    start_yyyymm = 201501,
    end_yyyymm = 201503,
    use_cache = TRUE,
    verbose = FALSE
  )

  # 2. Execute: Clear cache
  clear_sidra_cache()

  # 3. Verify: Cache environment should be empty
  cache_env <- PNADCperiods:::.sidra_cache
  expect_false(exists("population_data", envir = cache_env))
  expect_false(exists("population_time", envir = cache_env))

  # 4. Context: Clearing cache forces fresh API calls
})


test_that("use_cache=FALSE bypasses cache", {
  skip_on_cran()
  skip_if_not(requireNamespace("sidrar", quietly = TRUE),
              "sidrar package not available")
  skip_if_offline()

  # 1. Setup: Clear cache first
  clear_sidra_cache()

  # 2. Execute: Fetch with use_cache=FALSE
  result1 <- fetch_monthly_population(
    start_yyyymm = 201501,
    end_yyyymm = 201503,
    use_cache = FALSE,
    verbose = FALSE
  )

  # 3. Verify: Cache should still be empty
  cache_env <- PNADCperiods:::.sidra_cache
  expect_false(exists("population_data", envir = cache_env))

  # 4. Context: use_cache=FALSE prevents both reading and writing cache
})


test_that("cached data is returned on second call", {
  skip_on_cran()
  skip_if_not(requireNamespace("sidrar", quietly = TRUE),
              "sidrar package not available")
  skip_if_offline()

  # 1. Setup: Clear cache
  clear_sidra_cache()

  # 2. Execute: First call (populates cache)
  result1 <- fetch_monthly_population(
    start_yyyymm = 201501,
    end_yyyymm = 201503,
    use_cache = TRUE,
    verbose = FALSE
  )

  # 3. Execute: Second call (should use cache)
  result2 <- fetch_monthly_population(
    start_yyyymm = 201501,
    end_yyyymm = 201503,
    use_cache = TRUE,
    verbose = FALSE
  )

  # 4. Verify: Results should be identical
  expect_equal(nrow(result1), nrow(result2))
  expect_equal(result1$ref_month_yyyymm, result2$ref_month_yyyymm)
  expect_equal(result1$m_populacao, result2$m_populacao)

  # 5. Context: Cache improves performance on repeated calls
})


test_that("cache respects different date ranges", {
  skip_on_cran()
  skip_if_not(requireNamespace("sidrar", quietly = TRUE),
              "sidrar package not available")
  skip_if_offline()

  # 1. Setup: Clear cache
  clear_sidra_cache()

  # 2. Execute: Cache stores full data, filters are applied after
  result1 <- fetch_monthly_population(
    start_yyyymm = 201501,
    end_yyyymm = 201503,
    use_cache = TRUE,
    verbose = FALSE
  )

  # 3. Execute: Different date range should work correctly
  result2 <- fetch_monthly_population(
    start_yyyymm = 201504,
    end_yyyymm = 201506,
    use_cache = TRUE,
    verbose = FALSE
  )

  # 4. Verify: Should have different months
  expect_false(any(result2$ref_month_yyyymm %in% result1$ref_month_yyyymm))
  expect_true(all(result2$ref_month_yyyymm >= 201504))
  expect_true(all(result2$ref_month_yyyymm <= 201506))

  # 5. Context: This test verifies the Phase 1 bug fix - cache stores full data
})


test_that("cache_max_age_hours parameter accepted", {
  skip_on_cran()
  skip_if_not(requireNamespace("sidrar", quietly = TRUE),
              "sidrar package not available")
  skip_if_offline()

  # 1. Setup: Clear cache
  clear_sidra_cache()

  # 2. Execute: Fetch with cache_max_age_hours parameter
  expect_no_error({
    result <- fetch_monthly_population(
      start_yyyymm = 201501,
      end_yyyymm = 201503,
      use_cache = TRUE,
      cache_max_age_hours = 24,
      verbose = FALSE
    )
  })

  # 3. Context: cache_max_age_hours parameter controls cache expiration
})


# =============================================================================
# CRAN POLICY: Internet resources must fail gracefully
# (no warning, no error). Mocked tests run offline.
# =============================================================================

test_that("fetch_monthly_population fails gracefully when SIDRA is unreachable", {
  skip_if_not_installed("sidrar")
  skip_if_not_installed("testthat", "3.0.0")

  # Simulate API down: any call to sidrar::get_sidra throws.
  testthat::local_mocked_bindings(
    get_sidra = function(...) stop("simulated network error"),
    .package = "sidrar"
  )

  expect_message(
    result <- fetch_monthly_population(verbose = FALSE),
    "failed to fetch from SIDRA API"
  )
  expect_null(result)
})


# =============================================================================
# ERROR HANDLING TESTS
# =============================================================================

test_that("transform_moving_quarter_to_monthly handles edge cases", {
  # 1. Setup: Create data.table with moving quarter data
  dt <- data.table::data.table(
    anomesfinaltrimmovel = c(201203L, 201204L, 201205L),
    populacao = c(200000, 200100, 200200)
  )

  # 2. Execute: Should not error
  expect_no_error({
    result <- transform_moving_quarter_to_monthly(dt, verbose = FALSE)
  })

  # 3. Verify: Should produce output
  expect_true(nrow(result) > 0)

  # 4. Context: Function handles moving quarter transformation
})

Try the PNADCperiods package in your browser

Any scripts or data that you put into this service are public.

PNADCperiods documentation built on April 28, 2026, 9:07 a.m.