tests/testthat/test-mensalize-sidra-series.R

# Tests for mensalize-sidra-series.R
# Covers the exported mensalize_sidra_series() function and its internal helpers:
#   .compute_cumsum_by_mesnotrim(), .extract_y0_vector(),
#   .apply_final_adjustment(), .mensalize_single_series(), .mensalize_split_series()
#
# All tests use synthetic mock data (no API calls).

# =============================================================================
# HELPER: generate proper YYYYMM sequence and mesnotrim
# =============================================================================

make_rolling_quarter_dt <- function(start_yyyymm = 201201L, n_months = 36L,
                                    series_name = "popocup",
                                    base_value = 100000, trend = 100) {

  start_year <- start_yyyymm %/% 100L
  start_month <- start_yyyymm %% 100L

  dt <- data.table::data.table(month_num = seq_len(n_months))
  dt[, `:=`(
    year = start_year + (start_month + month_num - 2L) %/% 12L,
    month = ((start_month + month_num - 2L) %% 12L) + 1L
  )]
  dt[, anomesfinaltrimmovel := year * 100L + month]
  dt[, mesnotrim := ((month - 1L) %% 3L) + 1L]

  # Deterministic series: base + linear trend + small seasonal component
  dt[, (series_name) := base_value + trend * (month_num - 1) +
       500 * sin(2 * pi * month_num / 12)]

  dt[, .SD, .SDcols = c("anomesfinaltrimmovel", "mesnotrim", series_name)]
}


# =============================================================================
# .compute_cumsum_by_mesnotrim() TESTS
# =============================================================================

test_that(".compute_cumsum_by_mesnotrim computes d3 correctly for simple case", {
  # 1. Setup: 6 months of constant rolling quarters (no change expected)
  rq <- c(300, 300, 300, 300, 300, 300)
  mesnotrim <- c(1L, 2L, 3L, 1L, 2L, 3L)

  # 2. Execute
  cum <- PNADCperiods:::.compute_cumsum_by_mesnotrim(rq, mesnotrim)

  # 3. Verify: d3 = 3*(RQ_t - RQ_{t-1}) = 0 for all interior,
  #    first of each mesnotrim position is set to NA (d3=0 for cumsum)

  #    All cumsums should be 0 since no variation exists
  expect_equal(cum[4], 0, label = "Cumsum for second pos1 should be 0 (no variation)")
  expect_equal(cum[5], 0, label = "Cumsum for second pos2 should be 0 (no variation)")
  expect_equal(cum[6], 0, label = "Cumsum for second pos3 should be 0 (no variation)")
})


test_that(".compute_cumsum_by_mesnotrim separates 3 mesnotrim positions", {
  # 1. Setup: Different step sizes at each month position
  #    RQ sequence: 100, 200, 300, 110, 210, 310, 120, 220, 320
  rq <- c(100, 200, 300, 110, 210, 310, 120, 220, 320)
  mesnotrim <- c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L)

  # 2. Execute
  cum <- PNADCperiods:::.compute_cumsum_by_mesnotrim(rq, mesnotrim)

  # 3. Verify: Each position accumulates independently
  #    For pos1: first d3 = NA (forced 0), d3[4] = 3*(110-300) = -570, d3[7] = 3*(120-310) = -570
  #    For pos2: first d3 = NA (forced 0), d3[5] = 3*(210-110) = 300, d3[8] = 3*(220-120) = 300
  #    For pos3: first d3 = NA (forced 0), d3[6] = 3*(310-210) = 300, d3[9] = 3*(320-220) = 300
  # Pos1 cumsums:
  expect_equal(cum[1], 0, label = "First pos1 cumsum is 0")
  # Pos2 cumsums:
  expect_equal(cum[2], 0, label = "First pos2 cumsum is 0")
  # Pos3 cumsums:
  expect_equal(cum[3], 0, label = "First pos3 cumsum is 0")

  # The second occurrence of each position should have the correct d3 accumulated
  # pos1: cum[4] = d3 at index 4 = 3*(110 - 300) = -570
  expect_equal(cum[4], -570, label = "Second pos1 d3 accumulated correctly")
  # pos2: cum[5] = d3 at index 5 = 3*(210 - 110) = 300
  expect_equal(cum[5], 300, label = "Second pos2 d3 accumulated correctly")
  # pos3: cum[6] = d3 at index 6 = 3*(310 - 210) = 300
  expect_equal(cum[6], 300, label = "Second pos3 d3 accumulated correctly")
})


test_that(".compute_cumsum_by_mesnotrim first obs of each position starts at 0", {
  # 1. Setup: 9 months with non-trivial values
  rq <- c(1000, 2000, 3000, 1100, 2100, 3100, 1200, 2200, 3200)
  mesnotrim <- c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L)

  # 2. Execute
  cum <- PNADCperiods:::.compute_cumsum_by_mesnotrim(rq, mesnotrim)

  # 3. Verify: First occurrence of each position = 0
  expect_equal(cum[1], 0, label = "First pos1 starts at 0")
  expect_equal(cum[2], 0, label = "First pos2 starts at 0")
  expect_equal(cum[3], 0, label = "First pos3 starts at 0")
})


test_that(".compute_cumsum_by_mesnotrim respects filter_mask", {
  # 1. Setup: 9 months but mask out the middle 3
  rq <- c(100, 200, 300, 110, 210, 310, 120, 220, 320)
  mesnotrim <- c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L)
  mask <- c(TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE)

  # 2. Execute
  cum <- PNADCperiods:::.compute_cumsum_by_mesnotrim(rq, mesnotrim, filter_mask = mask)

  # 3. Verify: d3 at masked positions (4,5,6) treated as 0
  #    pos1: d3[1]=0, d3[4]=0(masked), d3[7] = 3*(120-310) = -570
  #    So cum[7] = -570 (only one unmasked step)
  #    Compare to unmasked: cum[7] would be -570 + (-570) = -1140
  cum_unmasked <- PNADCperiods:::.compute_cumsum_by_mesnotrim(rq, mesnotrim)
  expect_true(cum[7] != cum_unmasked[7],
              label = "Filtered cumsum differs from unfiltered")
})


test_that(".compute_cumsum_by_mesnotrim handles all-NA input", {
  # 1. Setup: All NA rolling quarter values
  rq <- rep(NA_real_, 9)
  mesnotrim <- c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L)

  # 2. Execute
  cum <- PNADCperiods:::.compute_cumsum_by_mesnotrim(rq, mesnotrim)

  # 3. Verify: All cumsums should be NA (no data to cumulate)
  expect_true(all(is.na(cum)),
              label = "All-NA input produces all-NA cumsum")
})


# =============================================================================
# .extract_y0_vector() TESTS
# =============================================================================

test_that(".extract_y0_vector returns 3 values for found series", {
  # 1. Setup: Starting points table with 3 positions for one series
  sp <- data.table::data.table(
    series_name = rep("popocup", 3),
    mesnotrim = 1:3,
    y0 = c(90000, 91000, 92000)
  )

  # 2. Execute
  y0 <- PNADCperiods:::.extract_y0_vector(sp, "popocup")

  # 3. Verify: 3-element vector with correct values

  expect_length(y0, 3)
  expect_equal(y0[1], 90000)
  expect_equal(y0[2], 91000)
  expect_equal(y0[3], 92000)
})


test_that(".extract_y0_vector returns 3 NAs for missing series", {
  # 1. Setup: Starting points table without the target series
  sp <- data.table::data.table(
    series_name = rep("popocup", 3),
    mesnotrim = 1:3,
    y0 = c(90000, 91000, 92000)
  )

  # 2. Execute
  y0 <- PNADCperiods:::.extract_y0_vector(sp, "popdesocup")

  # 3. Verify: 3 NAs when series not found
  expect_length(y0, 3)
  expect_true(all(is.na(y0)),
              label = "Missing series returns c(NA, NA, NA)")
})


test_that(".extract_y0_vector handles partial starting points", {
  # 1. Setup: Starting points with only 2 of 3 positions
  sp <- data.table::data.table(
    series_name = c("popocup", "popocup"),
    mesnotrim = c(1L, 3L),
    y0 = c(90000, 92000)
  )

  # 2. Execute
  y0 <- PNADCperiods:::.extract_y0_vector(sp, "popocup")

  # 3. Verify: Position 2 is NA, others filled
  expect_length(y0, 3)
  expect_equal(y0[1], 90000)
  expect_true(is.na(y0[2]), label = "Missing position 2 is NA")
  expect_equal(y0[3], 92000)
})


# =============================================================================
# .apply_final_adjustment() TESTS
# =============================================================================

test_that(".apply_final_adjustment preserves rolling quarter consistency", {
  # 1. Setup: 12 months where mean(m[t], m[t+1], m[t+2]) should == rq[t+2]
  #    Use known y values and rq values
  n <- 12
  mesnotrim <- rep(1:3, 4)
  # Create y values that differ from rq (final adjustment corrects them)
  y <- c(100, 200, 300, 110, 210, 310, 120, 220, 320, 130, 230, 330)
  # rq values: rolling quarter = average of 3 consecutive months
  # We want rq[3] = mean(m[1],m[2],m[3]), rq[4] = mean(m[2],m[3],m[4]), etc.
  # For testing, set rq = average of y with small shift
  rq <- c(NA, NA, mean(y[1:3]) + 5,
          mean(y[2:4]) + 3, mean(y[3:5]) + 2, mean(y[4:6]) + 1,
          mean(y[5:7]) + 4, mean(y[6:8]) + 2, mean(y[7:9]) + 3,
          mean(y[8:10]) + 1, mean(y[9:11]) + 5, mean(y[10:12]) + 2)

  # 2. Execute
  m <- PNADCperiods:::.apply_final_adjustment(y, rq, mesnotrim)

  # 3. Verify: For mesnotrim==3 positions where all neighbors are available,
  #    mean(m[i-2], m[i-1], m[i]) should equal rq[i]
  for (i in seq(3, 12, by = 3)) {
    if (i >= 3 && !is.na(rq[i])) {
      trio_mean <- mean(m[(i - 2):i])
      expect_equal(trio_mean, rq[i], tolerance = 1e-10,
                   label = paste0("Rolling quarter consistency at position ", i))
    }
  }
})


test_that(".apply_final_adjustment falls back to y at boundaries", {
  # 1. Setup: Short sequence where lead/lag values are missing
  y <- c(100, 200, 300)
  rq <- c(NA, NA, 200)
  mesnotrim <- c(1L, 2L, 3L)

  # 2. Execute
  m <- PNADCperiods:::.apply_final_adjustment(y, rq, mesnotrim)

  # 3. Verify: mesnotrim==1 cannot look ahead 2 positions beyond length
  #    (y_lead1 and y_lead2 will be NA for last elements),
  #    mesnotrim==1 at position 1: needs y_lead1[1], y_lead2[1] which exist
  #    mesnotrim==3 at position 3: needs y_lag1 and y_lag2 which exist
  expect_length(m, 3)
  # For this 3-element case, all trios are complete
  # pos1 (i=1): valid1 = needs y[1], y_lead1=y[2], y_lead2=y[3], rq_lead2=rq[3]
  # All available, so adjustment is applied
  expect_false(is.na(m[1]), label = "Position 1 gets adjusted value (complete trio)")
  expect_false(is.na(m[3]), label = "Position 3 gets adjusted value (complete trio)")
})


test_that(".apply_final_adjustment works with 12-month sequence", {
  # 1. Setup: 12 months with linearly increasing rq
  mesnotrim <- rep(1:3, 4)
  rq <- seq(1000, 1110, by = 10)  # 1000, 1010, ..., 1110
  # Set y to be rq itself (adjustment should be minimal when y is already close)
  y <- rq

  # 2. Execute
  m <- PNADCperiods:::.apply_final_adjustment(y, rq, mesnotrim)

  # 3. Verify: For complete trios at mesnotrim==3 positions,
  #    the rolling quarter relationship holds
  # At i=3: rq[3] should == mean(m[1], m[2], m[3])
  expect_equal(mean(m[1:3]), rq[3], tolerance = 1e-10,
               label = "First complete trio matches rq")
  # At i=6: rq[6] should == mean(m[4], m[5], m[6])
  expect_equal(mean(m[4:6]), rq[6], tolerance = 1e-10,
               label = "Second complete trio matches rq")
  # At i=9: rq[9] should == mean(m[7], m[8], m[9])
  expect_equal(mean(m[7:9]), rq[9], tolerance = 1e-10,
               label = "Third complete trio matches rq")
})


# =============================================================================
# .mensalize_single_series() TESTS
# =============================================================================

test_that(".mensalize_single_series produces standard output", {
  # 1. Setup: Build a data.table with 36 months of rolling quarter data
  dt <- make_rolling_quarter_dt(
    start_yyyymm = 201201L, n_months = 36L,
    series_name = "popocup", base_value = 90000, trend = 100
  )

  # Create starting points for this series
  sp <- data.table::data.table(
    series_name = rep("popocup", 3),
    mesnotrim = 1:3,
    y0 = c(89500, 89800, 90100)
  )

  # 2. Execute
  m_values <- PNADCperiods:::.mensalize_single_series(dt, "popocup", sp)

  # 3. Verify: Output is numeric vector with same length as input
  expect_length(m_values, nrow(dt))
  expect_type(m_values, "double")
  # Non-NA values should exist for most of the series (at least after first few)
  # With 36 months and valid starting points, all values should be non-NA
  # (cumsum produces 0 at first positions, y0 is non-NA, so y is non-NA everywhere)
  expect_true(sum(!is.na(m_values)) > nrow(dt) * 0.5,
              label = "Most values should be non-NA")
})


test_that(".mensalize_single_series returns all NA when series not in starting_points", {
  # 1. Setup: Data has series "popocup" but starting points are for a different series
  dt <- make_rolling_quarter_dt(
    start_yyyymm = 201201L, n_months = 12L,
    series_name = "popocup", base_value = 90000
  )

  sp <- data.table::data.table(
    series_name = rep("popdesocup", 3),
    mesnotrim = 1:3,
    y0 = c(10000, 10500, 11000)
  )

  # 2. Execute
  m_values <- PNADCperiods:::.mensalize_single_series(dt, "popocup", sp)

  # 3. Verify: y0 is c(NA,NA,NA), so y = NA + cum => all NA
  expect_true(all(is.na(m_values)),
              label = "Missing starting points produce all-NA output")
})


# =============================================================================
# .mensalize_split_series() TESTS
# =============================================================================

test_that(".mensalize_split_series handles spanning split correctly", {
  # 1. Setup: Series spanning from 201201 to 201612 (5 years, crosses 201509 split)
  dt <- make_rolling_quarter_dt(
    start_yyyymm = 201201L, n_months = 60L,
    series_name = "subocuphoras", base_value = 5000, trend = 10
  )

  # Starting points for pre-split and post-split
  sp <- data.table::data.table(
    series_name = c(rep("subocuphoras_pre", 3), rep("subocuphoras", 3)),
    mesnotrim = c(1:3, 1:3),
    y0 = c(4800, 4900, 5000, 5400, 5500, 5600)
  )

  # 2. Execute
  m_values <- PNADCperiods:::.mensalize_split_series(
    dt, "subocuphoras", sp, split_month = 201509L
  )

  # 3. Verify: Output covers full range
  expect_length(m_values, nrow(dt))
  expect_type(m_values, "double")

  # Pre-split values (up to and including 201509) should be mostly non-NA
  # With valid starting points, all values in each segment should be non-NA
  pre_idx <- which(dt$anomesfinaltrimmovel <= 201509L)
  expect_true(sum(!is.na(m_values[pre_idx])) > length(pre_idx) * 0.5,
              label = "Pre-split values mostly non-NA")

  # Post-split values (after 201509) should be mostly non-NA
  post_idx <- which(dt$anomesfinaltrimmovel > 201509L)
  expect_true(sum(!is.na(m_values[post_idx])) > length(post_idx) * 0.5,
              label = "Post-split values mostly non-NA")
})


test_that(".mensalize_split_series handles data entirely before split", {
  # 1. Setup: Series ending before the split point
  dt <- make_rolling_quarter_dt(
    start_yyyymm = 201201L, n_months = 36L,  # ends Dec 2014, before split
    series_name = "subocuphoras", base_value = 5000, trend = 10
  )

  sp <- data.table::data.table(
    series_name = c(rep("subocuphoras_pre", 3), rep("subocuphoras", 3)),
    mesnotrim = c(1:3, 1:3),
    y0 = c(4800, 4900, 5000, 5400, 5500, 5600)
  )

  # 2. Execute
  m_values <- PNADCperiods:::.mensalize_split_series(
    dt, "subocuphoras", sp, split_month = 201509L
  )

  # 3. Verify: Only pre-split logic is used
  expect_length(m_values, nrow(dt))
  # Should produce values using pre-split starting points
  expect_true(sum(!is.na(m_values)) > nrow(dt) * 0.5,
              label = "Before-split data fully processed")
})


test_that(".mensalize_split_series handles data entirely after split", {
  # 1. Setup: Series starting after the split point
  dt <- make_rolling_quarter_dt(
    start_yyyymm = 201601L, n_months = 24L,
    series_name = "subocuphoras", base_value = 5500, trend = 10
  )

  sp <- data.table::data.table(
    series_name = c(rep("subocuphoras_pre", 3), rep("subocuphoras", 3)),
    mesnotrim = c(1:3, 1:3),
    y0 = c(4800, 4900, 5000, 5400, 5500, 5600)
  )

  # 2. Execute
  m_values <- PNADCperiods:::.mensalize_split_series(
    dt, "subocuphoras", sp, split_month = 201509L
  )

  # 3. Verify: Only post-split logic is used
  expect_length(m_values, nrow(dt))
  expect_true(sum(!is.na(m_values)) > nrow(dt) * 0.5,
              label = "After-split data fully processed")
})


test_that(".mensalize_split_series resets cumsum at split boundary", {
  # 1. Setup: Data spanning the split with a jump in values
  dt <- make_rolling_quarter_dt(
    start_yyyymm = 201501L, n_months = 24L,  # Jan 2015 - Dec 2016
    series_name = "subocuphoras", base_value = 5000, trend = 10
  )
  # Introduce a level shift after the split to test independence
  post_rows <- dt$anomesfinaltrimmovel > 201509L
  data.table::set(dt, i = which(post_rows), j = "subocuphoras",
                  value = dt[["subocuphoras"]][post_rows] + 2000)

  sp <- data.table::data.table(
    series_name = c(rep("subocuphoras_pre", 3), rep("subocuphoras", 3)),
    mesnotrim = c(1:3, 1:3),
    y0 = c(4800, 4900, 5000, 7400, 7500, 7600)
  )

  # 2. Execute
  m_values <- PNADCperiods:::.mensalize_split_series(
    dt, "subocuphoras", sp, split_month = 201509L
  )

  # 3. Verify: Post-split values should reflect the level shift
  #    Pre-split values should be around 5000 level
  #    Post-split values should be around 7000+ level
  pre_mean <- mean(m_values[!post_rows], na.rm = TRUE)
  post_mean <- mean(m_values[post_rows], na.rm = TRUE)
  expect_true(post_mean > pre_mean + 1000,
              label = "Post-split reflects level shift (cumsums reset)")
})


# =============================================================================
# mensalize_sidra_series() EXPORTED FUNCTION TESTS
# =============================================================================

test_that("mensalize_sidra_series errors on missing anomesfinaltrimmovel", {
  # 1. Setup: data.table without the required column
  dt <- data.table::data.table(
    mesnotrim = c(1L, 2L, 3L),
    popocup = c(100, 200, 300)
  )

  # 2. Verify: Error about missing column
  expect_error(
    mensalize_sidra_series(dt, verbose = FALSE),
    "Missing required columns.*anomesfinaltrimmovel",
    label = "Missing anomesfinaltrimmovel raises error"
  )
})


test_that("mensalize_sidra_series errors on missing mesnotrim", {
  # 1. Setup: data.table without mesnotrim
  dt <- data.table::data.table(
    anomesfinaltrimmovel = c(201203L, 201204L, 201205L),
    popocup = c(100, 200, 300)
  )

  # 2. Verify: Error about missing column
  expect_error(
    mensalize_sidra_series(dt, verbose = FALSE),
    "Missing required columns.*mesnotrim",
    label = "Missing mesnotrim raises error"
  )
})


test_that("mensalize_sidra_series auto-converts data.frame to data.table", {
  # 1. Setup: plain data.frame input with starting points
  n <- 36L
  dt <- make_rolling_quarter_dt(
    start_yyyymm = 201201L, n_months = n, series_name = "popocup"
  )
  df <- as.data.frame(dt)  # convert to plain data.frame

  sp <- data.table::data.table(
    series_name = rep("popocup", 3),
    mesnotrim = 1:3,
    y0 = c(99000, 99500, 100000)
  )

  # 2. Execute: Should not error
  expect_no_error({
    result <- mensalize_sidra_series(
      df,
      starting_points = sp,
      series = "popocup",
      compute_derived = FALSE,
      verbose = FALSE
    )
  })

  # 3. Verify: Result is data.table with expected columns
  result <- mensalize_sidra_series(
    df,
    starting_points = sp,
    series = "popocup",
    compute_derived = FALSE,
    verbose = FALSE
  )
  expect_s3_class(result, "data.table")
  expect_true("m_popocup" %in% names(result))
})


test_that("mensalize_sidra_series respects specific series selection", {
  # 1. Setup: Data with multiple series
  dt <- make_rolling_quarter_dt(
    start_yyyymm = 201201L, n_months = 36L, series_name = "popocup"
  )
  # Add additional series columns using data.table::set to avoid scoping issues
  data.table::set(dt, j = "popdesocup",
                  value = dt[["popocup"]] * 0.1 + rnorm(nrow(dt), 0, 50))
  data.table::set(dt, j = "popnaforca",
                  value = dt[["popocup"]] + dt[["popdesocup"]])

  sp <- data.table::data.table(
    series_name = c(rep("popocup", 3), rep("popdesocup", 3), rep("popnaforca", 3)),
    mesnotrim = rep(1:3, 3),
    y0 = c(99000, 99500, 100000, 9000, 9500, 10000, 108000, 109000, 110000)
  )

  # 2. Execute: Select only one series
  result <- mensalize_sidra_series(
    dt,
    starting_points = sp,
    series = "popocup",
    compute_derived = FALSE,
    verbose = FALSE
  )

  # 3. Verify: Only selected series in output
  expect_true("m_popocup" %in% names(result))
  expect_false("m_popdesocup" %in% names(result),
               label = "Non-selected series excluded")
  expect_false("m_popnaforca" %in% names(result),
               label = "Non-selected series excluded")
})


test_that("mensalize_sidra_series errors on invalid series name", {
  # 1. Setup
  dt <- make_rolling_quarter_dt(
    start_yyyymm = 201201L, n_months = 12L, series_name = "popocup"
  )

  sp <- data.table::data.table(
    series_name = rep("popocup", 3),
    mesnotrim = 1:3,
    y0 = c(99000, 99500, 100000)
  )

  # 2. Verify: Invalid series name triggers error
  expect_error(
    mensalize_sidra_series(
      dt,
      starting_points = sp,
      series = "nonexistent_series",
      verbose = FALSE
    ),
    "Series not found",
    label = "Invalid series name raises error"
  )
})


test_that("mensalize_sidra_series compute_derived=FALSE omits derived columns", {
  # 1. Setup: Provide two component series that normally produce a derived aggregate
  dt <- make_rolling_quarter_dt(
    start_yyyymm = 201201L, n_months = 36L, series_name = "popocup"
  )
  data.table::set(dt, j = "popdesocup", value = dt[["popocup"]] * 0.1)

  sp <- data.table::data.table(
    series_name = c(rep("popocup", 3), rep("popdesocup", 3)),
    mesnotrim = rep(1:3, 2),
    y0 = c(99000, 99500, 100000, 9000, 9500, 10000)
  )

  # 2. Execute: compute_derived = FALSE
  result <- mensalize_sidra_series(
    dt,
    starting_points = sp,
    compute_derived = FALSE,
    verbose = FALSE
  )

  # 3. Verify: No derived series like m_popnaforca (= popocup + popdesocup)
  expect_false("m_popnaforca" %in% names(result),
               label = "Derived aggregate not computed when compute_derived=FALSE")
  # Primary series should still be there
  expect_true("m_popocup" %in% names(result))
  expect_true("m_popdesocup" %in% names(result))
})


test_that("mensalize_sidra_series output has anomesexato as first column", {
  # 1. Setup
  dt <- make_rolling_quarter_dt(
    start_yyyymm = 201201L, n_months = 24L, series_name = "popocup"
  )

  sp <- data.table::data.table(
    series_name = rep("popocup", 3),
    mesnotrim = 1:3,
    y0 = c(99000, 99500, 100000)
  )

  # 2. Execute
  result <- mensalize_sidra_series(
    dt,
    starting_points = sp,
    series = "popocup",
    compute_derived = FALSE,
    verbose = FALSE
  )

  # 3. Verify: anomesexato is the first column
  expect_equal(names(result)[1], "anomesexato",
               label = "anomesexato is first column of output")
})


test_that("mensalize_sidra_series filters pre-PNADC rows", {
  # 1. Setup: Data starting before PNADC era (before 201201)
  # Build manually to include pre-2012 data
  dt_pre <- data.table::data.table(
    anomesfinaltrimmovel = c(201101L, 201102L, 201103L),
    mesnotrim = c(1L, 2L, 3L),
    popocup = c(80000, 80100, 80200)
  )
  dt_post <- make_rolling_quarter_dt(
    start_yyyymm = 201201L, n_months = 12L, series_name = "popocup"
  )
  dt <- data.table::rbindlist(list(dt_pre, dt_post))

  sp <- data.table::data.table(
    series_name = rep("popocup", 3),
    mesnotrim = 1:3,
    y0 = c(99000, 99500, 100000)
  )

  # 2. Execute
  result <- mensalize_sidra_series(
    dt,
    starting_points = sp,
    series = "popocup",
    compute_derived = FALSE,
    verbose = FALSE
  )

  # 3. Verify: No pre-2012 rows in output
  expect_true(min(result$anomesexato) >= 201201L,
              label = "Pre-PNADC rows (before 201201) filtered out")
  expect_equal(nrow(result), 12L,
               label = "Only PNADC-era rows remain")
})


test_that("mensalize_sidra_series accepts custom starting_points", {
  # 1. Setup
  dt <- make_rolling_quarter_dt(
    start_yyyymm = 201201L, n_months = 24L, series_name = "popocup"
  )

  # Custom starting points with specific values
  sp_custom <- data.table::data.table(
    series_name = rep("popocup", 3),
    mesnotrim = 1:3,
    y0 = c(95000, 96000, 97000)
  )

  # 2. Execute: Should use our custom starting points without error
  result <- mensalize_sidra_series(
    dt,
    starting_points = sp_custom,
    series = "popocup",
    compute_derived = FALSE,
    verbose = FALSE
  )

  # 3. Verify: Result produced without trying to load bundled data
  expect_s3_class(result, "data.table")
  expect_true("m_popocup" %in% names(result))
  expect_true(any(!is.na(result$m_popocup)),
              label = "Custom starting points produce non-NA values")
})


test_that("mensalize_sidra_series errors on wrong starting_points format", {
  # 1. Setup: Starting points missing required column
  dt <- make_rolling_quarter_dt(
    start_yyyymm = 201201L, n_months = 12L, series_name = "popocup"
  )

  # Missing 'y0' column
  sp_bad <- data.table::data.table(
    series_name = rep("popocup", 3),
    mesnotrim = 1:3,
    value = c(99000, 99500, 100000)  # wrong column name
  )

  # 2. Verify: Error about missing columns
  expect_error(
    mensalize_sidra_series(dt, starting_points = sp_bad, verbose = FALSE),
    "Starting points missing columns.*y0",
    label = "Malformed starting_points raises error"
  )
})


test_that("mensalize_sidra_series passes through price index columns", {
  # 1. Setup: Data with price index column alongside a regular series
  dt <- make_rolling_quarter_dt(
    start_yyyymm = 201201L, n_months = 24L, series_name = "popocup"
  )
  # Add a price index column (these are not mensalized, just passed through)
  data.table::set(dt, j = "ipca100dez1993",
                  value = seq(100, by = 0.5, length.out = nrow(dt)))

  sp <- data.table::data.table(
    series_name = rep("popocup", 3),
    mesnotrim = 1:3,
    y0 = c(99000, 99500, 100000)
  )

  # 2. Execute
  result <- mensalize_sidra_series(
    dt,
    starting_points = sp,
    series = "popocup",
    compute_derived = FALSE,
    verbose = FALSE
  )

  # 3. Verify: Price index column present (no m_ prefix, no mensalization)
  expect_true("ipca100dez1993" %in% names(result),
              label = "Price index column passed through")
  # Verify values are the same as input (no transformation)
  expect_equal(result$ipca100dez1993, dt$ipca100dez1993,
               label = "Price index values unchanged")
})


# =============================================================================
# ADDITIONAL PROPERTY-BASED AND EDGE CASE TESTS
# =============================================================================

test_that("mensalize_sidra_series excludes rate series from direct mensalization", {
  # 1. Setup: Data containing a rate series (should be excluded from mensalization)
  dt <- make_rolling_quarter_dt(
    start_yyyymm = 201201L, n_months = 24L, series_name = "popocup"
  )
  data.table::set(dt, j = "taxadesocup", value = runif(nrow(dt), 5, 15))

  sp <- data.table::data.table(
    series_name = rep("popocup", 3),
    mesnotrim = 1:3,
    y0 = c(99000, 99500, 100000)
  )

  # 2. Execute: Request "all" series (default)
  result <- mensalize_sidra_series(
    dt,
    starting_points = sp,
    compute_derived = FALSE,
    verbose = FALSE
  )

  # 3. Verify: Rate series is filtered out, not directly mensalized
  expect_false("m_taxadesocup" %in% names(result),
               label = "Rate series not directly mensalized")
  expect_true("m_popocup" %in% names(result),
              label = "Population series is mensalized")
})


test_that("mensalize_sidra_series errors when no valid series columns exist", {
  # 1. Setup: Only metadata columns, no series data
  dt <- data.table::data.table(
    anomesfinaltrimmovel = c(201201L, 201202L, 201203L),
    mesnotrim = c(1L, 2L, 3L)
  )

  # 2. Verify: Error about no series columns
  expect_error(
    mensalize_sidra_series(dt, verbose = FALSE),
    "No series columns found",
    label = "No series columns raises error"
  )
})


test_that(".apply_final_adjustment handles NA values in y gracefully", {
  # 1. Setup: y with some NA values interspersed
  y <- c(100, NA, 300, 110, 210, NA, 120, 220, 320)
  rq <- c(NA, NA, 200, 180, 220, 210, 190, 230, 220)
  mesnotrim <- c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L)

  # 2. Execute
  m <- PNADCperiods:::.apply_final_adjustment(y, rq, mesnotrim)

  # 3. Verify: Where y is NA, m should remain NA (no adjustment possible)
  expect_true(is.na(m[2]), label = "NA in y propagates to m")
  expect_true(is.na(m[6]), label = "NA in y propagates to m")

  # Where y is not NA and trio is incomplete due to NA neighbor, fallback = y
  # Position 1 (mesnotrim=1): needs y[1], y_lead1=y[2]=NA -> invalid, m=y
  expect_equal(m[1], y[1], label = "Boundary fallback to y when neighbor is NA")
})


test_that("mensalize_sidra_series output length matches PNADC-era input rows", {
  # 1. Setup: Exactly 12 months of PNADC-era data
  dt <- make_rolling_quarter_dt(
    start_yyyymm = 201201L, n_months = 12L, series_name = "popocup"
  )

  sp <- data.table::data.table(
    series_name = rep("popocup", 3),
    mesnotrim = 1:3,
    y0 = c(99000, 99500, 100000)
  )

  result <- mensalize_sidra_series(
    dt,
    starting_points = sp,
    series = "popocup",
    compute_derived = FALSE,
    verbose = FALSE
  )

  # 3. Verify: One output row per input row
  expect_equal(nrow(result), 12L,
               label = "Output has same number of rows as PNADC-era input")
})


test_that(".compute_cumsum_by_mesnotrim produces correct d3 for linear rq", {
  # 1. Setup: Linearly increasing RQ
  #    rq = 100, 101, 102, 103, 104, ...
  #    d3 = 3*(rq[t] - rq[t-1]) = 3*1 = 3 for all t > first of position
  rq <- 100 + 0:11
  mesnotrim <- rep(1:3, 4)

  # 2. Execute
  cum <- PNADCperiods:::.compute_cumsum_by_mesnotrim(rq, mesnotrim)

  # 3. Verify: For position 1, second occurrence is index 4
  #    d3[4] = 3*(103-102) = 3, cum[4] = 3
  #    d3[7] = 3*(106-105) = 3, cum[7] = 6
  #    d3[10] = 3*(109-108) = 3, cum[10] = 9
  expect_equal(cum[1], 0, label = "First pos1 cum=0")
  expect_equal(cum[4], 3, label = "Second pos1 d3=3")
  expect_equal(cum[7], 6, label = "Third pos1 cum=6")
  expect_equal(cum[10], 9, label = "Fourth pos1 cum=9")

  # For position 2:
  #    d3[5] = 3*(104-103) = 3, cum[5] = 3
  #    d3[8] = 3*(107-106) = 3, cum[8] = 6
  expect_equal(cum[2], 0, label = "First pos2 cum=0")
  expect_equal(cum[5], 3, label = "Second pos2 d3=3")
  expect_equal(cum[8], 6, label = "Third pos2 cum=6")
})


test_that("mensalize_sidra_series excludes avg income series from direct mensalization", {
  # 1. Setup: Data with an average income column (should be derived, not mensalized)
  dt <- make_rolling_quarter_dt(
    start_yyyymm = 201201L, n_months = 24L, series_name = "popocup"
  )
  data.table::set(dt, j = "rendhabnominaltodos", value = runif(nrow(dt), 2000, 3000))

  sp <- data.table::data.table(
    series_name = rep("popocup", 3),
    mesnotrim = 1:3,
    y0 = c(99000, 99500, 100000)
  )

  # 2. Execute
  result <- mensalize_sidra_series(
    dt,
    starting_points = sp,
    compute_derived = FALSE,
    verbose = FALSE
  )

  # 3. Verify: Average income not directly mensalized
  expect_false("m_rendhabnominaltodos" %in% names(result),
               label = "Average income series not directly mensalized")
})


test_that("mensalize_sidra_series excludes residual series from direct mensalization", {
  # 1. Setup: Data with a residual series column
  dt <- make_rolling_quarter_dt(
    start_yyyymm = 201201L, n_months = 24L, series_name = "popocup"
  )
  data.table::set(dt, j = "popforadaforca", value = runif(nrow(dt), 50000, 60000))

  sp <- data.table::data.table(
    series_name = rep("popocup", 3),
    mesnotrim = 1:3,
    y0 = c(99000, 99500, 100000)
  )

  # 2. Execute
  result <- mensalize_sidra_series(
    dt,
    starting_points = sp,
    compute_derived = FALSE,
    verbose = FALSE
  )

  # 3. Verify: Residual series not directly mensalized
  expect_false("m_popforadaforca" %in% names(result),
               label = "Residual series not directly mensalized")
})


test_that("mensalize_sidra_series does not modify input data", {
  # 1. Setup
  dt <- make_rolling_quarter_dt(
    start_yyyymm = 201201L, n_months = 24L, series_name = "popocup"
  )
  dt_copy <- data.table::copy(dt)

  sp <- data.table::data.table(
    series_name = rep("popocup", 3),
    mesnotrim = 1:3,
    y0 = c(99000, 99500, 100000)
  )

  # 2. Execute
  result <- mensalize_sidra_series(
    dt,
    starting_points = sp,
    series = "popocup",
    compute_derived = FALSE,
    verbose = FALSE
  )

  # 3. Verify: Original input unchanged
  expect_equal(names(dt), names(dt_copy),
               label = "Input columns unchanged")
  expect_equal(nrow(dt), nrow(dt_copy),
               label = "Input row count unchanged")
  expect_equal(dt$popocup, dt_copy$popocup,
               label = "Input values unchanged")
})

Try the PNADCperiods package in your browser

Any scripts or data that you put into this service are public.

PNADCperiods documentation built on April 28, 2026, 9:07 a.m.