Nothing
# Tests for mensalize-sidra-series.R
# Covers the exported mensalize_sidra_series() function and its internal helpers:
# .compute_cumsum_by_mesnotrim(), .extract_y0_vector(),
# .apply_final_adjustment(), .mensalize_single_series(), .mensalize_split_series()
#
# All tests use synthetic mock data (no API calls).
# =============================================================================
# HELPER: generate proper YYYYMM sequence and mesnotrim
# =============================================================================
make_rolling_quarter_dt <- function(start_yyyymm = 201201L, n_months = 36L,
series_name = "popocup",
base_value = 100000, trend = 100) {
start_year <- start_yyyymm %/% 100L
start_month <- start_yyyymm %% 100L
dt <- data.table::data.table(month_num = seq_len(n_months))
dt[, `:=`(
year = start_year + (start_month + month_num - 2L) %/% 12L,
month = ((start_month + month_num - 2L) %% 12L) + 1L
)]
dt[, anomesfinaltrimmovel := year * 100L + month]
dt[, mesnotrim := ((month - 1L) %% 3L) + 1L]
# Deterministic series: base + linear trend + small seasonal component
dt[, (series_name) := base_value + trend * (month_num - 1) +
500 * sin(2 * pi * month_num / 12)]
dt[, .SD, .SDcols = c("anomesfinaltrimmovel", "mesnotrim", series_name)]
}
# =============================================================================
# .compute_cumsum_by_mesnotrim() TESTS
# =============================================================================
test_that(".compute_cumsum_by_mesnotrim computes d3 correctly for simple case", {
# 1. Setup: 6 months of constant rolling quarters (no change expected)
rq <- c(300, 300, 300, 300, 300, 300)
mesnotrim <- c(1L, 2L, 3L, 1L, 2L, 3L)
# 2. Execute
cum <- PNADCperiods:::.compute_cumsum_by_mesnotrim(rq, mesnotrim)
# 3. Verify: d3 = 3*(RQ_t - RQ_{t-1}) = 0 for all interior,
# first of each mesnotrim position is set to NA (d3=0 for cumsum)
# All cumsums should be 0 since no variation exists
expect_equal(cum[4], 0, label = "Cumsum for second pos1 should be 0 (no variation)")
expect_equal(cum[5], 0, label = "Cumsum for second pos2 should be 0 (no variation)")
expect_equal(cum[6], 0, label = "Cumsum for second pos3 should be 0 (no variation)")
})
test_that(".compute_cumsum_by_mesnotrim separates 3 mesnotrim positions", {
# 1. Setup: Different step sizes at each month position
# RQ sequence: 100, 200, 300, 110, 210, 310, 120, 220, 320
rq <- c(100, 200, 300, 110, 210, 310, 120, 220, 320)
mesnotrim <- c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L)
# 2. Execute
cum <- PNADCperiods:::.compute_cumsum_by_mesnotrim(rq, mesnotrim)
# 3. Verify: Each position accumulates independently
# For pos1: first d3 = NA (forced 0), d3[4] = 3*(110-300) = -570, d3[7] = 3*(120-310) = -570
# For pos2: first d3 = NA (forced 0), d3[5] = 3*(210-110) = 300, d3[8] = 3*(220-120) = 300
# For pos3: first d3 = NA (forced 0), d3[6] = 3*(310-210) = 300, d3[9] = 3*(320-220) = 300
# Pos1 cumsums:
expect_equal(cum[1], 0, label = "First pos1 cumsum is 0")
# Pos2 cumsums:
expect_equal(cum[2], 0, label = "First pos2 cumsum is 0")
# Pos3 cumsums:
expect_equal(cum[3], 0, label = "First pos3 cumsum is 0")
# The second occurrence of each position should have the correct d3 accumulated
# pos1: cum[4] = d3 at index 4 = 3*(110 - 300) = -570
expect_equal(cum[4], -570, label = "Second pos1 d3 accumulated correctly")
# pos2: cum[5] = d3 at index 5 = 3*(210 - 110) = 300
expect_equal(cum[5], 300, label = "Second pos2 d3 accumulated correctly")
# pos3: cum[6] = d3 at index 6 = 3*(310 - 210) = 300
expect_equal(cum[6], 300, label = "Second pos3 d3 accumulated correctly")
})
test_that(".compute_cumsum_by_mesnotrim first obs of each position starts at 0", {
# 1. Setup: 9 months with non-trivial values
rq <- c(1000, 2000, 3000, 1100, 2100, 3100, 1200, 2200, 3200)
mesnotrim <- c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L)
# 2. Execute
cum <- PNADCperiods:::.compute_cumsum_by_mesnotrim(rq, mesnotrim)
# 3. Verify: First occurrence of each position = 0
expect_equal(cum[1], 0, label = "First pos1 starts at 0")
expect_equal(cum[2], 0, label = "First pos2 starts at 0")
expect_equal(cum[3], 0, label = "First pos3 starts at 0")
})
test_that(".compute_cumsum_by_mesnotrim respects filter_mask", {
# 1. Setup: 9 months but mask out the middle 3
rq <- c(100, 200, 300, 110, 210, 310, 120, 220, 320)
mesnotrim <- c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L)
mask <- c(TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE)
# 2. Execute
cum <- PNADCperiods:::.compute_cumsum_by_mesnotrim(rq, mesnotrim, filter_mask = mask)
# 3. Verify: d3 at masked positions (4,5,6) treated as 0
# pos1: d3[1]=0, d3[4]=0(masked), d3[7] = 3*(120-310) = -570
# So cum[7] = -570 (only one unmasked step)
# Compare to unmasked: cum[7] would be -570 + (-570) = -1140
cum_unmasked <- PNADCperiods:::.compute_cumsum_by_mesnotrim(rq, mesnotrim)
expect_true(cum[7] != cum_unmasked[7],
label = "Filtered cumsum differs from unfiltered")
})
test_that(".compute_cumsum_by_mesnotrim handles all-NA input", {
# 1. Setup: All NA rolling quarter values
rq <- rep(NA_real_, 9)
mesnotrim <- c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L)
# 2. Execute
cum <- PNADCperiods:::.compute_cumsum_by_mesnotrim(rq, mesnotrim)
# 3. Verify: All cumsums should be NA (no data to cumulate)
expect_true(all(is.na(cum)),
label = "All-NA input produces all-NA cumsum")
})
# =============================================================================
# .extract_y0_vector() TESTS
# =============================================================================
test_that(".extract_y0_vector returns 3 values for found series", {
# 1. Setup: Starting points table with 3 positions for one series
sp <- data.table::data.table(
series_name = rep("popocup", 3),
mesnotrim = 1:3,
y0 = c(90000, 91000, 92000)
)
# 2. Execute
y0 <- PNADCperiods:::.extract_y0_vector(sp, "popocup")
# 3. Verify: 3-element vector with correct values
expect_length(y0, 3)
expect_equal(y0[1], 90000)
expect_equal(y0[2], 91000)
expect_equal(y0[3], 92000)
})
test_that(".extract_y0_vector returns 3 NAs for missing series", {
# 1. Setup: Starting points table without the target series
sp <- data.table::data.table(
series_name = rep("popocup", 3),
mesnotrim = 1:3,
y0 = c(90000, 91000, 92000)
)
# 2. Execute
y0 <- PNADCperiods:::.extract_y0_vector(sp, "popdesocup")
# 3. Verify: 3 NAs when series not found
expect_length(y0, 3)
expect_true(all(is.na(y0)),
label = "Missing series returns c(NA, NA, NA)")
})
test_that(".extract_y0_vector handles partial starting points", {
# 1. Setup: Starting points with only 2 of 3 positions
sp <- data.table::data.table(
series_name = c("popocup", "popocup"),
mesnotrim = c(1L, 3L),
y0 = c(90000, 92000)
)
# 2. Execute
y0 <- PNADCperiods:::.extract_y0_vector(sp, "popocup")
# 3. Verify: Position 2 is NA, others filled
expect_length(y0, 3)
expect_equal(y0[1], 90000)
expect_true(is.na(y0[2]), label = "Missing position 2 is NA")
expect_equal(y0[3], 92000)
})
# =============================================================================
# .apply_final_adjustment() TESTS
# =============================================================================
test_that(".apply_final_adjustment preserves rolling quarter consistency", {
# 1. Setup: 12 months where mean(m[t], m[t+1], m[t+2]) should == rq[t+2]
# Use known y values and rq values
n <- 12
mesnotrim <- rep(1:3, 4)
# Create y values that differ from rq (final adjustment corrects them)
y <- c(100, 200, 300, 110, 210, 310, 120, 220, 320, 130, 230, 330)
# rq values: rolling quarter = average of 3 consecutive months
# We want rq[3] = mean(m[1],m[2],m[3]), rq[4] = mean(m[2],m[3],m[4]), etc.
# For testing, set rq = average of y with small shift
rq <- c(NA, NA, mean(y[1:3]) + 5,
mean(y[2:4]) + 3, mean(y[3:5]) + 2, mean(y[4:6]) + 1,
mean(y[5:7]) + 4, mean(y[6:8]) + 2, mean(y[7:9]) + 3,
mean(y[8:10]) + 1, mean(y[9:11]) + 5, mean(y[10:12]) + 2)
# 2. Execute
m <- PNADCperiods:::.apply_final_adjustment(y, rq, mesnotrim)
# 3. Verify: For mesnotrim==3 positions where all neighbors are available,
# mean(m[i-2], m[i-1], m[i]) should equal rq[i]
for (i in seq(3, 12, by = 3)) {
if (i >= 3 && !is.na(rq[i])) {
trio_mean <- mean(m[(i - 2):i])
expect_equal(trio_mean, rq[i], tolerance = 1e-10,
label = paste0("Rolling quarter consistency at position ", i))
}
}
})
test_that(".apply_final_adjustment falls back to y at boundaries", {
# 1. Setup: Short sequence where lead/lag values are missing
y <- c(100, 200, 300)
rq <- c(NA, NA, 200)
mesnotrim <- c(1L, 2L, 3L)
# 2. Execute
m <- PNADCperiods:::.apply_final_adjustment(y, rq, mesnotrim)
# 3. Verify: mesnotrim==1 cannot look ahead 2 positions beyond length
# (y_lead1 and y_lead2 will be NA for last elements),
# mesnotrim==1 at position 1: needs y_lead1[1], y_lead2[1] which exist
# mesnotrim==3 at position 3: needs y_lag1 and y_lag2 which exist
expect_length(m, 3)
# For this 3-element case, all trios are complete
# pos1 (i=1): valid1 = needs y[1], y_lead1=y[2], y_lead2=y[3], rq_lead2=rq[3]
# All available, so adjustment is applied
expect_false(is.na(m[1]), label = "Position 1 gets adjusted value (complete trio)")
expect_false(is.na(m[3]), label = "Position 3 gets adjusted value (complete trio)")
})
test_that(".apply_final_adjustment works with 12-month sequence", {
# 1. Setup: 12 months with linearly increasing rq
mesnotrim <- rep(1:3, 4)
rq <- seq(1000, 1110, by = 10) # 1000, 1010, ..., 1110
# Set y to be rq itself (adjustment should be minimal when y is already close)
y <- rq
# 2. Execute
m <- PNADCperiods:::.apply_final_adjustment(y, rq, mesnotrim)
# 3. Verify: For complete trios at mesnotrim==3 positions,
# the rolling quarter relationship holds
# At i=3: rq[3] should == mean(m[1], m[2], m[3])
expect_equal(mean(m[1:3]), rq[3], tolerance = 1e-10,
label = "First complete trio matches rq")
# At i=6: rq[6] should == mean(m[4], m[5], m[6])
expect_equal(mean(m[4:6]), rq[6], tolerance = 1e-10,
label = "Second complete trio matches rq")
# At i=9: rq[9] should == mean(m[7], m[8], m[9])
expect_equal(mean(m[7:9]), rq[9], tolerance = 1e-10,
label = "Third complete trio matches rq")
})
# =============================================================================
# .mensalize_single_series() TESTS
# =============================================================================
test_that(".mensalize_single_series produces standard output", {
# 1. Setup: Build a data.table with 36 months of rolling quarter data
dt <- make_rolling_quarter_dt(
start_yyyymm = 201201L, n_months = 36L,
series_name = "popocup", base_value = 90000, trend = 100
)
# Create starting points for this series
sp <- data.table::data.table(
series_name = rep("popocup", 3),
mesnotrim = 1:3,
y0 = c(89500, 89800, 90100)
)
# 2. Execute
m_values <- PNADCperiods:::.mensalize_single_series(dt, "popocup", sp)
# 3. Verify: Output is numeric vector with same length as input
expect_length(m_values, nrow(dt))
expect_type(m_values, "double")
# Non-NA values should exist for most of the series (at least after first few)
# With 36 months and valid starting points, all values should be non-NA
# (cumsum produces 0 at first positions, y0 is non-NA, so y is non-NA everywhere)
expect_true(sum(!is.na(m_values)) > nrow(dt) * 0.5,
label = "Most values should be non-NA")
})
test_that(".mensalize_single_series returns all NA when series not in starting_points", {
# 1. Setup: Data has series "popocup" but starting points are for a different series
dt <- make_rolling_quarter_dt(
start_yyyymm = 201201L, n_months = 12L,
series_name = "popocup", base_value = 90000
)
sp <- data.table::data.table(
series_name = rep("popdesocup", 3),
mesnotrim = 1:3,
y0 = c(10000, 10500, 11000)
)
# 2. Execute
m_values <- PNADCperiods:::.mensalize_single_series(dt, "popocup", sp)
# 3. Verify: y0 is c(NA,NA,NA), so y = NA + cum => all NA
expect_true(all(is.na(m_values)),
label = "Missing starting points produce all-NA output")
})
# =============================================================================
# .mensalize_split_series() TESTS
# =============================================================================
test_that(".mensalize_split_series handles spanning split correctly", {
# 1. Setup: Series spanning from 201201 to 201612 (5 years, crosses 201509 split)
dt <- make_rolling_quarter_dt(
start_yyyymm = 201201L, n_months = 60L,
series_name = "subocuphoras", base_value = 5000, trend = 10
)
# Starting points for pre-split and post-split
sp <- data.table::data.table(
series_name = c(rep("subocuphoras_pre", 3), rep("subocuphoras", 3)),
mesnotrim = c(1:3, 1:3),
y0 = c(4800, 4900, 5000, 5400, 5500, 5600)
)
# 2. Execute
m_values <- PNADCperiods:::.mensalize_split_series(
dt, "subocuphoras", sp, split_month = 201509L
)
# 3. Verify: Output covers full range
expect_length(m_values, nrow(dt))
expect_type(m_values, "double")
# Pre-split values (up to and including 201509) should be mostly non-NA
# With valid starting points, all values in each segment should be non-NA
pre_idx <- which(dt$anomesfinaltrimmovel <= 201509L)
expect_true(sum(!is.na(m_values[pre_idx])) > length(pre_idx) * 0.5,
label = "Pre-split values mostly non-NA")
# Post-split values (after 201509) should be mostly non-NA
post_idx <- which(dt$anomesfinaltrimmovel > 201509L)
expect_true(sum(!is.na(m_values[post_idx])) > length(post_idx) * 0.5,
label = "Post-split values mostly non-NA")
})
test_that(".mensalize_split_series handles data entirely before split", {
# 1. Setup: Series ending before the split point
dt <- make_rolling_quarter_dt(
start_yyyymm = 201201L, n_months = 36L, # ends Dec 2014, before split
series_name = "subocuphoras", base_value = 5000, trend = 10
)
sp <- data.table::data.table(
series_name = c(rep("subocuphoras_pre", 3), rep("subocuphoras", 3)),
mesnotrim = c(1:3, 1:3),
y0 = c(4800, 4900, 5000, 5400, 5500, 5600)
)
# 2. Execute
m_values <- PNADCperiods:::.mensalize_split_series(
dt, "subocuphoras", sp, split_month = 201509L
)
# 3. Verify: Only pre-split logic is used
expect_length(m_values, nrow(dt))
# Should produce values using pre-split starting points
expect_true(sum(!is.na(m_values)) > nrow(dt) * 0.5,
label = "Before-split data fully processed")
})
test_that(".mensalize_split_series handles data entirely after split", {
# 1. Setup: Series starting after the split point
dt <- make_rolling_quarter_dt(
start_yyyymm = 201601L, n_months = 24L,
series_name = "subocuphoras", base_value = 5500, trend = 10
)
sp <- data.table::data.table(
series_name = c(rep("subocuphoras_pre", 3), rep("subocuphoras", 3)),
mesnotrim = c(1:3, 1:3),
y0 = c(4800, 4900, 5000, 5400, 5500, 5600)
)
# 2. Execute
m_values <- PNADCperiods:::.mensalize_split_series(
dt, "subocuphoras", sp, split_month = 201509L
)
# 3. Verify: Only post-split logic is used
expect_length(m_values, nrow(dt))
expect_true(sum(!is.na(m_values)) > nrow(dt) * 0.5,
label = "After-split data fully processed")
})
test_that(".mensalize_split_series resets cumsum at split boundary", {
# 1. Setup: Data spanning the split with a jump in values
dt <- make_rolling_quarter_dt(
start_yyyymm = 201501L, n_months = 24L, # Jan 2015 - Dec 2016
series_name = "subocuphoras", base_value = 5000, trend = 10
)
# Introduce a level shift after the split to test independence
post_rows <- dt$anomesfinaltrimmovel > 201509L
data.table::set(dt, i = which(post_rows), j = "subocuphoras",
value = dt[["subocuphoras"]][post_rows] + 2000)
sp <- data.table::data.table(
series_name = c(rep("subocuphoras_pre", 3), rep("subocuphoras", 3)),
mesnotrim = c(1:3, 1:3),
y0 = c(4800, 4900, 5000, 7400, 7500, 7600)
)
# 2. Execute
m_values <- PNADCperiods:::.mensalize_split_series(
dt, "subocuphoras", sp, split_month = 201509L
)
# 3. Verify: Post-split values should reflect the level shift
# Pre-split values should be around 5000 level
# Post-split values should be around 7000+ level
pre_mean <- mean(m_values[!post_rows], na.rm = TRUE)
post_mean <- mean(m_values[post_rows], na.rm = TRUE)
expect_true(post_mean > pre_mean + 1000,
label = "Post-split reflects level shift (cumsums reset)")
})
# =============================================================================
# mensalize_sidra_series() EXPORTED FUNCTION TESTS
# =============================================================================
test_that("mensalize_sidra_series errors on missing anomesfinaltrimmovel", {
# 1. Setup: data.table without the required column
dt <- data.table::data.table(
mesnotrim = c(1L, 2L, 3L),
popocup = c(100, 200, 300)
)
# 2. Verify: Error about missing column
expect_error(
mensalize_sidra_series(dt, verbose = FALSE),
"Missing required columns.*anomesfinaltrimmovel",
label = "Missing anomesfinaltrimmovel raises error"
)
})
test_that("mensalize_sidra_series errors on missing mesnotrim", {
# 1. Setup: data.table without mesnotrim
dt <- data.table::data.table(
anomesfinaltrimmovel = c(201203L, 201204L, 201205L),
popocup = c(100, 200, 300)
)
# 2. Verify: Error about missing column
expect_error(
mensalize_sidra_series(dt, verbose = FALSE),
"Missing required columns.*mesnotrim",
label = "Missing mesnotrim raises error"
)
})
test_that("mensalize_sidra_series auto-converts data.frame to data.table", {
# 1. Setup: plain data.frame input with starting points
n <- 36L
dt <- make_rolling_quarter_dt(
start_yyyymm = 201201L, n_months = n, series_name = "popocup"
)
df <- as.data.frame(dt) # convert to plain data.frame
sp <- data.table::data.table(
series_name = rep("popocup", 3),
mesnotrim = 1:3,
y0 = c(99000, 99500, 100000)
)
# 2. Execute: Should not error
expect_no_error({
result <- mensalize_sidra_series(
df,
starting_points = sp,
series = "popocup",
compute_derived = FALSE,
verbose = FALSE
)
})
# 3. Verify: Result is data.table with expected columns
result <- mensalize_sidra_series(
df,
starting_points = sp,
series = "popocup",
compute_derived = FALSE,
verbose = FALSE
)
expect_s3_class(result, "data.table")
expect_true("m_popocup" %in% names(result))
})
test_that("mensalize_sidra_series respects specific series selection", {
# 1. Setup: Data with multiple series
dt <- make_rolling_quarter_dt(
start_yyyymm = 201201L, n_months = 36L, series_name = "popocup"
)
# Add additional series columns using data.table::set to avoid scoping issues
data.table::set(dt, j = "popdesocup",
value = dt[["popocup"]] * 0.1 + rnorm(nrow(dt), 0, 50))
data.table::set(dt, j = "popnaforca",
value = dt[["popocup"]] + dt[["popdesocup"]])
sp <- data.table::data.table(
series_name = c(rep("popocup", 3), rep("popdesocup", 3), rep("popnaforca", 3)),
mesnotrim = rep(1:3, 3),
y0 = c(99000, 99500, 100000, 9000, 9500, 10000, 108000, 109000, 110000)
)
# 2. Execute: Select only one series
result <- mensalize_sidra_series(
dt,
starting_points = sp,
series = "popocup",
compute_derived = FALSE,
verbose = FALSE
)
# 3. Verify: Only selected series in output
expect_true("m_popocup" %in% names(result))
expect_false("m_popdesocup" %in% names(result),
label = "Non-selected series excluded")
expect_false("m_popnaforca" %in% names(result),
label = "Non-selected series excluded")
})
test_that("mensalize_sidra_series errors on invalid series name", {
# 1. Setup
dt <- make_rolling_quarter_dt(
start_yyyymm = 201201L, n_months = 12L, series_name = "popocup"
)
sp <- data.table::data.table(
series_name = rep("popocup", 3),
mesnotrim = 1:3,
y0 = c(99000, 99500, 100000)
)
# 2. Verify: Invalid series name triggers error
expect_error(
mensalize_sidra_series(
dt,
starting_points = sp,
series = "nonexistent_series",
verbose = FALSE
),
"Series not found",
label = "Invalid series name raises error"
)
})
test_that("mensalize_sidra_series compute_derived=FALSE omits derived columns", {
# 1. Setup: Provide two component series that normally produce a derived aggregate
dt <- make_rolling_quarter_dt(
start_yyyymm = 201201L, n_months = 36L, series_name = "popocup"
)
data.table::set(dt, j = "popdesocup", value = dt[["popocup"]] * 0.1)
sp <- data.table::data.table(
series_name = c(rep("popocup", 3), rep("popdesocup", 3)),
mesnotrim = rep(1:3, 2),
y0 = c(99000, 99500, 100000, 9000, 9500, 10000)
)
# 2. Execute: compute_derived = FALSE
result <- mensalize_sidra_series(
dt,
starting_points = sp,
compute_derived = FALSE,
verbose = FALSE
)
# 3. Verify: No derived series like m_popnaforca (= popocup + popdesocup)
expect_false("m_popnaforca" %in% names(result),
label = "Derived aggregate not computed when compute_derived=FALSE")
# Primary series should still be there
expect_true("m_popocup" %in% names(result))
expect_true("m_popdesocup" %in% names(result))
})
test_that("mensalize_sidra_series output has anomesexato as first column", {
# 1. Setup
dt <- make_rolling_quarter_dt(
start_yyyymm = 201201L, n_months = 24L, series_name = "popocup"
)
sp <- data.table::data.table(
series_name = rep("popocup", 3),
mesnotrim = 1:3,
y0 = c(99000, 99500, 100000)
)
# 2. Execute
result <- mensalize_sidra_series(
dt,
starting_points = sp,
series = "popocup",
compute_derived = FALSE,
verbose = FALSE
)
# 3. Verify: anomesexato is the first column
expect_equal(names(result)[1], "anomesexato",
label = "anomesexato is first column of output")
})
test_that("mensalize_sidra_series filters pre-PNADC rows", {
# 1. Setup: Data starting before PNADC era (before 201201)
# Build manually to include pre-2012 data
dt_pre <- data.table::data.table(
anomesfinaltrimmovel = c(201101L, 201102L, 201103L),
mesnotrim = c(1L, 2L, 3L),
popocup = c(80000, 80100, 80200)
)
dt_post <- make_rolling_quarter_dt(
start_yyyymm = 201201L, n_months = 12L, series_name = "popocup"
)
dt <- data.table::rbindlist(list(dt_pre, dt_post))
sp <- data.table::data.table(
series_name = rep("popocup", 3),
mesnotrim = 1:3,
y0 = c(99000, 99500, 100000)
)
# 2. Execute
result <- mensalize_sidra_series(
dt,
starting_points = sp,
series = "popocup",
compute_derived = FALSE,
verbose = FALSE
)
# 3. Verify: No pre-2012 rows in output
expect_true(min(result$anomesexato) >= 201201L,
label = "Pre-PNADC rows (before 201201) filtered out")
expect_equal(nrow(result), 12L,
label = "Only PNADC-era rows remain")
})
test_that("mensalize_sidra_series accepts custom starting_points", {
# 1. Setup
dt <- make_rolling_quarter_dt(
start_yyyymm = 201201L, n_months = 24L, series_name = "popocup"
)
# Custom starting points with specific values
sp_custom <- data.table::data.table(
series_name = rep("popocup", 3),
mesnotrim = 1:3,
y0 = c(95000, 96000, 97000)
)
# 2. Execute: Should use our custom starting points without error
result <- mensalize_sidra_series(
dt,
starting_points = sp_custom,
series = "popocup",
compute_derived = FALSE,
verbose = FALSE
)
# 3. Verify: Result produced without trying to load bundled data
expect_s3_class(result, "data.table")
expect_true("m_popocup" %in% names(result))
expect_true(any(!is.na(result$m_popocup)),
label = "Custom starting points produce non-NA values")
})
test_that("mensalize_sidra_series errors on wrong starting_points format", {
# 1. Setup: Starting points missing required column
dt <- make_rolling_quarter_dt(
start_yyyymm = 201201L, n_months = 12L, series_name = "popocup"
)
# Missing 'y0' column
sp_bad <- data.table::data.table(
series_name = rep("popocup", 3),
mesnotrim = 1:3,
value = c(99000, 99500, 100000) # wrong column name
)
# 2. Verify: Error about missing columns
expect_error(
mensalize_sidra_series(dt, starting_points = sp_bad, verbose = FALSE),
"Starting points missing columns.*y0",
label = "Malformed starting_points raises error"
)
})
test_that("mensalize_sidra_series passes through price index columns", {
# 1. Setup: Data with price index column alongside a regular series
dt <- make_rolling_quarter_dt(
start_yyyymm = 201201L, n_months = 24L, series_name = "popocup"
)
# Add a price index column (these are not mensalized, just passed through)
data.table::set(dt, j = "ipca100dez1993",
value = seq(100, by = 0.5, length.out = nrow(dt)))
sp <- data.table::data.table(
series_name = rep("popocup", 3),
mesnotrim = 1:3,
y0 = c(99000, 99500, 100000)
)
# 2. Execute
result <- mensalize_sidra_series(
dt,
starting_points = sp,
series = "popocup",
compute_derived = FALSE,
verbose = FALSE
)
# 3. Verify: Price index column present (no m_ prefix, no mensalization)
expect_true("ipca100dez1993" %in% names(result),
label = "Price index column passed through")
# Verify values are the same as input (no transformation)
expect_equal(result$ipca100dez1993, dt$ipca100dez1993,
label = "Price index values unchanged")
})
# =============================================================================
# ADDITIONAL PROPERTY-BASED AND EDGE CASE TESTS
# =============================================================================
test_that("mensalize_sidra_series excludes rate series from direct mensalization", {
# 1. Setup: Data containing a rate series (should be excluded from mensalization)
dt <- make_rolling_quarter_dt(
start_yyyymm = 201201L, n_months = 24L, series_name = "popocup"
)
data.table::set(dt, j = "taxadesocup", value = runif(nrow(dt), 5, 15))
sp <- data.table::data.table(
series_name = rep("popocup", 3),
mesnotrim = 1:3,
y0 = c(99000, 99500, 100000)
)
# 2. Execute: Request "all" series (default)
result <- mensalize_sidra_series(
dt,
starting_points = sp,
compute_derived = FALSE,
verbose = FALSE
)
# 3. Verify: Rate series is filtered out, not directly mensalized
expect_false("m_taxadesocup" %in% names(result),
label = "Rate series not directly mensalized")
expect_true("m_popocup" %in% names(result),
label = "Population series is mensalized")
})
test_that("mensalize_sidra_series errors when no valid series columns exist", {
# 1. Setup: Only metadata columns, no series data
dt <- data.table::data.table(
anomesfinaltrimmovel = c(201201L, 201202L, 201203L),
mesnotrim = c(1L, 2L, 3L)
)
# 2. Verify: Error about no series columns
expect_error(
mensalize_sidra_series(dt, verbose = FALSE),
"No series columns found",
label = "No series columns raises error"
)
})
test_that(".apply_final_adjustment handles NA values in y gracefully", {
# 1. Setup: y with some NA values interspersed
y <- c(100, NA, 300, 110, 210, NA, 120, 220, 320)
rq <- c(NA, NA, 200, 180, 220, 210, 190, 230, 220)
mesnotrim <- c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L)
# 2. Execute
m <- PNADCperiods:::.apply_final_adjustment(y, rq, mesnotrim)
# 3. Verify: Where y is NA, m should remain NA (no adjustment possible)
expect_true(is.na(m[2]), label = "NA in y propagates to m")
expect_true(is.na(m[6]), label = "NA in y propagates to m")
# Where y is not NA and trio is incomplete due to NA neighbor, fallback = y
# Position 1 (mesnotrim=1): needs y[1], y_lead1=y[2]=NA -> invalid, m=y
expect_equal(m[1], y[1], label = "Boundary fallback to y when neighbor is NA")
})
test_that("mensalize_sidra_series output length matches PNADC-era input rows", {
# 1. Setup: Exactly 12 months of PNADC-era data
dt <- make_rolling_quarter_dt(
start_yyyymm = 201201L, n_months = 12L, series_name = "popocup"
)
sp <- data.table::data.table(
series_name = rep("popocup", 3),
mesnotrim = 1:3,
y0 = c(99000, 99500, 100000)
)
result <- mensalize_sidra_series(
dt,
starting_points = sp,
series = "popocup",
compute_derived = FALSE,
verbose = FALSE
)
# 3. Verify: One output row per input row
expect_equal(nrow(result), 12L,
label = "Output has same number of rows as PNADC-era input")
})
test_that(".compute_cumsum_by_mesnotrim produces correct d3 for linear rq", {
# 1. Setup: Linearly increasing RQ
# rq = 100, 101, 102, 103, 104, ...
# d3 = 3*(rq[t] - rq[t-1]) = 3*1 = 3 for all t > first of position
rq <- 100 + 0:11
mesnotrim <- rep(1:3, 4)
# 2. Execute
cum <- PNADCperiods:::.compute_cumsum_by_mesnotrim(rq, mesnotrim)
# 3. Verify: For position 1, second occurrence is index 4
# d3[4] = 3*(103-102) = 3, cum[4] = 3
# d3[7] = 3*(106-105) = 3, cum[7] = 6
# d3[10] = 3*(109-108) = 3, cum[10] = 9
expect_equal(cum[1], 0, label = "First pos1 cum=0")
expect_equal(cum[4], 3, label = "Second pos1 d3=3")
expect_equal(cum[7], 6, label = "Third pos1 cum=6")
expect_equal(cum[10], 9, label = "Fourth pos1 cum=9")
# For position 2:
# d3[5] = 3*(104-103) = 3, cum[5] = 3
# d3[8] = 3*(107-106) = 3, cum[8] = 6
expect_equal(cum[2], 0, label = "First pos2 cum=0")
expect_equal(cum[5], 3, label = "Second pos2 d3=3")
expect_equal(cum[8], 6, label = "Third pos2 cum=6")
})
test_that("mensalize_sidra_series excludes avg income series from direct mensalization", {
# 1. Setup: Data with an average income column (should be derived, not mensalized)
dt <- make_rolling_quarter_dt(
start_yyyymm = 201201L, n_months = 24L, series_name = "popocup"
)
data.table::set(dt, j = "rendhabnominaltodos", value = runif(nrow(dt), 2000, 3000))
sp <- data.table::data.table(
series_name = rep("popocup", 3),
mesnotrim = 1:3,
y0 = c(99000, 99500, 100000)
)
# 2. Execute
result <- mensalize_sidra_series(
dt,
starting_points = sp,
compute_derived = FALSE,
verbose = FALSE
)
# 3. Verify: Average income not directly mensalized
expect_false("m_rendhabnominaltodos" %in% names(result),
label = "Average income series not directly mensalized")
})
test_that("mensalize_sidra_series excludes residual series from direct mensalization", {
# 1. Setup: Data with a residual series column
dt <- make_rolling_quarter_dt(
start_yyyymm = 201201L, n_months = 24L, series_name = "popocup"
)
data.table::set(dt, j = "popforadaforca", value = runif(nrow(dt), 50000, 60000))
sp <- data.table::data.table(
series_name = rep("popocup", 3),
mesnotrim = 1:3,
y0 = c(99000, 99500, 100000)
)
# 2. Execute
result <- mensalize_sidra_series(
dt,
starting_points = sp,
compute_derived = FALSE,
verbose = FALSE
)
# 3. Verify: Residual series not directly mensalized
expect_false("m_popforadaforca" %in% names(result),
label = "Residual series not directly mensalized")
})
test_that("mensalize_sidra_series does not modify input data", {
# 1. Setup
dt <- make_rolling_quarter_dt(
start_yyyymm = 201201L, n_months = 24L, series_name = "popocup"
)
dt_copy <- data.table::copy(dt)
sp <- data.table::data.table(
series_name = rep("popocup", 3),
mesnotrim = 1:3,
y0 = c(99000, 99500, 100000)
)
# 2. Execute
result <- mensalize_sidra_series(
dt,
starting_points = sp,
series = "popocup",
compute_derived = FALSE,
verbose = FALSE
)
# 3. Verify: Original input unchanged
expect_equal(names(dt), names(dt_copy),
label = "Input columns unchanged")
expect_equal(nrow(dt), nrow(dt_copy),
label = "Input row count unchanged")
expect_equal(dt$popocup, dt_copy$popocup,
label = "Input values unchanged")
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.