Nothing
# Tests for starting points functions and bundled dataset
# Covers: pnadc_series_starting_points (bundled data),
# compute_z_aggregates(), compute_series_starting_points()
# =============================================================================
# BUNDLED DATASET TESTS (pnadc_series_starting_points)
# =============================================================================
test_that("bundled dataset loads successfully via data()", {
data("pnadc_series_starting_points", package = "PNADCperiods")
expect_s3_class(pnadc_series_starting_points, "data.table")
expect_true(nrow(pnadc_series_starting_points) > 0,
label = "Dataset has rows")
})
test_that("bundled dataset has exactly 3 columns: series_name, mesnotrim, y0", {
data("pnadc_series_starting_points", package = "PNADCperiods")
expect_equal(ncol(pnadc_series_starting_points), 3)
expect_named(pnadc_series_starting_points,
c("series_name", "mesnotrim", "y0"),
ignore.order = FALSE)
})
test_that("mesnotrim only contains values 1, 2, 3", {
data("pnadc_series_starting_points", package = "PNADCperiods")
expect_true(all(pnadc_series_starting_points$mesnotrim %in% c(1L, 2L, 3L)),
label = "All mesnotrim values are 1, 2, or 3")
# Confirm all three positions are present
expect_setequal(unique(pnadc_series_starting_points$mesnotrim), c(1L, 2L, 3L))
})
test_that("no NA in series_name or mesnotrim", {
data("pnadc_series_starting_points", package = "PNADCperiods")
expect_false(any(is.na(pnadc_series_starting_points$series_name)),
label = "No NA in series_name")
expect_false(any(is.na(pnadc_series_starting_points$mesnotrim)),
label = "No NA in mesnotrim")
})
test_that("all y0 values are finite (no NA, NaN, Inf)", {
data("pnadc_series_starting_points", package = "PNADCperiods")
expect_true(all(is.finite(pnadc_series_starting_points$y0)),
label = "All y0 values are finite")
})
test_that("key labor market series are present in bundled dataset", {
data("pnadc_series_starting_points", package = "PNADCperiods")
available <- unique(pnadc_series_starting_points$series_name)
key_series <- c("popocup", "popdesocup", "popnaforca", "pop14mais")
for (s in key_series) {
expect_true(s %in% available,
label = paste0("Series '", s, "' is present"))
}
})
test_that("each series has exactly 3 rows (one per mesnotrim)", {
data("pnadc_series_starting_points", package = "PNADCperiods")
counts <- pnadc_series_starting_points[, .N, by = series_name]
expect_true(all(counts$N == 3L),
label = "Every series has exactly 3 rows")
# Verify each series has all three positions
for (s in unique(pnadc_series_starting_points$series_name)) {
positions <- pnadc_series_starting_points[series_name == s, mesnotrim]
expect_setequal(positions, c(1L, 2L, 3L))
}
})
# =============================================================================
# compute_z_aggregates() TESTS
# =============================================================================
# Helper: create mock calibrated microdata for compute_z_aggregates()
create_mock_calibrated <- function(n_per_month = 10L,
months = c(202301L, 202302L, 202303L),
seed = 100L) {
set.seed(seed)
n <- n_per_month * length(months)
dt <- data.table::data.table(
weight_monthly = rep(1000, n),
ref_month_yyyymm = rep(months, each = n_per_month),
ref_month_in_quarter = rep(1:3, each = n_per_month),
Ano = 2023L,
Trimestre = 1L,
V2009 = sample(c(5L, 20L, 35L, 50L, 70L), n, replace = TRUE),
VD4001 = sample(c(1L, 2L), n, replace = TRUE),
VD4002 = sample(c(1L, 2L, NA_integer_), n, replace = TRUE, prob = c(0.45, 0.15, 0.40)),
VD4009 = sample(c(1L:10L, NA_integer_), n, replace = TRUE),
VD4010 = sample(c(1L:11L, NA_integer_), n, replace = TRUE),
V4019 = sample(c(1L, 2L, NA_integer_), n, replace = TRUE),
VD4004 = sample(c(1L, 2L, NA_integer_), n, replace = TRUE),
VD4004A = sample(c(1L, 2L, NA_integer_), n, replace = TRUE),
VD4019 = round(runif(n, 1000, 5000), 2),
VD4020 = round(runif(n, 900, 4500), 2),
VD4016 = round(runif(n, 500, 3000), 2),
VD4017 = round(runif(n, 500, 3000), 2),
VD4031 = round(runif(n, 20, 44), 0),
VD4035 = round(runif(n, 20, 50), 0)
)
dt
}
test_that("compute_z_aggregates errors when weight_monthly is missing", {
dt <- data.table::data.table(
ref_month_yyyymm = c(202301L, 202302L),
ref_month_in_quarter = c(1L, 2L),
V2009 = c(30L, 40L)
)
expect_error(
compute_z_aggregates(dt, verbose = FALSE),
"weight_monthly"
)
})
test_that("compute_z_aggregates computes correct z_popocup and z_popdesocup", {
# Construct a deterministic example:
# 4 obs in one month, weights all 1000
# VD4002: 1, 1, 2, 2 -> 2 occupied, 2 unemployed
dt <- data.table::data.table(
weight_monthly = rep(1000, 4),
ref_month_yyyymm = rep(202301L, 4),
ref_month_in_quarter = rep(1L, 4),
Ano = 2023L,
Trimestre = 1L,
V2009 = c(30L, 35L, 25L, 40L),
VD4001 = c(1L, 1L, 1L, 1L),
VD4002 = c(1L, 1L, 2L, 2L)
)
result <- compute_z_aggregates(dt, verbose = FALSE)
# z_popocup = sum(indicator * weight): 2 obs with VD4002==1, weight=1000 each
expect_equal(result$z_popocup, 2 * 1000)
# z_popdesocup = sum(indicator * weight): 2 obs with VD4002==2
expect_equal(result$z_popdesocup, 2 * 1000)
})
test_that("z_populacao equals 1 for every observation (total pop = sum of weights)", {
# 6 obs across 2 months, various weights
dt <- data.table::data.table(
weight_monthly = c(500, 800, 1200, 600, 900, 1100),
ref_month_yyyymm = c(rep(202301L, 3), rep(202302L, 3)),
ref_month_in_quarter = c(rep(1L, 3), rep(2L, 3)),
Ano = 2023L,
Trimestre = 1L,
V2009 = c(30L, 35L, 25L, 40L, 50L, 20L)
)
result <- compute_z_aggregates(dt, verbose = FALSE)
data.table::setorder(result, anomesexato)
# z_populacao should be sum of weights per month
# Month 1: 500 + 800 + 1200 = 2500
# Month 2: 600 + 900 + 1100 = 2600
expect_equal(result[anomesexato == 202301L, z_populacao], 2500)
expect_equal(result[anomesexato == 202302L, z_populacao], 2600)
})
test_that("compute_z_aggregates groups correctly by month across multiple months", {
dt <- create_mock_calibrated(n_per_month = 20L)
result <- compute_z_aggregates(dt, verbose = FALSE)
# Should have one row per month
expect_equal(nrow(result), 3)
expect_true(all(c(202301L, 202302L, 202303L) %in% result$anomesexato))
# All z_ columns should be present and numeric
z_cols <- grep("^z_", names(result), value = TRUE)
expect_true(length(z_cols) > 0, label = "z_ columns exist")
for (col in z_cols) {
expect_true(is.numeric(result[[col]]),
label = paste0(col, " is numeric"))
}
})
test_that("compute_z_aggregates converts character VD variables to integer", {
# VD variables stored as character in some PNADC files ("01", "02")
dt <- data.table::data.table(
weight_monthly = rep(1000, 4),
ref_month_yyyymm = rep(202301L, 4),
ref_month_in_quarter = rep(1L, 4),
Ano = 2023L,
Trimestre = 1L,
V2009 = c(30L, 35L, 25L, 40L),
VD4001 = c("01", "01", "02", "02"),
VD4002 = c("01", "01", "02", "02")
)
# Should not error, and should produce correct results
result <- compute_z_aggregates(dt, verbose = FALSE)
expect_equal(result$z_popocup, 2 * 1000)
expect_equal(result$z_popdesocup, 2 * 1000)
})
# =============================================================================
# compute_series_starting_points() TESTS
# =============================================================================
# Helper: create paired mock data for compute_series_starting_points()
# Returns a list with $monthly_estimates and $rolling_quarters
create_mock_for_starting_points <- function(n_months = 36L,
start_yyyymm = 201301L,
base_rq = 100,
scale_factor = 1000,
seed = 200L) {
set.seed(seed)
# Generate YYYYMM sequence
months_seq <- generate_yyyymm_seq(start_yyyymm, n_months)
mesnotrim_seq <- ((months_seq %% 100L - 1L) %% 3L) + 1L
# Rolling quarters: base value with slight trend
rq_values <- base_rq * (1 + 0.001 * seq_len(n_months))
rolling_quarters <- data.table::data.table(
anomesfinaltrimmovel = months_seq,
mesnotrim = mesnotrim_seq,
popocup = rq_values,
popdesocup = rq_values * 0.1,
pop14mais = rq_values * 1.2
)
# Monthly estimates: z_ columns = scale_factor * rq + noise
# (so that z/scale_factor is close to rq, giving small y0)
monthly_estimates <- data.table::data.table(
anomesexato = months_seq,
z_popocup = rq_values * scale_factor + rnorm(n_months, 0, 50),
z_popdesocup = rq_values * 0.1 * scale_factor + rnorm(n_months, 0, 5),
z_pop14mais = rq_values * 1.2 * scale_factor + rnorm(n_months, 0, 60)
)
list(
monthly_estimates = monthly_estimates,
rolling_quarters = rolling_quarters
)
}
test_that("compute_series_starting_points returns correct columns", {
mock <- create_mock_for_starting_points()
result <- compute_series_starting_points(
mock$monthly_estimates,
mock$rolling_quarters,
calibration_start = 201301L,
calibration_end = 201912L,
scale_factor = 1000,
use_series_specific_periods = FALSE,
verbose = FALSE
)
expect_s3_class(result, "data.table")
expect_named(result, c("series_name", "mesnotrim", "y0"), ignore.order = FALSE)
})
test_that("compute_series_starting_points returns 3 rows per series", {
mock <- create_mock_for_starting_points()
result <- compute_series_starting_points(
mock$monthly_estimates,
mock$rolling_quarters,
calibration_start = 201301L,
calibration_end = 201912L,
scale_factor = 1000,
use_series_specific_periods = FALSE,
verbose = FALSE
)
counts <- result[, .N, by = series_name]
expect_true(all(counts$N == 3L),
label = "Each series has exactly 3 rows")
# Each series should have all 3 mesnotrim values
for (s in unique(result$series_name)) {
positions <- result[series_name == s, mesnotrim]
expect_setequal(positions, c(1L, 2L, 3L))
}
})
test_that("custom calibration_start/end are respected", {
# Create data spanning a wide range
mock <- create_mock_for_starting_points(n_months = 120L, start_yyyymm = 201201L)
# Use narrow calibration period
result_narrow <- compute_series_starting_points(
mock$monthly_estimates,
mock$rolling_quarters,
calibration_start = 201601L,
calibration_end = 201712L,
scale_factor = 1000,
use_series_specific_periods = FALSE,
verbose = FALSE
)
# Use wider calibration period
result_wide <- compute_series_starting_points(
mock$monthly_estimates,
mock$rolling_quarters,
calibration_start = 201301L,
calibration_end = 201912L,
scale_factor = 1000,
use_series_specific_periods = FALSE,
verbose = FALSE
)
# Both should produce valid results
expect_true(all(is.finite(result_narrow$y0)))
expect_true(all(is.finite(result_wide$y0)))
# y0 values should differ because calibration periods differ
merged <- merge(result_narrow, result_wide,
by = c("series_name", "mesnotrim"),
suffixes = c("_narrow", "_wide"))
# Not all y0 pairs should be identical (noise + different averaging window)
expect_false(all(merged$y0_narrow == merged$y0_wide),
label = "Different calibration periods produce different y0")
})
test_that("use_series_specific_periods=FALSE uses uniform calibration for all series", {
# Create data with a "CNPJ-like" series name to verify it does NOT get special treatment
months_seq <- generate_yyyymm_seq(201201L, 120)
mesnotrim_seq <- ((months_seq %% 100L - 1L) %% 3L) + 1L
rq_vals <- 100 * (1 + 0.001 * seq_along(months_seq))
rq <- data.table::data.table(
anomesfinaltrimmovel = months_seq,
mesnotrim = mesnotrim_seq,
popocup = rq_vals,
empregadorcomcnpj = rq_vals * 0.05
)
me <- data.table::data.table(
anomesexato = months_seq,
z_popocup = rq_vals * 1000 + rnorm(120, 0, 10),
z_empregadorcomcnpj = rq_vals * 0.05 * 1000 + rnorm(120, 0, 1)
)
result <- compute_series_starting_points(
me, rq,
calibration_start = 201301L,
calibration_end = 201912L,
scale_factor = 1000,
use_series_specific_periods = FALSE,
verbose = FALSE
)
# Both series should be present with 3 rows each
expect_true("popocup" %in% result$series_name)
expect_true("empregadorcomcnpj" %in% result$series_name)
expect_equal(nrow(result[series_name == "empregadorcomcnpj"]), 3)
})
test_that("scale_factor=1 produces correct y0 (no scaling applied)", {
mock <- create_mock_for_starting_points(scale_factor = 1)
# Rebuild monthly estimates with scale_factor=1 (z_ values close to rq)
months_seq <- mock$rolling_quarters$anomesfinaltrimmovel
rq_popocup <- mock$rolling_quarters$popocup
me <- data.table::data.table(
anomesexato = months_seq,
z_popocup = rq_popocup + rnorm(length(months_seq), 0, 0.1)
)
result <- compute_series_starting_points(
me,
mock$rolling_quarters,
calibration_start = 201301L,
calibration_end = 201912L,
scale_factor = 1,
use_series_specific_periods = FALSE,
verbose = FALSE
)
# With scale_factor=1 and z ~ rq, y0 should be close to the base rq value
# (y0 is the starting level, not zero; ~100 for base_rq=100)
expect_true(all(is.finite(result$y0)))
# The three y0 values per mesnotrim should be consistent with each other
y0_vals <- result[series_name == "popocup", y0]
expect_true(sd(y0_vals) < 5,
label = "y0 values consistent across mesnotrim positions")
})
test_that("mathematical property: backprojection e0 averaged gives y0", {
# The defining formula:
# e0 = z / scale_factor - cumsum_by_mesnotrim(rq)
# y0[pos] = mean(e0[mesnotrim == pos & in_calibration])
#
# We verify this by constructing data where we can compute e0 manually.
set.seed(42)
n <- 12L # 12 months = 4 quarters
months_seq <- generate_yyyymm_seq(201301L, n)
mesnotrim_seq <- ((months_seq %% 100L - 1L) %% 3L) + 1L
rq_vals <- rep(100, n) # Constant rolling quarter
z_vals <- rep(100000, n) # Constant z values (scale 1000)
scale_factor <- 1000
rq <- data.table::data.table(
anomesfinaltrimmovel = months_seq,
mesnotrim = mesnotrim_seq,
popocup = rq_vals
)
me <- data.table::data.table(
anomesexato = months_seq,
z_popocup = z_vals
)
result <- compute_series_starting_points(
me, rq,
calibration_start = 201301L,
calibration_end = 201312L,
scale_factor = scale_factor,
use_series_specific_periods = FALSE,
verbose = FALSE
)
# With constant rq, d3 = 3*(rq - lag(rq)) = 0, so cumsum = 0 for all periods
# e0 = z/scale_factor - cum = 100000/1000 - 0 = 100
# y0 should be 100 for all positions
expect_equal(result[series_name == "popocup", y0],
rep(100, 3),
tolerance = 0.001)
})
test_that("compute_series_starting_points errors when no z_ columns present", {
months_seq <- generate_yyyymm_seq(201301L, 12)
mesnotrim_seq <- ((months_seq %% 100L - 1L) %% 3L) + 1L
rq <- data.table::data.table(
anomesfinaltrimmovel = months_seq,
mesnotrim = mesnotrim_seq,
popocup = rep(100, 12)
)
# monthly_estimates with NO z_ columns
me <- data.table::data.table(
anomesexato = months_seq,
popocup = rep(100000, 12)
)
expect_error(
compute_series_starting_points(me, rq, verbose = FALSE),
"No z_ columns"
)
})
test_that("compute_series_starting_points skips series not in rolling_quarters", {
months_seq <- generate_yyyymm_seq(201301L, 12)
mesnotrim_seq <- ((months_seq %% 100L - 1L) %% 3L) + 1L
rq <- data.table::data.table(
anomesfinaltrimmovel = months_seq,
mesnotrim = mesnotrim_seq,
popocup = rep(100, 12)
# NOTE: no "popdesocup" column here
)
me <- data.table::data.table(
anomesexato = months_seq,
z_popocup = rep(100000, 12),
z_popdesocup = rep(10000, 12) # z_ column exists but series not in rq
)
result <- compute_series_starting_points(
me, rq,
calibration_start = 201301L,
calibration_end = 201312L,
scale_factor = 1000,
use_series_specific_periods = FALSE,
verbose = FALSE
)
# Only popocup should appear (popdesocup skipped because not in rq)
expect_true("popocup" %in% result$series_name)
expect_false("popdesocup" %in% result$series_name)
})
test_that("compute_series_starting_points handles anomesexato column name in monthly_estimates", {
# The function renames "anomesexato" to "anomesfinaltrimmovel" internally
months_seq <- generate_yyyymm_seq(201301L, 12)
mesnotrim_seq <- ((months_seq %% 100L - 1L) %% 3L) + 1L
rq <- data.table::data.table(
anomesfinaltrimmovel = months_seq,
mesnotrim = mesnotrim_seq,
popocup = rep(100, 12)
)
me <- data.table::data.table(
anomesexato = months_seq, # Using anomesexato, not anomesfinaltrimmovel
z_popocup = rep(100000, 12)
)
# Should not error
result <- compute_series_starting_points(
me, rq,
calibration_start = 201301L,
calibration_end = 201312L,
scale_factor = 1000,
verbose = FALSE
)
expect_s3_class(result, "data.table")
expect_true(nrow(result) > 0)
})
test_that("non-finite y0 values are replaced with 0", {
# If all z_ values are NA for a calibration period, e0 would be NaN
# The function should replace non-finite y0 with 0
months_seq <- generate_yyyymm_seq(201301L, 12)
mesnotrim_seq <- ((months_seq %% 100L - 1L) %% 3L) + 1L
rq <- data.table::data.table(
anomesfinaltrimmovel = months_seq,
mesnotrim = mesnotrim_seq,
popocup = rep(100, 12)
)
# z_ values are all NA
me <- data.table::data.table(
anomesexato = months_seq,
z_popocup = rep(NA_real_, 12)
)
result <- compute_series_starting_points(
me, rq,
calibration_start = 201301L,
calibration_end = 201312L,
scale_factor = 1000,
use_series_specific_periods = FALSE,
verbose = FALSE
)
# All y0 should be 0 (non-finite replaced)
expect_true(all(result$y0 == 0),
label = "Non-finite y0 replaced with 0")
})
# =============================================================================
# INTEGRATION: compute_z_aggregates() -> compute_series_starting_points()
# =============================================================================
test_that("z_aggregates output is compatible with compute_series_starting_points input", {
# Build calibrated mock data -> compute z_aggregates -> feed into starting points
dt <- create_mock_calibrated(n_per_month = 30L,
months = generate_yyyymm_seq(201301L, 12))
z_agg <- compute_z_aggregates(dt, verbose = FALSE)
# Verify z_aggregates has anomesexato column
expect_true("anomesexato" %in% names(z_agg))
# Create matching rolling quarter data with same series
z_cols <- grep("^z_", names(z_agg), value = TRUE)
series_names <- sub("^z_", "", z_cols)
rq <- data.table::data.table(
anomesfinaltrimmovel = z_agg$anomesexato,
mesnotrim = ((z_agg$anomesexato %% 100L - 1L) %% 3L) + 1L
)
# Add matching series columns with plausible rolling quarter values
for (s in series_names) {
z_col <- paste0("z_", s)
if (z_col %in% names(z_agg)) {
rq[, (s) := z_agg[[z_col]] / 1000]
}
}
# Should not error
result <- compute_series_starting_points(
z_agg, rq,
calibration_start = 201301L,
calibration_end = 201312L,
scale_factor = 1000,
use_series_specific_periods = FALSE,
verbose = FALSE
)
expect_s3_class(result, "data.table")
expect_true(nrow(result) > 0, label = "Starting points computed from z_aggregates")
expect_true(all(is.finite(result$y0)),
label = "All y0 from pipeline are finite")
})
test_that("compute_z_aggregates filters out zero and NA weights", {
dt <- data.table::data.table(
weight_monthly = c(1000, 0, NA_real_, 1000),
ref_month_yyyymm = rep(202301L, 4),
ref_month_in_quarter = rep(1L, 4),
Ano = 2023L,
Trimestre = 1L,
V2009 = c(30L, 35L, 25L, 40L)
)
result <- compute_z_aggregates(dt, verbose = FALSE)
# z_populacao should only count obs with valid weight (2 obs with weight=1000)
expect_equal(result$z_populacao, 2 * 1000)
})
test_that("compute_z_aggregates errors when all weights are zero/NA", {
dt <- data.table::data.table(
weight_monthly = c(0, NA_real_, 0),
ref_month_yyyymm = rep(202301L, 3),
ref_month_in_quarter = rep(1L, 3),
Ano = 2023L,
Trimestre = 1L,
V2009 = c(30L, 35L, 25L)
)
expect_error(
compute_z_aggregates(dt, verbose = FALSE),
"No observations with valid weight_monthly"
)
})
test_that("compute_z_aggregates produces z_pop14mais only for age >= 14", {
dt <- data.table::data.table(
weight_monthly = rep(1000, 6),
ref_month_yyyymm = rep(202301L, 6),
ref_month_in_quarter = rep(1L, 6),
Ano = 2023L,
Trimestre = 1L,
V2009 = c(5L, 10L, 13L, 14L, 30L, 65L) # 3 under 14, 3 at/above 14
)
result <- compute_z_aggregates(dt, verbose = FALSE)
# z_pop14mais: only ages 14, 30, 65 -> 3 obs * 1000 weight
expect_equal(result$z_pop14mais, 3 * 1000)
# z_populacao: all 6 obs
expect_equal(result$z_populacao, 6 * 1000)
})
# =============================================================================
# INPC LAG PRE-COMPUTE TEST
# =============================================================================
test_that("compute_series_starting_points pre-computes lagged INPC", {
skip("requires proper INPC data sequence with pre-PNADC values")
# This test validates that .inpc100dez1993_lagged is computed BEFORE
# any deflation operations, ensuring that z_massaefetrealtodos can use
# the lagged INPC for the first PNADC month (201201 uses INPC[201112]).
#
# Mock scenario:
# - rolling_quarters: 201112, 201201 (at least)
# - inpc100dez1993: values for both months
# - z_massaefetnominaltodos: values for both months
#
# Expected: z_massaefetrealtodos[201201] uses INPC[201112] as lag
# (not NA from post-filter shift)
})
test_that("compute_series_starting_points produces non-NA y0 with lagged INPC", {
# Lightweight version: validate that the pre-compute doesn't break existing tests
# and that y0 values remain finite when inpc columns are present
months_seq <- generate_yyyymm_seq(201301L, 12)
mesnotrim_seq <- ((months_seq %% 100L - 1L) %% 3L) + 1L
rq <- data.table::data.table(
anomesfinaltrimmovel = months_seq,
mesnotrim = mesnotrim_seq,
popocup = rep(100, 12),
inpc100dez1993 = 100 + seq_len(12) # Simple increasing sequence
)
me <- data.table::data.table(
anomesexato = months_seq,
z_popocup = rep(100000, 12) # Nominal values
)
result <- compute_series_starting_points(
me, rq,
calibration_start = 201301L,
calibration_end = 201312L,
scale_factor = 1000,
use_series_specific_periods = FALSE,
verbose = FALSE
)
# Verify result is valid (covers the pre-compute block)
expect_s3_class(result, "data.table")
expect_true(nrow(result) > 0)
expect_true(all(is.finite(result$y0)))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.