tests/testthat/test-aggregate.R

# Tests for aggregation functions

# ============================================================================
# aggregate_geography() tests
# ============================================================================

test_that("aggregate_geography validates value column exists", {
  df <- data.frame(
    GEOGRAPHY_CODE = "123",
    VALUE = 100
  )
  
  expect_error(
    aggregate_geography(df, "TYPE499", "OBS_VALUE"),
    "Column.*not found"
  )
})

test_that("aggregate_geography aggregates data correctly", {
  df <- data.frame(
    GEOGRAPHY_CODE = c("123", "123", "456"),
    GEOGRAPHY_NAME = c("Area A", "Area A", "Area B"),
    CATEGORY = c("X", "X", "Y"),
    OBS_VALUE = c(10, 20, 30)
  )
  
  result <- aggregate_geography(df, "TYPE499", "OBS_VALUE")
  
  expect_s3_class(result, "data.frame")
  expect_equal(nrow(result), 2)
})

test_that("aggregate_geography uses sum by default", {
  df <- data.frame(
    GEOGRAPHY_CODE = c("123", "123"),
    CATEGORY = c("X", "X"),
    OBS_VALUE = c(10, 20)
  )
  
  result <- aggregate_geography(df, "TYPE499", "OBS_VALUE", sum)
  
  expect_equal(result$OBS_VALUE[1], 30)
})

test_that("aggregate_geography accepts custom aggregation function", {
  df <- data.frame(
    GEOGRAPHY_CODE = c("123", "123"),
    CATEGORY = c("X", "X"),
    OBS_VALUE = c(10, 20)
  )
  
  result <- aggregate_geography(df, "TYPE499", "OBS_VALUE", mean)
  
  expect_equal(result$OBS_VALUE[1], 15)
})

test_that("aggregate_geography handles NA values with na.rm=TRUE", {
  df <- data.frame(
    GEOGRAPHY_CODE = c("123", "123"),
    CATEGORY = c("X", "X"),
    OBS_VALUE = c(10, NA)
  )
  
  result <- aggregate_geography(df, "TYPE499", "OBS_VALUE", sum)
  
  expect_equal(result$OBS_VALUE[1], 10)
})

test_that("aggregate_geography removes geography columns from grouping", {
  df <- data.frame(
    GEOGRAPHY_CODE = c("123", "123"),
    GEOGRAPHY_NAME = c("Area", "Area"),
    CATEGORY = c("X", "X"),
    OBS_VALUE = c(10, 20)
  )
  
  result <- aggregate_geography(df, "TYPE499", "OBS_VALUE")
  
  expect_equal(nrow(result), 1)
})

test_that("aggregate_geography removes RECORD_COUNT from grouping", {
  df <- data.frame(
    GEOGRAPHY_CODE = "123",
    RECORD_COUNT = 1,
    CATEGORY = "X",
    OBS_VALUE = 100
  )
  
  result <- aggregate_geography(df, "TYPE499", "OBS_VALUE")
  
  expect_false("RECORD_COUNT" %in% names(result))
})

test_that("aggregate_geography handles single row", {
  df <- data.frame(
    GEOGRAPHY_CODE = "123",
    OBS_VALUE = 100
  )
  
  result <- aggregate_geography(df, "TYPE499", "OBS_VALUE")
  
  expect_equal(nrow(result), 1)
  expect_equal(result$OBS_VALUE[1], 100)
})

test_that("aggregate_geography handles multiple grouping variables", {
  df <- data.frame(
    GEOGRAPHY_CODE = c("123", "123", "123"),
    CAT1 = c("A", "A", "B"),
    CAT2 = c("X", "Y", "X"),
    OBS_VALUE = c(10, 20, 30)
  )
  
  result <- aggregate_geography(df, "TYPE499", "OBS_VALUE")
  
  expect_equal(nrow(result), 3)
})

test_that("aggregate_geography preserves non-value columns", {
  df <- data.frame(
    GEOGRAPHY_CODE = c("123", "123"),
    CATEGORY = c("X", "X"),
    DATE = c("2020", "2020"),
    OBS_VALUE = c(10, 20)
  )
  
  result <- aggregate_geography(df, "TYPE499", "OBS_VALUE")
  
  expect_true("CATEGORY" %in% names(result))
  expect_true("DATE" %in% names(result))
})

# ============================================================================
# aggregate_time() tests
# ============================================================================

test_that("aggregate_time requires DATE column", {
  df <- data.frame(OBS_VALUE = c(1, 2, 3))
  
  expect_error(
    aggregate_time(df),
    "must contain.*DATE"
  )
})

test_that("aggregate_time aggregates by year", {
  df <- data.frame(
    DATE = c("2020-01", "2020-02", "2021-01"),
    DATE_NAME = c("Jan 2020", "Feb 2020", "Jan 2021"),
    CATEGORY = c("A", "A", "B"),
    OBS_VALUE = c(10, 20, 30)
  )
  
  result <- aggregate_time(df, "year", "OBS_VALUE", mean)
  
  expect_s3_class(result, "data.frame")
  expect_true("PERIOD" %in% names(result))
  expect_equal(length(unique(result$PERIOD)), 2)
})

test_that("aggregate_time uses mean by default", {
  df <- data.frame(
    DATE = c("2020-01", "2020-02"),
    CATEGORY = "A",
    OBS_VALUE = c(10, 20)
  )
  
  result <- aggregate_time(df, "year", "OBS_VALUE", mean)
  
  expect_equal(result$OBS_VALUE[1], 15)
})

test_that("aggregate_time accepts custom aggregation function", {
  df <- data.frame(
    DATE = c("2020-01", "2020-02"),
    CATEGORY = "A",
    OBS_VALUE = c(10, 20)
  )
  
  result <- aggregate_time(df, "year", "OBS_VALUE", sum)
  
  expect_equal(result$OBS_VALUE[1], 30)
})

test_that("aggregate_time extracts year from DATE", {
  df <- data.frame(
    DATE = c("2020-01", "2020-02", "2021-01"),
    OBS_VALUE = c(10, 20, 30)
  )
  
  result <- aggregate_time(df, "year", "OBS_VALUE")
  
  expect_true("2020" %in% result$PERIOD)
  expect_true("2021" %in% result$PERIOD)
})

test_that("aggregate_time removes DATE columns from grouping", {
  df <- data.frame(
    DATE = c("2020-01", "2020-02"),
    DATE_NAME = c("Jan", "Feb"),
    CATEGORY = "A",
    OBS_VALUE = c(10, 20)
  )
  
  result <- aggregate_time(df, "year", "OBS_VALUE")
  
  expect_false("DATE" %in% names(result))
  expect_false("DATE_NAME" %in% names(result))
})

test_that("aggregate_time removes RECORD_COUNT from grouping", {
  df <- data.frame(
    DATE = "2020-01",
    RECORD_COUNT = 1,
    OBS_VALUE = 100
  )
  
  result <- aggregate_time(df, "year", "OBS_VALUE")
  
  expect_false("RECORD_COUNT" %in% names(result))
})

test_that("aggregate_time handles single time period", {
  df <- data.frame(
    DATE = c("2020-01", "2020-02"),
    OBS_VALUE = c(10, 20)
  )
  
  result <- aggregate_time(df, "year", "OBS_VALUE")
  
  expect_equal(nrow(result), 1)
  expect_equal(result$PERIOD[1], "2020")
})

test_that("aggregate_time handles multiple categories", {
  df <- data.frame(
    DATE = c("2020-01", "2020-02", "2020-01", "2020-02"),
    CATEGORY = c("A", "A", "B", "B"),
    OBS_VALUE = c(10, 20, 30, 40)
  )
  
  result <- aggregate_time(df, "year", "OBS_VALUE", mean)
  
  expect_equal(nrow(result), 2)
})

test_that("aggregate_time handles NA values", {
  df <- data.frame(
    DATE = c("2020-01", "2020-02"),
    OBS_VALUE = c(10, NA)
  )
  
  result <- aggregate_time(df, "year", "OBS_VALUE", mean)
  
  expect_equal(result$OBS_VALUE[1], 10)
})

test_that("aggregate_time validates period argument", {
  df <- data.frame(
    DATE = "2020-01",
    OBS_VALUE = 100
  )
  
  expect_error(
    aggregate_time(df, "invalid", "OBS_VALUE"),
    "should be one of"
  )
})

test_that("aggregate_time accepts year period", {
  df <- data.frame(
    DATE = "2020-01",
    OBS_VALUE = 100
  )
  
  expect_error(
    aggregate_time(df, "year", "OBS_VALUE"),
    NA
  )
})

test_that("aggregate_time accepts quarter period", {
  df <- data.frame(
    DATE = "2020-01",
    OBS_VALUE = 100
  )
  
  expect_error(
    aggregate_time(df, "quarter", "OBS_VALUE"),
    NA
  )
})

test_that("aggregate_time accepts month period", {
  df <- data.frame(
    DATE = "2020-01",
    OBS_VALUE = 100
  )
  
  expect_error(
    aggregate_time(df, "month", "OBS_VALUE"),
    NA
  )
})

test_that("aggregate_time preserves grouping variables", {
  df <- data.frame(
    DATE = c("2020-01", "2020-02"),
    CATEGORY = "A",
    REGION = "North",
    OBS_VALUE = c(10, 20)
  )
  
  result <- aggregate_time(df, "year", "OBS_VALUE")
  
  expect_true("CATEGORY" %in% names(result))
  expect_true("REGION" %in% names(result))
})

# ============================================================================
# Integration tests
# ============================================================================

test_that("aggregate_geography and aggregate_time work together", {
  df <- data.frame(
    GEOGRAPHY_CODE = c("123", "123", "456", "456"),
    DATE = c("2020-01", "2020-02", "2020-01", "2020-02"),
    CATEGORY = c("A", "A", "A", "A"),
    OBS_VALUE = c(10, 20, 30, 40)
  )
  
  result1 <- aggregate_geography(df, "TYPE499", "OBS_VALUE")
  result2 <- aggregate_time(result1, "year", "OBS_VALUE")
  
  expect_s3_class(result2, "data.frame")
  expect_true("PERIOD" %in% names(result2))
})

test_that("aggregate_time handles empty data frame", {
  df <- data.frame(
    DATE = character(),
    OBS_VALUE = numeric()
  )
  
  result <- aggregate_time(df, "year", "OBS_VALUE")
  
  expect_equal(nrow(result), 0)
})

Try the nomisdata package in your browser

Any scripts or data that you put into this service are public.

nomisdata documentation built on Feb. 10, 2026, 5:10 p.m.