tests/testthat/test-transform.R

# Tests for data transformation functions

# ============================================================================
# tidy_names() basic tests
# ============================================================================

test_that("tidy_names converts to snake_case by default", {
  skip_if_not_installed("janitor")
  
  df <- data.frame(
    GEOGRAPHY_NAME = "UK",
    OBS_VALUE = 100,
    DATE_CODE = "2020"
  )
  
  result <- tidy_names(df)
  
  expect_true("geography_name" %in% names(result))
  expect_true("obs_value" %in% names(result))
  expect_true("date_code" %in% names(result))
})

test_that("tidy_names supports camelCase style", {
  skip_if_not_installed("janitor")
  
  df <- data.frame(
    GEOGRAPHY_NAME = "UK",
    OBS_VALUE = 100
  )
  
  result <- tidy_names(df, "camelCase")
  
  # janitor might produce "geographyName" or "geographyname"
  expect_true(any(c("geographyName", "geographyname") %in% names(result)))
})

test_that("tidy_names supports period.case style", {
  skip_if_not_installed("janitor")
  
  df <- data.frame(
    GEOGRAPHY_NAME = "UK",
    OBS_VALUE = 100
  )
  
  result <- tidy_names(df, "period.case")
  
  expect_true("geography.name" %in% names(result))
  expect_true("obs.value" %in% names(result))
})

test_that("tidy_names handles missing janitor gracefully", {
  skip_if_installed("janitor")
  
  df <- data.frame(GEOGRAPHY_NAME = "UK", OBS_VALUE = 100)
  
  expect_warning(
    result <- tidy_names(df),
    "janitor.*not available"
  )
  
  expect_equal(result, df)
  expect_equal(names(result), c("GEOGRAPHY_NAME", "OBS_VALUE"))
})

test_that("tidy_names preserves data", {
  skip_if_not_installed("janitor")
  
  df <- data.frame(
    GEOGRAPHY_NAME = c("UK", "US"),
    OBS_VALUE = c(100, 200)
  )
  
  result <- tidy_names(df)
  
  expect_equal(nrow(result), 2)
  expect_equal(result[[1]], c("UK", "US"))
  expect_equal(result[[2]], c(100, 200))
})

test_that("tidy_names handles single column", {
  skip_if_not_installed("janitor")
  
  df <- data.frame(GEOGRAPHY_CODE = "123")
  
  result <- tidy_names(df)
  
  expect_true("geography_code" %in% names(result))
})

test_that("tidy_names handles many columns", {
  skip_if_not_installed("janitor")
  
  df <- data.frame(
    COL_A = 1, COL_B = 2, COL_C = 3,
    COL_D = 4, COL_E = 5, COL_F = 6
  )
  
  result <- tidy_names(df)
  
  expect_equal(ncol(result), 6)
  expect_true(all(grepl("col_", names(result))))
})

test_that("tidy_names handles columns with numbers", {
  skip_if_not_installed("janitor")
  
  df <- data.frame(
    GEOGRAPHY_2020 = "UK",
    VALUE_123 = 100
  )
  
  result <- tidy_names(df)
  
  expect_true("geography_2020" %in% names(result))
  expect_true("value_123" %in% names(result))
})

test_that("tidy_names handles columns with special characters", {
  skip_if_not_installed("janitor")
  
  df <- data.frame(
    `GEOGRAPHY-NAME` = "UK",
    `OBS%VALUE` = 100,
    check.names = FALSE
  )
  
  result <- tidy_names(df)
  
  # janitor should clean these
  expect_true(length(names(result)) == 2)
})

test_that("tidy_names handles empty data frame", {
  skip_if_not_installed("janitor")
  
  df <- data.frame()
  
  result <- tidy_names(df)
  
  expect_equal(nrow(result), 0)
  expect_equal(ncol(result), 0)
})

test_that("tidy_names handles columns that are already clean", {
  skip_if_not_installed("janitor")
  
  df <- data.frame(
    geography = "UK",
    value = 100
  )
  
  result <- tidy_names(df)
  
  expect_true("geography" %in% names(result))
  expect_true("value" %in% names(result))
})

test_that("tidy_names style argument is case-sensitive", {
  skip_if_not_installed("janitor")
  
  df <- data.frame(TEST_COL = 1)
  
  # Should work with exact case
  expect_error(tidy_names(df, "snake_case"), NA)
  expect_error(tidy_names(df, "camelCase"), NA)
  expect_error(tidy_names(df, "period.case"), NA)
})

test_that("tidy_names handles NA values in data", {
  skip_if_not_installed("janitor")
  
  df <- data.frame(
    GEOGRAPHY_NAME = c("UK", NA),
    OBS_VALUE = c(100, NA)
  )
  
  result <- tidy_names(df)
  
  expect_equal(result[[1]][2], NA_character_)
  expect_equal(result[[2]][2], NA_real_)
})

test_that("tidy_names returns data frame", {
  skip_if_not_installed("janitor")
  
  df <- data.frame(TEST = 1)
  result <- tidy_names(df)
  
  expect_true(is.data.frame(result))
})

test_that("tidy_names handles tibbles", {
  skip_if_not_installed("janitor")
  
  df <- tibble::tibble(
    GEOGRAPHY_NAME = "UK",
    OBS_VALUE = 100
  )
  
  result <- tidy_names(df)
  
  expect_true("geography_name" %in% names(result))
})

test_that("tidy_names snake_case handles consecutive underscores", {
  skip_if_not_installed("janitor")
  
  df <- data.frame(
    GEOGRAPHY__NAME = "UK"  # Double underscore
  )
  
  result <- tidy_names(df)
  
  # janitor should clean this
  expect_true(length(names(result)) == 1)
})

test_that("tidy_names period.case replaces underscores with periods", {
  skip_if_not_installed("janitor")
  
  df <- data.frame(
    GEOGRAPHY_NAME = "UK",
    OBS_VALUE_TOTAL = 100
  )
  
  result <- tidy_names(df, "period.case")
  
  expect_true("geography.name" %in% names(result))
  expect_true("obs.value.total" %in% names(result))
  expect_false(any(grepl("_", names(result))))
})

test_that("tidy_names handles duplicated column names", {
  skip_if_not_installed("janitor")
  
  df <- data.frame(
    VALUE = 1,
    VALUE.1 = 2
  )
  
  result <- tidy_names(df)
  
  # Should handle duplicates somehow
  expect_equal(ncol(result), 2)
})

test_that("tidy_names with janitor unavailable returns warning", {
  skip_if_installed("janitor")
  
  df <- data.frame(TEST = 1)
  
  expect_warning(
    tidy_names(df),
    "not available"
  )
})

test_that("tidy_names with janitor unavailable returns unchanged df", {
  skip_if_installed("janitor")
  
  df <- data.frame(ORIGINAL_NAME = 1)
  
  result <- suppressWarnings(tidy_names(df))
  
  expect_identical(names(result), "ORIGINAL_NAME")
})

Try the nomisdata package in your browser

Any scripts or data that you put into this service are public.

nomisdata documentation built on Feb. 10, 2026, 5:10 p.m.