tests/testthat/test-get-data.R

# Tests for get_fars() and get_gescrss() data retrieval functions
# Note: Many of these tests will be skipped on CRAN to avoid network dependencies

# Internet connection handling ----

test_that("get_fars handles no internet connection gracefully", {
  skip_on_cran()

  # If we can't connect, the function should return invisibly
  # This test is environment-dependent, so we just check it doesn't error
  expect_no_error({
    result <- tryCatch(
      get_fars(years = 2020, source = "zenodo", proceed = TRUE),
      error = function(e) NULL
    )
  })
})

test_that("get_gescrss handles no internet connection gracefully", {
  skip_on_cran()

  expect_no_error({
    result <- tryCatch(
      get_gescrss(years = 2020, source = "zenodo", proceed = TRUE),
      error = function(e) NULL
    )
  })
})


# Zenodo download structure (with mocking) ----

test_that("get_fars with source='zenodo' attempts zenodo download", {
  skip_on_cran()
  skip("Requires network access and may be slow")

  # This would actually download from Zenodo
  # Only run manually when testing network functionality
  result <- get_fars(years = 2020, source = "zenodo", proceed = TRUE)

  if (!is.null(result)) {
    expect_fars_object(result)
  }
})

test_that("get_gescrss with source='zenodo' attempts zenodo download", {
  skip_on_cran()
  skip("Requires network access and may be slow")

  result <- get_gescrss(years = 2020, source = "zenodo", proceed = TRUE)

  if (!is.null(result)) {
    expect_gescrss_object(result)
  }
})


# Object structure validation ----

test_that("FARS object has correct structure", {
  # Use mock data to test structure
  expect_fars_object(mock_fars)
})

test_that("GESCRSS object has correct structure", {
  expect_gescrss_object(mock_gescrss)
})

test_that("FARS flat file has required columns", {
  required_cols <- c("year", "state", "st_case", "id", "veh_no", "per_no")
  expect_true(all(required_cols %in% names(mock_fars$flat)))
})

test_that("GESCRSS flat file has required columns", {
  required_cols <- c("year", "casenum", "id", "veh_no", "per_no", "weight")
  expect_true(all(required_cols %in% names(mock_gescrss$flat)))
})


# Year filtering ----

test_that("get_fars filters to requested years from zenodo data", {
  skip_on_cran()
  skip("Requires network access")

  result <- get_fars(years = c(2020, 2021), source = "zenodo", proceed = TRUE)

  if (!is.null(result)) {
    expect_true(all(unique(result$flat$year) %in% c(2020, 2021)))
  }
})

test_that("multiple year request includes all years", {
  # Test with mock data
  multi_year_fars <- mock_fars

  years_in_flat <- unique(multi_year_fars$flat$year)
  expect_true(all(c(2022, 2023) %in% years_in_flat))
})


# State filtering ----

test_that("get_fars filters by state correctly", {
  skip_on_cran()
  skip("Requires network access")

  result <- get_fars(years = 2020, states = "Virginia", source = "zenodo", proceed = TRUE)

  if (!is.null(result)) {
    expect_true(all(result$flat$state == "Virginia"))
  }
})

test_that("get_fars handles multiple states", {
  skip_on_cran()
  skip("Requires network access")

  result <- get_fars(years = 2020, states = c("Virginia", "Maryland"), source = "zenodo", proceed = TRUE)

  if (!is.null(result)) {
    expect_true(all(result$flat$state %in% c("Virginia", "Maryland")))
  }
})


# Region filtering ----

test_that("get_gescrss filters by region correctly", {
  skip_on_cran()
  skip("Requires network access")

  result <- get_gescrss(years = 2020, regions = "s", source = "zenodo", proceed = TRUE)

  if (!is.null(result)) {
    # Check that region filtering worked
    expect_s3_class(result$flat, "data.frame")
  }
})


# Cache handling ----

test_that("cache parameter creates RDS file when specified with dir", {
  skip("Requires directory setup")

  # This would require setting up a temporary directory structure
  # and testing the caching mechanism
  expect_true(TRUE)  # Placeholder
})

test_that("existing cache file is used when available", {
  skip("Requires directory setup")

  # Would test that if cache exists, it's loaded instead of downloading
  expect_true(TRUE)  # Placeholder
})


# Proceed parameter ----

# test_that("proceed=FALSE would require user input", {
#   # Can't easily test interactive readline, but can verify parameter exists
#   expect_no_error({
#     # This will error on download attempt, but that's after the proceed parameter
#     tryCatch(
#       get_fars(years = 2020, proceed = FALSE),
#       error = function(e) NULL
#     )
#   })
# })
#
# test_that("proceed=TRUE skips user prompts", {
#   skip_on_cran()
#
#   # With proceed=TRUE, should attempt download without prompting
#   expect_no_error({
#     result <- tryCatch(
#       get_fars(years = 2020, source = "zenodo", proceed = TRUE),
#       error = function(e) NULL
#     )
#   })
# })


# Source parameter ----

test_that("source='zenodo' is default", {
  skip_on_cran()

  # Default should use zenodo
  expect_no_error({
    result <- tryCatch(
      get_fars(years = 2020, proceed = TRUE),
      error = function(e) NULL
    )
  })
})

test_that("source='nhtsa' would download from NHTSA", {
  skip("Long-running test that downloads large files")

  # This would actually download from NHTSA FTP
  # Only run manually
  expect_true(TRUE)
})


# Directory handling ----

test_that("dir=NULL uses temp directories", {
  skip("Requires full download")

  # When dir is NULL, should use tempdir()
  expect_true(TRUE)  # Placeholder
})

test_that("specified dir creates FARS data folder", {
  skip("Requires directory setup")

  # Should create 'FARS data' folder in specified directory
  expect_true(TRUE)  # Placeholder
})


# Multi tables filtering ----

test_that("state filtering affects all multi tables", {
  # Using mock data
  filtered_states <- c("Virginia")

  # Simulate filtering
  filtered_flat <- mock_fars$flat %>%
    dplyr::filter(state %in% filtered_states)

  filter_frame <- filtered_flat %>%
    dplyr::distinct(year, state, st_case)

  # Multi tables should only have matching st_case values
  expect_true(nrow(filter_frame) <= nrow(mock_fars$flat))
})


# Error handling ----

test_that("get_fars returns invisible NULL on download failure", {
  skip_on_cran()

  # If download fails (e.g., bad network), should return invisible(NULL)
  # This is hard to test reliably, so we check the mechanism exists
  expect_no_error({
    result <- tryCatch(
      get_fars(years = 2020, source = "zenodo", proceed = TRUE),
      error = function(e) NULL
    )
  })
})

test_that("download failure doesn't crash R session", {
  skip_on_cran()

  expect_no_error({
    # Even with network issues, shouldn't crash
    result <- tryCatch(
      get_gescrss(years = 2020, source = "zenodo", proceed = TRUE),
      error = function(e) NULL
    )
  })
})


# Class assignment ----

test_that("get_fars returns object with FARS class", {
  expect_true("FARS" %in% class(mock_fars))
})

test_that("get_gescrss returns object with GESCRSS class", {
  expect_true("GESCRSS" %in% class(mock_gescrss))
})


# Codebook ----

test_that("codebook is included in FARS object", {
  expect_true("codebook" %in% names(mock_fars))
  expect_s3_class(mock_fars$codebook, "data.frame")
})

test_that("codebook has expected structure", {
  expect_true(all(c("source", "file", "name_ncsa", "name_rfars", "label") %in% names(mock_fars$codebook)))
})


# Data consistency checks ----

test_that("all IDs in multi tables exist in flat table", {
  # Get all IDs from flat
  flat_ids <- unique(paste0(mock_fars$flat$year, mock_fars$flat$st_case))

  # Get IDs from multi_acc
  multi_ids <- unique(paste0(mock_fars$multi_acc$year, mock_fars$multi_acc$st_case))

  # All multi IDs should exist in flat
  expect_true(all(multi_ids %in% flat_ids))
})

test_that("events table links to flat table", {
  flat_ids <- unique(paste0(mock_fars$flat$year, mock_fars$flat$st_case))
  events_ids <- unique(paste0(mock_fars$events$year, mock_fars$events$st_case))

  expect_true(all(events_ids %in% flat_ids))
})

Try the rfars package in your browser

Any scripts or data that you put into this service are public.

rfars documentation built on Nov. 5, 2025, 7:09 p.m.