tests/testthat/test-data.R

test_that("data_save creates data file and updates database", {
  test_dir <- create_test_project()
  old_wd <- getwd()
  on.exit({
    setwd(old_wd)
    cleanup_test_dir(test_dir)
  })

  setwd(test_dir)

  # Create test data
  test_data <- data.frame(x = 1:5, y = letters[1:5])

  # Save as CSV using direct path (force = TRUE to create directory structure)
  suppressMessages(data_save(test_data, "outputs/public/sample.csv", force = TRUE))

  # Check file was created
  expect_true(file.exists("outputs/public/sample.csv"))

  # Check database record was created
  conn <- DBI::dbConnect(RSQLite::SQLite(), "framework.db")
  on.exit(DBI::dbDisconnect(conn), add = TRUE)

  record <- DBI::dbGetQuery(conn, "SELECT * FROM data WHERE name = 'outputs/public/sample.csv'")
  expect_equal(nrow(record), 1)
  expect_equal(record$type, "csv")
  expect_false(is.na(record$hash))
})

test_that("data_save creates RDS files", {
  test_dir <- create_test_project()
  old_wd <- getwd()
  on.exit({
    setwd(old_wd)
    cleanup_test_dir(test_dir)
  })

  setwd(test_dir)

  test_data <- data.frame(x = 1:5, y = letters[1:5])

  # Use direct path for RDS file
  suppressMessages(data_save(test_data, "outputs/private/rds_data.rds", force = TRUE))

  expect_true(file.exists("outputs/private/rds_data.rds"))
})

test_that("data_read reads saved CSV data", {
  test_dir <- create_test_project()
  old_wd <- getwd()
  on.exit({
    setwd(old_wd)
    cleanup_test_dir(test_dir)
  })

  setwd(test_dir)

  # Create directory and save test data using direct path
  dir.create("inputs/raw", recursive = TRUE, showWarnings = FALSE)
  original_data <- data.frame(x = 1:5, y = letters[1:5])
  suppressMessages(data_save(original_data, "inputs/raw/csv_load.csv"))

  # Add to config for catalog lookup
  config <- settings_read()
  config$data$inputs <- list(
    raw = list(
      csv_load = list(
        path = "inputs/raw/csv_load.csv",
        type = "csv",
        delimiter = "comma",
        locked = FALSE,
        encrypted = FALSE
      )
    )
  )
  settings_write(config)

  # Load it back using catalog name
  loaded_data <- suppressWarnings(data_read("inputs.raw.csv_load"))

  expect_equal(nrow(loaded_data), 5)
  expect_equal(loaded_data$x, 1:5)
})

test_that("data_read reads saved RDS data", {
  test_dir <- create_test_project()
  old_wd <- getwd()
  on.exit({
    setwd(old_wd)
    cleanup_test_dir(test_dir)
  })

  setwd(test_dir)

  # Create directory and save test data using direct path
  dir.create("inputs/intermediate", recursive = TRUE, showWarnings = FALSE)
  original_data <- data.frame(x = 1:5, y = letters[1:5])
  suppressMessages(data_save(original_data, "inputs/intermediate/rds_load.rds"))

  # Add to config for catalog lookup
  config <- settings_read()
  config$data$inputs <- list(
    intermediate = list(
      rds_load = list(
        path = "inputs/intermediate/rds_load.rds",
        type = "rds",
        locked = FALSE,
        encrypted = FALSE
      )
    )
  )
  settings_write(config)

  # Load it back using catalog name
  loaded_data <- suppressWarnings(data_read("inputs.intermediate.rds_load"))

  expect_equal(loaded_data, original_data)
})

test_that("data_read reads direct file paths", {
  test_dir <- create_test_project()
  old_wd <- getwd()
  on.exit({
    setwd(old_wd)
    cleanup_test_dir(test_dir)
  })

  setwd(test_dir)

  # Create a CSV file directly
  dir.create("data/direct", recursive = TRUE)
  test_data <- data.frame(a = 1:3, b = 4:6)
  write.csv(test_data, "data/direct/file.csv", row.names = FALSE)

  # Load using direct path
  loaded <- data_read("data/direct/file.csv")

  expect_equal(nrow(loaded), 3)
  expect_true("a" %in% names(loaded))
})

test_that("data_read fails for non-existent file", {
  test_dir <- create_test_project()
  old_wd <- getwd()
  on.exit({
    setwd(old_wd)
    cleanup_test_dir(test_dir)
  })

  setwd(test_dir)

  expect_error(data_read("nonexistent.file.csv"))
})

test_that("data_spec_get retrieves data specification from config", {
  test_dir <- create_test_project()
  old_wd <- getwd()
  on.exit({
    setwd(old_wd)
    cleanup_test_dir(test_dir)
  })

  setwd(test_dir)

  config <- settings_read()
  config$data$inputs <- list(
    raw = list(
      test = list(
        path = "inputs/raw/test.csv",
        type = "csv",
        delimiter = "comma"
      )
    )
  )
  settings_write(config)

  spec <- data_spec_get("inputs.raw.test")

  expect_type(spec, "list")
  # Path is normalized to absolute, so check it ends with the expected relative path
  expect_true(endsWith(spec$path, "inputs/raw/test.csv"))
  expect_equal(spec$type, "csv")
  expect_equal(spec$delimiter, "comma")
})

test_that("data_read reads Excel files directly", {
  skip_if_not_installed("readxl")
  skip_if_not_installed("writexl")
  
  test_dir <- create_test_project()
  old_wd <- getwd()
  on.exit({
    setwd(old_wd)
    cleanup_test_dir(test_dir)
  })

  setwd(test_dir)

  # Create an Excel file
  dir.create("data/excel", recursive = TRUE)
  test_data <- data.frame(x = 1:5, y = letters[1:5], z = 10:14)
  writexl::write_xlsx(test_data, "data/excel/test.xlsx")

  # Load using direct path
  loaded <- data_read("data/excel/test.xlsx")

  expect_equal(nrow(loaded), 5)
  expect_equal(loaded$x, 1:5)
  expect_equal(loaded$y, letters[1:5])
  expect_equal(loaded$z, 10:14)
})

test_that("data_read reads Excel files from config", {
  skip_if_not_installed("readxl")
  skip_if_not_installed("writexl")
  
  test_dir <- create_test_project()
  old_wd <- getwd()
  on.exit({
    setwd(old_wd)
    cleanup_test_dir(test_dir)
  })

  setwd(test_dir)

  # Create an Excel file
  dir.create("inputs/raw", recursive = TRUE)
  test_data <- data.frame(name = c("Alice", "Bob"), age = c(25, 30))
  writexl::write_xlsx(test_data, "inputs/raw/people.xlsx")

  # Add to config
  config <- settings_read()
  config$data$inputs <- list(
     raw = list(
      people = list(
        path = "inputs/raw/people.xlsx",
        type = "excel",
        locked = FALSE,
        encrypted = FALSE
      )
    )
  )
  settings_write(config)

  # Load it back
  loaded <- suppressWarnings(data_read("inputs.raw.people"))

  expect_equal(nrow(loaded), 2)
  expect_equal(loaded$name, c("Alice", "Bob"))
  expect_equal(loaded$age, c(25, 30))
})

test_that("data_spec_get detects Excel file type", {
  test_dir <- create_test_project()
  old_wd <- getwd()
  on.exit({
    setwd(old_wd)
    cleanup_test_dir(test_dir)
  })

  setwd(test_dir)

  # Create a fake Excel file
  dir.create("data/excel", recursive = TRUE)
  file.create("data/excel/test.xlsx")

  # Get spec by direct path
  spec <- data_spec_get("data/excel/test.xlsx")

  expect_equal(spec$type, "excel")
  expect_null(spec$delimiter)
})

# New path resolution tests
test_that("data_save resolves dot notation to configured directories", {
  test_dir <- create_test_project()
  old_wd <- getwd()
  on.exit({
    setwd(old_wd)
    cleanup_test_dir(test_dir)
  })

  setwd(test_dir)

  # Create inputs/intermediate directory
  dir.create("inputs/intermediate", recursive = TRUE, showWarnings = FALSE)

  # Create test data
  test_data <- data.frame(x = 1:5, y = letters[1:5])

  # Save using 3-part dot notation (inputs.intermediate.filename)
  suppressMessages(data_save(test_data, "inputs.intermediate.test_file"))

  # Should resolve to inputs/intermediate/test_file.rds
  expect_true(file.exists("inputs/intermediate/test_file.rds"))
})

test_that("data_save accepts direct file paths", {
  test_dir <- create_test_project()
  old_wd <- getwd()
  on.exit({
    setwd(old_wd)
    cleanup_test_dir(test_dir)
  })

  setwd(test_dir)

  # Create directory
  dir.create("inputs/intermediate", recursive = TRUE, showWarnings = FALSE)

  # Create test data
  test_data <- data.frame(x = 1:5, y = letters[1:5])

  # Save using direct path
  suppressMessages(data_save(test_data, "inputs/intermediate/direct_path.csv"))

  # Should save to exact path
  expect_true(file.exists("inputs/intermediate/direct_path.csv"))
})

test_that("data_save auto-detects type from file extension", {
  test_dir <- create_test_project()
  old_wd <- getwd()
  on.exit({
    setwd(old_wd)
    cleanup_test_dir(test_dir)
  })

  setwd(test_dir)

  dir.create("inputs/intermediate", recursive = TRUE, showWarnings = FALSE)
  test_data <- data.frame(x = 1:5)

  # CSV extension
  suppressMessages(data_save(test_data, "inputs/intermediate/file1.csv"))
  expect_true(file.exists("inputs/intermediate/file1.csv"))

  # RDS extension
  suppressMessages(data_save(test_data, "inputs/intermediate/file2.rds"))
  expect_true(file.exists("inputs/intermediate/file2.rds"))
})

test_that("data_save errors when directory doesn't exist without force", {
  test_dir <- create_test_project()
  old_wd <- getwd()
  on.exit({
    setwd(old_wd)
    cleanup_test_dir(test_dir)
  })

  setwd(test_dir)

  test_data <- data.frame(x = 1:5)

  # Should error because inputs/nonexistent doesn't exist
  expect_error(
    data_save(test_data, "inputs/nonexistent/file.csv"),
    "Directory.*does not exist.*force = TRUE"
  )
})

test_that("data_save creates directory with force = TRUE", {
  test_dir <- create_test_project()
  old_wd <- getwd()
  on.exit({
    setwd(old_wd)
    cleanup_test_dir(test_dir)
  })

  setwd(test_dir)

  test_data <- data.frame(x = 1:5)

  # Should create directory and save
  suppressMessages(
    data_save(test_data, "inputs/new_dir/file.rds", force = TRUE)
  )

  expect_true(dir.exists("inputs/new_dir"))
  expect_true(file.exists("inputs/new_dir/file.rds"))
})

test_that("data_save errors for simple filename without directory", {
  test_dir <- create_test_project()
  old_wd <- getwd()
  on.exit({
    setwd(old_wd)
    cleanup_test_dir(test_dir)
  })

  setwd(test_dir)

  test_data <- data.frame(x = 1:5)

  # Should error - no directory specified
  expect_error(
    data_save(test_data, "just_a_filename"),
    "has no directory.*dot notation.*full path"
  )
})

test_that("data_save updates database with normalized name", {
  test_dir <- create_test_project()
  old_wd <- getwd()
  on.exit({
    setwd(old_wd)
    cleanup_test_dir(test_dir)
  })

  setwd(test_dir)

  dir.create("inputs/intermediate", recursive = TRUE, showWarnings = FALSE)
  test_data <- data.frame(x = 1:5)

  # Save with 3-part dot notation
  suppressMessages(data_save(test_data, "inputs.intermediate.my_data"))

  # Check database record uses original path as name
  conn <- DBI::dbConnect(RSQLite::SQLite(), "framework.db")
  on.exit(DBI::dbDisconnect(conn), add = TRUE)

  record <- DBI::dbGetQuery(conn, "SELECT * FROM data WHERE name = 'inputs.intermediate.my_data'")
  expect_equal(nrow(record), 1)
  # Path may be absolute or relative depending on the platform, and may use backslashes on Windows
  path_normalized <- gsub("\\\\", "/", record$path)
  expect_true(grepl("inputs/intermediate/my_data\\.rds$", path_normalized))
  expect_equal(record$type, "rds")
})

# data_add tests
test_that("data_add registers existing CSV file", {
  test_dir <- create_test_project()
  old_wd <- getwd()
  on.exit({
    setwd(old_wd)
    cleanup_test_dir(test_dir)
  })

  setwd(test_dir)

  # Create a CSV file manually (simulating external data)
  dir.create("inputs/raw", recursive = TRUE, showWarnings = FALSE)
  test_data <- data.frame(x = 1:5, y = letters[1:5])
  write.csv(test_data, "inputs/raw/external_data.csv", row.names = FALSE)

  # Add it to the catalog
  suppressMessages(
    data_add("inputs/raw/external_data.csv", name = "raw.external_data")
  )

  # Check database record was created
  conn <- DBI::dbConnect(RSQLite::SQLite(), "framework.db")
  on.exit(DBI::dbDisconnect(conn), add = TRUE)

  record <- DBI::dbGetQuery(conn, "SELECT * FROM data WHERE name = 'raw.external_data'")
  expect_equal(nrow(record), 1)
  expect_equal(record$type, "csv")
  expect_false(is.na(record$hash))
})

test_that("data_add auto-detects file type from extension", {
  test_dir <- create_test_project()
  old_wd <- getwd()
  on.exit({
    setwd(old_wd)
    cleanup_test_dir(test_dir)
  })

  setwd(test_dir)

  # Create an RDS file manually
  dir.create("inputs/intermediate", recursive = TRUE, showWarnings = FALSE)
  test_data <- data.frame(x = 1:5)
  saveRDS(test_data, "inputs/intermediate/processed.rds")

  # Add without specifying type
  suppressMessages(
    data_add("inputs/intermediate/processed.rds", name = "intermediate.processed")
  )

  # Check type was auto-detected
  conn <- DBI::dbConnect(RSQLite::SQLite(), "framework.db")
  on.exit(DBI::dbDisconnect(conn), add = TRUE)

  record <- DBI::dbGetQuery(conn, "SELECT * FROM data WHERE name = 'intermediate.processed'")
  expect_equal(record$type, "rds")
})

test_that("data_add errors for non-existent file", {
  test_dir <- create_test_project()
  old_wd <- getwd()
  on.exit({
    setwd(old_wd)
    cleanup_test_dir(test_dir)
  })

  setwd(test_dir)

  expect_error(
    data_add("nonexistent/file.csv", name = "test.data"),
    "File not found"
  )
})

test_that("data_add allows reading via data_read after registration", {
  test_dir <- create_test_project()
  old_wd <- getwd()
  on.exit({
    setwd(old_wd)
    cleanup_test_dir(test_dir)
  })

  setwd(test_dir)

  # Create a CSV file
  dir.create("inputs/raw", recursive = TRUE, showWarnings = FALSE)
  original_data <- data.frame(a = 1:3, b = c("x", "y", "z"))
  write.csv(original_data, "inputs/raw/readable.csv", row.names = FALSE)

  # Register with data_add
  suppressMessages(
    data_add("inputs/raw/readable.csv", name = "raw.readable")
  )

  # Now read it using data_read with dot notation
  loaded <- suppressWarnings(data_read("raw.readable"))

  expect_equal(nrow(loaded), 3)
  expect_equal(loaded$a, 1:3)
})

test_that("data_add respects locked parameter", {
  test_dir <- create_test_project()
  old_wd <- getwd()
  on.exit({
    setwd(old_wd)
    cleanup_test_dir(test_dir)
  })

  setwd(test_dir)

  dir.create("inputs/raw", recursive = TRUE, showWarnings = FALSE)
  write.csv(data.frame(x = 1), "inputs/raw/locked.csv", row.names = FALSE)

  # Add with locked = FALSE
  suppressMessages(
    data_add("inputs/raw/locked.csv", name = "raw.locked_file", locked = FALSE)
  )

  conn <- DBI::dbConnect(RSQLite::SQLite(), "framework.db")
  on.exit(DBI::dbDisconnect(conn), add = TRUE)

  record <- DBI::dbGetQuery(conn, "SELECT * FROM data WHERE name = 'raw.locked_file'")
  expect_equal(record$locked, 0)  # SQLite stores FALSE as 0
})

test_that("data_add can skip config update", {
  test_dir <- create_test_project()
  old_wd <- getwd()
  on.exit({
    setwd(old_wd)
    cleanup_test_dir(test_dir)
  })

  setwd(test_dir)

  dir.create("inputs/raw", recursive = TRUE, showWarnings = FALSE)
  write.csv(data.frame(x = 1), "inputs/raw/no_config.csv", row.names = FALSE)

  # Add without updating config
  suppressMessages(
    data_add("inputs/raw/no_config.csv", name = "raw.no_config", update_config = FALSE)
  )

  # Database should have record
  conn <- DBI::dbConnect(RSQLite::SQLite(), "framework.db")
  on.exit(DBI::dbDisconnect(conn), add = TRUE)

  record <- DBI::dbGetQuery(conn, "SELECT * FROM data WHERE name = 'raw.no_config'")
  expect_equal(nrow(record), 1)

  # But config should NOT have the spec
  spec <- tryCatch(data_spec_get("raw.no_config"), error = function(e) NULL)
  expect_null(spec)
})

Try the framework package in your browser

Any scripts or data that you put into this service are public.

framework documentation built on Feb. 18, 2026, 1:07 a.m.