tests/testthat/test-schema.R

# Helper to create a star-schema test setup
make_star_schema <- function() {
  f_obs <- tempfile(fileext = ".vtr")
  f_sp  <- tempfile(fileext = ".vtr")
  f_ct  <- tempfile(fileext = ".vtr")

  write_vtr(data.frame(
    sp_id   = c(1, 2, 3, 4),
    ct_code = c("AT", "DE", "FR", "XX"),
    value   = c(10, 20, 30, 40)
  ), f_obs)

  write_vtr(data.frame(
    sp_id = c(1, 2, 3),
    name  = c("Oak", "Beech", "Pine")
  ), f_sp)

  write_vtr(data.frame(
    ct_code = c("AT", "DE", "FR"),
    gdp     = c(400, 3800, 2700)
  ), f_ct)

  list(f_obs = f_obs, f_sp = f_sp, f_ct = f_ct)
}

# --- link() ---

test_that("link() validates inputs", {
  f <- tempfile(fileext = ".vtr")
  write_vtr(data.frame(x = 1), f)
  on.exit(unlink(f))

  expect_error(link(123, tbl(f)), "key must be")
  expect_error(link(character(0), tbl(f)), "key must be")
  expect_error(link("x", data.frame()), "must be a vectra_node")
})

test_that("link() creates a vectra_link object", {
  f <- tempfile(fileext = ".vtr")
  write_vtr(data.frame(x = 1), f)
  on.exit(unlink(f))

  lnk <- link("x", tbl(f))
  expect_s3_class(lnk, "vectra_link")
  expect_equal(lnk$key, "x")
})

# --- vtr_schema() ---

test_that("vtr_schema() validates inputs", {
  f <- tempfile(fileext = ".vtr")
  write_vtr(data.frame(x = 1), f)
  on.exit(unlink(f))

  expect_error(vtr_schema(fact = data.frame()), "must be a vectra_node")
  expect_error(vtr_schema(fact = tbl(f)), "at least one dimension")
  expect_error(vtr_schema(fact = tbl(f), link("x", tbl(f))),
               "must be named")
  expect_error(vtr_schema(fact = tbl(f), dim1 = "not_a_link"),
               "must be a vectra_link")
})

test_that("vtr_schema() creates a vectra_schema object", {
  ft <- make_star_schema()
  on.exit(unlink(c(ft$f_obs, ft$f_sp, ft$f_ct)))

  s <- vtr_schema(
    fact    = tbl(ft$f_obs),
    species = link("sp_id", tbl(ft$f_sp)),
    country = link("ct_code", tbl(ft$f_ct))
  )
  expect_s3_class(s, "vectra_schema")
  expect_equal(names(s$dims), c("species", "country"))
})

test_that("print.vectra_schema produces output", {
  ft <- make_star_schema()
  on.exit(unlink(c(ft$f_obs, ft$f_sp, ft$f_ct)))

  s <- vtr_schema(
    fact    = tbl(ft$f_obs),
    species = link("sp_id", tbl(ft$f_sp))
  )
  out <- capture.output(print(s))
  expect_true(any(grepl("vectra schema", out)))
  expect_true(any(grepl("species", out)))
})

# --- lookup() ---

test_that("lookup() resolves fact columns by bare name", {
  ft <- make_star_schema()
  on.exit(unlink(c(ft$f_obs, ft$f_sp, ft$f_ct)))

  s <- vtr_schema(
    fact    = tbl(ft$f_obs),
    species = link("sp_id", tbl(ft$f_sp))
  )
  result <- lookup(s, value, .report = FALSE) |> collect()
  expect_equal(result$value, c(10, 20, 30, 40))
})

test_that("lookup() resolves dimension columns via dim$col syntax", {
  ft <- make_star_schema()
  on.exit(unlink(c(ft$f_obs, ft$f_sp, ft$f_ct)))

  s <- vtr_schema(
    fact    = tbl(ft$f_obs),
    species = link("sp_id", tbl(ft$f_sp)),
    country = link("ct_code", tbl(ft$f_ct))
  )
  result <- lookup(s, value, species$name, country$gdp, .report = FALSE) |>
    collect()
  expect_equal(ncol(result), 3)
  expect_equal(names(result), c("value", "name", "gdp"))
  # Row 4 has sp_id=4 which doesn't match species (left join -> NA)
  expect_equal(result$name[1:3], c("Oak", "Beech", "Pine"))
  expect_true(is.na(result$name[4]))
  # Row 4 has ct_code="XX" which doesn't match country
  expect_equal(result$gdp[1:3], c(400, 3800, 2700))
  expect_true(is.na(result$gdp[4]))
})

test_that("lookup() with inner join drops unmatched rows", {
  ft <- make_star_schema()
  on.exit(unlink(c(ft$f_obs, ft$f_sp, ft$f_ct)))

  s <- vtr_schema(
    fact    = tbl(ft$f_obs),
    species = link("sp_id", tbl(ft$f_sp))
  )
  result <- lookup(s, value, species$name, .join = "inner",
                   .report = FALSE) |> collect()
  expect_equal(nrow(result), 3)
  expect_false(any(is.na(result$name)))
})

test_that("lookup() only joins needed dimensions", {
  ft <- make_star_schema()
  on.exit(unlink(c(ft$f_obs, ft$f_sp, ft$f_ct)))

  s <- vtr_schema(
    fact    = tbl(ft$f_obs),
    species = link("sp_id", tbl(ft$f_sp)),
    country = link("ct_code", tbl(ft$f_ct))
  )
  # Only request species columns - country should not be joined
  result <- lookup(s, value, species$name, .report = FALSE) |> collect()
  expect_equal(names(result), c("value", "name"))
})

test_that("lookup() reports unmatched keys", {
  ft <- make_star_schema()
  on.exit(unlink(c(ft$f_obs, ft$f_sp, ft$f_ct)))

  s <- vtr_schema(
    fact    = tbl(ft$f_obs),
    species = link("sp_id", tbl(ft$f_sp)),
    country = link("ct_code", tbl(ft$f_ct))
  )
  msgs <- capture.output(
    result <- lookup(s, value, species$name, country$gdp, .report = TRUE) |>
      collect(),
    type = "message"
  )
  # species: sp_id=4 is unmatched
  expect_true(any(grepl("species.*1/4.*unmatched", msgs)))
  # country: ct_code="XX" is unmatched
  expect_true(any(grepl("country.*1/4.*unmatched", msgs)))
})

test_that("lookup() reports all matched when no mismatches", {
  f_obs <- tempfile(fileext = ".vtr")
  f_sp  <- tempfile(fileext = ".vtr")
  write_vtr(data.frame(sp_id = c(1, 2), value = c(10, 20)), f_obs)
  write_vtr(data.frame(sp_id = c(1, 2), name = c("A", "B")), f_sp)
  on.exit(unlink(c(f_obs, f_sp)))

  s <- vtr_schema(
    fact    = tbl(f_obs),
    species = link("sp_id", tbl(f_sp))
  )
  msgs <- capture.output(
    result <- lookup(s, value, species$name) |> collect(),
    type = "message"
  )
  expect_true(any(grepl("all 2 keys matched", msgs)))
})

test_that("lookup() errors on unknown dimension", {
  ft <- make_star_schema()
  on.exit(unlink(c(ft$f_obs, ft$f_sp, ft$f_ct)))

  s <- vtr_schema(
    fact    = tbl(ft$f_obs),
    species = link("sp_id", tbl(ft$f_sp))
  )
  expect_error(lookup(s, bogus$col, .report = FALSE), "dimension 'bogus'")
})

test_that("lookup() errors with no column references", {
  ft <- make_star_schema()
  on.exit(unlink(c(ft$f_obs, ft$f_sp, ft$f_ct)))

  s <- vtr_schema(
    fact    = tbl(ft$f_obs),
    species = link("sp_id", tbl(ft$f_sp))
  )
  expect_error(lookup(s, .report = FALSE), "at least one column")
})

test_that("lookup() works with named keys (different column names)", {
  f_obs <- tempfile(fileext = ".vtr")
  f_sp  <- tempfile(fileext = ".vtr")
  write_vtr(data.frame(species_id = c(1, 2), value = c(10, 20)), f_obs)
  write_vtr(data.frame(sp_id = c(1, 2), name = c("A", "B")), f_sp)
  on.exit(unlink(c(f_obs, f_sp)))

  s <- vtr_schema(
    fact    = tbl(f_obs),
    species = link(c("species_id" = "sp_id"), tbl(f_sp))
  )
  result <- lookup(s, value, species$name, .report = FALSE) |> collect()
  expect_equal(result$name, c("A", "B"))
})

test_that("schema can be reused for multiple lookups", {
  ft <- make_star_schema()
  on.exit(unlink(c(ft$f_obs, ft$f_sp, ft$f_ct)))

  s <- vtr_schema(
    fact    = tbl(ft$f_obs),
    species = link("sp_id", tbl(ft$f_sp)),
    country = link("ct_code", tbl(ft$f_ct))
  )

  r1 <- lookup(s, value, species$name, .report = FALSE) |> collect()
  r2 <- lookup(s, value, country$gdp, .report = FALSE) |> collect()
  r3 <- lookup(s, species$name, country$gdp, .report = FALSE) |> collect()

  expect_equal(nrow(r1), 4)
  expect_equal(nrow(r2), 4)
  expect_equal(nrow(r3), 4)
  expect_equal(names(r1), c("value", "name"))
  expect_equal(names(r2), c("value", "gdp"))
  expect_equal(names(r3), c("name", "gdp"))
})

Try the vectra package in your browser

Any scripts or data that you put into this service are public.

vectra documentation built on May 8, 2026, 9:06 a.m.