tests/testthat/test-driver-rds.R

context("driver rds details")

## Tests of the implementation details of the rds driver only...
test_that("creation", {
  path <- tempfile()
  expect_false(file.exists(path))
  dr <- driver_rds(path)
  on.exit(dr$destroy())

  expect_true(file.exists(path))
  expect_identical(sort(dir(path)), c("config", "data", "keys", "scratch"))
  expect_identical(dir(file.path(path, "data")), character(0))
  expect_false(dr$mangle_key)
})

test_that("mangling", {
  path <- tempfile()
  dr <- driver_rds(path, mangle_key = TRUE)
  on.exit(dr$destroy())
  st <- storr(dr)

  st$list()
  st$set("foo", 1)

  expect_identical(st$list(), "foo")

  expect_identical(dir(file.path(path, "keys", "objects")),
                   encode64("foo"))

  st3 <- storr_environment()
  x <- st3$import(st)
  expect_identical(unname(x[, "name"]), "foo")
  expect_identical(st3$list(), "foo")

  st4 <- storr_environment()
  st4$set("bar", 2)
  st$import(st4)

  expect_identical(sort(st$list()), c("bar", "foo"))
  expect_identical(sort(dir(file.path(path, "keys", "objects"))),
                   sort(c(encode64("foo"), encode64("bar"))))
})

test_that("mangledless compatibility", {
  path <- tempfile()

  dr1 <- driver_rds(path, mangle_key = TRUE)
  expect_true(file.exists(file.path(path, "config", "mangle_key")))
  expect_equal(readLines(file.path(path, "config", "mangle_key")), "TRUE")
  expect_true(dr1$mangle_key)

  ## Pointing another driver here without mangling is an error:
  expect_error(driver_rds(path, mangle_key = FALSE),
               "Incompatible value for mangle_key",
               class = "ConfigError")

  ## But omitting the argument (NULL mangledness) is OK
  dr2 <- driver_rds(path)
  expect_true(dr2$mangle_key)

  ## In reverse:
  path2 <- tempfile()

  dr3 <- driver_rds(path2)
  expect_true(file.exists(file.path(path2, "config", "mangle_key")))
  expect_equal(readLines(file.path(path2, "config", "mangle_key")), "FALSE")
  expect_false(dr3$mangle_key)

  ## Pointing another driver here without mangling is an error:
  expect_error(driver_rds(path2, mangle_key = TRUE),
               "Incompatible value for mangle_key",
               class = "ConfigError")

  ## But omitting the argument (NULL mangledness) is OK
  dr4 <- driver_rds(path2)
  expect_false(dr4$mangle_key)
})

test_that("mangledness padding compatibility", {
  path <- tempfile()

  dr1 <- driver_rds(path, mangle_key = TRUE)
  expect_true(file.exists(file.path(path, "config", "mangle_key_pad")))
  expect_equal(readLines(file.path(path, "config", "mangle_key_pad")), "FALSE")
  expect_false(dr1$mangle_key_pad)

  ## Pointing another driver here without mangling is an error:
  expect_error(driver_rds(path, mangle_key_pad = TRUE),
               "Incompatible value for mangle_key_pad",
               class = "ConfigError")

  ## But omitting the argument (NULL mangledness) is OK
  dr2 <- driver_rds(path)
  expect_false(dr2$mangle_key_pad)

  ## In reverse:
  path2 <- tempfile()

  dr3 <- driver_rds(path2, mangle_key = TRUE, mangle_key_pad = TRUE)
  expect_true(file.exists(file.path(path2, "config", "mangle_key_pad")))
  expect_equal(readLines(file.path(path2, "config", "mangle_key_pad")), "TRUE")
  expect_true(dr3$mangle_key_pad)

  ## Pointing another driver here without mangling is an error:
  expect_error(driver_rds(path2, mangle_key = TRUE, mangle_key_pad = FALSE),
               "Incompatible value for mangle_key",
               class = "ConfigError")

  ## But omitting the argument (NULL mangledness) is OK
  dr4 <- driver_rds(path2)
  expect_true(dr4$mangle_key_pad)
})

## This test takes a lot of time - about 25s.  This really suggests
## that storing objects of this size is not a sensible idea!
test_that("large vector support", {
  skip_on_cran()
  skip_long_test()

  path <- tempfile()
  dr <- driver_rds(path, compress = FALSE)
  on.exit(dr$destroy())
  st <- storr(dr)

  data <- raw(2195148826)       # ~  1.3s to allocate the data

  x <- st$serialize_object(data)# ~  4.5s
  hash <- st$hash_raw(x)        # ~  7.0s

  dr$set_object(hash, x)        # ~  8.0s
  cmp <- dr$get_object(hash)    # ~  3.4s
  expect_identical(cmp, data)   # ~  0.3s
  ##                            # -------
  ##                            # ~ 24.5s

  ## Check that R still doesn't support this directly; if it does
  ## we'll move straight over and use the native support (once native
  ## support is present, then the set_object phase will save about 4s
  ## total)
  tmp <- tempfile()
  expect_error(writeBin(x, tmp), "long vectors not supported yet")
  file.remove(tmp)
})

test_that("compression support", {
  ## some data that will likely compress very well:
  data <- rep(1:10, each = 500)

  st1 <- storr_rds(tempfile(), TRUE)
  st2 <- storr_rds(tempfile(), FALSE)
  on.exit({
    st1$destroy()
    st2$destroy()
  })

  h1 <- st1$set("data", data)
  h2 <- st2$set("data", data)

  expect_identical(h1, h2)
  expect_gt(file.size(st2$driver$name_hash(h2)),
            file.size(st1$driver$name_hash(h1)))

  expect_identical(st1$get("data"), data)
  expect_identical(st2$get("data"), data)
})

test_that("backward compatibility", {
  ## In version 1.0.1 and earlier, the hash algorithm was not stored
  ## in the database and md5 was *always* used.
  path <- copy_to_tmp("v1.0.1_clear")
  st <- storr_rds(path)
  expect_equal(st$list(), "key")
  expect_equal(st$get("key"), "v1.0.1")
  expect_equal(st$driver$hash_algorithm, "md5")
  expect_false(st$driver$mangle_key)
  st$destroy()

  path <- copy_to_tmp("v1.0.1_clear")
  expect_error(storr_rds(path, hash_algorithm = "sha1"),
               "Incompatible value for hash_algorithm",
               class = "ConfigError")
})

test_that("mangledness padding backward compatibility", {
  ## In version 1.0.1 and earlier, mangling was always padded
  path <- copy_to_tmp("v1.0.1_mangled")
  st <- storr_rds(path)
  expect_true(st$driver$mangle_key)
  expect_true(st$driver$mangle_key_pad)
  expect_equal(st$get("a"), 1)
  expect_equal(st$get("bb"), 2)
  expect_equal(st$get("ccc"), 3)
  expect_equal(st$list(),
               sort(c("a", "bb", "ccc")))
  st$set("a", "x")
  st$set("bb", "y")
  st$set("ccc", "z")
  expect_equal(st$list(),
               sort(c("a", "bb", "ccc")))
  st$destroy()
})

## This is a test for issue 42; check that hard links do not create
## inconsistent storrs.
test_that("copy -lr and consistency", {
  skip_on_cran()
  skip_on_os(c("windows", "mac", "solaris"))

  path1 <- tempfile()
  path2 <- tempfile()
  st1 <- storr_rds(path1)
  h1 <- st1$set("foo", "val1")

  ok <- system2("cp", c("-lr", path1, path2))

  st2 <- storr_rds(path2)
  expect_equal(st2$get("foo"), "val1")

  h2 <- st1$set("foo", "val2")
  expect_equal(st1$get("foo"), "val2")
  expect_equal(st2$get("foo"), "val1")

  expect_equal(st2$list_hashes(), h1)
  expect_equal(sort(st1$list_hashes()), sort(c(h1, h2)))
})

## Prevent a regression
test_that("vectorised exists", {
  st <- storr_rds(tempfile(), mangle_key = TRUE)
  on.exit(st$destroy())

  expect_equal(st$exists(c("x", "xx")), c(FALSE, FALSE))
  st$set("x", 1)
  st$set("xx", 2)
  expect_equal(st$exists(c("x", "xx")), c(TRUE, TRUE))
})

test_that("change directories and access same storr", {
  x <- storr_rds("my_storr")
  x$set("a", 1)
  expect_equal(x$list(), "a")
  subdir <- "subdir"
  dir.create(subdir)
  owd <- setwd(subdir)
  on.exit(setwd(owd))
  expect_equal(x$list(), "a")
  setwd("..")
  unlink(subdir, recursive = TRUE)
  x$destroy()
})

test_that("check empty storr", {
  st <- storr_rds(tempfile())
  expect_true(st$check()$healthy)
  expect_silent(st$check(quiet = TRUE))
})

test_that("recover corrupt storr: corrupted rds", {
  st <- storr_rds(tempfile())

  ## First start with a storr with some data in it:
  for (i in 1:10) {
    st$mset(paste0(letters[[i]], seq_len(i)),
            lapply(seq_len(i), function(.) runif(20)),
            namespace = LETTERS[[i]])
  }

  res <- st$check()
  expect_true(res$healthy)

  ## Then let's truncate some data!
  set.seed(1)
  i <- sample.int(55, 5)
  r <- st$list_hashes()[i]
  for (p in st$driver$name_hash(r)) {
    writeBin(raw(), p)
  }

  res <- st$check()
  expect_is(res, "storr_check")
  expect_false(res$healthy)

  expect_equal(length(res$objects$corrupt), 5L)
  expect_equal(nrow(res$keys$corrupt), 0L)
  expect_equal(nrow(res$keys$dangling), 5L)

  res2 <- st$check(full = FALSE)
  expect_false(res2$healthy)
  expect_equal(res2$objects, res$objects)
  expect_equal(nrow(res2$keys$corrupt), 0L)
  expect_equal(nrow(res2$keys$dangling), 0L)

  st$repair(force = TRUE)
  res <- st$check()
  expect_true(res$healthy)
  expect_false(st$repair(res, force = TRUE))
  expect_silent(st$repair(res, force = TRUE))
  expect_equal(st$check(full = FALSE), res)
})


test_that("don't run automatically", {
  skip_if_interactive()
  st <- storr_rds(tempfile())
  for (i in 1:10) {
    st$mset(paste0(letters[[i]], seq_len(i)),
            lapply(seq_len(i), function(.) runif(20)),
            namespace = LETTERS[[i]])
  }
  set.seed(1)
  i <- sample.int(55, 5)
  r <- st$list_hashes()[i]
  for (p in st$driver$name_hash(r)) {
    writeBin(raw(), p)
  }

  expect_error(st$repair(force = FALSE),
               "Please rerun with force")
})


test_that("automatic is ok if storr is healthy", {
  expect_false(storr_rds(tempfile())$repair(force = FALSE))
})


test_that("corrupted mangled keys", {
  st <- storr_rds(tempfile(), mangle_key = TRUE, mangle_key_pad = TRUE)
  st$mset(month.name,
          lapply(seq_along(month.name), function(.) runif(20)))
  keys <- st$driver$name_key(month.name, "objects")
  file.copy(keys[[3]],
            paste(keys[[3]], "(conflicted copy)"))
  with_options(list("storr.corrupt.notice.period" = NA),
               expect_silent(st$list()))
  expect_message(st$list(),
                 "1 corrupted files have been found in your storr archive:")
  expect_silent(st$list())
  with_options(list("storr.corrupt.notice.period" = 0L),
               expect_message(st$list(), "namespace: 'objects'"))

  expect_message(st$driver$purge_corrupt_keys("objects"),
                 "Removed 1 of 1 corrupt file")
  with_options(list("storr.corrupt.notice.period" = 0L),
               expect_silent(st$list()))
  expect_silent(st$driver$purge_corrupt_keys("objects"))
})


test_that("avoid race condition when writing in parallel", {
  ## This is an integration test modified from
  ## https://github.com/richfitz/storr/issues/80
  ## contributed by @wlandau
  ##
  ## Try and hammer the rds storr with concurrent writes.  The end
  ## result should be all keys (a-z) contain the value "Z"
  skip_on_cran()
  skip_on_os("windows")

  racy_write <- function(...) {
    write <- function(key, path) {
      tryCatch({
        st <- storr_rds(path)
        for (x in c(letters, LETTERS)) {
          st$set(key = key, value = x,
                 use_cache = FALSE)
        }
        NULL
      },
      warning = identity)
    }

    st <- storr_rds(tempfile())
    res <- parallel::mclapply(letters, write, path = st$driver$path, ...)
    values <- st$mget(letters)
    st$destroy()

    all(lengths(res) == 0L) && all(vlapply(values, identical, "Z"))
  }

  ok <- vlapply(1:10, function(i) racy_write())
  expect_true(all(ok))
})
richfitz/storr documentation built on Dec. 6, 2020, 7:35 p.m.