library(tinytest)
library(tiledb)
if (Sys.getenv("CI", "") == "") exit_file("Skip unextended test run")
ctx <- tiledb_ctx(limitTileDBCores())
if (!requireNamespace("arrow", quietly=TRUE)) exit_file("No 'arrow' package.")
suppressMessages(library(arrow))
if (get_return_as_preference() != "asis") set_return_as_preference("asis") # baseline value
## -- RecordBatch test (and TileDB does not yet export to / import from these)
df <- data.frame(one = c(-1, NA, 2.5),
two = c("foo", "bar", "baz"),
three = c(TRUE, FALSE, TRUE))
batch <- record_batch(df)
expect_true(is(batch, "RecordBatch"))
expect_true(is(as.data.frame(batch), "data.frame"))
## allocate two structures (and release at end)
aa <- nanoarrow::nanoarrow_allocate_array()
as <- nanoarrow::nanoarrow_allocate_schema()
arrow:::ExportRecordBatch(batch, aa, as)
newrb <- arrow:::ImportRecordBatch(aa, as)
expect_true(is(newrb, "RecordBatch"))
expect_true(is(as.data.frame(newrb), "data.frame"))
expect_equal(batch, newrb)
## round-turn test 1: write tiledb first, create arrow object via zero-copy
suppressMessages(library(bit64))
n <- 10L
dir.create(tmp <- tempfile())
dim <- tiledb_dim("rows", domain=c(1L,n), type="INT32", tile=1L)
dom <- tiledb_domain(dim)
sch <- tiledb_array_schema(dom,
attrs = c(tiledb_attr("int8", type="INT8"),
tiledb_attr("uint8", type="UINT8"),
tiledb_attr("int16", type="INT16"),
tiledb_attr("uint16", type="UINT16"),
tiledb_attr("int32", type="INT32"),
tiledb_attr("uint32", type="UINT32"),
tiledb_attr("int64", type="INT64"),
tiledb_attr("uint64", type="UINT64"),
tiledb_attr("float64",type="FLOAT64")),
sparse = TRUE)
tiledb_array_create(tmp, sch)
arr <- tiledb_array(tmp)
qry <- tiledb_query(arr, "WRITE")
dimptr <- tiledb_query_create_buffer_ptr(qry, "INT32", seq(1,n))
qry <- tiledb_query_set_buffer_ptr(qry, "rows", dimptr)
attrlst <- list()
for (col in c("int8", "uint8", "int16", "uint16", "int32", "uint32", "int64", "uint64", "float64")) {
if (grepl("int64", col)) {
vals <- as.integer64(seq(1,n))
} else {
vals <- seq(1,n)
}
attrlst[[col]] <- tiledb_query_create_buffer_ptr(qry, toupper(col), vals)
qry <- tiledb_query_set_buffer_ptr(qry, col, attrlst[[col]])
}
tiledb_query_set_layout(qry, "UNORDERED")
tiledb_query_submit(qry)
tiledb_query_finalize(qry)
#arr <- tiledb_array(tmp, return_as="data.frame")
#print(arr[])
arr <- tiledb_array(tmp)
qry <- tiledb_query(arr, "READ")
dimptr <- tiledb_query_buffer_alloc_ptr(qry, "INT32", n)
qry <- tiledb_query_set_buffer_ptr(qry, "rows", dimptr)
tiledb_query_set_subarray(qry, c(4L,7L), "INT32")
attrlst <- list()
for (col in c("int8", "uint8", "int16", "uint16", "int32", "uint32", "int64", "uint64", "float64")) {
attrlst[[col]] <- tiledb_query_buffer_alloc_ptr(qry, toupper(col), 4L)
qry <- tiledb_query_set_buffer_ptr(qry, col, attrlst[[col]])
}
tiledb_query_submit(qry)
tiledb_query_finalize(qry)
res <- tiledb_query_export_buffer(qry, "rows")
#v <- Array$create(arrow:::ImportArray(res[[1]], res[[2]]))
v <- Array$create(res)
#tiledb_arrow_array_del(res[[1]])
#tiledb_arrow_schema_del(res[[2]])
expect_equal(v$as_vector(), 4:7)
for (col in c("int8", "uint8", "int16", "uint16", "int32", "uint32", "int64", "uint64", "float64")) {
qry <- tiledb_query_set_buffer_ptr(qry, col, attrlst[[col]])
res <- tiledb_query_export_buffer(qry, col)
v <- Array$create(res)
#v <- Array$create(arrow:::ImportArray(res[[1]], res[[2]]))
#tiledb_arrow_array_del(res[[1]])
#tiledb_arrow_schema_del(res[[2]])
expect_equal(v$as_vector(), 4:7)
}
## n=15
## round-turn test 2: create arrow object, write tiledb second via zero-copy
dir.create(tmp <- tempfile())
n <- 10L
## create a schema but don't fill it yet
#spdl::log("debug")
dim <- tiledb_dim("rows", domain=c(1L,n), type="INT32", tile=1L)
dom <- tiledb_domain(dim)
sch <- tiledb_array_schema(dom,
attrs = c(tiledb_attr("int8", type="INT8"),
tiledb_attr("uint8", type="UINT8"),
tiledb_attr("int16", type="INT16"),
tiledb_attr("uint16", type="UINT16"),
tiledb_attr("int32", type="INT32"),
tiledb_attr("uint32", type="UINT32"),
tiledb_attr("int64", type="INT64"),
tiledb_attr("uint64", type="UINT64"),
tiledb_attr("float64",type="FLOAT64")),
sparse = TRUE)
tiledb_array_create(tmp, sch)
#exit_file("aa")
## create an arrow 'record batch' with a number of (correcsponding) columns
rb <- record_batch("rows" = Array$create(1:n, int32()),
"int8" = Array$create(1:n, int8()),
"uint8" = Array$create(1:n, uint8()),
"int16" = Array$create(1:n, int16()),
"uint16" = Array$create(1:n, uint16()),
"int32" = Array$create(1:n, int32()),
"uint32" = Array$create(1:n, uint32()),
"int64" = Array$create(1:n, int64()),
"uint64" = Array$create(1:n, uint64()),
"float64"= Array$create(1:n, float64()) )
#print(rb)
#print(as.data.frame(rb))
arr <- tiledb_array(tmp)
qry <- tiledb_query(arr, "WRITE")
#spdl::log("debug")
nms <- rb$names()
lst <- list()
for (nam in nms) {
vec <- rb[[nam]] # can access by name
na <- nanoarrow::as_nanoarrow_array(vec)
#print(na)
#print(class(na))
#aa <- tiledb_arrow_array_ptr()
#as <- tiledb_arrow_schema_ptr()
#arrow:::ExportArray(vec, aa, as)
qry <- tiledb_query_import_buffer(qry, nam, na)
#lst[[nam]] <- list(aa=aa, as=as)
}
tiledb_query_set_layout(qry, "UNORDERED")
tiledb_query_submit(qry)
tiledb_query_finalize(qry)
arr <- tiledb_array(tmp, return_as="data.frame")
df <- arr[]
#print(df)
#q()
#for (i in 1:10) {
# l <- lst[[i]]
# tiledb_arrow_array_del(l[[1]])
# tiledb_arrow_schema_del(l[[2]])
#}
## n=15
expect_true(is(df, "data.frame"))
expect_equal(dim(df), c(n, 10))
for (i in c(1:7,9:10)) {
expect_equivalent(df[,i], 1:10)
}
expect_equivalent(df[,8], as.integer64(1:10))
## n=27
## test support for return_as="arrow"
if (!requireNamespace("palmerpenguins", quietly=TRUE)) exit_file("remainder needs 'palmerpenguins'")
library(palmerpenguins)
uri <- tempfile()
fromDataFrame(penguins, uri, col_index = c("species", "year"))
for (arg in c("arrow", "arrow_table")) {
res <- tiledb_array(uri, return_as=arg)[]
expect_true(inherits(res, "Table"))
expect_true(inherits(res, "ArrowTabular"))
expect_true(inherits(res, "ArrowObject"))
expect_equal(res$num_rows, 344)
expect_equal(res$num_columns, 8)
}
## test support for return as Date (GH Issue 533)
uri <- tempfile()
D <- data.frame(val = 100 + 0:4,
dat = Sys.Date() + seq(-4,0))
fromDataFrame(D, uri, col_index = 1)
at <- tiledb_array(uri, return_as = "arrow")[]
expect_true(inherits(at, "Table"))
chk <- data.frame(at)
expect_equal(D, chk)
## detaching arrow should not be necessary as we generally do not need to unload
## packages but had been seen as beneficial in some instanced so there is now an option
if (Sys.getenv("R_TESTING_CLEANUP", "") == "yes") detach(package:arrow, unload=TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.