# from uploaded data frame ----
test_that("upload data frame: no template", {
skip_if_no_db()
withr::local_options(stringsAsFactors = FALSE)
conn = get_scidb_connection()
df = data.frame(a = letters[1:5], b = 1:5, z = 11:15 * 1.2)
uploaded = conn$upload_df(df)
# uploaded data frame will have an artificial dimension
expect_equal(uploaded$to_df(), df)
})
test_that("various types of template", {
skip_if_no_db()
withr::local_options(stringsAsFactors = FALSE)
conn = get_scidb_connection()
templateStr = "<a:int32, b:string, c:bool> [x;y]"
templateArray = conn$create_array(dbutils$random_array_name(), templateStr)
df = data.frame(x=1:2, y=3:4, a=1:2, b=letters[1:2], c=c(T,F))
verify_template = function(template) {
a1 = conn$upload_df(df, template, .gc = F)
expect_identical(a1$attrs, templateArray$dims_n_attrs)
a1$remove_array()
}
verify_template(templateStr)
verify_template(templateArray)
verify_template(templateArray$to_afl()) # array name as template
# force the array to conform with the template's schema
a2 = conn$array_from_df(df, templateArray, force_template_schema = T)$persist(.gc = F)
expect_identical(a2$attrs, templateArray$attrs)
expect_identical(a2$dims, templateArray$dims)
a2$remove_array()
templateArray$remove_array()
})
test_that("upload data frame: no template, temporary", {
skip_if_no_db()
withr::local_options(stringsAsFactors = FALSE)
conn = get_scidb_connection()
df = data.frame(a = letters[1:5], b = 1:5, z = 11:15 * 1.2)
uploaded = conn$upload_df(df, .gc = F, .temp = T)
expect_identical(uploaded$array_meta_data()$temporary, TRUE)
# uploaded data frame will have an artificial dimension
expect_equal(uploaded$to_df(), df)
uploaded$remove_array()
})
test_that("upload data frame: by a single vector", {
skip_if_no_db()
withr::local_options(stringsAsFactors = FALSE)
conn = get_scidb_connection()
df = data.frame(a = letters[1:5])
uploaded = conn$upload_df(df, upload_by_vector = T, .gc = F)
# uploaded data frame will have an artificial dimension
expect_equal(uploaded$to_df(), df)
uploaded$remove_array()
})
test_that("upload data frame: no template, by vectors", {
skip_if_no_db()
withr::local_options(stringsAsFactors = FALSE)
conn = get_scidb_connection()
df = data.frame(a = letters[1:5], b = 1:5, z = 11:15 * 1.2)
uploaded = conn$upload_df(df, upload_by_vector = T, .gc = T)
gc()
# uploaded data frame will have an artificial dimension
expect_equal(uploaded$to_df(), df)
uploaded$finalize() # this should clean up all uploaded vector arrays
gc()
expect_error(uploaded$cell_count())
# columnArrays = uploaded$.private$get_meta(".refs")
# for(arr in columnArrays){
# arr$remove_array()
# }
})
test_that("upload data frame with a template", {
skip_if_no_db()
withr::local_options(stringsAsFactors = FALSE)
conn = get_scidb_connection()
df = data.frame(a = letters[1:5], b = 1:5, z = 11:15)
uploaded = conn$upload_df(
df,
template = "<a:string compression 'zlib', b:int32, extra:bool> [z]",
)
# expect_match(uploaded$to_schema_str(), "compression 'zlib'", ignore.case = T)
expect_equal(uploaded$to_df(), df)
})
test_that("uploaded data frame by vectors and store the joined vectors", {
skip_if_no_db()
withr::local_options(stringsAsFactors = FALSE)
conn = get_scidb_connection()
df = data.frame(a = letters[1:5], b = 1:5, z = 11:15)
uploaded = conn$upload_df(df, "<a:string, b:int32, extra:bool> [z]", upload_by_vector = T, .temp = T)
stored = uploaded$persist()
expect_equal(uploaded$to_df(), df)
expect_equal(stored$to_df(), df)
})
test_that("upload data frame with special chars", {
skip_if_no_db()
withr::local_options(stringsAsFactors = FALSE)
conn = get_scidb_connection()
df = data.frame(a = c(
"slashes: http://a-b/c d%20%", # double backslackes \\ cause a bug
NA, # NA is ok if we upload df as a whole instead of by vectors
"quotes: 'a' \"|\" ",
"special: [abcd]", # if \t or \n were added to the field, tests would fail
"''"
), b = 1:5, z = 11:15, stringsAsFactors = F)
arr = conn$upload_df(df, template = "<a:string, b:int32, extra:bool> [z]")
# all matched fields are uploaded as attributes (dimensions vary with upload operators)
expect_identical(arr$attrs, c('a', 'b', 'z'))
expect_equal(arr$to_df(), df)
arr$remove_array()
})
test_that("upload data frame by vectors", {
skip_if_no_db()
withr::local_options(stringsAsFactors = FALSE)
conn = get_scidb_connection()
df = data.frame(
a = c(
"slashes: http://a-b/c\\d%20%",
'notNA', # NA cannot be uploaded in non-merge mode
"quotes: 'a' \"|\" ",
"special: \t\n[abcd]", # \t or \n cannot be uploaded in non-merge mode
"''"
),
b = 1:5, z = 11:15)
arr = conn$upload_df(df, "<a:string, b:int32, extra:bool> [z]", upload_by_vector = T)
# all matched fields are uploaded as attributes (dimensions vary with upload operators)
expect_identical(arr$attrs, c('a', 'b', 'z'))
expect_equal(arr$to_df(), df)
})
test_that("upload data frame with GC setting", {
skip_if_no_db()
withr::local_options(stringsAsFactors = FALSE)
conn = get_scidb_connection()
template = conn$array_from_schema(
"<a:string compression 'zlib', b:int32 not null> [z]"
)
df = data.frame(a = letters[1:5], b = 1:5, z = 11:15)
gc_on = function() {
name = dbutils$random_array_name()
name2 = dbutils$random_array_name()
arr = conn$upload_df(df, template, name = name, .gc = TRUE)
arr2 = conn$upload_df(df, template, name = name2, force_template_schema = T, .gc = TRUE)
expect_true(!is.null(
conn$array(name) # array with name must exist
))
# because arr2 involves a redimension using 'template' as schema template
expect_match(arr2$to_afl(), "compression 'zlib'", ignore.case = T)
expect_match(arr2$to_schema_str(), "compression 'zlib'", ignore.case = T)
expect_match(arr2$to_afl(), "not null", ignore.case = T)
expect_match(arr2$to_schema_str(), "not null", ignore.case = T)
expect_silent({
rm(arr)
gc()
})
expect_error(conn$array(name)) # array should be removed during GC
expect_equal(arr2$cell_count(), nrow(df))
}
gc_off = function() {
name = dbutils$random_array_name()
arr = conn$upload_df(df, template, name = name, .gc = F)
expect_true(!is.null(
conn$array(name) # array with name must exist
))
rm(arr)
gc()
retried = conn$array(name) # array should still exists
expect_true(!is.null(retried))
retried$remove_array()
}
gc_on()
gc_off()
})
test_that("upload data frame with other scidbR settings", {
skip_if_no_db()
withr::local_options(stringsAsFactors = FALSE)
conn = get_scidb_connection()
df = data.frame(
f_str = letters[1:5],
f_double = c(3.14, 2.0, NA, 0, -99),
f_bool = c(T,NA,F,NA,F),
f_int64 = 1:5 * 10.0,
f_datetime = c('2020-03-14 01:23:45', '2000-01-01 0:0:0', '1999-12-3 12:34:56', as.character(Sys.time()), "2020-01-01 3:14:15")
)
schema = "<f_str:string COMPRESSION 'zlib', f_int32:int32, f_int64:int64, f_bool: bool, f_double: double, f_datetime: datetime> [da=0:*:0:*]"
template = conn$array_from_schema(sprintf(
"%s %s", dbutils$random_array_name(), schema
))
uploaded = conn$upload_df(df, schema, .temp = F, .use_aio_input = F)
# Template can also be a list of data types
# aio_input mode does not support compound data types, e.g. <a: string CMOPRESSION 'zlib'>
uploaded2 = conn$upload_df(df, template$raw_dtypes, .temp = T, .use_aio_input = T)
expect_identical(uploaded$dtypes[uploaded$attrs], template$dtypes[uploaded$attrs])
expect_identical(uploaded2$dtypes[uploaded2$attrs], template$raw_dtypes[uploaded2$attrs])
# templateMatchedDTypes = template$.private$get_field_types(names(df), .raw = T)
# expect_identical(uploaded2$.private$get_field_types(uploaded2$attrs), templateMatchedDTypes)
# R date time conversion is cubersome. We replace it with the scidb parsed values.
df = dplyr::mutate(df, f_datetime = as.POSIXct(f_datetime, tz = "GMT"))
dbdf1 = uploaded$to_df()
dbdf2 = uploaded2$to_df()
expect_equal(dbdf1, df)
expect_equal(dbdf2, df)
# Validate number of rows
expect_equal(uploaded$cell_count(), nrow(df))
expect_equal(uploaded2$cell_count(), nrow(df))
expect_silent(uploaded$remove_array())
expect_silent(uploaded2$remove_array())
})
test_that("Error cases: upload data frames", {
skip_if_no_db()
withr::local_options(stringsAsFactors = FALSE)
conn = get_scidb_connection()
expect_error(conn$upload_df(list()))
expect_error(conn$upload_df(data.frame()))
expect_error(conn$upload_df(data.frame(a=1:2), template = "<m:string> [z]"))
})
# from build literal ----
test_that("transient array_op from build literal", {
skip_if_no_db()
withr::local_options(stringsAsFactors = FALSE)
conn = get_scidb_connection()
# build literal is generated by ArrayOp class, all special characters are properly escaped
# While the data frame below will work in build literal, it would NOT work in upload mode (because of bugs from SciDBR package)
#template = conn$array_from_schema("new <a:string, b:int32, extra:bool> [z]")
template = "new <a:string compression 'zlib', b:int32 not null, extra:bool> [z]"
df = data.frame(
b = 1:5, z = 11:15,
a = c(
"slashes: http://a-b/c\\d%20%",
NA,
"quotes: 'a' \"|\" ",
"special: \t\n[abcd]",
"''"
)
)
arr = conn$compile_df(df, template, build_dim_spec = "i=1:*:0:*")
arr2 = conn$compile_df(df, build_dim_spec = "i=1:*:0:*")
expect_equal(arr$to_df() %>% dplyr::arrange(z), df)
expect_equal(arr2$to_df() %>% dplyr::arrange(z), df)
expect_match(arr$to_afl(), "compression 'zlib'", ignore.case = T)
expect_match(arr$to_afl(), "not null", ignore.case = T)
})
test_that("build as scidb data frame", {
skip_if_no_db()
withr::local_options(stringsAsFactors = FALSE)
conn = get_scidb_connection()
myDf = data.frame(a = 1:3)
arr = conn$compile_df(myDf, "<a:int32> [i]", as_scidb_data_frame = F)
arrDf = conn$compile_df(myDf, "<a:int32> [i]", as_scidb_data_frame = T)
expect_identical(arr$is_scidb_data_frame, F)
expect_identical(arrDf$is_scidb_data_frame, T)
expect_equal(arr$to_df(), myDf)
expect_equal(arrDf$to_df(), myDf)
})
test_that("Error case: invalid dimension specs in build_literal", {
skip_if_no_db()
withr::local_options(stringsAsFactors = FALSE)
conn = get_scidb_connection()
# should report error if build_dim is invalid, verified by scidb show
expect_error(conn$compile_df(data.frame(a=1:10), build_dim_spec = "i=non-sense"))
})
# From data frame auto mode ----
test_that("auto determine whether to upload or build", {
skip_if_no_db()
withr::local_options(stringsAsFactors = FALSE)
conn = get_scidb_connection()
dataFrame = data.frame(a=1:10, b=1:10*3.14, c=letters[1:10], d=c(T,F), e=as.POSIXct("2020/1/23 3:45", tz = "GMT"))
# ScidbR has a bug for uploading data frames with a datetime column
dataFrame2 = data.frame(a=1:10, b=1:10*3.14, c=letters[1:10], d=c(T,F))
arrBuild = conn$array_from_df(dataFrame, build_or_upload_threshold = 5000)
arrUpload = conn$array_from_df(dataFrame2, build_or_upload_threshold = 10)
expect_true(grepl("build", arrBuild$to_afl()))
expect_true(!grepl("build", arrUpload$to_afl()))
expect_equal(arrBuild$to_df(), dataFrame)
expect_equal(arrUpload$to_df(), dataFrame2)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.