skip_if_no_db()
# Shared connection object
conn = get_scidb_connection()
schemaTemplate = conn$array_from_schema(
"<lower:string COMPRESSION 'zlib', upper:string, f_int32:int32, f_int64:int64, f_bool: bool, f_double: double>
[da=0:*:0:*; db=0:*:0:*]"
)
ArrayContent = data.frame(
#da=1:20, db=101:120,
da = c(1:10, 1:10),
db = c(101:110, 201:210),
lower = letters[1:20],
upper = LETTERS[1:20],
f_int32 = -20:-1,
f_int64 = 1:20 * 10.0,
f_bool = c(T,NA,F,NA,F),
f_double = c(3.14, 2.0, NA, 0, -99),
stringsAsFactors = FALSE
)
RefArray = conn$
array_from_df(ArrayContent, schemaTemplate, force_template_schema = T)$
persist(.gc = FALSE)
assert_df_match = function(result_op, expected_df, afl_patterns) {
resultAfl = result_op$to_afl()
sapply(afl_patterns, function(x) expect_match(resultAfl, x))
expect_identical(result_op$to_schema_str(), RefArray$to_schema_str())
# Scidb doesn't have a deterministic ordering rule, so we need to sort the result data frame and then compare
expect_equal(
dplyr::arrange(data.frame(result_op$to_df_all()), da, db),
dplyr::arrange(expected_df, da, db)
)
}
# Filter mode ----
test_that("filter, 1-dim", {
withr::local_options(stringsAsFactors = FALSE)
df = data.frame(da = c(3,5,8,11))
assert_df_match(
RefArray$semi_join(df, filter_threshold=10, upload_threshold=20),
dplyr::semi_join(ArrayContent, df),
"filter"
)
})
test_that("filter, 1-attr", {
withr::local_options(stringsAsFactors = FALSE)
values = c(-10, -11, -12)
df = data.frame(f_int32 = values)
assert_df_match(
RefArray$semi_join(df, filter_threshold=10, upload_threshold=20),
dplyr::semi_join(ArrayContent, df),
"filter"
)
})
test_that("filter, 2-attrs", {
withr::local_options(stringsAsFactors = FALSE)
values1 = c(-20, -17, -16)
values2 = c("no_match", "d", "e")
df = data.frame(f_int32 = values1, lower = values2)
assert_df_match(
RefArray$semi_join(df, filter_threshold=10, upload_threshold=20),
dplyr::semi_join(ArrayContent, df),
"filter"
)
})
test_that("filter, 1-dim + 1-attr", {
withr::local_options(stringsAsFactors = FALSE)
values1 = c(-1, 4, 5)
values2 = c("no_match", "d", "e")
df = data.frame(da = values1, lower = values2)
assert_df_match(
RefArray$semi_join(df, filter_threshold=10, upload_threshold=20),
dplyr::semi_join(ArrayContent, df),
"filter"
)
})
test_that("filter, lower/upper bounds on same dimension", {
withr::local_options(stringsAsFactors = FALSE)
df = data.frame(da_low = c(1,3), da_hi = c(5, 8))
assert_df_match(
RefArray$semi_join(df,
lower_bound = list('da' = 'da_low'),
upper_bound = list('da' = 'da_hi'),
filter_threshold=10, upload_threshold=20),
dplyr::filter(ArrayContent, (da >= 1 & da <= 5) | (da >=3 & da <=8)),
"filter"
)
})
test_that("filter, lower/upper bounds on different dimensions", {
withr::local_options(stringsAsFactors = FALSE)
df = data.frame(da = c(1,3), db = c(105, 108))
assert_df_match(
RefArray$semi_join(df,
lower_bound = list(da = 'da'),
upper_bound = list(db = 'db'),
filter_threshold=10, upload_threshold=20),
dplyr::filter(ArrayContent, (da >= 1 & db <= 105) | (da >=3 & db <=108)),
"filter"
)
})
test_that("filter mode cannot take array_op as data source", {
withr::local_options(stringsAsFactors = FALSE)
df = data.frame(da = c(1,3), db = c(105, 108))
dataArr = conn$array_from_df(df, RefArray)
expect_error(RefArray$semi_join(dataArr, mode = 'filter'), "data.frame")
})
# Cross_between mode ----
test_that("cross_between, 2 dimension, build", {
withr::local_options(stringsAsFactors = FALSE)
df = data.frame(da = c(1:5, -1), db = c(101:105, -1))
assert_df_match(
RefArray$semi_join(df, filter_threshold=10, upload_threshold=20, mode="cross_between"),
dplyr::semi_join(ArrayContent, df),
c("cross_between", "build")
)
assert_df_match(
RefArray$semi_join(conn$array_from_df(df, RefArray), mode="cross_between"),
dplyr::semi_join(ArrayContent, df),
c("cross_between", "build")
)
})
test_that("cross_between, error, no attribute allowed", {
withr::local_options(stringsAsFactors = FALSE)
df = data.frame(da = c(1:5, -1), db = c(101:105, -1), lower = letters[1:6])
expect_error(
RefArray$semi_join(data.frame(f_int32 = -1, f_int64 = 200), mode = "cross_between"),
"cross_between"
)
})
test_that("cross_between, lower/upper bounds on same dimension", {
withr::local_options(stringsAsFactors = FALSE)
df = data.frame(da_low = c(1,3), da_hi = c(5, 8))
assert_df_match(
RefArray$semi_join(df,
lower_bound = list(da = 'da_low'),
upper_bound = list(da = 'da_hi'),
filter_threshold=2, upload_threshold=20, mode="cross_between"),
dplyr::filter(ArrayContent, (da >= 1 & da <= 5) | (da >=3 & da <=8)),
"cross_between"
)
assert_df_match(
RefArray$semi_join(conn$array_from_df(df, "<da_low:int64, da_hi:int64>[anything]"),
lower_bound = list(da = 'da_low'),
upper_bound = list(da = 'da_hi'),
mode="cross_between"),
dplyr::filter(ArrayContent, (da >= 1 & da <= 5) | (da >=3 & da <=8)),
"cross_between"
)
})
test_that("cross_between, lower/upper bounds on different dimensions", {
withr::local_options(stringsAsFactors = FALSE)
df = data.frame(da = c(1,3), db = c(105, 108))
assert_df_match(
RefArray$semi_join(df,
lower_bound = list(da = 'da'),
upper_bound = list(db = 'db'),
field_mapping = list(),
filter_threshold=2, upload_threshold=20, mode="cross_between"),
dplyr::filter(ArrayContent, (da >= 1 & db <= 105) | (da >=3 & db <=108)),
"cross_between"
)
assert_df_match(
RefArray$semi_join(conn$array_from_df(df, RefArray),
lower_bound = list(da = 'da'),
upper_bound = list(db = 'db'),
field_mapping = list(),
upload_threshold=0, mode="cross_between"),
dplyr::filter(ArrayContent, (da >= 1 & db <= 105) | (da >=3 & db <=108)),
"cross_between"
)
})
# index_lookup mode ----
test_that("index_lookup, 1 dimension, build", {
withr::local_options(stringsAsFactors = FALSE)
df = data.frame(da = 1:15)
assert_df_match(
RefArray$semi_join(df, filter_threshold=5, upload_threshold=20),
dplyr::semi_join(ArrayContent, df),
c("index_lookup", "build")
)
assert_df_match(
RefArray$semi_join(conn$array_from_df(df, RefArray), upload_threshold=0),
dplyr::semi_join(ArrayContent, df),
c("index_lookup", "build")
)
})
test_that("index_lookup, 1 attribute, build", {
withr::local_options(stringsAsFactors = FALSE)
df = data.frame(lower = letters[5:15])
assert_df_match(
RefArray$semi_join(df, filter_threshold=5, upload_threshold=20),
dplyr::semi_join(ArrayContent, df),
c("index_lookup", "build")
)
assert_df_match(
RefArray$semi_join(conn$array_from_df(df, RefArray), upload_threshold=0),
dplyr::semi_join(ArrayContent, df),
c("index_lookup", "build")
)
})
test_that("index_lookup, 1 dimension, upload", {
withr::local_options(stringsAsFactors = FALSE)
df = data.frame(da = 1:15)
assert_df_match(
RefArray$semi_join(df, filter_threshold = 5, upload_threshold = 10),
dplyr::semi_join(ArrayContent, df),
c("index_lookup", "Rarrayop|R_array")
)
assert_df_match(
RefArray$semi_join(conn$array_from_df(df, RefArray), upload_threshold=0),
dplyr::semi_join(ArrayContent, df),
c("index_lookup", "build")
)
})
test_that("index_lookup, 1 attribute, upload", {
withr::local_options(stringsAsFactors = FALSE)
df = data.frame(lower = letters[5:15])
assert_df_match(
RefArray$semi_join(df, filter_threshold = 2, upload_threshold = 5),
dplyr::semi_join(ArrayContent, df),
"index_lookup"
)
assert_df_match(
RefArray$semi_join(conn$array_from_df(df, RefArray), upload_threshold=0),
dplyr::semi_join(ArrayContent, df),
c("index_lookup", "build")
)
})
test_that("index_lookup, error cases", {
withr::local_options(stringsAsFactors = FALSE)
df = data.frame(da = c(1:5, -1), db = c(101:105, -1))
dataArray1 = conn$compile_df(df, RefArray)
dataArray2 = conn$compile_df(df %>% dplyr::select(da), RefArray)
# only 1-dim and 1-attr array is allowed
expect_error(
RefArray$semi_join(dataArray1, mode = "index_lookup"),
"only one dimension and one attribute"
)
# only field_mapping allowed
expect_error(
RefArray$semi_join(dataArray2, mode = "index_lookup", lower_bound = list(da="da")),
"lower_bound"
)
})
test_that("equi_join, 2-attrs", {
withr::local_options(stringsAsFactors = FALSE)
values1 = c(-20, -17, -16)
values2 = c("no_match", "d", "e")
df = data.frame(f_int32 = values1, lower = values2)
assert_df_match(
RefArray$semi_join(df, filter_threshold=10, upload_threshold=20, mode="equi_join"),
dplyr::semi_join(ArrayContent, df),
"equi_join"
)
})
test_that("equi_join, 1-dim + 1-attr", {
withr::local_options(stringsAsFactors = FALSE)
values1 = c(-1, 4, 5)
values2 = c("no_match", "d", "e")
df = data.frame(da = values1, lower = values2)
assert_df_match(
RefArray$semi_join(df, filter_threshold=10, upload_threshold=20, mode="equi_join"),
dplyr::semi_join(ArrayContent, df),
"equi_join"
)
})
test_that("param format error checking", {
withr::local_options(stringsAsFactors = FALSE)
df = data.frame(da = c(1:5, -1), db = c(101:105, -1))
dataArray = conn$compile_df(df, RefArray)
expect_error(
RefArray$semi_join(df, mode = "non-existent"),
"non-existent"
)
expect_error(RefArray$semi_join(df, field_mapping = list(non_field = "da")), "non_field")
expect_error(RefArray$semi_join(list()), "list")
})
test_that("all modes return equal results 1 dimension", {
withr::local_options(stringsAsFactors = FALSE)
filter_df = data.frame(da = c(3, 5, 8, 11))
expected_df = dplyr::semi_join(ArrayContent, filter_df)
semi_join_result = RefArray$semi_join(filter_df, mode = "filter")$to_df_all()
expect_equal(
dplyr::arrange(semi_join_result, da, db),
dplyr::arrange(expected_df, da, db)
)
semi_join_result = RefArray$semi_join(filter_df, mode = "index_lookup")$to_df_all()
expect_equal(
dplyr::arrange(semi_join_result, da, db),
dplyr::arrange(expected_df, da, db)
)
semi_join_result = RefArray$semi_join(filter_df, mode = "cross_between")$to_df_all()
expect_equal(
dplyr::arrange(semi_join_result, da, db),
dplyr::arrange(expected_df, da, db)
)
semi_join_result = RefArray$semi_join(filter_df, mode = "equi_join")$to_df_all()
expect_equal(
dplyr::arrange(semi_join_result, da, db),
dplyr::arrange(expected_df, da, db)
)
})
test_that("all modes return equal results 1 attribute", {
withr::local_options(stringsAsFactors = FALSE)
filter_df = data.frame(f_int32 = c(-10, -11, -12))
expected_df = dplyr::semi_join(ArrayContent, filter_df)
semi_join_result = RefArray$semi_join(filter_df, mode = "filter")$to_df_all()
expect_equal(
dplyr::arrange(semi_join_result, da, db),
dplyr::arrange(expected_df, da, db)
)
semi_join_result = RefArray$semi_join(filter_df, mode = "index_lookup")$to_df_all()
expect_equal(
dplyr::arrange(semi_join_result, da, db),
dplyr::arrange(expected_df, da, db)
)
expect_error(
RefArray$semi_join(filter_df, mode = "cross_between")$to_df_all(),
"Only dimensions are matched in this mode"
)
semi_join_result = RefArray$semi_join(filter_df, mode = "equi_join")$to_df_all()
expect_equal(
dplyr::arrange(semi_join_result, da, db),
dplyr::arrange(expected_df, da, db)
)
})
test_that("all modes return equal results 2 dimensions", {
withr::local_options(stringsAsFactors = FALSE)
filter_df = data.frame(da = c(1:5, -1), db = c(101, 202, 103, 204, 105, 110))
expected_df = dplyr::semi_join(ArrayContent, filter_df)
semi_join_result = RefArray$semi_join(filter_df, mode = "filter")$to_df_all()
expect_equal(
dplyr::arrange(semi_join_result, da),
dplyr::arrange(expected_df, da)
)
semi_join_result = RefArray$semi_join(filter_df, mode = "cross_between")$to_df_all()
expect_equal(
dplyr::arrange(semi_join_result, da),
dplyr::arrange(expected_df, da)
)
semi_join_result = RefArray$semi_join(filter_df, mode = "equi_join")$to_df_all()
expect_equal(
dplyr::arrange(semi_join_result, da),
dplyr::arrange(expected_df, da)
)
})
test_that("all modes return equal results 2 attributes", {
withr::local_options(stringsAsFactors = FALSE)
filter_df = data.frame(f_int32 = c(-20, -17, -5), lower = c("no_match", "d", "p"))
expected_df = dplyr::semi_join(ArrayContent, filter_df)
semi_join_result = RefArray$semi_join(filter_df, mode = "filter")$to_df_all()
expect_equal(
dplyr::arrange(semi_join_result, da, db),
dplyr::arrange(expected_df, da, db)
)
expect_error(
RefArray$semi_join(filter_df, mode = "cross_between")$to_df_all(),
"Only dimensions are matched in this mode"
)
semi_join_result = RefArray$semi_join(filter_df, mode = "equi_join")$to_df_all()
expect_equal(
dplyr::arrange(semi_join_result, da, db),
dplyr::arrange(expected_df, da, db)
)
})
test_that("all modes return equal results 1 dimension, 1 attribute", {
withr::local_options(stringsAsFactors = FALSE)
filter_df = data.frame(da = c(20, 4, 6), lower = c("no_match", "d", "p"))
expected_df = dplyr::semi_join(ArrayContent, filter_df)
semi_join_result = RefArray$semi_join(filter_df, mode = "filter")$to_df_all()
expect_equal(
dplyr::arrange(semi_join_result, da, db),
dplyr::arrange(expected_df, da, db)
)
# Not testing cross_between, results won't be the same as it ignores attributes.
# TODO: put in a error condition if you pass attributes to cross_between mode
semi_join_result = RefArray$semi_join(filter_df, mode = "equi_join")$to_df_all()
expect_equal(
dplyr::arrange(semi_join_result, da, db),
dplyr::arrange(expected_df, da, db)
)
})
RefArray$remove_array()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.