skip_if_no_db()
# Shared connection object
conn = get_scidb_connection()
schemaTemplate = conn$array_from_schema(
"<lower:string COMPRESSION 'zlib', upper:string, f_int32:int32, f_int64:int64, f_bool: bool, f_double: double>
[da=0:*:0:*; db=0:*:0:*]"
)
ArrayContent = data.frame(
da=0:4, db=10:14,
lower = letters[1:5],
upper = LETTERS[1:5],
f_int32 = -5:-1,
f_int64 = 1:5 * 10.0,
f_bool = c(T,NA,F,NA,F),
f_double = c(3.14, 2.0, NA, 0, -99),
stringsAsFactors = FALSE
)
RefArray = conn$
array_from_df(ArrayContent, schemaTemplate, force_template_schema = T)$
persist(.gc = FALSE)
assert_df_match = function(actual_df, expected_df) {
expect_equal(
actual_df %>% dplyr::arrange(!!! sapply(names(actual_df), as.name)),
expected_df
)
}
# Mutate new ----
test_that("Mutate an array by expression", {
withr::local_options(stringsAsFactors = FALSE)
# Mutate existing field
assert_df_match(
RefArray$
mutate(f_bool=iif(f_double > 1, true, false), upper=" \"'mutated'\" ", f_int32 = strlen(lower))$
to_df_all(),
ArrayContent %>%
dplyr::mutate("f_bool" = !is.na(f_double) & f_double > 1, upper = " \"'mutated'\" ", f_int32 = nchar(lower))
)
# R `if` and `nchar` functions are converted to SciDB `iif` and `strlen`, respectively
assert_df_match(
RefArray$
filter(is_null(f_bool))$
mutate(f_bool=if(f_double > 1) !!T else !!F, upper='mutated', f_int32 = nchar(lower),
.sync_schema = F)$ # do not check schema
to_df_all(),
ArrayContent %>%
dplyr::filter(is.na(f_bool)) %>%
dplyr::mutate(f_bool = f_double > 1, upper = "mutated", f_int32 = nchar(lower))
)
# New fields generated
assert_df_match(
RefArray$
filter(is_null(f_bool))$
mutate(lower_len = strlen(lower))$
to_df_all(),
ArrayContent %>%
dplyr::filter(is.na(f_bool)) %>%
dplyr::mutate(lower_len = nchar(lower))
)
# New fields generated and existing fields are ignored if they are not changed
assert_df_match(
RefArray$
filter(is_null(f_bool))$
mutate(lower_len = strlen(lower), f_int32 = f_int32)$ # f_int32 ignored
to_df_all(),
ArrayContent %>%
dplyr::filter(is.na(f_bool)) %>%
dplyr::mutate(lower_len = nchar(lower))
)
# New fields generated and existing fields removed by NULL
assert_df_match(
RefArray$
filter(is_null(f_bool))$
mutate(lower_len = strlen(lower), f_bool = NULL, f_int64 = NULL)$
to_df_all(),
ArrayContent %>%
dplyr::filter(is.na(f_bool)) %>%
dplyr::mutate(f_bool = NULL, f_int64 = NULL, lower_len = nchar(lower))
)
# Remove existing fields
assert_df_match(
RefArray$
mutate(f_bool = NULL, f_int64 = NULL)$
to_df_all(),
ArrayContent %>%
dplyr::mutate(f_bool = NULL, f_int64 = NULL)
)
# Pass field expression by list
assert_df_match(
RefArray$
filter(is_null(f_bool))$
mutate(.dots = list(lower_len = aflutils$e(strlen(lower)), f_bool = NULL, f_int64 = NULL))$
to_df_all(),
ArrayContent %>%
dplyr::filter(is.na(f_bool)) %>%
dplyr::mutate(f_bool = NULL, f_int64 = NULL, lower_len = nchar(lower))
)
})
test_that("Add attributes from dimensions", {
withr::local_options(stringsAsFactors = FALSE)
assert_df_match(
RefArray$
mutate(lower_len = strlen(lower), da = da)$
to_df() %>%
dplyr::select(da, dplyr::everything()),
ArrayContent %>%
dplyr::mutate(db = NULL, lower_len = nchar(lower))
)
})
test_that("At least one field in result", {
withr::local_options(stringsAsFactors = FALSE)
assert_df_match(
RefArray$
mutate(f_bool = NULL, f_double = NULL, f_int32 = NULL, f_int64 = NULL, lower = NULL, upper = NULL, extra = bool(null))$
select("da", "db")$
to_df(),
ArrayContent %>%
dplyr::select(da, db)
)
# all array attrs are removed
expect_error(
RefArray$
mutate(f_bool = NULL, f_double = NULL, f_int32 = NULL, f_int64 = NULL, lower = NULL, upper = NULL),
"no valid field"
)
})
test_that("mutate error cases", {
withr::local_options(stringsAsFactors = FALSE)
expect_error(
RefArray$
filter(is_null(f_bool))$
mutate(upper='mutated', f_int32 = strlen(lower), nonexistent = NULL),
"nonexistent"
)
expect_error(
RefArray$
filter(is_null(f_bool))$
mutate(f_bool, upper='mutated', f_int32 = strlen(lower)),
"f_bool"
)
})
# transmute ----
test_that("transmute does not preserve old fields", {
withr::local_options(stringsAsFactors = FALSE)
# transmute an array will keep the dimensions
# Single and double quotes should be escaped
assert_df_match(
RefArray$
filter(is_null(f_bool))$
transmute(f_double, upper=" \"'mutated'\" ", f_int32 = nchar(lower))$
to_df_all(),
ArrayContent %>%
dplyr::filter(is.na(f_bool)) %>%
dplyr::transmute(da, db, f_double, upper = " \"'mutated'\" ", f_int32 = nchar(lower))
)
assert_df_match(
RefArray$
filter(is_null(f_bool))$
transmute(f_double=f_double, upper="mutated", f_int32 = strlen(lower))$
to_df_all(),
ArrayContent %>%
dplyr::filter(is.na(f_bool)) %>%
dplyr::transmute(da, db, f_double, upper = "mutated", f_int32 = nchar(lower))
)
assert_df_match(
RefArray$
filter(is_null(f_bool))$
transmute(f_bool=iif(f_double > 1, true, false), upper="mutated", f_int32 = strlen(lower))$
to_df_all(),
ArrayContent %>%
dplyr::filter(is.na(f_bool)) %>%
dplyr::transmute(da, db, f_bool = f_double > 1, upper = "mutated", f_int32 = nchar(lower))
)
# New fields generated & no old fields preserved
assert_df_match(
RefArray$
filter(is_null(f_bool))$
transmute(.dots = list(lower_len = aflutils$e(strlen(lower))))$
to_df(),
ArrayContent %>%
dplyr::filter(is.na(f_bool)) %>%
dplyr::transmute(lower_len = nchar(lower))
)
})
test_that("Transmute array dimensions to attributes", {
withr::local_options(stringsAsFactors = FALSE)
assert_df_match(
RefArray$
transmute(da)$
to_df(),
ArrayContent %>%
dplyr::transmute(da)
)
assert_df_match(
RefArray$
transmute(da, lower)$
to_df(),
ArrayContent %>%
dplyr::transmute(da, lower)
)
assert_df_match(
RefArray$
transmute(da = da + 1, lower)$
to_df(),
ArrayContent %>%
dplyr::transmute(da = da + 1, lower)
)
})
test_that("Transmute error cases", {
withr::local_options(stringsAsFactors = FALSE)
expect_error(RefArray$transmute(f_int32 = NULL), "transmute fields")
})
# mutate_by another arrayOp ----
test_that("mutate: key:1-attr, update_fields: 2 attrs", {
withr::local_options(stringsAsFactors = FALSE)
mutateDataSource = ArrayContent %>%
dplyr::filter(da %% 2 == 0) %>%
dplyr::mutate(f_int32 = 1:3, upper = 'changed')
assert_df_match(
RefArray$mutate_by(
conn$array_from_df(mutateDataSource %>% dplyr::select(lower, upper, f_int32)),
keys = c('lower'), updated_fields = c('f_int32', 'upper')
)$to_df_all(),
mutateDataSource
)
})
test_that("mutate: key:2-attrs, update_fields: 1 attr", {
withr::local_options(stringsAsFactors = FALSE)
mutateDataSource = ArrayContent %>%
dplyr::filter(da %% 2 == 0) %>%
dplyr::mutate(f_int32 = 1:3)
dataArray = conn$array_from_df(mutateDataSource %>% dplyr::select(lower, upper, f_int32), schemaTemplate)
assert_df_match(
RefArray$mutate_by(
dataArray, keys = c('lower', 'upper'), updated_fields = c('f_int32')
)$to_df_all(),
mutateDataSource
)
})
test_that("mutate: key:1 dim, update_fields: 1 attr", {
withr::local_options(stringsAsFactors = FALSE)
mutateDataSource = ArrayContent %>%
dplyr::filter(da %% 2 == 0) %>%
dplyr::mutate(f_int32 = 1:3)
dataArray = conn$array_from_df(mutateDataSource %>% dplyr::select(da, f_int32), schemaTemplate)
assert_df_match(
RefArray$mutate_by(
dataArray, keys = c('da'), updated_fields = c('f_int32')
)$to_df_all(),
mutateDataSource
)
})
test_that("mutate: key:2 dims, update_fields: 1 attr", {
withr::local_options(stringsAsFactors = FALSE)
mutateDataSource = ArrayContent %>%
dplyr::filter(da %% 2 == 0) %>%
dplyr::mutate(f_int32 = 1:3)
dataArray = conn$array_from_df(mutateDataSource %>% dplyr::select(da, db, f_int32), schemaTemplate)
assert_df_match(
RefArray$mutate_by(
dataArray, keys = c('da', 'db'), updated_fields = c('f_int32')
)$to_df_all(),
mutateDataSource
)
})
test_that("mutate: key: 1 dim + 1 attr, update_fields: 2 attrs", {
withr::local_options(stringsAsFactors = FALSE)
mutateDataSource = ArrayContent %>%
dplyr::filter(da %% 2 == 0) %>%
dplyr::mutate(f_int32 = 1:3, upper = "changed")
dataArray = conn$array_from_df(mutateDataSource %>% dplyr::select(db, lower, f_int32, upper), schemaTemplate)
assert_df_match(
RefArray$mutate_by(
dataArray, keys = c('lower', 'db'), updated_fields = c('f_int32', 'upper')
)$to_df_all(),
mutateDataSource
)
})
test_that("mutate_by: no reserved fields", {
withr::local_options(stringsAsFactors = FALSE)
mutateDataSource = ArrayContent %>%
dplyr::filter(da %% 2 == 0) %>%
dplyr::mutate(f_int32 = 1:3, upper = "changed")
dataArray = conn$array_from_df(
mutateDataSource %>% dplyr::select(db, lower, upper, f_int32, f_int64, f_bool, f_double),
schemaTemplate
)
assert_df_match(
RefArray$mutate_by(
dataArray, keys = c('lower', 'db'), updated_fields = dataArray$attrs %-% c('lower', 'db')
)$to_df_all(),
mutateDataSource
)
})
test_that("mutate_by: data_array already conforms to target schema", {
withr::local_options(stringsAsFactors = FALSE)
mutateDataSource = ArrayContent %>%
dplyr::filter(da %% 2 == 0) %>%
dplyr::mutate(f_int32 = 1:3, upper = "changed")
dataArray = conn$array_from_df(
mutateDataSource,
schemaTemplate, force_template_schema = T
)
assert_df_match(
RefArray$mutate_by(
dataArray # no keys or updated_fields required. keys = dataArray$dims, updated_fields = dataArray$attrs
)$to_df_all(),
mutateDataSource
)
assert_df_match(
RefArray$mutate_by(
dataArray, updated_fields = c("lower", "upper", "f_int32")
)$to_df_all(),
mutateDataSource
)
expect_warning({
result = RefArray$mutate_by(
dataArray, keys = c("da", "db", "lower"), updated_fields = c("upper", "f_int32")
)$to_df_all()
}, "lower")
assert_df_match(
result,
mutateDataSource
)
})
RefArray$remove_array()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.