context("Deriving array variables")
with_mock_crunch({
ds <- cachedLoadDataset("test ds")
test_that("deriveArray works with MR", {
expect_POST(
ds$derived_mr <- deriveArray(list(ds$gender),
selections = list("Female"),
name = "derivedMR",
numeric = FALSE
),
"https://app.crunch.io/api/datasets/1/variables/",
'{"derivation":{"function":"select_categories","args":',
'[{"function":"array","args":[{"function":"make_frame","args":',
'[{"map":{"0001":{"variable":"https://app.crunch.io/api/datasets',
'/1/variables/gender/"}}},{"value":["0001"]}]}],"kwargs":{"numeric":{"value":false}}},', # nolint
'{"value":["Female"]}]},"name":"derivedMR","alias":"derived_mr"}'
)
})
})
with_test_authentication({
ds <- newDatasetFromFixture("apidocs")
## Reference values:
plh.values <- as.vector(ds$petloc$petloc_home)
plw.values <- as.vector(ds$petloc$petloc_work)
q1.values <- as.vector(ds$q1)
plh.counts <- table(ds$petloc$petloc_home)
# petloc_home
# Cat Dog Bird
# 5 3 3
plw.counts <- table(ds$petloc$petloc_work)
# petloc_work
# Cat Dog Bird
# 6 4 6
vd <- deriveArray(list(ds$q1, ds$petloc$petloc_home), name = "Derived pets")
test_that("deriveArray returns a VarDef", {
expect_is(vd, "VariableDefinition")
})
ds$derivedarray <- vd
test_that("Sending a derived array vardef creates a derived array", {
expect_true(is.CA(ds$derivedarray))
expect_identical(names(subvariables(ds$derivedarray)), c("Pet", "Home"))
expect_identical_temp_nodata(
names(categories(ds$derivedarray)),
c("Cat", "Dog", "Bird", "Skipped", "Not Asked", "No Data")
)
expect_identical(as.vector(ds$derivedarray[[1]]), q1.values)
expect_identical(as.vector(ds$derivedarray[[2]]), as.vector(ds$petloc$petloc_home))
})
ds$derivedmr <- deriveArray(
list(ds$q1, ds$petloc$petloc_home),
selections = "Dog", name = "Derived pets MR"
) # cat dog has id 2
test_that("deriveArray can also derive MRs", {
expect_true(is.MR(ds$derivedmr))
expect_identical(names(subvariables(ds$derivedmr)), c("Pet", "Home"))
expect_identical_temp_nodata(
names(categories(ds$derivedmr)),
c("Cat", "Dog", "Bird", "Skipped", "Not Asked", "No Data")
)
expect_identical(as.vector(ds$derivedmr[[1]]), q1.values)
expect_true(is.selected(categories(ds$derivedmr)[[2]]))
})
test_that("Can edit metadata of the derived array, and parents are unaffected", {
aliases(subvariables(ds$derivedarray)) <- c("dsub1", "dsub2")
expect_identical(
aliases(subvariables(ds$derivedarray)),
c("dsub1", "dsub2")
)
names(categories(ds$derivedarray))[1:2] <- c("one", "two")
expect_identical_temp_nodata(
names(categories(ds$derivedarray)),
c("one", "two", "Bird", "Skipped", "Not Asked", "No Data")
)
ds <- refresh(ds)
expect_identical(
aliases(subvariables(ds$derivedarray)),
c("dsub1", "dsub2")
)
expect_identical_temp_nodata(
names(categories(ds$derivedarray)),
c("one", "two", "Bird", "Skipped", "Not Asked", "No Data")
)
expect_equivalent(
table(ds$derivedarray$dsub2),
structure(c(5, 3, 3),
.Dim = 3L,
.Dimnames = list(dsub2 = c("one", "two", "Bird")),
class = "table"
)
)
## Parent unaffected
expect_true(is.Categorical(ds$q1))
expect_identical_temp_nodata(
names(categories(ds$q1)),
c("Cat", "Dog", "Bird", "Skipped", "Not Asked", "No Data")
)
})
## Deep copy array, rename it, etc.
ds$petloc2 <- copy(ds$petloc, deep = TRUE, name = "Other pet loc")
aliases(subvariables(ds$petloc2)) <- c("pl2_a", "pl2_b")
## Make a "flipped" version of that and the original
ds$petloc_a <- deriveArray(list(ds$petloc2$pl2_a, ds$petloc$petloc_home),
name = "Pet Location: Home", subreferences = list(
list(name = "Copy", alias = "pla_copy"),
list(name = "Original", alias = "pla_orig")
)
)
ds$petloc_b <- deriveArray(list(ds$petloc2$pl2_b, ds$petloc$petloc_work),
name = "Pet Location: Work", subreferences = list(
list(name = "Copy", alias = "plb_copy"),
list(name = "Original", alias = "plb_orig")
)
)
test_that("Deriving arrays with subreferences specified", {
expect_identical(
aliases(subvariables(ds$petloc_a)),
c("pla_copy", "pla_orig")
)
expect_identical(
names(subvariables(ds$petloc_a)),
c("Copy", "Original")
)
})
## Make derived array of derived array
ds$metapetloc <- deriveArray(list(
ds$petloc$petloc_home, ds$petloc_a$pla_copy,
ds$petloc_b$plb_copy, ds$derivedarray$dsub2
),
name = "Derived from derived and real"
)
test_that("Derived arrays of derived array subvariables with different categories", {
expect_identical(
names(subvariables(ds$metapetloc)),
c("Home", "Copy", "Copy", "Home")
)
expect_identical_temp_nodata(
names(categories(ds$metapetloc)),
c("Cat", "Dog", "Bird", "Skipped", "Not Asked", "No Data", "one", "two")
)
t1 <- table(ds$metapetloc[[1]])
expect_identical(names(t1), c("Cat", "Dog", "Bird", "one", "two"))
expect_equal(as.numeric(t1), c(5, 3, 3, 0, 0))
expect_identical(as.numeric(table(ds$metapetloc[[2]])), c(5, 3, 3, 0, 0))
expect_identical(as.numeric(table(ds$metapetloc[[3]])), c(6, 4, 6, 0, 0))
## Note the shift: this variable has categories "one" and "two"
expect_identical(as.numeric(table(ds$metapetloc[[4]])), c(0, 0, 3, 5, 3))
})
ds$metapet_combined <- combine(ds$metapetloc,
name = "Metapet combined",
combinations = list(
list(name = "Cat", categories = c("Cat", "one")),
list(name = "Dog", categories = c("Dog", "two"))
)
)
test_that("Combine categories of derived array", {
expect_identical(
names(subvariables(ds$metapet_combined)),
c("Home", "Copy", "Copy", "Home")
)
expect_identical_temp_nodata(
names(categories(ds$metapet_combined)),
c("Cat", "Dog", "Bird", "Skipped", "Not Asked", "No Data")
)
t1 <- table(ds$metapet_combined[[1]])
expect_identical(names(t1), c("Cat", "Dog", "Bird"))
expect_equal(as.numeric(t1), c(5, 3, 3))
expect_identical(as.numeric(table(ds$metapet_combined[[2]])), c(5, 3, 3))
expect_identical(as.numeric(table(ds$metapet_combined[[3]])), c(6, 4, 6))
## Now these are combined back together
expect_identical(as.numeric(table(ds$metapet_combined[[4]])), c(5, 3, 3))
})
## Append to that dataset, make sure all arrays update appropriately
part2 <- newDatasetFromFixture("apidocs")
ds <- appendDataset(ds, part2)
test_that("When appending, derived arrays get data (or missing, as appropriate)", {
## No Data is added to the categories here
# Replace this `expect_true(identical(new) || identical(old))`
# construction with `expect_identical(new)` once the "default values"
# ticket https://www.pivotaltracker.com/story/show/164939686 is released.
expect_true(
identical(
names(categories(ds$metapetloc)),
c("Cat", "Dog", "Bird", "Skipped", "Not Asked", "one", "two", "No Data")
) || identical(
names(categories(ds$metapetloc)),
c("Cat", "Dog", "Bird", "Skipped", "Not Asked", "No Data", "one", "two")
)
)
## metapetloc: subvar 1 is derived from petloc$petloc_home
expect_identical(
as.numeric(table(ds$metapetloc[[1]])),
2 * c(5, 3, 3, 0, 0)
)
## subvar 2 is from petloc_a$pla_1, which points to petloc2, a deep copy
expect_identical(
as.numeric(table(ds$metapetloc[[2]])),
c(5, 3, 3, 0, 0)
)
## likewise for subvar 3
expect_identical(
as.numeric(table(ds$metapetloc[[3]])),
c(6, 4, 6, 0, 0)
)
## 4 comes from derivedarray$dsub2, which comes from petloc$petloc_home,
## but with different category names
# This test expectation is long-standing but wrong:
# expect_identical(
# as.numeric(table(ds$metapetloc[[4]])),
# c(5, 3, 6, 5, 3)
# )
# It should instead expect these numbers:
# expect_identical(
# as.numeric(table(ds$metapetloc[[4]])),
# c(0, 0, 6, 10, 6)
# )
# TODO: Once the fix (behind the Crunch API) ships,
# delete the wrong expectation and uncomment the right one.
})
test_that("Combined categories on top of derived array also gets the right data", {
expect_identical(
names(categories(ds$metapet_combined)),
c("Cat", "Dog", "Bird", "Skipped", "Not Asked", "No Data")
)
expect_identical(
as.numeric(table(ds$metapet_combined[[1]])),
2 * c(5, 3, 3)
)
expect_identical(
as.numeric(table(ds$metapet_combined[[2]])),
c(5, 3, 3)
)
expect_identical(
as.numeric(table(ds$metapet_combined[[3]])),
c(6, 4, 6)
)
expect_identical(
as.numeric(table(ds$metapet_combined[[4]])),
2 * c(5, 3, 3)
)
})
test_that("The array we edited in the beginning still has its edits after appending", {
expect_identical(
aliases(subvariables(ds$derivedarray)),
c("dsub1", "dsub2")
)
expect_identical_temp_nodata(
names(categories(ds$derivedarray)),
c("one", "two", "Bird", "Skipped", "Not Asked", "No Data")
)
## This is the data we should see in metapetloc[[4]]. It's correct here.
expect_equivalent(
table(ds$derivedarray$dsub2),
structure(c(10, 6, 6),
.Dim = 3L,
.Dimnames = list(dsub2 = c("one", "two", "Bird")),
class = "table"
)
)
})
## Append a dataset where the categories in one of the base variables don't quite match
part3 <- newDatasetFromFixture("apidocs")
names(categories(part3$petloc))[2] <- "Beaver"
aliases(subvariables(part3$petloc))[2] <- "petloc_school"
names(subvariables(part3$petloc))[2] <- "School"
ds <- appendDataset(ds, part3)
test_that("petloc is updated appropriately", {
expect_identical(
aliases(subvariables(ds$petloc)),
c("petloc_home", "petloc_work", "petloc_school")
)
## No Data comes in after appending because petloc_work and petloc_school have data gaps
# Replace this `expect_true(identical(new) || identical(old))`
# construction with `expect_identical(new)` once the "default values"
# ticket https://www.pivotaltracker.com/story/show/164939686 is released.
expect_true(
identical(
names(categories(ds$petloc)),
c("Cat", "Dog", "Bird", "Skipped", "Not Asked", "Beaver", "No Data")
) || identical(
names(categories(ds$petloc)),
c("Cat", "Dog", "Bird", "Skipped", "Not Asked", "No Data", "Beaver")
)
)
})
# skip("Derivations of derivations need some work in zz9")
# test_that("derivedarray gets updated appropriately", {
# expect_identical(aliases(subvariables(ds$derivedarray)),
# c("dsub1", "dsub2"))
# ## No Data and Beaver come in from petloc
# expect_identical(names(categories(ds$derivedarray)),
# c("one", "two", "Bird", "Skipped", "Not Asked", "Beaver", "No Data"))
# # print(table(ds$derivedarray))
# # expect_equivalent(table(ds$derivedarray),
# # structure(c(10, 6, 6), .Dim=3L,
# # .Dimnames=list(dsub2=c("one", "two", "Bird")),
# # class="table"))
# })
# test_that("petloc_a gets updated appropriately", {
# ## petloc_a has Copy and Original, which is petloc_home
# expect_identical(aliases(subvariables(ds$petloc_a)),
# c("pla_copy", "pla_orig"))
# expect_identical(names(subvariables(ds$petloc_a)),
# c("Copy", "Original"))
# expect_identical(names(categories(ds$petloc_a)),
# c("Cat", "Dog", "Bird", "Skipped", "Not Asked", "No Data", "Beaver"))
# ## table(that)
# })
# test_that("petloc_b gets updated appropriately", {
# ## petloc_a has Copy and Original, which is petloc_work. But there is
# ## no petloc_work in the new batch
# expect_identical(aliases(subvariables(ds$petloc_b)),
# c("plb_copy", "plb_orig"))
# expect_identical(names(subvariables(ds$petloc_b)),
# c("Copy", "Original"))
# expect_identical(names(categories(ds$petloc_b)),
# c("Cat", "Dog", "Bird", "Skipped", "Not Asked", "No Data", "Beaver"))
# ## table(that). all NA for the new batch
# })
# test_that("metapetloc gets updated appropriately", {
# # petloc$petloc_home, petloc_a$pla_copy, petloc_b$plb_copy, derivedarray$dsub2
# expect_identical(names(categories(ds$metapetloc)),
# c("Cat", "Dog", "Bird", "Skipped", "Not Asked", "one", "two", "No Data", "Beaver"))
# ## metapetloc: subvar 1 is derived from petloc$petloc_home.
# ## Beaver instead of Dog in new wave
# expect_identical(as.numeric(table(ds$metapetloc[[1]])),
# c(15, 6, 9, 0, 0, 3))
# ## subvar 2 is from petloc_a$pla_1, which points to petloc2, a deep copy
# expect_identical(as.numeric(table(ds$metapetloc[[2]])),
# c(5, 3, 3, 0, 0, 0))
# ## likewise for subvar 3
# expect_identical(as.numeric(table(ds$metapetloc[[3]])),
# c(6, 4, 6, 0, 0, 0))
# ## 4 comes from derivedarray$dsub2, which comes from petloc$petloc_home,
# ## but with different category names
# ## This assertion fails. The new data comes in as Cat Dog and not one two
# # print(table(ds$metapetloc[[4]]))
# expect_identical(as.numeric(table(ds$metapetloc[[4]])),
# c(0, 0, 9, 10, 6, 3))
# })
# test_that("metapet_combined gets updated appropriately", {
# expect_identical(names(categories(ds$metapet_combined)),
# c("Cat", "Dog", "Bird", "Skipped", "Not Asked", "No Data", "Beaver"))
# expect_identical(as.numeric(table(ds$metapet_combined[[1]])),
# c(15, 6, 9, 3))
# expect_identical(as.numeric(table(ds$metapet_combined[[2]])),
# c(5, 3, 3))
# expect_identical(as.numeric(table(ds$metapet_combined[[3]])),
# c(6, 4, 6))
# expect_identical(as.numeric(table(ds$metapet_combined[[4]])),
# c(15, 6, 9, 3))
# })
## TODO: more things (filter entities, drop rows, etc., edit values of base
## variables; edit values of derived array?)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.