context("Merge/extend dataset")
with_mock_crunch({
ds1 <- cachedLoadDataset("test ds")
ds2 <- cachedLoadDataset("ECON.sav")
testPayloadNoFilterArg <- paste0(
'{"function":"adapt",',
'"args":[{"dataset":"https://app.crunch.io/api/datasets/3/"},',
'{"variable":"https://app.crunch.io/api/datasets/3/variables/birthyr/"},',
'{"variable":"https://app.crunch.io/api/datasets/1/variables/birthyr/"}]'
)
testPayload <- paste0(testPayloadNoFilterArg, "}")
genderFilter <- paste0(
'{"function":"==","args":[',
'{"variable":"https://app.crunch.io/api/datasets/3/variables/gender/"},{"value":1}]}'
)
testPayloadWithFilter <- paste0(
testPayloadNoFilterArg, ',"filter":',
genderFilter, "}"
)
testSubsetPayloadPart1 <- paste0(
'{"function":"select","args":[{"map":{',
'"66ae9881e3524f7db84970d556c34552":',
'{"variable":"https://app.crunch.io/api/datasets/3/variables/gender/"},',
'"f78ca47313144b57adfb495893968e70":',
'{"variable":"https://app.crunch.io/api/datasets/3/variables/birthyr/"}}}],',
'"frame":'
)
testSubsetPayload <- paste0(testSubsetPayloadPart1, testPayload, "}")
testSubsetPayloadWithFilter <- paste0(
testSubsetPayloadPart1, testPayload,
',"filter":', genderFilter, "}"
)
test_that("Correct payload without filtering", {
expect_warning(
expect_POST(
merge(ds1, ds2, by.x = ds1$birthyr, ds2$birthyr),
"https://app.crunch.io/api/datasets/1/variables/",
testPayload
),
"Variable birthyr is hidden"
)
})
test_that("Can reference variables by alias", {
expect_warning(
expect_POST(
merge(ds1, ds2, by.x = "birthyr", by.y = "birthyr"),
"https://app.crunch.io/api/datasets/1/variables/",
testPayload
),
"Variable birthyr is hidden"
)
expect_warning(
expect_POST(
merge(ds1, ds2, by = "birthyr"),
"https://app.crunch.io/api/datasets/1/variables/",
testPayload
),
"Variable birthyr is hidden"
)
})
test_that("joinDatasets with default copy=TRUE redirects here", {
expect_warning(
expect_POST(
joinDatasets(ds1, ds2, by.x = ds1$birthyr, ds2$birthyr),
"https://app.crunch.io/api/datasets/1/variables/",
testPayload
),
"Variable birthyr is hidden"
)
})
test_that("joinDatasets with copy=FALSE is (no longer/not yet) valid", {
expect_error(
joinDatasets(ds1, ds2, by.x = ds1$birthyr, ds2$birthyr, copy = FALSE),
"Virtual joins are not yet supported."
)
})
test_that("merge a subset of variables", {
expect_warning(
expect_POST(
merge(ds1, ds2[c("gender", "birthyr")], by = "birthyr"),
"https://app.crunch.io/api/datasets/1/variables/",
testSubsetPayload
),
"Variable birthyr is hidden"
)
})
test_that("filter rows in merge", {
expect_warning(
expect_POST(
merge(ds1, ds2[ds2$gender == "Male", ], by = "birthyr"),
"https://app.crunch.io/api/datasets/1/variables/",
testPayloadWithFilter
),
"Variable birthyr is hidden"
)
})
test_that("filter rows and variables in merge", {
expect_warning(
expect_POST(
merge(ds1, ds2[ds2$gender == "Male", c("gender", "birthyr")],
by = "birthyr"
),
"https://app.crunch.io/api/datasets/1/variables/",
testSubsetPayloadWithFilter
),
"Variable birthyr is hidden"
)
})
test_that("Input validation for merge/extend (plus method dispatch)", {
expect_error(
extendDataset(1),
"x must be a Crunch Dataset"
)
expect_error(
merge(ds1, 1, by.x = ds1[[1]]),
"y must be a Crunch Dataset"
)
expect_error(
merge(ds1, ds2, by.x = 1),
"by.x must be a Crunch Variable"
)
expect_error(
merge(ds1, ds2, by.x = ds2[[1]]),
"by.x must be a variable in x"
)
expect_error(
merge(ds1, ds2, by.x = ds1[[1]], by.y = 1),
"by.y must be a Crunch Variable"
)
expect_error(
merge(ds1, ds2, by.x = ds1[[1]], by.y = ds1[[1]]),
"by.y must be a variable in y"
)
expect_warning(
expect_error(
merge(ds1, ds2, by.x = ds1$birthyr, by.y = ds2$birthyr, all = TRUE),
'Option "all" not supported.'
),
"Variable birthyr is hidden"
) ## In ds2
expect_warning(
expect_error(
merge(ds1, ds2, by.x = ds1$birthyr, by.y = ds2$birthyr, all.x = FALSE),
'Option "all.x=FALSE" not supported.'
),
"Variable birthyr is hidden"
) ## In ds2
expect_warning(
expect_error(
merge(ds1, ds2, by.x = ds1$birthyr, by.y = ds2$birthyr, all.y = TRUE),
'Option "all.y" not supported.'
),
"Variable birthyr is hidden"
) ## In ds2
})
test_that("Categorical and array variables can't be used as keys", {
expect_error(
merge(ds1, ds2, by.x = ds1$gender, by.y = ds2$birthyr),
"by.x must be type numeric or text"
)
expect_error(
merge(ds1, ds2, by.x = ds1$birthyr, by.y = ds2$gender),
"by.y must be type numeric or text"
)
})
test_that("Providing != 1 alias gives useful error message", {
expect_error(
merge(ds1, ds2),
"by.x must reference one and only one variable"
)
## Default "by" is intersection of names
expect_error(
merge(ds1, ds2, by.x = ds1$birthyr),
"by.y must reference one and only one variable"
)
})
test_that("An invalid alias gives a useful error message", {
expect_error(
merge(ds1, ds2, by.x = "NOTAVARIABLE"),
"NOTAVARIABLE does not reference a variable in x"
)
expect_error(
merge(ds1, ds2, by.x = ds1$birthyr, by.y = "NOTAVARIABLE"),
"NOTAVARIABLE does not reference a variable in y"
)
})
})
printed_order_apidocs2 <- c(
"[+] Key Pet Indicators",
" All pets owned",
" Pet",
" Pets by location",
"[+] Dog Metrics",
" Number of dogs",
" [+] Number of dogs by type",
" Number of dogs -- With papers",
" Number of dogs -- Mutts",
"[+] Details",
" Pet name",
"[+] Dimensions",
" Country",
" Wave",
"Person ID",
"[+] __hidden__",
" Weight",
" Case ID"
)
printed_order_apidocs2_merge_by_stringid <- c( # nolint
" [+] Key Pet Indicators",
" All pets owned",
" Pet",
" Pets by location",
" [+] Dog Metrics",
" Number of dogs",
" [+] Number of dogs by type",
" Number of dogs -- With papers",
" Number of dogs -- Mutts",
" [+] Details",
" Pet name",
" [+] Dimensions",
" Country",
" Wave",
"[+] __hidden__",
" [+] Example dataset",
" Weight",
" Case ID"
)
printed_order_apidocs2_merge_by_caseid <- c( # nolint
" [+] Key Pet Indicators",
" All pets owned",
" Pet",
" Pets by location",
" [+] Dog Metrics",
" Number of dogs",
" [+] Number of dogs by type",
" Number of dogs -- With papers",
" Number of dogs -- Mutts",
" [+] Details",
" Pet name",
" [+] Dimensions",
" Country",
" Wave",
" Person ID",
"[+] __hidden__",
" [+] Example dataset",
" Weight"
)
with_test_authentication({
ds1 <- newDatasetFromFixture("join-apidocs2-to-me")
with_consent(ds1$allpets_1 <- NULL)
ds2 <- newDatasetFromFixture("apidocs2")
test_that("Shape of apidocs2", {
expect_prints(ordering(ds2),
paste(printed_order_apidocs2, collapse = "\n"),
fixed = TRUE
)
expect_identical(names(ds2), c(
"allpets", "q1", "petloc", "ndogs",
"ndogs_a", "ndogs_b", "q3", "country", "wave", "stringid"
))
expect_identical(dim(ds2), c(20L, 10L))
})
test_that("Shape of join-to-me", {
expect_identical(names(ds1), c("id", "matches", "other_var"))
expect_identical(dim(ds1), c(14L, 3L))
})
test_that("An uncomplicated merge on a text key", {
ds1 <- merge(ds1, ds2, by.x = ds1$id, by.y = ds2$stringid)
expect_is(ds1, "CrunchDataset")
expect_identical(dim(ds1), c(14L, 12L))
expect_prints(ordering(ds1),
paste(c(
"ID",
"Join matches",
"Another variable",
paste0("[+] ", name(ds2)),
paste0(printed_order_apidocs2_merge_by_stringid) # nolint
),
collapse = "\n"
),
fixed = TRUE
)
expect_identical(
names(ds1),
c(
"id", "matches", "other_var", "allpets", "q1", "petloc", "ndogs",
"ndogs_a", "ndogs_b", "q3", "country", "wave"
)
)
expect_identical((ds1, "name"), c("Case ID", "Weight"))
})
test_that("Similarly uncomplicated merge, but numeric and hidden key", {
# When merging datasets during jenkins run tests, the order of the
# variables is *sometimes* not copied (usually Pet Name/q3 is missing),
# which makes this test fail. The ordering should be fully copied
# during the merge operation. Reproduction of the bug on production or
# alpha did not yield errors with order. Repro. locally is possible
ds1 <- newDatasetFromFixture("join-apidocs2-to-me")
with_consent(ds1$allpets_1 <- NULL)
type(ds1$id) <- "numeric"
expect_warning(
ds1 <- merge(ds1, ds2, by.x = ds1$id, by.y = ds2$caseid),
"Variable caseid is hidden"
)
expect_is(ds1, "CrunchDataset")
expect_identical(dim(ds1), c(14L, 13L))
expect_prints(ordering(ds1),
paste(c(
"ID",
"Join matches",
"Another variable",
paste0("[+] ", name(ds2)),
paste0(printed_order_apidocs2_merge_by_caseid) # nolint
),
collapse = "\n"
),
fixed = TRUE
)
expect_identical(
names(ds1),
c(
"id", "matches", "other_var", "allpets", "q1", "petloc", "ndogs",
"ndogs_a", "ndogs_b", "q3", "country", "wave", "stringid"
)
)
expect_identical((ds1, "name"), "Weight")
})
## More tests to write:
## 1) check for handling of the conflicted alias
## 2) weight_variables?
## 3) apply exclusion filter on either dataset
test_that("Can select variables to join", {
ds1 <- newDatasetFromFixture("join-apidocs2-to-me")
with_consent(ds1$allpets_1 <- NULL)
ds1 <- merge(ds1, ds2[c("stringid", "q1", "petloc")],
by.x = "id", by.y = "stringid"
)
expect_identical(
names(ds1),
c("id", "matches", "other_var", "q1", "petloc")
)
})
test_that("Can select variables and rows to join", {
ds1 <- newDatasetFromFixture("join-apidocs2-to-me")
with_consent(ds1$allpets_1 <- NULL)
ds1 <- merge(ds1, ds2[ds2$stringid == "43805958", c("stringid", "q1", "petloc")],
by.x = "id", by.y = "stringid"
)
expect_prints(ordering(ds1),
paste(c(
"ID",
"Join matches",
"Another variable",
paste0("[+] ", name(ds2)),
paste(" ",
c(
"[+] Key Pet Indicators",
" Pet",
" Pets by location"
),
sep = "", collapse = "\n"
)
),
collapse = "\n"
),
fixed = TRUE
)
expect_identical(
names(ds1),
c("id", "matches", "other_var", "q1", "petloc")
)
expect_equal(sum(table(ds1$q1)), 1)
})
test_that("Can select rows to join", {
# When merging datasets during jenkins run tests, the order of the
# variables is *sometimes* not copied (usually Pet Name/q3 is missing),
# which makes this test fail. The ordering should be fully copied
# during the merge operation. Reproduction of the bug on production or
# alpha did not yield errors with order. Repro. locally is possible
ds1 <- newDatasetFromFixture("join-apidocs2-to-me")
with_consent(ds1$allpets_1 <- NULL)
ds1 <- merge(ds1, ds2[ds2$stringid == "43805958", ],
by.x = "id", by.y = "stringid"
)
expect_prints(ordering(ds1),
paste(c(
"ID",
"Join matches",
"Another variable",
paste0("[+] ", name(ds2)),
paste0(printed_order_apidocs2_merge_by_stringid) # nolint
),
collapse = "\n"
),
fixed = TRUE
)
expect_identical(
names(ds1),
c(
"id", "matches", "other_var", "allpets", "q1", "petloc", "ndogs",
"ndogs_a", "ndogs_b", "q3", "country", "wave"
)
)
expect_equal(sum(table(ds1$q1)), 1)
})
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.