tests/testthat/test-append-debug.R

context("Debugging append")

with_test_authentication({
    with(temp.option(crunch = list(crunch.timeout = 60)), {
        whereas("Appending with an exclusion on the incoming dataset", {
            part0 <- createDataset(name = now())
            part1 <- newDatasetFromFixture("apidocs")
            exclusion(part1) <- part1$q1 == "Dog"
            part2 <- newDatasetFromFixture("apidocs")
            exclusion(part2) <- part2$q1 == "Dog"

            part0 <- appendDataset(part0, part1)
            part0 <- appendDataset(part0, part2)

            test_that("Appending applies the exclusion filter of the incoming", {
                expect_identical(
                    dim(part0),
                    c(nrow(part1) * 2L, ncol(part1))
                )
                expect_equivalent(table(part0$q1)["Dog"], 0)
            })
        })

        whereas("When appending different arrays containing the same subvars", {
            part1 <- mrdf.setup(newDataset(mrdf), name = "CA1")
            part2 <- mrdf.setup(newDataset(mrdf), name = "CA2")

            test_that("The arrays with different aliases have the same subvar aliases", {
                expect_identical(
                    aliases(subvariables(part1$CA1)),
                    c("mr_1", "mr_2", "mr_3")
                )
                expect_identical(
                    aliases(subvariables(part2$CA2)),
                    c("mr_1", "mr_2", "mr_3")
                )
            })
            test_that("compareDatasets catches that parent mismatch", {
                comp <- compareDatasets(part1, part2)
                expect_prints(
                    summary(comp),
                    "Contains subvariables found in other arrays after matching: CA2"
                )
            })
            # Revisit after https://www.pivotaltracker.com/n/projects/2172644/stories/186660623
            # (reusable subvar codes) ships
            # test_that("The append fails", {
            #     expect_error(
            #         appendDataset(part1, part2),
            #         "Subvariable 'mr_1' cannot be bound to both arrays 'CA2' and 'CA1'."
            #     )
            # })
            # part1 <- cleanseBatches(part1)
            #
            # test_that(paste0(
            #     "Can re-alias array variables to make them line up ",
            #     "**and then drop rows** (and old refs don't reappear)"
            # ), {
            #     alias(part2$CA2) <- "CA1"
            #     ## This is the critical piece to trigger the error: delete rows after realiasing
            #     part2 <- dropRows(part2, seq_len(nrow(part2)) == 1)
            #     out <- appendDataset(part1, part2)
            #     expect_equal(dim(out), c(2 * nrow(part2) + 1, ncol(part2)))
            #     expect_identical(
            #         aliases(subvariables(out$CA1)),
            #         c("mr_1", "mr_2", "mr_3")
            #     )
            # })
        })

        whereas("Appending arrays with different subvars and derived vars", {
            ds1 <- newDatasetFromFixture("apidocs")
            ds2 <- newDatasetFromFixture("apidocs")
            with_consent(deleteSubvariable(ds1$petloc, "petloc_work"))
            ds1 <- refresh(ds1)
            ds1$comb <- combine(ds1$petloc,
                                name = "Comb 1",
                                combinations = list(
                                    list(name = "Mammals", categories = c("Cat", "Dog"))
                                )
            )
            ds1$comb2 <- combine(ds1$petloc,
                                 name = "Comb 2",
                                 combinations = list(
                                     list(name = "Mammals", categories = c("Cat", "Dog"))
                                 )
            )
            test_that("The array has one fewer subvars in ds1", {
                expect_identical(aliases(subvariables(ds1$petloc)), "petloc_home")
            })
            test_that("The array has a variable derived from it", {
                expect_length(aliases(subvariables(ds1$comb)), 1)
                expect_length(aliases(subvariables(ds1$comb2)), 1)
                ## Exact alias isn't deterministic
            })
            test_that("In the other dataset, there are both subvars, and no derivation", {
                expect_identical(
                    aliases(subvariables(ds2$petloc)),
                    c("petloc_home", "petloc_work")
                )
                expect_null(ds2$comb)
            })
            test_that("These append successfully", {
                ## This passes on master but fails here with:
                ## Cannot append subvariables [u'000013', u'000012'] to [u'000012'].
                out <- appendDataset(ds1, ds2)
                expect_true(is.dataset(out))
                expect_true(is.CA(out$comb))
                expect_equal(
                    aliases(subvariables(out$petloc)),
                    c("petloc_home", "petloc_work")
                )
                expect_length(aliases(subvariables(out$comb)), 2)
            })
        })

        test_that("Derivations with comparison operators can be appended", {
            df1 <- data.frame(foo = rnorm(100), bar = c(1:100))
            ds1 <- newDataset(df1)

            df2 <- data.frame(bar = c(100:1))
            ds2 <- newDataset(df2)

            ds1$new1 <- makeCaseVariable(
                `less than one` = ds1$foo < 1,
                other = "else", name = "new one"
            )
            ds <- appendDataset(ds1, ds2)

            expect_equal(nrow(ds), 200)
            expect_equal(as.vector(ds$foo), c(df1$foo, rep(NA, 100)))
            expect_equal(as.vector(ds$bar), c(df1$bar, df2$bar))
            expect_equal(
                as.character(as.vector(ds$new1)),
                c(
                    ifelse(df1$foo < 1, "less than one", "other"),
                    rep("other", 100)
                )
            )
        })
    })
})
Crunch-io/rcrunch documentation built on April 1, 2024, 1:14 a.m.