tests/testthat/test-conditional-transform.R

context("Conditional transform")

with_mock_crunch({
    ds <- cachedLoadDataset("test ds")
    ds2 <- cachedLoadDataset("ECON.sav")

    test_that("conditionalTransform input validation", {
        expect_error(
            conditionalTransform("gender", data = ds),
            paste0(
                "Conditions must be supplied: Have you forgotten ",
                "to supply conditions as formulas in either the",
                " .*formulas.* argument, or through .*....*"
            )
        )

        expect_error(
            conditionalTransform(TRUE ~ "foo"),
            paste0(
                "There must be at least one crunch expression in the formulas ",
                "specifying cases or use the data argument to specify a dataset."
            )
        )

        # but sending the dataset alone does work
        expect_silent(new_var <- conditionalTransform(TRUE ~ "foo", data = ds))
        expect_equal(new_var$values, c(
            c("foo", rep(NA, nrow(ds) - 1))
        ))
        expect_equal(new_var$type, "text")

        expect_error(
            conditionalTransform("bar" ~ "foo", data = ds),
            paste0(
                "The left-hand side provided must be a logical or a ",
                "CrunchLogicalExpr: .*bar.*"
            )
        )

        expect_error(
            conditionalTransform(gender ~ "foo", data = ds, type = "unknown"),
            paste0(
                "Type must be either ", dQuote("categorical"), ", ",
                dQuote("text"), ", or ", dQuote("numeric")
            )
        )

        expect_warning(
            conditionalTransform(gender == "Male" ~ "foo",
                data = ds, type = "text",
                categories = c("foo", "bar")
            ),
            paste0(
                "Type is not ", dQuote("categorical"), " ignoring ",
                dQuote("categories")
            )
        )
        # check that we can't reference two different datasets
        expect_error(
            conditionalTransform(ds$gender == "Male" ~ "foo",
                ds2$gender == "Male" ~ "foo",
                name = "new"
            ),
            paste0(
                "There must be only one dataset referenced. Did ",
                "you accidentally supply more than one?"
            )
        )

        # updated to use a categorical variable as the source
        expect_error(
            conditionalTransform(gender == "Male" ~ textVar,
                data = ds,
                type = "categorical",
                categories = c("l", "m", "s", "h", "z")
            ),
            paste0(
                "When specifying categories, all categories in the",
                " results must be included. These categories are ",
                "in the results that were not specified in ",
                "categories: x"
            )
        )

        # we can't provide conditions in both ... and formulas
        expect_error(
            conditionalTransform(gender == "Male" ~ textVar,
                data = ds,
                type = "categorical",
                categories = c("l", "m", "s", "h", "z"),
                formulas = list(
                    gender == "Male" ~ textVar
                )
            ),
            paste0(
                "Must not supply conditions in both the ",
                ".*formulas.* argument and .*....*"
            )
        )
    })

    test_that("conditionalTransform works with categories", {
        expect_silent(new_var <- conditionalTransform(gender == "Male" ~ textVar,
            data = ds,
            type = "categorical"
        ))
        expect_equal(new_var$values, c(
            -1, -1, -1, -1, -1, -1, 2, -1, -1, -1,
            3, -1, 4, -1, -1, -1, -1, -1, -1, -1, 1,
            6, 3, -1, 5
        ))
        expect_equal(new_var$type, "categorical")

        # and we can use the formulas arg
        expect_silent(new_var <- conditionalTransform(
            data = ds,
            type = "categorical",
            formulas = list(
                gender == "Male" ~ textVar
            )
        ))
        expect_equal(new_var$values, c(
            -1, -1, -1, -1, -1, -1, 2, -1, -1, -1,
            3, -1, 4, -1, -1, -1, -1, -1, -1, -1, 1,
            6, 3, -1, 5
        ))
        expect_equal(new_var$type, "categorical")

        expect_silent(new_var <- conditionalTransform(ds$gender == "Male" ~ ds$textVar,
            type = "categorical"
        ))
        expect_equal(new_var$values, c(
            -1, -1, -1, -1, -1, -1, 2, -1, -1, -1,
            3, -1, 4, -1, -1, -1, -1, -1, -1, -1, 1,
            6, 3, -1, 5
        ))
        expect_equal(new_var$type, "categorical")
    })

    test_that("conditionalTransform works when specifying a categories as strings", {
        expect_silent(new_var <-
            conditionalTransform(gender == "Male" ~ textVar,
                data = ds,
                type = "categorical",
                categories = c(
                    "l", "m", "s",
                    "h", "z", "x"
                )
            ))
        expect_equal(new_var$values, c(
            -1, -1, -1, -1, -1, -1, 1, -1, -1, -1,
            2, -1, 3, -1, -1, -1, -1, -1, -1, -1, 4,
            5, 2, -1, 6
        ))
        expect_equal(new_var$type, "categorical")
    })

    test_that("conditionalTransform works when specifying a categories object", {
        # use different numeric values and missingnesses to check that the
        # categories object is being sent
        textVarCats <- Categories(
            list(id = 1L, name = "l", numeric_value = 10L, missing = FALSE),
            list(id = 2L, name = "m", numeric_value = 20L, missing = TRUE),
            list(id = 3L, name = "s", numeric_value = 30L, missing = FALSE),
            list(id = 4L, name = "h", numeric_value = 40L, missing = TRUE),
            list(id = 5L, name = "z", numeric_value = 50L, missing = FALSE),
            list(id = 6L, name = "x", numeric_value = 60L, missing = TRUE)
        )
        no_data_cat <- Categories(
            list(id = -1L, name = "No Data", numeric_value = NULL, missing = TRUE)
        )
        textVarCats <- Categories(data = textVarCats)
        expect_true(is.categories(textVarCats))
        expect_silent(new_var <- conditionalTransform(gender == "Male" ~ textVar,
            data = ds, type = "categorical",
            categories = textVarCats
        ))
        expect_equal(new_var$values, c(
            -1, -1, -1, -1, -1, -1, 1, -1, -1, -1,
            2, -1, 3, -1, -1, -1, -1, -1, -1, -1, 4,
            5, 2, -1, 6
        ))
        expect_equal(new_var$type, "categorical")
        expect_json_equivalent(new_var$categories, c(textVarCats, no_data_cat))

        # reverse the ids to make sure that the ids are not being over-written
        textVarCats <- Categories(
            list(id = 6L, name = "l", numeric_value = 10L, missing = FALSE),
            list(id = 5L, name = "m", numeric_value = 20L, missing = FALSE),
            list(id = 4L, name = "s", numeric_value = 30L, missing = FALSE),
            list(id = 3L, name = "h", numeric_value = 40L, missing = FALSE),
            list(id = 2L, name = "z", numeric_value = 50L, missing = FALSE),
            list(id = 1L, name = "x", numeric_value = 60L, missing = FALSE)
        )
        textVarCats <- Categories(data = textVarCats)
        expect_true(is.categories(textVarCats))
        expect_silent(new_var <- conditionalTransform(gender == "Male" ~ textVar,
            data = ds,
            type = "categorical",
            categories = textVarCats
        ))
        # expect_equal(new_var$values, c(-1, -1, -1, -1, -1, -1, 1, -1, -1, -1,
        #                                2, -1, 3, -1, -1, -1, -1, -1, -1, -1, 4,
        #                                5, 2, -1, 6)) # for standard IDs
        expect_equal(new_var$values, c(
            -1, -1, -1, -1, -1, -1, 6, -1, -1, -1,
            5, -1, 4, -1, -1, -1, -1, -1, -1, -1, 3,
            2, 5, -1, 1
        )) # for reversed IDs
        expect_equal(new_var$type, "categorical")
        expect_json_equivalent(new_var$categories, c(textVarCats, no_data_cat))
    })

    test_that("conditionalTransform works when specifying a categories erroneously", {
        textVarCats <- Categories(
            list(id = 1L, name = "l", numeric_value = 10L, missing = FALSE),
            list(id = 2L, name = "m", numeric_value = 20L, missing = TRUE),
            list(id = 3L, name = "s", numeric_value = 30L, missing = FALSE),
            list(id = 4L, name = "h", numeric_value = 40L, missing = TRUE),
            list(id = 5L, name = "z", numeric_value = 50L, missing = FALSE),
            list(id = 6L, name = "x", numeric_value = 60L, missing = TRUE)
        )
        no_data_cat <- Categories(
            list(id = -1L, name = "No Data", numeric_value = NULL, missing = TRUE)
        )
        textVarCats <- Categories(data = textVarCats)
        expect_true(is.categories(textVarCats))
        expect_warning(
            new_var <- conditionalTransform(gender == "Male" ~ textVar,
                data = ds,
                categories = textVarCats
            ),
            paste0(
                "Type is not ", dQuote("categorical"),
                " ignoring ", dQuote("categories")
            )
        )
        expect_equal(new_var$values, c(
            NA, NA, NA, NA, NA, NA, "l", NA, NA, NA, "m",
            NA, "s", NA, NA, NA, NA, NA, NA, NA, "h", "z",
            "m", NA, "x"
        ))
        expect_equal(new_var$type, "text")
    })

    test_that("conditionalTransform works with other output types (text and numeric)", {
        expect_silent(new_var <- conditionalTransform(gender == "Male" ~ textVar,
            data = ds,
            type = "text"
        ))
        expect_equal(new_var$values, c(
            NA, NA, NA, NA, NA, NA, "l", NA, NA, NA, "m",
            NA, "s", NA, NA, NA, NA, NA, NA, NA, "h", "z",
            "m", NA, "x"
        ))
        expect_equal(new_var$type, "text")

        expect_silent(new_var <- conditionalTransform(gender == "Male" ~ "guy",
            data = ds,
            type = "text"
        ))
        expect_equal(new_var$values, c(
            NA, NA, NA, NA, NA, NA, "guy", NA, NA, NA, "guy",
            NA, "guy", NA, NA, NA, NA, NA, NA, NA, "guy", "guy",
            "guy", NA, "guy"
        ))
        expect_equal(new_var$type, "text")

        expect_silent(new_var <- conditionalTransform(gender == "Male" ~ 1,
            data = ds,
            type = "numeric"
        ))
        expect_equal(new_var$values, c(
            NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, 1,
            NA, 1, NA, NA, NA, NA, NA, NA, NA, 1, 1,
            1, NA, 1
        ))
        expect_equal(new_var$type, "numeric")
    })

    test_that("makeConditionalValues", {
        # need to develop mocks for other conditions and sources to test various
        # permutations.
    })

    #####################
    ### check collation
    #####################
    values_to_fill <- list(
        c("A", "A"), c("B", "B"),
        c("C", "C"), c("D", "D")
    )
    case_indices <- list(c(1, 3), c(2, 4), c(5, 7), c(6, 8))
    else_condition <- NA
    n_rows <- 8

    test_that("collateValues works with all characters", {
        expect_equal(
            collateValues(
                values_to_fill, case_indices, else_condition,
                n_rows
            ),
            c("A", "B", "A", "B", "C", "D", "C", "D")
        )
    })

    test_that("collateValues works with all NAs", {
        values_to_fill <- list(
            c("A", "A"), c("B", "B"),
            c("C", "C"), c("D")
        )
        case_indices <- list(c(1, 3), c(2, 4), c(5, 7), c(8))
        expect_equal(
            collateValues(
                values_to_fill, case_indices, else_condition,
                n_rows
            ),
            c("A", "B", "A", "B", "C", NA, "C", "D")
        )

        values_to_fill <- list(
            c("A", "A"), c("B", "B"),
            c("C", NA), c("D")
        )
        expect_equal(
            collateValues(
                values_to_fill, case_indices, else_condition,
                n_rows
            ),
            c("A", "B", "A", "B", "C", NA, NA, "D")
        )
    })

    test_that("collateValues works with factors", {
        values_to_fill <- list(
            factor(c("A", "A")), factor(c("B", "B")),
            factor(c("C", "C")), factor(c("D", "D"))
        )
        expect_equal(
            collateValues(
                values_to_fill, case_indices, else_condition,
                n_rows
            ),
            c("A", "B", "A", "B", "C", "D", "C", "D")
        )

        values_to_fill[[3]] <- NULL
        case_indices[[3]] <- NULL
        expect_equal(
            collateValues(
                values_to_fill, case_indices, else_condition,
                n_rows
            ),
            c("A", "B", "A", "B", NA, "D", NA, "D")
        )
    })

    test_that("collateValues works with numerics", {
        values_to_fill <- list(
            c(10, 10), c(20, 20),
            c(30, 30), c(40, 40)
        )
        expect_equal(
            collateValues(
                values_to_fill, case_indices, else_condition,
                n_rows
            ),
            c(10, 20, 10, 20, 30, 40, 30, 40)
        )

        values_to_fill[[3]] <- NULL
        case_indices[[3]] <- NULL
        expect_equal(
            collateValues(
                values_to_fill, case_indices, else_condition,
                n_rows
            ),
            c(10, 20, 10, 20, NA, 40, NA, 40)
        )
    })

    test_that("collateValues works with character+numeric", {
        values_to_fill <- list(
            c(10, 10), c("B", "B"),
            c(30, 30), c("D", "D")
        )
        expect_equal(
            collateValues(
                values_to_fill, case_indices, else_condition,
                n_rows
            ),
            c("10", "B", "10", "B", "30", "D", "30", "D")
        )

        values_to_fill[[3]] <- NULL
        case_indices[[3]] <- NULL
        expect_equal(
            collateValues(
                values_to_fill, case_indices, else_condition,
                n_rows
            ),
            c("10", "B", "10", "B", NA, "D", NA, "D")
        )
    })

    test_that("collateValues works with character+numeric+factor", {
        values_to_fill <- list(
            c(10, 10), factor(c("B", "B")),
            c(30, 30), c("D", "D")
        )
        expect_equal(
            collateValues(
                values_to_fill, case_indices, else_condition,
                n_rows
            ),
            c("10", "B", "10", "B", "30", "D", "30", "D")
        )

        values_to_fill[[3]] <- NULL
        case_indices[[3]] <- NULL
        expect_equal(
            collateValues(
                values_to_fill, case_indices, else_condition,
                n_rows
            ),
            c("10", "B", "10", "B", NA, "D", NA, "D")
        )
    })
})

with_test_authentication({
    ds <- newDatasetFromFixture("apidocs")
    test_that("conditionalTransform", {
        ds$new0 <- conditionalTransform(ndogs < 1 ~ country,
            ndogs == 1 ~ q3,
            ndogs > 1 ~ ndogs,
            data = ds
        )
        expect_equal(as.vector(ds$new0), c(
            "Jasmine", NA, "2", "3", "Zeus",
            "2", "2", "3", "2", "2", "2", NA,
            "3", "Belgium", "6", "Fluffy",
            NA, "Austria", NA, "2"
        ))
    })
    test_that("conditionalTransform with else_condition", {
        ds$new1 <- conditionalTransform(ndogs < 1 ~ country,
            ndogs == 1 ~ q3,
            ndogs > 1 ~ ndogs,
            data = ds, else_condition = "other"
        )
        expect_equal(as.vector(ds$new1), c(
            "Jasmine", "other", "2", "3", "Zeus", "2", "2", "3",
            "2", "2", "2", "other", "3", "Belgium", "6", "Fluffy",
            "other", "Austria", "other", "2"
        ))
    })
    test_that("conditionalTransform with else_condition and formula lsit", {
        ds$new1_again <- conditionalTransform(
            data = ds, else_condition = "other",
            formulas = list(
                ndogs < 1 ~ country,
                ndogs == 1 ~ q3,
                ndogs > 1 ~ ndogs
            )
        )
        expect_equal(as.vector(ds$new1_again), c(
            "Jasmine", "other", "2", "3", "Zeus", "2", "2", "3",
            "2", "2", "2", "other", "3", "Belgium", "6", "Fluffy",
            "other", "Austria", "other", "2"
        ))
    })
    test_that("conditionalTransform with text", {
        ds$new2 <- conditionalTransform(ndogs < 1 ~ country,
            ndogs == 1 ~ q3,
            ndogs > 1 ~ ndogs,
            data = ds, type = "categorical"
        )
        expect_equal(as.vector(ds$new2), factor(c(
            "Jasmine", NA, "2", "3", "Zeus", "2", "2", "3",
            "2", "2", "2", NA, "3", "Belgium", "6", "Fluffy",
            NA, "Austria", NA, "2"
        )))
    })
    test_that("conditionalTransform with numeric", {
        ds$new3 <- conditionalTransform(ndogs < 1 ~ 200,
            ndogs == 1 ~ 400,
            ndogs > 1 ~ ndogs,
            data = ds, type = "numeric"
        )
        expect_equal(as.vector(ds$new3), c(
            400, NA, 2, 3, 400, 2, 2, 3,
            2, 2, 2, NA, 3, 200, 6, 400,
            NA, 200, NA, 2
        ))
    })
    test_that("conditionalTransform with a sole string as source", {
        ds$new4 <- conditionalTransform(ndogs < 1 ~ "lonely",
            ndogs == 1 ~ q3,
            ndogs > 1 ~ ndogs,
            data = ds
        )
        expect_equal(as.vector(ds$new4), c(
            "Jasmine", NA, "2", "3", "Zeus", "2", "2", "3",
            "2", "2", "2", NA, "3", "lonely", "6", "Fluffy",
            NA, "lonely", NA, "2"
        ))
    })
    test_that("conditionalTransform with categories", {
        ds$new5 <- conditionalTransform(ndogs < 1 ~ "lonely",
            ndogs == 1 ~ q3,
            ndogs > 1 ~ ndogs,
            data = ds,
            type = "categorical",
            categories = c(
                "lonely", "Zeus",
                "Jasmine", "Fluffy",
                "2", "3", "6"
            )
        )
        expect_equal(as.vector(ds$new5), factor(c(
            "Jasmine", NA, "2", "3", "Zeus", "2", "2", "3",
            "2", "2", "2", NA, "3", "lonely", "6", "Fluffy",
            NA, "lonely", NA, "2"
        ),
        levels = c("lonely", "Zeus", "Jasmine", "Fluffy", "2", "3", "6")
        ))
    })
    test_that("conditionalTransform with NAs", {
        ds$new6 <- conditionalTransform(ndogs < 1 ~ "lonely",
            ndogs == 1 ~ q3,
            ndogs > 1 ~ ndogs,
            is.na(ndogs) ~ "not applicable",
            data = ds
        )
        expect_equal(as.vector(ds$new6), c(
            "Jasmine", "not applicable", "2", "3", "Zeus", "2", "2", "3",
            "2", "2", "2", "not applicable", "3", "lonely", "6", "Fluffy",
            "not applicable", "lonely", "not applicable", "2"
        ))
        ds$new7 <- conditionalTransform(ndogs < 1 ~ NA,
            ndogs == 1 ~ q3,
            ndogs > 1 ~ ndogs,
            data = ds
        )
        expect_equal(as.vector(ds$new7), c(
            "Jasmine", NA, "2", "3", "Zeus", "2", "2", "3",
            "2", "2", "2", NA, "3", NA, "6", "Fluffy",
            NA, NA, NA, "2"
        ))
    })

    test_that("conditionalTransform with an exclusion set with a text varaible", {
        exclusion(ds) <- ds$ndogs > 2
        ds$new8 <- conditionalTransform(ndogs < 1 ~ 0,
            ndogs == 1 ~ 4,
            ndogs > 1 ~ ndogs,
            is.na(ndogs) ~ 5,
            data = ds
        )
        expect_equal(as.vector(ds$new8), c(
            4, 5, 2, 4, 2, 2,
            2, 2, 2, 5, 0, 4,
            5, 0, 5, 2
        ))
        # and after the exclusion is removed, we get NAs.
        exclusion(ds) <- NULL
        expect_equal(as.vector(ds$new8), c(
            4, 5, 2, NA, 4, 2, 2, NA,
            2, 2, 2, 5, NA, 0, NA, 4,
            5, 0, 5, 2
        ))
    })

    test_that("conditionalTransform with an exclusion set with a text varaible", {
        exclusion(ds) <- ds$ndogs > 2
        ds$new9 <- conditionalTransform(ndogs < 1 ~ "lonely",
            ndogs == 1 ~ q3,
            ndogs > 1 ~ ndogs,
            is.na(ndogs) ~ "not applicable",
            type = "categorical",
            data = ds
        )
        expect_equal(as.vector(ds$new9), as.factor(c(
            "Jasmine", "not applicable", "2", "Zeus", "2", "2",
            "2", "2", "2", "not applicable", "lonely", "Fluffy",
            "not applicable", "lonely", "not applicable", "2"
        )))
        # and after the exclusion is removed, we get NAs.
        exclusion(ds) <- NULL
        expect_equal(as.vector(ds$new9), as.factor(c(
            "Jasmine", "not applicable", "2", NA, "Zeus", "2", "2", NA,
            "2", "2", "2", "not applicable", NA, "lonely", NA, "Fluffy",
            "not applicable", "lonely", "not applicable", "2"
        )))
    })

    test_that("conditionalTransform with an exclusion set with a text varaible", {
        exclusion(ds) <- ds$ndogs > 2
        ds$new10 <- conditionalTransform(ndogs < 1 ~ "lonely",
            ndogs == 1 ~ q3,
            ndogs > 1 ~ ndogs,
            is.na(ndogs) ~ "not applicable",
            data = ds
        )
        expect_equal(as.vector(ds$new10), c(
            "Jasmine", "not applicable", "2", "Zeus", "2", "2",
            "2", "2", "2", "not applicable", "lonely", "Fluffy",
            "not applicable", "lonely", "not applicable", "2"
        ))
        # and after the exclusion is removed, we get NAs.
        exclusion(ds) <- NULL
        expect_equal(as.vector(ds$new10), c(
            "Jasmine", "not applicable", "2", NA, "Zeus", "2", "2", NA,
            "2", "2", "2", "not applicable", NA, "lonely", NA, "Fluffy",
            "not applicable", "lonely", "not applicable", "2"
        ))
    })
})

Try the crunch package in your browser

Any scripts or data that you put into this service are public.

crunch documentation built on Aug. 31, 2023, 1:07 a.m.