tests/testthat/test-summarize.R

context("'summarize' on CrunchDataset")

with_mock_crunch({

    ## Fixture dataset created from mtcars
    ds <- loadDataset("https://app.crunch.io/api/datasets/mtcars/")

    d2f <- function (...) dots_to_formula(rlang::enquos(...))

    test_that("dots_to_formula", {
        expect_equal(d2f(avg=mean(birthyr), ct=n()),
            list(avg=mean(birthyr), ct=n()) ~ 1)
    })

    test_that("summarize makes a cube request", {
        tbl1 <- summarize(ds, hp=mean(hp))
        expect_is(tbl1, "tbl_df")
        expect_identical(dim(tbl1), c(1L, 1L))
        expect_identical(names(tbl1), "hp")
        expect_equal(as.numeric(tbl1[1,1]), 146.6875)
    })

    test_that("summarize can handle multiple measures", {
        tbl2 <- summarize(ds, hp=mean(hp), sd_hp=sd(hp), count=n())
        expect_is(tbl2, "tbl_df")
        expect_identical(dim(tbl2), c(1L, 3L))
        expect_identical(names(tbl2), c("hp", "sd_hp", "count"))
        expect_equal(as.numeric(tbl2[1,1]), 146.6875)
        expect_equal(as.numeric(tbl2[1,3]), 32)
    })

    test_that("summarise_ warning", {
        expect_error(
            suppressWarnings(summarise_(ds, .dots = c(hp="mean(hp)", sd_hp="sd(hp)", count="n()"))),
            "The summarise_.* function is no longer supported. Please use summarise.* instead"
        )
    })

    test_that("summarize after filter", {
        tbl3 <- ds %>%
                filter(cyl == 6) %>%
                summarize(hp=mean(hp), sd_hp=sd(hp), count=n())
        expect_is(tbl3, "tbl_df")
        expect_identical(dim(tbl3), c(1L, 3L))
        expect_identical(names(tbl3), c("hp", "sd_hp", "count"))
        expect_equal(as.numeric(tbl3[1,3]), 7)
    })

    test_that("group_by and summarize", {
        tbl4 <- ds %>%
                group_by(vs) %>%
                summarize(hp=mean(hp), sd_hp=sd(hp), count=n())
        expect_is(tbl4, "tbl_df")
        expect_identical(dim(tbl4), c(2L, 5L))
        expect_identical(names(tbl4), c("vs", "is_missing", "hp", "sd_hp", "count"))
        expect_equal(as.numeric(tbl4[tbl4$vs == 1, "count"]), 14)
    })

    test_that("group_by two vars and summarize", {
        tbl5 <- ds %>%
                group_by(vs, gear) %>%
                summarize(hp=mean(hp), sd_hp=sd(hp), count=n())
        expect_is(tbl5, "tbl_df")
        expect_identical(dim(tbl5), c(6L, 6L))
        expect_identical(names(tbl5), c("vs",  "gear", "is_missing", "hp", "sd_hp", "count"))
        expect_equal(as.numeric(filter(tbl5, vs == 1 & gear == 4)$count), 10)
    })

    test_that("filter, group_by, and summarize", {
        tbl6 <- ds %>%
                filter(cyl == 6) %>%
                group_by(vs) %>%
                summarize(hp=mean(hp), sd_hp=sd(hp), count=n())
        expect_is(tbl6, "tbl_df")
        expect_identical(dim(tbl6), c(2L, 5L))
        expect_identical(names(tbl6), c("vs", "is_missing", "hp", "sd_hp", "count"))
        expect_equal(as.numeric(tbl6[tbl6$vs == 1, "count"]), 4)
    })

    test_that("group_by, filter, group_by, and summarize", {
        tbl7 <- ds %>%
                group_by(vs) %>%
                filter(cyl == 6) %>%
                group_by(gear, .add=TRUE) %>%
                summarize(hp=mean(hp), sd_hp=sd(hp), count=n())
        expect_is(tbl7, "tbl_df")
        expect_identical(dim(tbl7), c(6L, 6L))
        expect_identical(names(tbl7), c("vs", "gear", "is_missing", "hp", "sd_hp", "count"))
        expect_equal(as.numeric(filter(tbl7, vs == 1 & gear == 4)$count), 2)
    })

    test_that("unweighted_n as sole summary", {
        tbl8 <- ds %>%
            summarize(n = unweighted_n())
        expect_equal(dim(tbl8), c(1,1))
        expect_equal(names(tbl8), "n")
    })

    test_that("Two unweighted_n's with nothing else", {
        # This is pathological behavior, but in case a user asks for it, we
        # don't break and they get what they requested
        tbl <- ds %>%
            summarize(n = unweighted_n(), another_n = unweighted_n())
        expect_equal(dim(tbl), c(1,2))
        expect_equal(names(tbl), c("n", "another_n"))
        expect_true(tbl$n == tbl$another_n)
    })

    test_that("unweighted_n with other summary statistic", {
        tbl9 <- ds %>%
            summarise(mean = mean(hp), n = unweighted_n())
        expect_equal(dim(tbl9), c(1,2))
        expect_equal(names(tbl9), c("mean", "n"))
    })

    test_that("unweighted_n with groups", {
        tbl10 <- ds %>%
            group_by(cyl, gear) %>%
            summarize(mean = mean(hp), n = unweighted_n())
        expect_equal(dim(tbl10), c(9, 5))
        expect_equal(names(tbl10), c("cyl", "gear", "is_missing", "mean", "n"))
    })

    test_that("unweighted_n errors when called directly", {
        expect_error(unweighted_n(),
            "This function cannot be called outside of a summarize call.")
    })
})

Try the crplyr package in your browser

Any scripts or data that you put into this service are public.

crplyr documentation built on March 31, 2023, 9:30 p.m.