tests/testthat/test-classify.R

test_that("classify_visits", {
    data("testdt_tracking")
    data("domain_list")
    wt <- as.wt_dt(testdt_tracking)
    wt <- extract_domain(wt)
    wt_classes <- classify_visits(wt, classes = domain_list, match_by = "domain")
    # test existence of columns
    expect_true("type" %in% names(wt_classes))
    # test number of rows
    expect_true(nrow(wt_classes) == nrow(wt))
    # test number of rows when filtering to class
    return_rows_val_param <- "search"
    wt_classes <- classify_visits(wt,
        classes = domain_list, match_by = "domain",
        return_rows_by = "type", return_rows_val = return_rows_val_param
    )
    expect_true(nrow(wt_classes) < nrow(wt))
    expect_true(length(table(wt_classes$type)) == length(return_rows_val_param))
})

test_that("classify_visits errors", {
    skip_on_cran()
    data("testdt_tracking")
    wt <- as.wt_dt(testdt_tracking)
    data("domain_list")
    # test error when wrong value given to match_by
    expect_error(classify_visits(wt, classes = domain_list, match_by = "not_a_value"))
    # test errors when wt does not have column to match
    data("domain_list")
    expect_error(classify_visits(wt, classes = domain_list, match_by = "domain"))
    expect_error(classify_visits(wt, classes = domain_list, match_by = "host"))
    expect_error(classify_visits(wt, classes = domain_list, match_by = "regex"))
    expect_error(classify_visits(wt,
        classes = domain_list,
        match_by = "regex", regex_on = "not_a_variable"
    ))
    # test errors when classes does not have column to match
    data("testdt_tracking")
    wt <- as.wt_dt(testdt_tracking)
    wt <- extract_domain(wt)
    wt <- extract_host(wt)
    names(domain_list)[names(domain_list) == "domain"] <- "domain_other"
    expect_error(classify_visits(wt, classes = domain_list, match_by = "domain"))
    expect_error(classify_visits(wt, classes = domain_list, match_by = "host"))
    expect_error(classify_visits(wt, classes = domain_list, match_by = "regex", regex_on = "domain"))
    # test error when return_rows_by but not return_rows_val specified
    expect_error(classify_visits(wt, classes = domain_list, match_by = "domain", return_rows_by = "type"))
})

test_that("classify_visits testdt_specific", {
    data("testdt_tracking")
    data("domain_list")
    wt <- as.wt_dt(testdt_tracking)
    wt <- extract_domain(wt)
    # test number of cases in each categories when classified via domain
    wt_classes <- classify_visits(wt, classes = domain_list, match_by = "domain")
    expect_true(table(wt_classes$type)["ebay"] == 39)
    expect_true(table(wt_classes$type)["facebook"] == 1374)
    expect_true(table(wt_classes$type)["news"] == 300)
    expect_true(table(wt_classes$type)["portal"] == 623)
    expect_true(table(wt_classes$type)["search"] == 2795)
    expect_true(table(wt_classes$type)["twitter"] == 1910)
    expect_true(sum(is.na(wt_classes$type)) == 42571)
    # test number of cases in each categories when classified via host
    # (this should give much fewer cases, as list is on domain not host level)
    wt <- extract_host(wt)
    names(domain_list)[1] <- "host"
    wt_classes <- classify_visits(wt, classes = domain_list, match_by = "host")
    expect_true(table(wt_classes$type)["news"] == 6)
    expect_true(table(wt_classes$type)["newsportals"] == 214)
    expect_true(table(wt_classes$type)["twitter"] == 1908)
    expect_true(sum(is.na(wt_classes$type)) == 47484)
    # test number of cases when classified via regex
    regex_list <- domain_list[domain_list$type == "facebook", ]
    wt_classes <- classify_visits(wt, classes = regex_list, match_by = "regex", regex_on = "host")
    expect_true(table(wt_classes$type)["facebook"] == 1374)
    expect_true(sum(is.na(wt_classes$type)) == 48238)
    # test number of cases when only rows classified as "search" returned
    data("testdt_tracking")
    data("domain_list")
    names(domain_list)[1] <- "domain"
    wt <- as.wt_dt(testdt_tracking)
    wt <- extract_domain(wt)
    expect_true(nrow(classify_visits(wt,
        classes = domain_list, match_by = "domain",
        return_rows_by = "type", return_rows_val = "search"
    )) == 2795)
})

Try the webtrackR package in your browser

Any scripts or data that you put into this service are public.

webtrackR documentation built on May 29, 2024, 10:02 a.m.