tests/testthat/test-data.R

require(quanteda)
require(stringi)
require(testthat)

check_character <- function(x) {
    x <- unlist(x)
    k <- duplicated(x)
    #if (any(k))
    #    cat("Duplicated:\n", paste0(x[k], collapse = "\n"), "\n\n")

    ## \\u30fb katakana middle dot
    ## \\p{M} combining character
    l <- stri_detect_regex(x, "^[\\p{L}\\p{M}'\"*?\\-\\u30fb ]+$")
    #if (!all(l))
    #    cat("Invalid:\n", paste0(x[!l], collapse = "\n"), "\n\n")
    expect_true(all(l))
}

test_that("test that yaml do not contain illegal letters", {
    check_character(data_dictionary_newsmap_en)
    check_character(data_dictionary_newsmap_de)
    check_character(data_dictionary_newsmap_fr)
    check_character(data_dictionary_newsmap_es)
    check_character(data_dictionary_newsmap_pt)
    check_character(data_dictionary_newsmap_ja)
    check_character(data_dictionary_newsmap_ru)
    check_character(data_dictionary_newsmap_he)
    check_character(data_dictionary_newsmap_ar)
    check_character(data_dictionary_newsmap_zh_cn)
    check_character(data_dictionary_newsmap_zh_tw)
})

test_that("test that data file is created correctly", {

    expect_equal(length(names(data_dictionary_newsmap_en)), 5)
    expect_equal(length(names(data_dictionary_newsmap_de)), 5)
    expect_equal(length(names(data_dictionary_newsmap_fr)), 5)
    expect_equal(length(names(data_dictionary_newsmap_es)), 5)
    expect_equal(length(names(data_dictionary_newsmap_pt)), 5)
    expect_equal(length(names(data_dictionary_newsmap_ja)), 5)
    expect_equal(length(names(data_dictionary_newsmap_ru)), 5)
    expect_equal(length(names(data_dictionary_newsmap_it)), 5)
    expect_equal(length(names(data_dictionary_newsmap_he)), 5)
    expect_equal(length(names(data_dictionary_newsmap_ar)), 5)
    expect_equal(length(names(data_dictionary_newsmap_zh_cn)), 5)
    expect_equal(length(names(data_dictionary_newsmap_zh_tw)), 5)
})


test_that("test that dictionaries have the same countries", {

    en <- names(quanteda:::flatten_dictionary(data_dictionary_newsmap_en))
    de <- names(quanteda:::flatten_dictionary(data_dictionary_newsmap_de))
    fr <- names(quanteda:::flatten_dictionary(data_dictionary_newsmap_fr))
    es <- names(quanteda:::flatten_dictionary(data_dictionary_newsmap_es))
    pt <- names(quanteda:::flatten_dictionary(data_dictionary_newsmap_pt))
    ja <- names(quanteda:::flatten_dictionary(data_dictionary_newsmap_ja))
    ru <- names(quanteda:::flatten_dictionary(data_dictionary_newsmap_ru))
    it <- names(quanteda:::flatten_dictionary(data_dictionary_newsmap_it))
    he <- names(quanteda:::flatten_dictionary(data_dictionary_newsmap_he))
    ar <- names(quanteda:::flatten_dictionary(data_dictionary_newsmap_ar))
    zh_cn <- names(quanteda:::flatten_dictionary(data_dictionary_newsmap_zh_cn))
    zh_tw <- names(quanteda:::flatten_dictionary(data_dictionary_newsmap_zh_tw))

    expect_true(identical(en, de))
    expect_true(identical(en, fr))
    expect_true(identical(en, es))
    expect_true(identical(en, pt))
    expect_true(identical(en, ja))
    expect_true(identical(en, ru))
    expect_true(identical(en, it))
    expect_true(identical(en, he))
    expect_true(identical(en, ar))
    expect_true(identical(en, zh_cn))
    expect_true(identical(en, zh_tw))
})
koheiw/Newsmap documentation built on Feb. 22, 2024, 4:56 p.m.