tests/testthat/test-utilities.R

test_that("make_or_pattern() works as expected", {
  expect_equal(make_or_pattern(c("a", "b")), "^a$|^b$")
  expect_equal(make_or_pattern(c("a", "b"), pattern_type = "exact"), "^a$|^b$")
  expect_equal(make_or_pattern(c("a", "b"), pattern_type = "leading"), "^a|^b")
  expect_equal(make_or_pattern(c("a", "b"), pattern_type = "trailing"), "a$|b$")
  expect_equal(make_or_pattern(c("a", "b"), pattern_type = "anywhere"), "a|b")
  expect_equal(make_or_pattern(c("a", "b"), pattern_type = "literal"), c("a", "b"))

  expect_equal(make_or_pattern(c("a", "b", "c")), "^a$|^b$|^c$")
})


test_that('prepositions make good "or" patterns', {
  preps_pattern <- make_or_pattern(RCLabels::prepositions_list, pattern_type = "anywhere")
  res <- gregexpr(preps_pattern, c("a [of b]", "cat [from d]", "haunted [-> house]"))
  expect_equal(res, list(4, 6, 10), ignore_attr = TRUE)
  expect_equal(attr(res[[1]], which = "match.length"), 2)
  expect_equal(attr(res[[2]], which = "match.length"), 4)
  expect_equal(attr(res[[3]], which = "match.length"), 2)
})


test_that("match_by_pattern() works as expected for string matches", {
  labels <- c("Production [of b in c]", "d [of Coal in f]", "g [of h in USA]")
  # Simple matching
  expect_equal(match_by_pattern(labels,
                                regex_pattern = "Production"),
               c(TRUE, FALSE, FALSE))
  expect_equal(match_by_pattern(labels,
                                regex_pattern = "Coal"),
               c(FALSE, TRUE, FALSE))
  expect_equal(match_by_pattern(labels,
                                regex_pattern = "USA"),
               c(FALSE, FALSE, TRUE))

  # Check word positions
  expect_equal(match_by_pattern(labels,
                                regex_pattern = "^Production"),
               c(TRUE, FALSE, FALSE))
  # This should fail, because Production is at the start of the first string,
  # not the end of a string.
  expect_equal(match_by_pattern(labels,
                                regex_pattern = "Production$"),
               c(FALSE, FALSE, FALSE))
})


test_that("match_by_pattern() works for prefixes and suffixes", {
  labels <- c("Production [of b in c]", "d [of Coal in f]", "g [of h in USA]")

  # This should give an error,
  # because we should have only "pref" or "suff", not both.
  expect_error(match_by_pattern(labels,
                                regex_pattern = "Production",
                                pieces = c("pref", "to")),
               'If pieces contains "pref" or "suff", its length')

  # This should work, because "Production" is in the prefix.
  expect_equal(match_by_pattern(labels,
                                regex_pattern = "Production",
                                pieces = "pref"),
               c(TRUE, FALSE, FALSE))

  # This should fail, because "Production" is not in the suffix.
  expect_equal(match_by_pattern(labels,
                                regex_pattern = "Production",
                                pieces = "suff"),
               c(FALSE, FALSE, FALSE))
})


test_that("match_by_pattern() works for nouns and prepositions", {
  labels <- c("Production [of b in c]", "d [of Coal in f]", "g [of h in USA]")
  # This should work, because "Production" is a noun.
  expect_equal(match_by_pattern(labels,
                                regex_pattern = "Production",
                                pieces = "noun"),
               c(TRUE, FALSE, FALSE))

  # This won't work, because "Production" is a noun, not in a prepositional phrase.
  expect_equal(match_by_pattern(labels,
                                regex_pattern = "Production",
                                pieces = "in"),
               c(FALSE, FALSE, FALSE))

  # Try a preposition
  expect_equal(match_by_pattern(labels,
                                regex_pattern = make_or_pattern(c("c", "f")),
                                pieces = "in"),
               c(TRUE, TRUE, FALSE))

  # This should match only the USA one.
  expect_equal(match_by_pattern(labels,
                                regex_pattern = make_or_pattern(c("b", "Coal", "USA")),
                                pieces = "in"),
               c(FALSE, FALSE, TRUE))

  # This should match all labels.
  expect_equal(match_by_pattern(labels,
                                regex_pattern = make_or_pattern(c("b", "Coal", "USA")),
                                pieces = c("of", "in")),
               c(TRUE, TRUE, TRUE))
})


test_that("match_by_pattern() works for degenerate case", {
  labels <- c("Production [of b to GBR in c]", "d [of Coal in f]", "g [of h in USA]")
  # Try a situation that will return non-square results.
  expect_equal(match_by_pattern(labels,
                                regex_pattern = make_or_pattern(c("b", "Coal", "GBR", "USA")),
                                pieces = c("noun", "of", "in", "to"),
                                prepositions = c("of", "to", "in")),
               c(TRUE, TRUE, TRUE))
})


test_that("replace_by_pattern() works as expected", {
  labels <- c("Production [of b in c]", "d [of Coal in f]", "g [of h in USA]")
  expect_equal(replace_by_pattern(labels,
                                  regex_pattern = "Production",
                                  replacement = "Manufacture"),
               c("Manufacture [of b in c]", "d [of Coal in f]", "g [of h in USA]"))
  expect_equal(replace_by_pattern(labels,
                                  regex_pattern = "Coal",
                                  replacement = "Oil"),
               c("Production [of b in c]", "d [of Oil in f]", "g [of h in USA]"))
  expect_equal(replace_by_pattern(labels,
                                  regex_pattern = "USA",
                                  replacement = "GHA"),
               c("Production [of b in c]", "d [of Coal in f]", "g [of h in GHA]"))
})


test_that("replace_by_pattern() works as expected with prefixes and suffixes", {
  labels <- c("Production [of b in c]", "d [of Coal in f]", "g [of h in USA]")
  expect_equal(replace_by_pattern(labels,
                                  regex_pattern = "Production",
                                  replacement = "Manufacture",
                                  pieces = "pref"),
               c("Manufacture [of b in c]", "d [of Coal in f]", "g [of h in USA]"))

  expect_equal(replace_by_pattern(labels,
                                  regex_pattern = "Coa",
                                  replacement = "Bow",
                                  pieces = "suff"),
               c("Production [of b in c]", "d [of Bowl in f]", "g [of h in USA]"))

  # Nothing should change, because USA is in the suffix.
  expect_equal(replace_by_pattern(labels,
                                  regex_pattern = "SA",
                                  replacement = "SSR",
                                  pieces = "pref"),
               c("Production [of b in c]", "d [of Coal in f]", "g [of h in USA]"))

  # Now USA --> USSR, because USA is in the suffix.
  expect_equal(replace_by_pattern(labels,
                                  regex_pattern = "SA",
                                  replacement = "SSR",
                                  pieces = "suff"),
               c("Production [of b in c]", "d [of Coal in f]", "g [of h in USSR]"))

  # This should throw an error, because only "pref" or "suff" can be specified.
  expect_error(replace_by_pattern(labels,
                                  regex_pattern = "SA",
                                  replacement = "SSR",
                                  pieces = c("pref", "suff")),
               'If pieces contains "pref" or "suff", its length must be 1. Length was 2.')

  # This should throw an error, because only "pref" or "suff" can be specified.
  expect_error(replace_by_pattern(labels,
                                  regex_pattern = "SA",
                                  replacement = "SSR",
                                  pieces = c("pref", "bogus", "42")),
               'If pieces contains "pref" or "suff", its length must be 1. Length was 3.')
})


test_that("replace_by_pattern() works for nouns and prepositions", {
  labels <- c("Production [of b in c]", "d [of Coal in f]", "g [of h in USA]")
  expect_equal(replace_by_pattern(labels,
                                  regex_pattern = "Production",
                                  replacement = "Manufacture",
                                  pieces = "noun"),
               c("Manufacture [of b in c]", "d [of Coal in f]", "g [of h in USA]"))

  expect_equal(replace_by_pattern(labels,
                                  regex_pattern = "^Pro",
                                  replacement = "Con",
                                  pieces = "noun"),
               c("Conduction [of b in c]", "d [of Coal in f]", "g [of h in USA]"))
  # Won't match: wrong side of string.
  expect_equal(replace_by_pattern(labels,
                                  regex_pattern = "Pro$",
                                  replacement = "Con",
                                  pieces = "noun"),
               c("Production [of b in c]", "d [of Coal in f]", "g [of h in USA]"))
  # No change, because "Production" is a noun.
  expect_equal(replace_by_pattern(labels,
                                  regex_pattern = "Production",
                                  replacement = "Manufacture",
                                  pieces = "of"),
               c("Production [of b in c]", "d [of Coal in f]", "g [of h in USA]"))
  # Now try with "of".
  expect_equal(replace_by_pattern(labels,
                                  regex_pattern = "Coal",
                                  replacement = "Oil",
                                  pieces = "of"),
               c("Production [of b in c]", "d [of Oil in f]", "g [of h in USA]"))
  # No change, because "Coal" is not "in" anything.
  expect_equal(replace_by_pattern(labels,
                                  regex_pattern = "Coal",
                                  replacement = "Oil",
                                  pieces = "in"),
               c("Production [of b in c]", "d [of Coal in f]", "g [of h in USA]"))

  # Now try in "in".
  expect_equal(replace_by_pattern(labels,
                                  regex_pattern = "USA",
                                  replacement = "GBR",
                                  pieces = "in"),
               c("Production [of b in c]", "d [of Coal in f]", "g [of h in GBR]"))
  # Replace at end of word
  expect_equal(replace_by_pattern(labels,
                                  regex_pattern = "A$",
                                  replacement = "upercalifragilisticexpialidocious",
                                  pieces = "in"),
               c("Production [of b in c]", "d [of Coal in f]", "g [of h in USupercalifragilisticexpialidocious]"))
})


test_that("make_list() works as expected", {
  m <- matrix(c(1:6), nrow = 3, dimnames = list(c("r1", "r2", "r3"), c("c2", "c1")))

  expect_equal(make_list(m, n = 1), list(m))
  expect_equal(make_list(m, n = 2), list(m, m))
  expect_equal(make_list(m, n = 5), list(m, m, m, m, m))
  l1 <- list(c(1,2), c(3,4))
  # Expect c(1,2), c(3,4), c(1,2), c(3,4)
  expect_equal(make_list(l1, n = 4), c(l1, l1))
  # Expect [c(1,2), c(3,4)], [c(1,2), c(3,4)], [c(1,2), c(3,4)], [c(1,2), c(3,4)]
  expect_equal(make_list(l1, n = 4, lenx = 1), list(l1, l1, l1, l1))
  # Expect a warning, because length isn't a multiple
  expect_warning(make_list(l1, n = 3), "n not evenly divisible by length\\(x\\)")

  m1 <- matrix(1:4, nrow = 2)
  m2 <- m + 100
  l2 <- list(m1, m2)
  expect_equal(make_list(l2, n = 4), c(l2, l2))
  expect_warning(make_list(l2, n = 1), "n not evenly divisible by length\\(x\\)")
  expect_warning(make_list(l2, n = 5), "n not evenly divisible by length\\(x\\)")

  l3 <- list(c("r10", "r11"), c("c10", "c11"))
  expect_equal(make_list(l3, n = 2), l3) # Confused by x being a list
  expect_equal(make_list(l3, n = 2, lenx = 1), list(l3, l3)) # Fix by setting lenx = 1

  margin <- c(1, 2)
  # This approach spreads 1, 2 to the two items in the list.
  expect_equal(make_list(margin, n = 2), list(1, 2))
  # This approach considers c(1,2) to be the item to be repeated.
  expect_equal(make_list(margin, n = 2, lenx = 1), list(c(1,2), c(1,2)))
})

Try the RCLabels package in your browser

Any scripts or data that you put into this service are public.

RCLabels documentation built on April 25, 2023, 5:11 p.m.