test_that("make_or_pattern() works as expected", {
expect_equal(make_or_pattern(c("a", "b")), "^a$|^b$")
expect_equal(make_or_pattern(c("a", "b"), pattern_type = "exact"), "^a$|^b$")
expect_equal(make_or_pattern(c("a", "b"), pattern_type = "leading"), "^a|^b")
expect_equal(make_or_pattern(c("a", "b"), pattern_type = "trailing"), "a$|b$")
expect_equal(make_or_pattern(c("a", "b"), pattern_type = "anywhere"), "a|b")
expect_equal(make_or_pattern(c("a", "b"), pattern_type = "literal"), c("a", "b"))
expect_equal(make_or_pattern(c("a", "b", "c")), "^a$|^b$|^c$")
})
test_that('prepositions make good "or" patterns', {
preps_pattern <- make_or_pattern(RCLabels::prepositions_list, pattern_type = "anywhere")
res <- gregexpr(preps_pattern, c("a [of b]", "cat [from d]", "haunted [-> house]"))
expect_equal(res, list(4, 6, 10), ignore_attr = TRUE)
expect_equal(attr(res[[1]], which = "match.length"), 2)
expect_equal(attr(res[[2]], which = "match.length"), 4)
expect_equal(attr(res[[3]], which = "match.length"), 2)
})
test_that("match_by_pattern() works as expected for string matches", {
labels <- c("Production [of b in c]", "d [of Coal in f]", "g [of h in USA]")
# Simple matching
expect_equal(match_by_pattern(labels,
regex_pattern = "Production"),
c(TRUE, FALSE, FALSE))
expect_equal(match_by_pattern(labels,
regex_pattern = "Coal"),
c(FALSE, TRUE, FALSE))
expect_equal(match_by_pattern(labels,
regex_pattern = "USA"),
c(FALSE, FALSE, TRUE))
# Check word positions
expect_equal(match_by_pattern(labels,
regex_pattern = "^Production"),
c(TRUE, FALSE, FALSE))
# This should fail, because Production is at the start of the first string,
# not the end of a string.
expect_equal(match_by_pattern(labels,
regex_pattern = "Production$"),
c(FALSE, FALSE, FALSE))
})
test_that("match_by_pattern() works for prefixes and suffixes", {
labels <- c("Production [of b in c]", "d [of Coal in f]", "g [of h in USA]")
# This should give an error,
# because we should have only "pref" or "suff", not both.
expect_error(match_by_pattern(labels,
regex_pattern = "Production",
pieces = c("pref", "to")),
'If pieces contains "pref" or "suff", its length')
# This should work, because "Production" is in the prefix.
expect_equal(match_by_pattern(labels,
regex_pattern = "Production",
pieces = "pref"),
c(TRUE, FALSE, FALSE))
# This should fail, because "Production" is not in the suffix.
expect_equal(match_by_pattern(labels,
regex_pattern = "Production",
pieces = "suff"),
c(FALSE, FALSE, FALSE))
})
test_that("match_by_pattern() works for nouns and prepositions", {
labels <- c("Production [of b in c]", "d [of Coal in f]", "g [of h in USA]")
# This should work, because "Production" is a noun.
expect_equal(match_by_pattern(labels,
regex_pattern = "Production",
pieces = "noun"),
c(TRUE, FALSE, FALSE))
# This won't work, because "Production" is a noun, not in a prepositional phrase.
expect_equal(match_by_pattern(labels,
regex_pattern = "Production",
pieces = "in"),
c(FALSE, FALSE, FALSE))
# Try a preposition
expect_equal(match_by_pattern(labels,
regex_pattern = make_or_pattern(c("c", "f")),
pieces = "in"),
c(TRUE, TRUE, FALSE))
# This should match only the USA one.
expect_equal(match_by_pattern(labels,
regex_pattern = make_or_pattern(c("b", "Coal", "USA")),
pieces = "in"),
c(FALSE, FALSE, TRUE))
# This should match all labels.
expect_equal(match_by_pattern(labels,
regex_pattern = make_or_pattern(c("b", "Coal", "USA")),
pieces = c("of", "in")),
c(TRUE, TRUE, TRUE))
})
test_that("match_by_pattern() works for degenerate case", {
labels <- c("Production [of b to GBR in c]", "d [of Coal in f]", "g [of h in USA]")
# Try a situation that will return non-square results.
expect_equal(match_by_pattern(labels,
regex_pattern = make_or_pattern(c("b", "Coal", "GBR", "USA")),
pieces = c("noun", "of", "in", "to"),
prepositions = c("of", "to", "in")),
c(TRUE, TRUE, TRUE))
})
test_that("match_by_pattern() works when specifying a notation", {
# Does it work when not specifying a notation?
match1 <- match_by_pattern(labels = c("a [from b]", "a [from c]", "d [from b]", "e"),
regex_pattern = "^b$",
pieces = "from",
notation = RCLabels::bracket_notation)
expect_equal(match1, c(TRUE, FALSE, TRUE, FALSE))
# Does it work *when* specifying a notation?
# Well, sort of.
# This is the right answer, but not the expected answer.
match2 <- match_by_pattern(labels = c("a [from b]", "a [from c]", "d [from b]", "e [from x]"),
regex_pattern = "^b$",
pieces = "from",
notation = RCLabels::from_notation,
inf_notation = FALSE)
# Everything is FALSE, because specifying the notation
# extracts the object of the prepositional phrase
# before matching on the pattern,
# yielding an empty result.
expect_equal(match2, c(FALSE, FALSE, FALSE, FALSE))
})
test_that("replace_by_pattern() works as expected", {
labels <- c("Production [of b in c]", "d [of Coal in f]", "g [of h in USA]")
expect_equal(replace_by_pattern(labels,
regex_pattern = "Production",
replacement = "Manufacture"),
c("Manufacture [of b in c]", "d [of Coal in f]", "g [of h in USA]"))
expect_equal(replace_by_pattern(labels,
regex_pattern = "Coal",
replacement = "Oil"),
c("Production [of b in c]", "d [of Oil in f]", "g [of h in USA]"))
expect_equal(replace_by_pattern(labels,
regex_pattern = "USA",
replacement = "GHA"),
c("Production [of b in c]", "d [of Coal in f]", "g [of h in GHA]"))
})
test_that("replace_by_pattern() works as expected with prefixes and suffixes", {
labels <- c("Production [of b in c]", "d [of Coal in f]", "g [of h in USA]")
expect_equal(replace_by_pattern(labels,
regex_pattern = "Production",
replacement = "Manufacture",
pieces = "pref"),
c("Manufacture [of b in c]", "d [of Coal in f]", "g [of h in USA]"))
expect_equal(replace_by_pattern(labels,
regex_pattern = "Coa",
replacement = "Bow",
pieces = "suff"),
c("Production [of b in c]", "d [of Bowl in f]", "g [of h in USA]"))
# Nothing should change, because USA is in the suffix.
expect_equal(replace_by_pattern(labels,
regex_pattern = "SA",
replacement = "SSR",
pieces = "pref"),
c("Production [of b in c]", "d [of Coal in f]", "g [of h in USA]"))
# Now USA --> USSR, because USA is in the suffix.
expect_equal(replace_by_pattern(labels,
regex_pattern = "SA",
replacement = "SSR",
pieces = "suff"),
c("Production [of b in c]", "d [of Coal in f]", "g [of h in USSR]"))
# This should throw an error, because only "pref" or "suff" can be specified.
expect_error(replace_by_pattern(labels,
regex_pattern = "SA",
replacement = "SSR",
pieces = c("pref", "suff")),
'If pieces contains "pref" or "suff", its length must be 1. Length was 2.')
# This should throw an error, because only "pref" or "suff" can be specified.
expect_error(replace_by_pattern(labels,
regex_pattern = "SA",
replacement = "SSR",
pieces = c("pref", "bogus", "42")),
'If pieces contains "pref" or "suff", its length must be 1. Length was 3.')
})
test_that("replace_by_pattern() works for nouns and prepositions", {
labels <- c("Production [of b in c]", "d [of Coal in f]", "g [of h in USA]")
expect_equal(replace_by_pattern(labels,
regex_pattern = "Production",
replacement = "Manufacture",
pieces = "noun"),
c("Manufacture [of b in c]", "d [of Coal in f]", "g [of h in USA]"))
expect_equal(replace_by_pattern(labels,
regex_pattern = "^Pro",
replacement = "Con",
pieces = "noun"),
c("Conduction [of b in c]", "d [of Coal in f]", "g [of h in USA]"))
# Won't match: wrong side of string.
expect_equal(replace_by_pattern(labels,
regex_pattern = "Pro$",
replacement = "Con",
pieces = "noun"),
c("Production [of b in c]", "d [of Coal in f]", "g [of h in USA]"))
# No change, because "Production" is a noun.
expect_equal(replace_by_pattern(labels,
regex_pattern = "Production",
replacement = "Manufacture",
pieces = "of"),
c("Production [of b in c]", "d [of Coal in f]", "g [of h in USA]"))
# Now try with "of".
expect_equal(replace_by_pattern(labels,
regex_pattern = "Coal",
replacement = "Oil",
pieces = "of"),
c("Production [of b in c]", "d [of Oil in f]", "g [of h in USA]"))
# No change, because "Coal" is not "in" anything.
expect_equal(replace_by_pattern(labels,
regex_pattern = "Coal",
replacement = "Oil",
pieces = "in"),
c("Production [of b in c]", "d [of Coal in f]", "g [of h in USA]"))
# Now try in "in".
expect_equal(replace_by_pattern(labels,
regex_pattern = "USA",
replacement = "GBR",
pieces = "in"),
c("Production [of b in c]", "d [of Coal in f]", "g [of h in GBR]"))
# Replace at end of word
expect_equal(replace_by_pattern(labels,
regex_pattern = "A$",
replacement = "upercalifragilisticexpialidocious",
pieces = "in"),
c("Production [of b in c]", "d [of Coal in f]", "g [of h in USupercalifragilisticexpialidocious]"))
})
test_that("make_list() works as expected", {
m <- matrix(c(1:6), nrow = 3, dimnames = list(c("r1", "r2", "r3"), c("c2", "c1")))
expect_equal(make_list(m, n = 1), list(m))
expect_equal(make_list(m, n = 2), list(m, m))
expect_equal(make_list(m, n = 5), list(m, m, m, m, m))
l1 <- list(c(1,2), c(3,4))
# Expect c(1,2), c(3,4), c(1,2), c(3,4)
expect_equal(make_list(l1, n = 4), c(l1, l1))
# Expect [c(1,2), c(3,4)], [c(1,2), c(3,4)], [c(1,2), c(3,4)], [c(1,2), c(3,4)]
expect_equal(make_list(l1, n = 4, lenx = 1), list(l1, l1, l1, l1))
# Expect a warning, because length isn't a multiple
expect_warning(make_list(l1, n = 3), "n not evenly divisible by length\\(x\\)")
m1 <- matrix(1:4, nrow = 2)
m2 <- m + 100
l2 <- list(m1, m2)
expect_equal(make_list(l2, n = 4), c(l2, l2))
expect_warning(make_list(l2, n = 1), "n not evenly divisible by length\\(x\\)")
expect_warning(make_list(l2, n = 5), "n not evenly divisible by length\\(x\\)")
l3 <- list(c("r10", "r11"), c("c10", "c11"))
expect_equal(make_list(l3, n = 2), l3) # Confused by x being a list
expect_equal(make_list(l3, n = 2, lenx = 1), list(l3, l3)) # Fix by setting lenx = 1
margin <- c(1, 2)
# This approach spreads 1, 2 to the two items in the list.
expect_equal(make_list(margin, n = 2), list(1, 2))
# This approach considers c(1,2) to be the item to be repeated.
expect_equal(make_list(margin, n = 2, lenx = 1), list(c(1,2), c(1,2)))
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.