tests/testthat/test-extractors.R

test_that("get_nouns() works as expected", {
  expect_equal(get_nouns("a [b]"), c(noun = "a"))
  expect_equal(get_nouns(c("a [b]", "c [d]")), c(noun = "a", noun = "c"))
  expect_equal(get_nouns(list("a [b]", "c [d]")), c(noun = "a", noun = "c"))
  # Test with wrong notation
  expect_equal(get_nouns(list("a [b]", "c [d]"), notation = RCLabels::arrow_notation),
               c(noun = "a [b]", noun = "c [d]"))

  # Now try in a data frame
  df <- data.frame(labels = c("a [b]", "c [d]", "e [f]", "g [h]"))
  with_nouns <- df %>%
    dplyr::mutate(
      nouns = get_nouns(labels)
    )
  expect_equal(with_nouns$nouns, c(noun = "a", noun = "c", noun = "e", noun = "g"))

  # Try with different notations that are inferred for each label
  expect_equal(get_nouns(c("a -> b", "c [of d]")), c(noun = "a", noun = "c"))
})


test_that("get_nouns() works for a difficult case", {
  # This test was failing for a while.
  expect_equal(get_nouns("a [of b in c]", choose_most_specific = TRUE), c(noun = "a"))
})


test_that("get_nouns() works when inf_notation = FALSE", {
  expect_equal(get_nouns("a [b]",
                         inf_notation = FALSE,
                         notation = RCLabels::bracket_notation),
               c(noun = "a"))

  expect_equal(get_nouns(c("a [b]", "c -> d"),
                         inf_notation = FALSE,
                         notation = list(RCLabels::bracket_notation,
                                         RCLabels::arrow_notation)),
               c(noun = "a", noun = "c"))
})


test_that("get_pps() works as expected", {
  # Try a couple simple ones
  expect_equal(get_pps("a [in b]"), c(pps = "in b"))
  # Not sure why we would ever want to do this.
  expect_equal(get_pps("a [in b]", choose_most_specific = TRUE), c(pps = "b"))
  expect_equal(get_pps("a [in b]", choose_most_specific = FALSE), c(pps = "in b"))
  expect_equal(get_pps(c("a [in b]", "c [of d]"), notation = RCLabels::bracket_notation), c(pps = "in b", pps = "of d"))
  expect_equal(get_pps(c("a [in b]", "c [of d]"), choose_most_specific = FALSE), c(pps = "in b", pps = "of d"))
  # Infer notation
  expect_equal(get_pps(c("a [in b]", "c [of d]"), choose_most_specific = FALSE), c(pps = "in b", pps = "of d"))
  # Try a degenerate case
  expect_equal(get_pps("a [b in c]"), c(pps = "in c"))

  # Try with a single string
  expect_equal(get_pps("a [of b in c]", notation = bracket_notation), c(pps = "of b in c"))

  expect_equal(get_pps(c("a [of b in c]", "d [of e into f]"), notation = bracket_notation),
               c(pps = "of b in c", pps = "of e into f"))

  expect_equal(get_pps(list("a [of b in c]", "d [of e into f]"), notation = bracket_notation),
               c(pps = "of b in c", pps = "of e into f"))

  expect_equal(get_pps("a [in b]", notation = RCLabels::bracket_notation), c(pps = "in b"))
  expect_equal(get_pps(c("a [in b]"), notation = RCLabels::bracket_notation), c(pps = "in b"))
  expect_equal(get_pps(list("a [in b]"), notation = RCLabels::bracket_notation), c(pps = "in b"))

  # Now try in a data frame

  df <- data.frame(labels = c("e [of f in g]", "h [-> i in j]", "a [in b]", "c [of d into USA]"))
  with_nouns_pps <- df %>%
    dplyr::mutate(
      nouns = get_nouns(labels),
      pps = get_pps(labels, choose_most_specific = FALSE)
    )
  expect_equal(with_nouns_pps$nouns, c(noun = "e", noun = "h", noun = "a", noun = "c"))
  expect_equal(with_nouns_pps$pps, c(pps = "of f in g", pps = "-> i in j", pps = "in b", pps = "of d into USA"))
})


test_that("get_pps() works where there are no prepositions", {
  expect_equal(get_pps("a []"), c(pps = ""))
})


test_that("get_pps() works when inf_notation = FALSE", {
  expect_equal(get_pps("a [in b]",
                       choose_most_specific = FALSE),
               c(pps = "in b"))
  expect_equal(get_pps("a [in b]",
                       notation = RCLabels::bracket_notation,
                       choose_most_specific = FALSE),
               c(pps = "in b"))
  expect_equal(get_pps("a [in b]",
                       inf_notation = FALSE,
                       notation = RCLabels::bracket_notation,
                       choose_most_specific = FALSE),
               c(pps = "in b"))
})


test_that("get_prepositions() works correctly", {
  labs <- c("a [-> b in c]", "d [from Production]", "Coal [from Imports into US]")
  # Try with notation inference.
  # This works, because choose_most_specific = FALSE is the default AND
  # bracket_notation is before in_notation, from_notation, and bracket_arrow_notation.
  expect_equal(get_prepositions(labs), list(prepositions = c("->", "in"), prepositions = "from", prepositions = c("from", "into")))
  # Try by specifying bracket_notation
  expect_equal(get_prepositions(labs, notation = RCLabels::bracket_notation),
               list(prepositions = c("->", "in"), prepositions = "from", prepositions = c("from", "into")))
  # Try by turning off notation inference
  expect_equal(get_prepositions(labs, inf_notation = FALSE, notation = RCLabels::bracket_notation),
               list(prepositions = c("->", "in"), prepositions = "from", prepositions = c("from", "into")))
})


test_that("get_objects() works correctly", {
  # Specify the notation being used
  expect_equal(get_objects(c("a [of b in USA]", "d [of e into GBR]"), notation = RCLabels::bracket_notation),
               list(objects = c(of = "b", `in` = "USA"),
                    objects = c(of = "e", into = "GBR")))
  # Specify the notation being used and don't infer.
  expect_equal(get_objects(c("a [of b in USA]", "d [of e into GBR]"),
                           inf_notation = FALSE,
                           notation = RCLabels::bracket_notation),
               list(objects = c(of = "b", `in` = "USA"),
                    objects = c(of = "e", into = "GBR")))
  # Infer notation
  expect_equal(get_objects(c("a [of b in USA]", "d [of e into GBR]")),
               list(objects = c(of = "b", `in` = "USA"),
                    objects = c(of = "e", into = "GBR")))
})


test_that("get_objects() sets preposition with arrow notation", {
  label1 <- "a [-> b]"
  expect_equal(get_objects(label1, notation = RCLabels::bracket_notation, inf_notation = FALSE), list(objects = c(`->` = "b")))
  # This one is weird, but the expected behavior.
  # Note that get_objects() cannot pick up a preposition,
  # because -> is the delimiter between prefix and suffix.
  # The suffix is only "b", with no preposition present.
  label2 <- "a -> b"
  temp <- c(` ` = "b") %>%
    magrittr::set_names(c(""))
  res <- list(objects = temp)
  expect_equal(get_objects(label2), res)
})


test_that("split_noun_pp() works as expected", {
  # Try with specific notation
  expect_equal(split_noun_pp("a [of b]", notation = bracket_notation),
               list(c(noun = "a", of = "b")))
  # Try without inference
  expect_equal(split_noun_pp("a [of b]", inf_notation = FALSE, notation = bracket_notation),
               list(c(noun = "a", of = "b")))
  # Try without notation inference and no notation. This will fail.
  expect_error(split_noun_pp("a [of b]", inf_notation = FALSE))
  # Try with notation inference
  expect_equal(split_noun_pp("a [of b]"),
               list(c(noun = "a", of = "b")))
  expect_equal(split_noun_pp("a [of b in c]"),
               list(c(noun = "a", of = "b", `in` = "c")))
  expect_equal(split_noun_pp(c("a [of b in c]", "d [of e into f]")),
               list(c(noun = "a", of = "b", `in` = "c"),
                    c(noun = "d", of = "e", into = "f")))
  # Try in a list.
  expect_equal(split_noun_pp(list("a [of b in c]",
                                 "d [of e into f]",
                                 "Production [of Coal in GB]")),
               list(c(noun = "a", of = "b", `in` = "c"),
                    c(noun = "d", of = "e", into = "f"),
                    c(noun = "Production", of = "Coal", `in` = "GB")))

  # Now try in a data frame.
  df <- data.frame(labels = I(list("a [in b]", "c [of d into USA]",
                                   "e [of f in g]", "h [-> i in j]")))
  split <- df %>%
    dplyr::mutate(
      splits = split_noun_pp(labels)
    )
  expect_equal(split$splits[[1]], c(noun = "a", `in` = "b"))
  expect_equal(split$splits[[2]], c(noun = "c", of = "d", into = "USA"))
  expect_equal(split$splits[[3]], c(noun = "e", of = "f", `in` = "g"))
  expect_equal(split$splits[[4]], c(noun = "h", `->` = "i", `in` = "j"))
})


test_that("paste_noun_pp() works as expected", {
  # Try with a single label
  lab <- "a [of b in c]"
  split <- split_noun_pp(lab)
  expect_equal(split, list(c(noun = "a", of = "b", `in` = "c")))

  # Try with a vector of labels
  labs <- c("a [of b in c]", "d [from Coal mines in USA]")
  split <- split_noun_pp(labs)
  expect_equal(paste_noun_pp(split), labs)
  # Try with different notations for each paste
  expect_equal(paste_noun_pp(split, notation = list(RCLabels::bracket_notation, RCLabels::arrow_notation)),
               c("a [of b in c]", "d -> from Coal mines in USA"))

  # Try with a weird notation vector
  paren_note <- notation_vec(pref_start = "(", pref_end = ")", suff_start = "(", suff_end = ")")
  labs2 <- c("(Production)(of Coal in USA)", "(Manufacture)(of Oil in Canada)")
  split2 <- split_noun_pp(labs2, notation = paren_note)
  expect_equal(paste_noun_pp(split2, notation = paren_note), labs2)

  # Try in a data frame
  df <- tibble::tibble(labels = c("a [in b]", "c [of d into USA]",
                                  "e [of f in g]", "h [-> i in j]"))
  recombined <- df %>%
    dplyr::mutate(
      splits = split_noun_pp(labels),
      recombined = paste_noun_pp(splits)
    )
  expect_equal(recombined$recombined, recombined$labels)

  # Try with different notations for each row of the data frame.
  df2 <- recombined %>%
    dplyr::mutate(
      notn = list(arrow_notation, in_notation, bracket_notation, bracket_arrow_notation),
      recombined2 = paste_noun_pp(splits, notn)
    )
  expect_equal(df2$recombined2, c("a -> in b", "c [in of d into USA]", "e [of f in g]", "h [-> -> i in j]"))
})


test_that("get_piece() works as expected", {
  labs <- c("a [from b in c]", "d [of e in f]", "Export [of Coal from USA to MEX]")

  # Must have length 1
  expect_error(get_piece(labs, piece = c("from", "to")), "piece must be a character vector of length 1 in RCLabels::get")

  # Returns labs unchanged, because default value for piece is "all".
  expect_equal(get_piece(labs), labs)

  # Prefix
  expect_equal(get_piece(labs, "pref"),
               c(pref = "a", pref = "d", pref = "Export"))
  # Try without inference. This should fail, because no notation is given.
  expect_error(get_piece(labs, piece = "pref", inf_notation = FALSE),
               regexp = "length\\(x\\) and legth\\(notation\\) must be same length in strip_label_part\\(\\)")
  # Default is choose_most_specific = FALSE, which works in this case.
  expect_equal(get_piece(labs, "suff"),
               c(suff = "from b in c", suff = "of e in f", suff = "of Coal from USA to MEX"))
  # With the most specific match, things look weird.
  expect_equal(get_piece(labs, "suff", choose_most_specific = TRUE),
               c(suff = "b in c", suff = "e in f", suff = "Coal from USA to MEX"))
  # Noun
  expect_equal(get_piece(labs, piece = "noun"),
               c(noun = "a", noun = "d", noun = "Export"))
  # Prepositions
  expect_equal(get_piece(labs, "from"),
               list(c(from = "b"), c(from = ""), c(from = "USA")))
  expect_equal(get_piece(labs, "in"),
               list(c(`in` = "c"), c(`in` = "f"), c(`in` = "")))
  expect_equal(get_piece(labs, "of"),
               list(c(of = ""), c(of = "e"), c(of = "Coal")))
  expect_equal(get_piece(labs, "from"),
               list(c(from = "b"), c(from = ""), c(from = "USA")))
  # Unknown preposition
  expect_equal(get_piece(labs, "bogus"),
               list(c(bogus = ""), c(bogus = ""), c(bogus = "")))
})


test_that("get_piece() works with 'pps'", {
  labs <- c("a [from b in c]", "d [of e in f]", "Export [of Coal from USA to MEX]")
  expect_equal(get_piece(labs, piece = "pps"),
               c(pps = "from b in c", pps = "of e in f", pps = "of Coal from USA to MEX"))
})


test_that("get_piece() works with 'prepositions'", {
  labs <- c("a [from b in c]", "d [of e in f]", "Export [of Coal from USA to MEX]")
  expect_equal(get_piece(labs, piece = "prepositions"),
               list(prepositions = c("from", "in"), prepositions = c("of", "in"), prepositions = c("of", "from", "to")))
})


test_that("get_piece() works with 'objects'", {
  labs <- c("a [from b in c]", "d [of e in f]", "Export [of Coal from USA to MEX]")
  expect_equal(get_piece(labs, piece = "objects"),
               list(objects = c(from = "b", `in` = "c"),
                    objects = c(of = "e", `in` = "f"),
                    objects = c(of = "Coal", from = "USA", to = "MEX")))
})


test_that("get_piece() does the right thing when it can't infer notation", {
  # It should return the whole label in the prefix when notation can't be inferred.
  expect_equal(split_pref_suff("Crude"), list(pref = "Crude", suff = ""))
  expect_equal(split_pref_suff(c("Crude", "a -> b")),
               list(pref = c("Crude", "a"), suff = c("", "b")))
  expect_equal(split_pref_suff("Crude", inf_notation = FALSE, notation = RCLabels::dash_notation),
               list(pref = "Crude", suff = ""))
  expect_equal(split_pref_suff("Crude", inf_notation = FALSE, notation = RCLabels::arrow_notation),
               list(pref = "Crude", suff = ""))
  expect_equal(get_pref_suff("Crude", which = "pref"), c(pref = "Crude"))
  expect_equal(get_piece("Crude", piece = "noun"), c(noun = "Crude"))
})


test_that("get_piece() works with 2 different notations", {
  labs <- c("a [from b]", "c -> d", "e -> f")
  res1 <- labs %>%
    get_piece(piece = "noun",
              inf_notation = TRUE,
              notation = RCLabels::notations_list)
  expect_equal(res1, c(noun = "a", noun = "c", noun = "e"))

  res2 <- labs %>%
    get_piece(piece = "noun",
              inf_notation = TRUE,
              notation = list(RCLabels::bracket_notation, RCLabels::arrow_notation))
  expect_equal(res2, c(noun = "a", noun = "c", noun = "e"))
})


test_that("passing NULL to extractors returns NULL", {
  expect_null(get_pref_suff(NULL))
  expect_null(get_nouns(NULL))
  expect_null(get_pps(NULL))
  expect_null(get_prepositions(NULL))
  expect_null(get_objects(NULL))
  expect_null(split_noun_pp(NULL))
  expect_null(paste_noun_pp(NULL))
})


test_that("get_piece(NULL) returns NULL", {
  labs <- c("a [from b in c]", "d [of e in f]", "Export [of Coal from USA to MEX]")
  expect_null(get_piece(NULL, piece = "pref"))
  expect_null(get_piece(NULL, piece = "suff"))
  expect_null(get_piece(NULL, piece = "noun"))
  expect_null(get_piece(NULL, piece = "pps"))
  expect_null(get_piece(NULL, piece = "prepositions"))
  expect_null(get_piece(NULL, piece = "objects"))
})

Try the RCLabels package in your browser

Any scripts or data that you put into this service are public.

RCLabels documentation built on April 25, 2023, 5:11 p.m.