tests/testthat/test-CRAN-alternatives.R

library(nc)
library(data.table)
library(testthat)
context("alternatives")

test_that("error for no arguments to alternatives", {
  expect_error({
    alternatives()
  }, "alternatives should have at least two arguments")
})

test_that("error for one argument to alternatives", {
  expect_error({
    alternatives("pattern")
  }, "alternatives should have at least two arguments")
})

test_that("two un-named alternatives => two non-capturing groups", {
  subject <- c("foo1", "bar2")
  a.pat <- alternatives("foo", "bar")
  match.dt <- nc::capture_first_vec(subject, a.pat, number="[12]")
  expect_identical(match.dt, data.table(number=paste(1:2)))
})

subject <- c("foooo1", "bar2")
exp.dt <- data.table(foo=c("foooo", ""), number=paste(1:2))
test_that("first named alternative => one more capturing group", {
  a.pat <- alternatives(foo="fo+", "bar")
  match.dt <- nc::capture_first_vec(subject, a.pat, number="[12]")
  expect_identical(match.dt, exp.dt)
})

test_that("second named alternative => one more capturing group", {
  a.pat <- alternatives("bar", foo="fo+")
  match.dt <- nc::capture_first_vec(subject, a.pat, number="[12]")
  expect_identical(match.dt, exp.dt)
})

test_that("both named alternatives => two more capturing groups", {
  subject <- c("foooo1", "barrr2")
  exp.dt <- data.table(
    bar=c("", "barrr"),
    foo=c("foooo", ""),
    number=paste(1:2))
  a.pat <- alternatives(bar="bar+", foo="fo+")
  match.dt <- nc::capture_first_vec(subject, a.pat, number="[12]")
  expect_identical(match.dt, exp.dt)
})

pat.list <- nc::altlist(month="[a-z]{3}", day="[0-9]{2}", year="[0-9]{4}")
pattern <- with(pat.list, nc::alternatives(
  list(month, " ", day, ", ", year),
  list(day, " ", month, " ", year)))
test_that("alternatives with dup names ok", {
  subject.vec <- c("mar 17, 1983", "26 sep 2017")
  computed <- nc::capture_first_vec(subject.vec, pattern)
  expected <- data.table(
    month=c("mar", "sep"), day=c("17", "26"), year=c("1983", "2017"))
  expect_identical(computed, expected)
})
test_that("alternatives with dup names and one subject with no match", {
  subject.vec <- c("mar 17, 1983", "this will not match", "26 sep 2017")
  computed <- nc::capture_first_vec(subject.vec, pattern, nomatch.error=FALSE)
  expected <- data.table(
    month=c("mar", NA, "sep"),
    day=c("17", NA, "26"),
    year=c("1983", NA, "2017"))
  expect_identical(computed, expected)
})

test_that("alternatives with dup names error for diff types", {
  subject.vec <- c("mar 17, 1983", "26 sep 2017")
  pat.list <- nc::altlist(month="[a-z]{3}", day="[0-9]{2}", year="[0-9]{4}")
  pattern <- with(pat.list, nc::alternatives(
    list(month, " ", day, ", ", year, as.integer),
    list(day, " ", month, " ", year)))
  expect_error({
    nc::capture_first_vec(subject.vec, pattern)
  }, "capture groups with identical names should have conversion functions that all return the same type; problem group name=year has types character,integer")
})

test_that("IDate conversion OK", {
  subject.vec <- c(
    "02/07/2020 EUR68.50",
    "EUR45.00 29/10/2020")
  pat.list <- nc::altlist(
    money=list("EUR", amount="[0-9.]+", as.numeric),
    date=list(
      "[0-9]{2}/[0-9]{2}/[0-9]{4}",
      function(d)data.table::as.IDate(d, format="%d/%m/%Y")))
  pattern <- with(pat.list, nc::alternatives(
    list(money, " ", date),
    list(date, " ", money)))
  computed <- nc::capture_first_vec(subject.vec, pattern)
  expected <- data.table(
    money=c("EUR68.50", "EUR45.00"),
    amount=c(68.5, 45),
    date=as.IDate(c("2020-07-02", "2020-10-29")))
  expect_identical(computed, expected)
})

test_that("altlist fails for no names", {
  expect_error({
    nc::altlist("foo")
  }, "all arguments to altlist must be named")
})

test_that("altlist fails for no args", {
  expect_error({
    nc::altlist()
  }, "all arguments to altlist must be named")
})

test_that("altlist fails for one un-named arg", {
  expect_error({
    nc::altlist(foo="bar", "baz")
  }, "all arguments to altlist must be named")
})

test_that("alternatives_with_shared_groups errors for no named args", {
  expect_error({
    nc::alternatives_with_shared_groups("bar")
  }, "alternatives_with_shared_groups must have at least one named argument; each named argument should define a sub-pattern/group")
})

test_that("alternatives_with_shared_groups errors for no un-named args", {
  expect_error({
    nc::alternatives_with_shared_groups(foo="bar")
  }, "alternatives_with_shared_groups must have at least one un-named argument; each un-named argument should define an alternative pattern")
})

test_that("altlist + alternatives with names ok", {
  subject.vec <- c("mar 17, 1983", "26 sep 2017", "17 mar 1984")
  pat.list <- nc::altlist(month="[a-z]{3}", day="[0-9]{2}", year="[0-9]{4}")
  pattern <- with(pat.list, nc::alternatives(
    american=list(month, " ", day, ", ", year),
    european=list(day, " ", month, " ", year)))
  match.dt <- nc::capture_first_vec(subject.vec, pattern)
  sorted.names <- c("american", "day", "european", "month", "year")
  expect_identical(sort(names(match.dt)), sorted.names)
  expect_identical(match.dt[["american"]], c(subject.vec[1], "", ""))
  expect_identical(match.dt[["european"]], c("", subject.vec[2:3]))
  expect_identical(match.dt[["month"]], c("mar", "sep", "mar"))
  expect_identical(match.dt[["day"]], c("17", "26", "17"))
  expect_identical(match.dt[["year"]], c("1983", "2017", "1984"))
})

test_that("alternatives_with_shared_groups ok", {
  subject.vec <- c("mar 17, 1983", "26 sep 2017", "17 mar 1984")
  pattern <- nc::alternatives_with_shared_groups(
    month="[a-z]{3}", day="[0-9]{2}", year="[0-9]{4}",
    list(month, " ", day, ", ", year),
    list(day, " ", month, " ", year))
  match.dt <- nc::capture_first_vec(subject.vec, pattern)
  sorted.names <- c("day", "month", "year")
  expect_identical(sort(names(match.dt)), sorted.names)
  expect_identical(match.dt[["month"]], c("mar", "sep", "mar"))
  expect_identical(match.dt[["day"]], c("17", "26", "17"))
  expect_identical(match.dt[["year"]], c("1983", "2017", "1984"))
})

Try the nc package in your browser

Any scripts or data that you put into this service are public.

nc documentation built on Sept. 1, 2023, 1:07 a.m.