Nothing
context("rex")
test_that("start works", {
r <- rex(
start, letter
)
expect_true(grepl(r, "abcdef"))
expect_false(grepl(r, "123456", perl = TRUE))
})
test_that("end works", {
r <- rex(
"Z", end
)
expect_true(grepl(r, "abcZ"))
expect_false(grepl(r, "abc"))
})
test_that("version parsing works", {
r <- rex(
start,
capture(numbers),
any,
capture(numbers),
any,
capture(numbers),
any,
capture(numbers),
end
)
expect_identical(
gsub(r, "\\1 \\2 \\3 \\4", "3.1.1-1", perl = TRUE),
"3 1 1 1"
)
})
test_that("verbs in rex work", {
r <- rex(
start, "foo", zero_or_more(any), "bar", end
)
expect_true(grepl(r, "fooABCbar", perl = TRUE))
expect_true(grepl(r, "foo123\tbar", perl = TRUE))
})
test_that("Simple URL parsing works", {
# TODO: get these working better again
## Decompose a URL into its components.
## Example by LT (http://www.cs.uiowa.edu/~luke/R/regexp.html).
x <- "http://stat.umn.edu:80/xyz"
re <- "^(?:(((?:(?:(?!:).)*)+)://))?((?:(?:(?!:/).)*)+)(?:(:((?:[[:digit:]]+)+)))?(?:(/(?:.)*))?$"
#m <- regexec(re, x)
#m
#regmatches(x, m)
## Element 3 is the protocol, 4 is the host, 6 is the port, and 7
## is the path. We can use this to make a function for extracting the
## parts of a URL:
URL_parts <- function(x) {
m <- regexec(re, x)
parts <- do.call(rbind,
lapply(regmatches(x, m), `[`, c(3L, 4L, 6L, 7L)))
colnames(parts) <- c("protocol","host","port","path")
parts
}
#URL_parts(x)
r <- rex(
start,
## match the protocol -- may exist or may not
maybe(capture(
capture(except_some_of(":")),
"://"
)),
## match the path
capture(one_or_more(not(":/"))),
## get the port
maybe(capture(":", capture(numbers))),
## and the rest
maybe(capture("/", anything)),
end
)
rbind(r = r, m = re)
n <- gregexpr(r, x, perl = TRUE)[[1]]
split_matches <- function(string, matches) {
starts <- attr(matches, "capture.start")
lengths <- attr(matches, "capture.length")
ends <- starts + lengths - 1
c(string, substring(string, starts, ends))
}
#split_matches(x, n)
#regmatches(x, m)[[1]]
#expect_equal(regmatches(x, m)[[1]], split_matches(x, n))
})
context("URL Validation")
test_that("URL Validation works", {
valid_chars <- rex(except_some_of(".", "/", " ", "-"))
`%>%` <- magrittr::`%>%`
re <- rex(
start,
# protocol identifier (optional) + //
group(list("http", maybe("s")) %or% "ftp", "://"),
# user:pass authentication (optional)
maybe(non_spaces,
maybe(":", zero_or_more(non_space)),
"@"),
#host name
group(zero_or_more(valid_chars, zero_or_more("-")), one_or_more(valid_chars)),
#domain name
zero_or_more(".", zero_or_more(valid_chars, zero_or_more("-")), one_or_more(valid_chars)),
#TLD identifier
group(".", valid_chars %>% at_least(2)),
# server port number (optional)
maybe(":", digit %>% between(2, 5)),
# resource path (optional)
maybe("/", non_space %>% zero_or_more()),
end
)
good <- c("http://foo.com/blah_blah",
"http://foo.com/blah_blah/",
"http://foo.com/blah_blah_(wikipedia)",
"http://foo.com/blah_blah_(wikipedia)_(again)",
"http://www.example.com/wpstyle/?p=364",
"https://www.example.com/foo/?bar=baz&inga=42&quux",
"http://✪df.ws/123",
"http://userid:password@example.com:8080",
"http://userid:password@example.com:8080/",
"http://userid@example.com",
"http://userid@example.com/",
"http://userid@example.com:8080",
"http://userid@example.com:8080/",
"http://userid:password@example.com",
"http://userid:password@example.com/",
"http://➡.ws/䨹",
"http://⌘.ws",
"http://⌘.ws/",
"http://foo.com/blah_(wikipedia)#cite-1",
"http://foo.com/blah_(wikipedia)_blah#cite-1",
"http://foo.com/unicode_(✪)_in_parens",
"http://foo.com/(something)?after=parens",
"http://☺.damowmow.com/",
"http://code.google.com/events/#&product=browser",
"http://j.mp",
"ftp://foo.bar/baz",
"http://foo.bar/?q=Test%20URL-encoded%20stuff",
"http://مثال.إختبار",
"http://例子.测试",
"http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com",
"http://1337.net",
"http://a.b-c.de",
"http://223.255.255.254")
bad <- c(
"http://",
"http://.",
"http://..",
"http://../",
"http://?",
"http://??",
"http://??/",
"http://#",
"http://##",
"http://##/",
"http://foo.bar?q=Spaces should be encoded",
"//",
"//a",
"///a",
"///",
"http:///a",
"foo.com",
"rdar://1234",
"h://test", # nolint
"http:// shouldfail.com",
":// should fail",
"http://foo.bar/foo(bar)baz quux",
"ftps://foo.bar/",
"http://-error-.invalid/",
"http://-a.b.co",
"http://a.b-.co",
"http://0.0.0.0",
"http://3628126748",
"http://.www.foo.bar/",
"http://www.foo.bar./",
"http://.www.foo.bar./")
lapply(good, function(x) {
expect_true(grepl(re, x, perl = TRUE), info=x)
})
lapply(bad, function(x) {
expect_false(grepl(re, x, perl = TRUE), info=x)
})
})
context("start")
test_that("matches basic characters", {
expect_equal(rex(start, "f"), regex("^f"))
})
test_that("escapes special characters", {
expect_equal(rex(start, "."), regex("^\\."))
})
test_that("matches basic characters", {
expect_equal(rex(start, "x" %>% n_times(3)), regex("^(?:x){3}"))
})
test_that("matches special identifiers", {
expect_equal(rex(start, number %>% n_times(2)), regex("^(?:[[:digit:]]){2}"))
})
context("appending expressions")
test_that("adds basic characters", {
expect_equal(rex("x", "y", "z"),
regex("xyz"))
expect_equal(rex("x", maybe("y"), "z"),
regex("x(?:y)?z"))
})
test_that("escapes special characters", {
expect_equal(rex(numbers %>% between(0, 2), ".", "$", end),
regex("(?:[[:digit:]]+){0,2}\\.\\$$"))
})
context("shortcuts - end")
test_that("matches basic characters", {
expect_equal(rex("x", "y", end),
regex("xy$"))
})
test_that("escapes special characters", {
expect_equal(rex("x", "$", end),
regex("x\\$$"))
})
context("general regex")
test_that("returns a well-formed regex", {
expect_equal(rex(start, "w", "x" %or% "y", "z", end),
regex("^w(?:x|y)z$"))
})
context("rex examples")
re <-
rex(start,
number %>% n_times(3),
"-",
letter %>% n_times(2),
maybe("#"),
"a" %or% "b",
"c" %>% between(2, 4),
"$",
end
)
expect_true(grepl(re, "123-xy#accc$", perl=TRUE))
expect_true(grepl(re, "999-dfbcc$"))
expect_false(grepl(re, "000-df#baccccccccc$"))
expect_false(grepl(re, "444-dd3ac$"))
context("issues")
test_that("#11 Modifiers and named character classes", {
p <- rex(none_of(alpha))
expect_true(grepl(p, "6", perl = TRUE))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.