Nothing
### 'R CMD check' does not Reliably compare the output of examples with
### Rout.save file.
library(re2)
############################################################
### count
color <- c("yellowgreen", "steelblue", "goldenrod", "forestgreen")
stopifnot(re2_count(color, "e") == c(3, 3, 1, 3))
stopifnot(re2_count(color, "r") == c(1, 0, 1, 2))
# Regular expression vs literal string)
stopifnot(re2_count(c("..", "a...", "foo.b"), ".") == c(2, 4, 5))
stopifnot(re2_count(c("..", "a...", "foo.b"), re2_regexp(".", literal = TRUE))
== c(2, 3, 1))
############################################################
### detect
## Character vector input
s <- c("barbazbla", "foobar", "not present here ")
pat <- "(foo)|(bar)baz"
stopifnot(re2_detect(s, pat) == c(TRUE, TRUE, FALSE))
## Use precompiled regexp
re <- re2_regexp("(foo)|(bAR)baz", case_sensitive = FALSE)
stopifnot(re2_detect(s, re) == c(TRUE, TRUE, FALSE))
############################################################
### extract_replace
# Returns extracted string with substitutions
stopifnot(re2_extract_replace(
"bunny@wunnies.pl",
"(.*)@([^.]*)",
"\\2!\\1"
) == c("wunnies!bunny"))
# Case insensitive
stopifnot(re2_extract_replace(
"BUNNY@wunnies.pl",
re2_regexp("(b.*)@([^.]*)", case_sensitive = FALSE),
"\\2!\\1"
) == c("wunnies!BUNNY"))
# Max submatch too large (1 match group, 2 submatches needed)
stopifnot(re2_extract_replace("foo", "f(o+)", "\\1\\2") == c(""))
############################################################
### locate
r1 <- c(
12, 11,
10, 9,
10, 9,
12, 11
)
r2 <- c(
3, 3,
5, 5,
3, 3,
NA, NA
)
r3 <- c(
2, 2,
3, 3,
5, 5,
4, 4
)
r4 <- c(
3, 4,
1, 2,
3, 4,
5, 6
)
color <- c("yellowgreen", "steelblue", "goldenrod", "forestgreen")
stopifnot(all(c(t(re2_locate(color, "$"))) == r1))
stopifnot(all(na.omit(c(t(re2_locate(color, "l")))) == na.omit(r2)))
stopifnot(all(c(t(re2_locate(color, "e"))) == r3))
# String length can be a multiple of pattern length
stopifnot(all(c(t(re2_locate(color, c("l(l|d)?", "st")))) == r4))
# Locate all occurrences
r <- re2_locate_all(color, "l")
stopifnot(all(c(t(r[[1]])) == c(3, 3, 4, 4)))
stopifnot(all(c(t(r[[2]])) == c(5, 5, 7, 7)))
stopifnot(all(c(t(r[[3]])) == c(3, 3)))
r <- re2_locate_all(color, "e")
stopifnot(all(c(t(r[[1]])) == c(2, 2, 9, 9, 10, 10)))
stopifnot(all(c(t(r[[2]])) == c(3, 3, 4, 4, 9, 9)))
stopifnot(all(c(t(r[[3]])) == c(5, 5)))
stopifnot(all(c(t(r[[4]])) == c(4, 4, 9, 9, 10, 10)))
r <- re2_locate_all(color, ".")
stopifnot(all(c(t(r[[1]]))
== c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7,
7, 8, 8, 9, 9, 10, 10, 11, 11)))
############################################################
### which
color <- c("yellowgreen", "steelblue", "GOLDENROD", "forestgreen")
stopifnot(all(re2_which(color, "o") == c(1, 4)))
stopifnot(all(re2_subset(color, "o") == c("yellowgreen", "forestgreen")))
stopifnot(all(re2_which(c("x", "y", NA, "foo", ""), ".") == c(1, 2, 4)))
stopifnot(all(re2_subset(c("x", "y", NA, "foo", ""), ".")
== c("x", "y", "foo")))
# Use precompiled regexp
re <- re2_regexp("[a-z]")
stopifnot(all(re2_which(color, re) == c(1, 2, 4)))
stopifnot(all(re2_subset(color, re)
== c("yellowgreen", "steelblue", "forestgreen")))
re <- re2_regexp("[a-z]", case_sensitive = FALSE)
stopifnot(all(re2_which(color, re) == c(1, 2, 3, 4)))
stopifnot(all(re2_subset(color, re)
== c("yellowgreen", "steelblue", "GOLDENROD", "forestgreen")))
# Vector of patterns
stopifnot(all(re2_which(color, c("^o", "bl.e$", re, "$")) == c(2, 3, 4)))
############################################################
### replace
string <- c("yabba dabba doo", "famabbb sb")
stopifnot(all(re2_replace(string, "b+", "d")
== c("yada dabba doo", "famad sb")))
stopifnot(all(re2_replace_all(string, "b+", "d")
== c("yada dada doo", "famad sd")))
# Rearrange matching groups in replaced string
stopifnot(all(re2_replace(
"boris@kremvax.ru",
"(.*)@([^.]*)", "\\2!\\1"
) == c("kremvax!boris.ru")))
# Use complied pattern
string <- "the quick brown fox jumps over the lazy dogs."
re <- re2_regexp("(qu|[b-df-hj-np-tv-z]*)([a-z]+)")
rewrite <- "\\2\\1ay"
stopifnot(all(re2_replace(string, re, rewrite)
== c("ethay quick brown fox jumps over the lazy dogs.")))
stopifnot(all(re2_replace_all(string, re, rewrite)
== c("ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.")))
string <- "abcd.efghi@google.com"
re <- re2_regexp("\\w+")
rewrite <- "\\0-NOSPAM"
stopifnot(all(re2_replace(string, re, rewrite)
== c("abcd-NOSPAM.efghi@google.com")))
stopifnot(all(re2_replace_all(string, re, rewrite)
== c("abcd-NOSPAM.efghi-NOSPAM@google-NOSPAM.com-NOSPAM")))
string <- "aba\naba"
re <- re2_regexp("a.*a")
rewrite <- "(\\0)"
stopifnot(all(re2_replace(string, re, rewrite) == c("(aba)\naba")))
stopifnot(all(re2_replace_all(string, re, rewrite) == c("(aba)\n(aba)")))
# Vectorize string and pattern
string <- c("ababababab", "bbbbbb", "bbbbbb", "aaaaa")
pattern <- c("b", "b+", "b*", "b*")
rewrite <- "bb"
stopifnot(all(re2_replace(string, pattern, rewrite)
== c("abbabababab", "bb", "bb", "bbaaaaa")))
stopifnot(all(re2_replace_all(string, pattern, rewrite)
== c("abbabbabbabbabb", "bb", "bb", "bbabbabbabbabbabb")))
############################################################
### regexp
re2p <- re2_regexp("hello world")
stopifnot(mode(re2p) == "externalptr")
## UTF-8 and matching interface
# By default, pattern and input text are interpreted as UTF-8.
# The Latin1 option causes them to be interpreted as Latin-1.
x <- "fa\xE7ile"
Encoding(x) <- "latin1"
stopifnot(re2_detect(x, re2_regexp("fa\xE7", encoding = "Latin1")) == TRUE)
## Case insensitive
stopifnot(re2_detect("fOobar ", re2_regexp("Foo", case_sensitive = FALSE))
== TRUE)
## Literal string (as opposed to regular expression)
## Matches only when 'literal' option is TRUE
stopifnot(re2_detect("foo\\$bar", re2_regexp("foo\\$b", literal = TRUE))
== TRUE)
stopifnot(re2_detect("foo\\$bar", re2_regexp("foo\\$b", literal = FALSE))
== FALSE)
## Use of never_nl
re <- re2_regexp("(abc(.|\n)*def)", never_nl = FALSE)
stopifnot(re2_match("abc\ndef\n", re)[1, 1:3] == c("abc\ndef", "abc\ndef", "\n"))
re <- re2_regexp("(abc(.|\n)*def)", never_nl = TRUE)
stopifnot(all(is.na(re2_match("abc\ndef\n", re)[1, 1:3])))
############################################################
### split
panagram <- c(
"The quick brown fox jumps over the lazy dog",
"How vexingly quick daft zebras jump!"
)
r <- re2_split(panagram, " quick | over | zebras ")
list1 <- list(
c("The", "brown fox jumps", "the lazy dog"),
c("How vexingly", "daft", "jump!")
)
stopifnot(identical(r, list1))
r <- re2_split(panagram, " quick | over | zebras ", simplify = TRUE)
m1 <- rbind(
c("The", "brown fox jumps", "the lazy dog"),
c("How vexingly", "daft", "jump!")
)
stopifnot(r == m1)
# Use compiled regexp
re <- re2_regexp("quick | over |how ", case_sensitive = FALSE)
r <- re2_split(panagram, re)
list2 <- list(
c("The ", "brown fox jumps", "the lazy dog"),
c("", "vexingly ", "daft zebras jump!")
)
stopifnot(identical(r, list2))
r <- re2_split(panagram, re, simplify = TRUE)
m2 <- rbind(
c("The ", "brown fox jumps", "the lazy dog"),
c("", "vexingly ", "daft zebras jump!")
)
stopifnot(r == m2)
# Restrict number of matches
r <- re2_split(panagram, " quick | over | zebras ", n = 2)
list3 <- list(
c("The", "brown fox jumps over the lazy dog"),
c("How vexingly", "daft zebras jump!")
)
stopifnot(identical(r, list3))
############################################################
### match
## Substring extraction
strings <- c("barbazbla", "foobar")
pattern <- "(foo)|(?P<TestGroup>bar)baz"
result <- re2_match(strings, pattern)
m <- rbind(
c("barbaz", NA, "bar"),
c("foo", "foo", NA)
)
lvec <- function(lst) {
na.omit(unlist(lst))
}
mvec <- function(mat) {
na.omit(c(mat))
}
stopifnot(mvec(result) == mvec(m))
stopifnot(is.matrix(result))
stopifnot(names(result) == c(".0", ".1", "TestGroup"))
result <- re2_match(strings, pattern, simplify = FALSE)
lst <- list(
rbind(
c("barbaz", NA, "bar")
),
rbind(
c("foo", "foo", NA)
)
)
stopifnot(lvec(result) == lvec(lst))
stopifnot(is.list(result))
stopifnot(is.character(result[[1]]))
## Compile regexp
re <- re2_regexp("(foo)|(BaR)baz", case_sensitive = FALSE)
r <- re2_match(strings, re)
m <- rbind(
c("barbaz", NA, "bar"),
c("foo", "foo", NA)
)
stopifnot(mvec(r) == mvec(m))
strings <- c(
"Home: 743 733 5365", "373-733-5753 ", "foobar",
"733.335.3457 and Work: 573-433-7577 "
)
re <- re2_regexp("([0-9]{3})[- .]([0-9]{3})[- .]([0-9]{4})")
r <- re2_match(strings, re)
m <- rbind(
c("743 733 5365", "743", "733", "5365"),
c("373-733-5753", "373", "733", "5753"),
c(NA, NA, NA, NA),
c("733.335.3457", "733", "335", "3457")
)
stopifnot(mvec(r) == mvec(m))
## Vectorized over patterns
r <- re2_match(strings, c(re, "53 $", "^foo", re))
m <- rbind(
c("743 733 5365", "743", "733", "5365"),
c("53 ", NA, NA, NA),
c("foo", NA, NA, NA),
c("733.335.3457", "733", "335", "3457")
)
stopifnot(mvec(r) == mvec(m))
## Match all occurances, not just the first
r <- re2_match_all(strings, re)
lst <- list(
rbind(
c("743 733 5365", "743", "733", "5365")
),
rbind(
c("373-733-5753", "373", "733", "5753")
),
rbind(
c("733.335.3457", "733", "335", "3457"),
c("573-433-7577", "573", "433", "7577")
)
)
stopifnot(unlist(r) == unlist(lst))
r <- re2_match_all("ruby:1234 68 red:92 blue:", "(\\w+):(\\d+)")
m <- rbind(
c("ruby:1234", "ruby", "1234"),
c("red:92", "red", "92")
)
stopifnot(r[[1]] == m)
## Vectorized over patterns (matching all occurances)
r <- re2_match_all(strings, c(re, "53 $", "^foo", re))
lst <- list(
rbind(
c("743 733 5365", "743", "733", "5365")
),
rbind(
c("53 ")
),
rbind(
c("foo")
),
rbind(
c("733.335.3457", "733", "335", "3457"),
c("573-433-7577", "573", "433", "7577")
)
)
stopifnot(unlist(r) == unlist(lst))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.