Nothing
context("detect pattern")
test_that("detect pattern",{
tt = function(x,y){
expect_true(re2_detect(y,x))
expect_true(re2_detect(y,x, parallel = T))
expect_true(all(re2_detect(rep(y,10000),x, parallel = T, grain_size = 1)))
}
numbers = rawToChar(as.raw(c(0x61, 0x62, 0x63, 0x31, 0x32, 0x33, 0x32, 0x33, 0xc2, 0xbc, 0xc2, 0xbd, 0xc2, 0xbe, 0xe2, 0x82, 0x80)))
tt( "a", "a" )
tt( "a", "zyzzyva" )
tt( "a+", "aa" )
tt( "(a+|b)+", "ab" )
tt( "ab|cd", "xabcdx" )
tt( "h.*od?", "hello\ngoodbye\n" )
tt( "h.*o", "hello\ngoodbye\n" )
tt( "h.*o", "goodbye\nhello\n" )
tt( "h.*o", "hello world" )
tt( "h.*o", "othello, world" )
tt( "[^\\s]", "aaaaaaa" )
tt( "a", "aaaaaaa" )
tt( "a*", "aaaaaaa" )
tt( "a*", "" )
tt( "ab|cd", "xabcdx" )
tt( "a", "cab" )
tt( "a*b", "cab" )
tt( "((((((((((((((((((((x))))))))))))))))))))", "x" )
tt( "[abcd]", "xxxabcdxxx" )
tt( "[^x]", "xxxabcdxxx" )
tt( "[abcd]+", "xxxabcdxxx" )
tt( "[^x]+", "xxxabcdxxx" )
tt( "(fo|foo)", "fo" )
tt( "(foo|fo)", "foo" )
# Make sure ^ and $ work.
# The pathological cases didn't work
# in the original grep code.
tt( "foo|bar|[A-Z]", "foo" )
tt( "^(foo|bar|[A-Z])", "foo" )
tt( "(foo|bar|[A-Z])$", "foo" )
tt( "^(foo|bar|[A-Z])$", "foo" )
tt( "^(foo|bar|[A-Z])$", "bar" )
tt( "^(foo|bar|[A-Z])$", "X" )
# tt( "^(foo|bar|[A-Z])$", "XY" )
tt( "^(fo|foo)$", "fo" )
tt( "^(fo|foo)$", "foo" )
tt( "^^(fo|foo)$", "fo" )
tt( "^^(fo|foo)$", "foo" )
tt( "^$", "" )
tt( "^^$", "" )
tt( "^$$", "" )
tt( "^^$$", "" )
tt( "^^^^^^^^$$$$$$$$", "" )
tt( "^", "x" )
tt( "$", "x" )
# Word boundaries.
tt( "\\bfoo\\b", "nofoo foo that" )
tt( "a\\b", "faoa x" )
tt( "\\bbar", "bar x" )
tt( "bar\\b", "foobar" )
tt( "bar\\b", "foobar\nxxx" )
tt( "(foo|bar|[A-Z])\\b", "foo" )
tt( "(foo|bar|[A-Z])\\b", "foo\n" )
tt( "\\b", "x" )
tt( "\\b(foo|bar|[A-Z])", "foo" )
tt( "\\b(foo|bar|[A-Z])\\b", "X" )
tt( "\\b(foo|bar|[A-Z])\\b", "ffoo bbar N x" )
tt( "\\b(fo|foo)\\b", "fo" )
tt( "\\b(fo|foo)\\b", "foo" )
tt( "\\b\\b", "x" )
tt( "\\b$", "x" )
tt( "\\b$", "y x" )
tt( "\\b.$", "x" )
tt( "^\\b(fo|foo)\\b", "fo" )
tt( "^\\b(fo|foo)\\b", "foo" )
tt( "^\\b", "x" )
tt( "^\\b\\b", "x" )
tt( "^\\b.$", "x" )
tt( "^\\b.\\b$", "x" )
tt( "^^^^^^^^\\b.$$$$$$", "x" )
# Non-word boundaries.
tt( "\\Bfoo\\B", "n foo xfoox that" )
tt( "a\\B", "faoa x" )
tt( "(foo|bar|[A-Z])\\B", "foox" )
tt( "\\B", "" )
tt( "\\B(foo|bar|[A-Z])\\B", "xXy" )
tt( "\\B(foo|bar|[A-Z])\\B", "XYZ" )
tt( "\\B(foo|bar|[A-Z])\\B", "abara" )
tt( "\\B(foo|bar|[A-Z])\\B", "xfoo_" )
tt( "\\B(foo|bar|[A-Z])\\B", "foo bar vNx" )
tt( "\\B(fo|foo)\\B", "xfoo" )
tt( "\\B(foo|fo)\\B", "xfooo" )
tt( "\\B\\B", "" )
tt( "\\B$", "" )
tt( "^\\B", "" )
tt( "^\\B\\B", "" )
tt( "^\\B$", "" )
tt( "^^^^^^^^\\B$$$$$$$", "" )
# PCRE uses only ASCII for \b computation.
# All non-ASCII are *not* word characters.
tt( "\\bx\\b", "x" )
tt( "\\bx\\b", "x>" )
tt( "\\bx\\b", "<x" )
tt( "\\bx\\b", "<x>" )
tt( "\\bx\\b", rawToChar(as.raw(c(0xc2, 0xab, 0x78))))
tt( "\\bx\\b", rawToChar(as.raw(c(0x78, 0xc2, 0xbb))) )
tt( "\\bx\\b", rawToChar(as.raw(c(0xc2, 0xab, 0x78, 0xc2, 0xbb))) )
# Weird boundary cases.
tt( "^$^$", "" )
tt( "^$^", "" )
tt( "$^$", "" )
tt( "(foo\\$)", "foo$bar" )
tt( "^...$", "abc" )
# UTF-8
tt( "^\xe6\x9c\xac$", "\xe6\x9c\xac" )
tt( "...", "\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e" )
tt( "...", ".\xe6\x9c\xac." )
tt( "\\C\\C\\C", "\xe6\x9c\xac" )
tt( "\\C", "\xe6\x9c\xac" )
tt( "\\C\\C\\C", "\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e" )
# Latin1
tt( "...", "\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e" )
tt( "...", ".\xe6\x9c\xac." )
# Perl v Posix
tt( "\\B(fo|foo)\\B", "xfooo" )
tt( "(fo|foo)", "foo" )
# Octal escapes.
tt( "\\141", "a" )
tt( "\\060", "0" )
tt( "\\0600", "00" )
tt( "\\608", "08" )
tt( "\\01", "\01" )
tt( "\\018", "\018" )
# Hexadecimal escapes
tt( "\\x{61}", "a" )
tt( "\\x61", "a" )
tt( "\\x{00000061}", "a" )
# Unicode scripts.
tt( "\\p{Greek}+", stringi::stri_enc_fromutf32(c(97L, 945L, 946L, 98L)) )
tt( "\\P{Greek}+", stringi::stri_enc_fromutf32(c(97L, 945L, 946L, 98L)) )
tt( "\\p{^Greek}+", stringi::stri_enc_fromutf32(c(97L, 945L, 946L, 98L)) )
tt( "\\P{^Greek}+", stringi::stri_enc_fromutf32(c(97L, 945L, 946L, 98L)) )
# Unicode properties. Nd is decimal number. N is any number.
tt( "[^0-9]+", "abc123" )
tt( "\\p{Nd}+", numbers )
tt( "\\p{^Nd}+", numbers )
tt( "\\P{Nd}+", numbers )
tt( "\\P{^Nd}+", numbers )
tt( "\\pN+", numbers )
tt( "\\p{N}+", numbers )
tt( "\\p{^N}+", numbers )
tt( "\\p{Any}+", "abc123" )
# Character classes & case folding.
tt( "(?i)[@-A]+", rawToChar(as.raw(c(0x40, 0x41, 0x61, 0x42))) ) # matches @Aa but not B
tt( "(?i)[A-Z]+", "aAzZ" )
tt( "(?i)[^\\\\]+", "Aa\\" ) # \\ is between A-Z and a-z -
# splits the ranges in an interesting way.
# would like to use, but PCRE mishandles in full-match, non-greedy mode
# tt( "(?i)[\\\\]+", "Aa" )
tt( "(?i)[acegikmoqsuwy]+", "acegikmoqsuwyACEGIKMOQSUWY" )
# Character classes & case folding.
tt( "[@-A]+", rawToChar(as.raw(c(0x40, 0x41, 0x61, 0x42))) )
tt( "[A-Z]+", "aAzZ" )
tt( "[^\\\\]+", "Aa\\" )
tt( "[acegikmoqsuwy]+", "acegikmoqsuwyACEGIKMOQSUWY" )
# Anchoring. (^abc in aabcdef was a former bug)
# The tester checks for a match in the text and
# subpieces of the text with a byte removed on either side.
tt( "^abc", "abcdef" )
tt( "abc", "aabcdef" )
tt( "^[ay]*[bx]+c", "abcdef" )
tt( "^[ay]*[bx]+c", "aabcdef" )
tt( "def$", "abcdef" )
tt( "def", "abcdeff" )
tt( "d[ex][fy]$", "abcdef" )
tt( "d[ex][fy]", "abcdeff" )
tt( "[dz][ex][fy]$", "abcdef" )
tt( "[dz][ex][fy]", "abcdeff" )
tt( "(?m)^abc", "abcdef" )
tt( "(?m)^[ay]*[bx]+c", "abcdef" )
tt( "(?m)def$", "abcdef" )
tt( "(?m)def", "abcdeff" )
tt( "(?m)d[ex][fy]$", "abcdef" )
tt( "(?m)d[ex][fy]", "abcdeff" )
tt( "(?m)[dz][ex][fy]$", "abcdef" )
tt( "(?m)[dz][ex][fy]", "abcdeff" )
tt( "^", "a" )
tt( "^^", "a" )
# Context.
# The tester checks for a match in the text and
# subpieces of the text with a byte removed on either side.
tt( "a", "a" )
tt( "ab*", "a" )
tt( "a\\C*", "a" )
tt( "a\\C?", "a" )
tt( "a\\C*?", "a" )
tt( "a\\C??", "a" )
# Former bugs.
tt( "a\\C*|ba\\C", "baba" )
# stringi
expect_identical(re2_detect(NA, NA), NA)
# not working
#expect_identical(re2_detect(character(0), character(0)), logical(0))
#suppressWarnings(expect_identical(re2_detect("",""), NA))
#suppressWarnings(expect_identical(re2_detect"a",""), NA))
suppressWarnings(expect_identical(re2_detect("","a"), FALSE))
expect_identical(re2_detect(c("","ala", "ola"),"ala"), c(FALSE, TRUE, FALSE))
expect_identical(re2_detect(c("","ala", "ala", "bbb"),c("ala", "bbb")), c(FALSE, FALSE, TRUE, TRUE))
expect_identical(re2_detect(c("ala","", "", "bbb"),c("ala", "bbb")), c(TRUE, FALSE, FALSE, TRUE))
expect_identical(re2_detect('', c('.*', '.+', '^.*$')), c(TRUE,FALSE,TRUE))
expect_identical(re2_detect('a', c('a', 'b', 'c')), c(T,F,F))
expect_identical(re2_detect(c('a', 'b', 'c'), 'a'), c(T,F,F))
suppressWarnings(expect_identical(re2_detect(LETTERS[1:2], LETTERS[1:3]), c(T,T,F)))
suppressWarnings(expect_identical(re2_detect(LETTERS[1:3], LETTERS[1:5]), c(T,T,T,F,F)))
suppressWarnings(expect_identical(re2_detect(LETTERS[1:2], LETTERS[1:5]), c(T,T,F,F,F)))
suppressWarnings(expect_identical(re2_detect(LETTERS[1:4], LETTERS[1:5]), c(T,T,T,T,F)))
expect_identical(re2_detect(c("\u0105\u0106\u0107", "\u0105\u0107"), "\u0106*"), c(TRUE, TRUE)) # match of zero length
# expect_identical(re2_detect(c("\u0105\u0106\u0107", "\u0105\u0107"), "(?<=\u0106)"), c(TRUE, FALSE)) # match of zero length:
s <- c("Lorem", "123", " ", " ", "kota", "4\t\u0105")
p <- c("[[:alpha:]]+", "[[:blank:]]+")
expect_identical(re2_detect(s, p), c(T, F, F, T, T, T))
expect_identical(re2_detect("Lo123\trem", c("[[:alpha:]]", "[4-9]+")), c(T, F))
expect_warning(re2_detect(rep("asd", 5), rep("[A-z]", 2)))
expect_false(re2_detect("Ala", "{}"))
expect_identical(re2_detect("aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"), TRUE)
expect_identical(re2_detect("aaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaa"), TRUE)
expect_identical(re2_detect("aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaa"), TRUE)
expect_equivalent(re2_detect("\u0105\u0105\u0105\u0105\u0105\u0105\u0105b","\u0105+b$"),TRUE)
expect_equivalent(re2_detect("aaaab", "ab"), TRUE)
expect_equivalent(re2_detect("bababababaab", "aab"), TRUE)
expect_equivalent(re2_detect("caabaab", "(a+b)+"), TRUE)
expect_equivalent(re2_detect("caacbaab", "(a+b)+"), TRUE)
expect_equivalent(re2_detect("caacbacab", "(a+b)+"), TRUE)
expect_equivalent(re2_detect("caacbacacb", "(a+b)+"), FALSE)
case_list = list(
c( "aa", "aA" ),
c( "a", "Aa" ),
c( "a", "A" ),
c( "ABC", "abc" ),
c( "abc", "XABCY" ),
c( "ABC", "xabcy" )
)
for (x in case_list){
# print(x)
expect_true(re2_detect(x[2],x[1],case_sensitive = F))
}
empty_charset = c( "[^\\S\\s]",
"[^\\S[:space:]]",
"[^\\D\\d]",
"[^\\D[:digit:]]")
for ( x in empty_charset){
expect_true(!re2_detect("abc", x))
}
bitstate_assumptions = c(
"((((()))))[^\\S\\s]?",
"((((()))))([^\\S\\s])?",
"((((()))))([^\\S\\s]|[^\\S\\s])?",
"((((()))))(([^\\S\\s]|[^\\S\\s])|)"
)
for ( x in bitstate_assumptions){
expect_true(re2_detect("", x))
}
})
test_that("bad regex",{
strlong = paste0("x*", stri_dup("a",131072),"*x")
expect_true( re2_detect(strlong ,"((?:\\s|xx.*\n|x[*](?:\n|.)*?[*]x)*)"))
strlong_2 = paste0(stri_dup("c",515),'x');
expect_true( re2_detect(strlong_2 , ".{512}x"))
})
test_that("never \n",{
never_cases = list(
c("(.*)", "abc\ndef\nghi\n", "abc" ),
c("(?s)(abc.*def)", "abc\ndef\n", NA ),
c("(abc(.|\n)*def)", "abc\ndef\n", NA ),
c("(abc[^x]*def)", "abc\ndef\n", NA ),
c("(abc[^x]*def)", "abczzzdef\ndef\n", "abczzzdef" ))
for ( x in never_cases){
if (is.na(x[3])){
expect_true(!re2_detect(x[2], re2(x[1],never_nl = T)))
}else{
expect_identical(re2_match(x[2], re2(x[1],never_nl = T))[[1]], x[3])
}
}
})
test_that("dot nl",{
expect_true(re2_detect("\n", re2(".", dot_nl = T)))
expect_true(!re2_detect("\n", re2("(?-s).",dot_nl = T)))
expect_true(!re2_detect("\n", re2(".",dot_nl = T, never_nl = T)))
})
test_that("never capture",{
expect_identical(0L,get_number_of_groups(re2("(A)(v)",never_capture = T)))
})
test_that("longest match",{
expect_identical(re2_match("aaabaaaa",re2("(a|aaa)",longest_match = T))[[1]], "aaa")
expect_identical(re2_match("aaabaaaa",re2("(a|aaa)",longest_match = F))[[1]], "a")
})
test_that("utf_8 option",{
expect_true(re2_detect(stringi::stri_enc_fromutf32(c(20013L, 25991L)),re2(stri_enc_fromutf32(c(20013L, 25991L)),utf_8 = F)))
})
test_that("literal option",{
expect_identical(get_simplify(re2("[a-z]")),"[a-z]")
expect_identical(get_simplify(re2("[a-z]", literal = T)),"\\[a\\-z\\]")
})
test_that("max mem",{
expect_error(re2("asdddd*", max_mem = 3))
})
test_that("posix_syntax option",{
expect_error(re2("\\b",posix_syntax = T))
expect_error(re2("\\b"),NA)
expect_error(re2("\\b",posix_syntax = T, word_boundary = T),NA)
expect_error(re2("\\d"),NA)
expect_error(re2("\\d",posix_syntax = T, perl_classes = T),NA)
expect_error(re2("\\d",posix_syntax = T))
expect_true(!re2_detect("s
sdsd",re2("^sdsd$")))
expect_true(re2_detect("s
sdsd",re2("^sdsd$", posix_syntax = T)))
expect_true(!re2_detect("s
sdsd",re2("^sdsd$", posix_syntax = T, one_line = T)))
})
test_that("vectorize detect",{
detect_list = list(
list(
c("a","b"),c("a","b"),c(TRUE,TRUE)
)
)
for (ind in detect_list){
expect_identical( re2_detect(ind[[1]], ind[[2]]), ind[[3]])
expect_identical( re2_detect(ind[[1]], ind[[2]], parallel = TRUE), ind[[3]])
expect_identical( re2_detect(ind[[1]], ind[[2]], parallel = TRUE, grain_size = 1), ind[[3]])
}
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.