knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
(subject.size.vec <- unique(as.integer(10^seq(0,3.5,l=100)))) (backtrackers <- c( if(requireNamespace("stringi"))atime::atime_grid( ICU=stringi::stri_match(subject, regex=pattern)), atime::atime_grid( PCRE=regexpr(pattern, subject, perl=TRUE), TRE=regexpr(pattern, subject, perl=FALSE)))) backtrackers.result <- atime::atime( N=subject.size.vec, setup={ subject <- paste(rep("a", N), collapse="") pattern <- paste(rep(c("(a)?", "\\1"), each=N), collapse="") }, expr.list=backtrackers) backtrackers.best <- atime::references_best(backtrackers.result) plot(backtrackers.best)
The plot above shows that ICU/PCRE/TRE are all exponential in N (subject/pattern size) when the pattern contains backreferences.
all.exprs <- c( if(requireNamespace("re2"))atime::atime_grid( RE2=re2::re2_match(subject, pattern)), backtrackers) all.result <- atime::atime( N=subject.size.vec, setup={ subject <- paste(rep("a", N), collapse="") pattern <- paste(rep(c("a?", "a"), each=N), collapse="") }, expr.list=all.exprs) all.best <- atime::references_best(all.result) plot(all.best)
The plot above shows that ICU/PCRE are exponential time whereas
RE2/TRE are polynomial time. Exercise for the reader: modify the above
code to use the seconds.limit
argument so that you can see what
happens to ICU/PCRE for larger N (hint: you should see a difference at
larger sizes).
(all.pred <- predict(all.best)) summary(all.pred)
The predict
method above returns a list with a new element named
prediction
, which shows the data sizes that can be computed with a
given time budget. The plot
method is used below,
plot(all.pred)
atime_grid
to compare different enginesIn the nc
package there is an engine
argument which controls which
C regex library is used:
nc.exprs <- atime::atime_grid( list(ENGINE=c( if(requireNamespace("re2"))"RE2", "PCRE", if(requireNamespace("stringi"))"ICU")), nc=nc::capture_first_vec(subject, pattern, engine=ENGINE)) nc.result <- atime::atime( N=subject.size.vec, setup={ rep.collapse <- function(chr)paste(rep(chr, N), collapse="") subject <- rep.collapse("a") pattern <- list(maybe=rep.collapse("a?"), rep.collapse("a")) }, expr.list=nc.exprs) nc.best <- atime::references_best(nc.result) plot(nc.best)
The result/plot above is consistent with the previous result.
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.