R/RcppExports.R

Defines functions re2_split .re2_replace_all_cpp re2_replace_all .re2_replace_cpp re2_replace re2_regexp .re2_quote_meta .re2_possible_match_range .re2_check_rewrite_string .re2_max_submatch .re2_match_cpp re2_count re2_match_all re2_match re2_locate_all re2_locate re2_get_options .re2_extract_replace_cpp re2_extract_replace re2_subset re2_which re2_detect .re2_number_of_capturing_groups .re2_named_capturing_groups .re2_capturing_group_names

Documented in re2_count re2_detect re2_extract_replace re2_get_options re2_locate re2_locate_all re2_match re2_match_all re2_regexp re2_replace re2_replace_all re2_split re2_subset re2_which

# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

.re2_capturing_group_names <- function(pattern) {
    .Call(`_re2_re2_capturing_group_names`, pattern)
}

.re2_named_capturing_groups <- function(pattern) {
    .Call(`_re2_re2_named_capturing_groups`, pattern)
}

.re2_number_of_capturing_groups <- function(pattern) {
    .Call(`_re2_re2_number_of_capturing_groups`, pattern)
}

#' Find the presence of a pattern in string(s)
#'
#' @description
#' Equivalent to grepl(pattern, x). Vectorized over
#'   string and pattern. For the equivalent of
#'   grep(pattern, x) see \code{\link{re2_which}}.
#'
#' @inheritParams re2_match
#'
#' @return A logical vector. TRUE if match is found, FALSE if not.
#'
#' @example inst/examples/detect.R
#'
#' @seealso
#'   \code{\link{re2_regexp}} for options to regular expression,
#'   \link{re2_syntax} for regular expression syntax, and
#'   \code{\link{re2_match}} to extract matched groups.
#'
re2_detect <- function(string, pattern) {
    .Call(`_re2_re2_detect`, string, pattern)
}

#' Select strings that match, or find their positions
#'
#' @description
#' \code{re2_subset} returns strings that match a pattern.
#' \code{re2_which} is equivalent to grep(pattern, x). It returns
#'   position of string that match a pattern. Vectorized over
#'   string and pattern. For the equivalent of
#'   grepl(pattern, x) see \code{\link{re2_detect}}.
#'
#' @inheritParams re2_match
#'
#' @return \code{re2_subset} returns a character vector, and \code{re2_which}
#'   returns an integer vector.
#'
#' @example inst/examples/which.R
#'
#' @seealso
#'   \code{\link{re2_regexp}} for options to regular expression,
#'   \link{re2_syntax} for regular expression syntax, and
#'   \code{\link{re2_detect}} to find presence of a pattern (grep).
#'
re2_which <- function(string, pattern) {
    .Call(`_re2_re2_which`, string, pattern)
}

#' @rdname re2_which
re2_subset <- function(string, pattern) {
    .Call(`_re2_re2_subset`, string, pattern)
}

#' Extract with substitutions
#'
#' @description
#' Like \code{\link{re2_replace}}, except that if the pattern matches,
#'   "rewrite" string is returned with substitutions. The
#'   non-matching portions of "text" are ignored.
#'
#' Difference between \code{re2_extract_replace} and \code{\link{re2_replace}}:
#' \preformatted{
#' > re2_extract_replace("bunny@wunnies.pl", "(.*)@([^.]*)", "\\2!\\1")
#' [1] "wunnies!bunny"
#'
#' > re2_replace("bunny@wunnies.pl", "(.*)@([^.]*)", "\\2!\\1")
#' [1] "wunnies!bunny.pl"
#' }
#' "\\1" and "\\2" are names of capturing subgroups.
#'
#' Vectorized over string and pattern.
#'
#' @inheritParams re2_replace
#'
#' @return A character vector with extractions.
#'
#' @example inst/examples/extract_replace.R
#'
#' @seealso
#'   \code{\link{re2_regexp}} for options to regular expression,
#'   \link{re2_syntax} for regular expression syntax. See
#'   \code{\link{re2_replace}} and \code{\link{re2_replace_all}} to replace
#'   pattern in place.
re2_extract_replace <- function(string, pattern, rewrite) {
    .Call(`_re2_re2_extract_replace`, string, pattern, rewrite)
}

.re2_extract_replace_cpp <- function(string, pattern, rewrite, logical = FALSE) {
    .Call(`_re2_re2_extract_replace_cpp`, string, pattern, rewrite, logical)
}

#' Retrieve options
#'
#' \code{re2_get_options} returns a list of all options from a
#'   RE2 object (internal representation of compiled regexp).
#'
#' @param re2ptr The value obtained from call to \code{\link{re2_regexp}}.
#' @return A list of options and their values.
#'
#' @seealso \code{\link{re2_regexp}}.
#'
re2_get_options <- function(re2ptr) {
    .Call(`_re2_re2_get_options`, re2ptr)
}

#' Locate the start and end of pattern in a string
#'
#' @description
#' Vectorized over string and pattern. For matches of 0 length (ex.
#'   spatial patterns like "$") end will be one character greater than
#'   beginning.
#'
#' @inheritParams re2_match
#'
#' @return \code{re2_locate} returns an integer matrix, and
#'   \code{re2_locate_all} returns a list of integer matrices.
#'
#' @example inst/examples/locate.R
#'
#' @seealso
#'   \code{\link{re2_regexp}} for options to regular expression,
#'   \link{re2_syntax} for regular expression syntax.
re2_locate <- function(string, pattern) {
    .Call(`_re2_re2_locate`, string, pattern)
}

#' @rdname re2_locate
re2_locate_all <- function(string, pattern) {
    .Call(`_re2_re2_locate_all`, string, pattern)
}

#' Extract matched groups from a string
#'
#' @description
#' Vectorized over string and pattern. Match against a string using a regular
#'    expression and extract matched substrings. \code{re2_match} extracts
#'    first matched substring, and \code{re2_match_all} extracts all matches.
#'
#' Matching regexp "(foo)|(bar)baz" on "barbazbla" will return
#'   submatches '.0' = "barbaz", '.1' = NA, and '.2' = "bar". '.0' is
#'   the entire matching text. '.1' is the first group,
#'   and so on. Groups can also be named.
#'
#' @param string A character vector, or an object which can be coerced to one.
#' @param pattern Character string containing a regular expression,
#'    or a pre-compiled regular expression (or a vector of character
#'    strings and pre-compiled regular expressions). \cr
#'   See \code{\link{re2_regexp}} for available options. \cr
#'   See \link{re2_syntax} for regular expression syntax. \cr
#' @param simplify If TRUE, the default, returns a character matrix. If FALSE,
#'   returns a list. Not applicable to \code{re2_match_all}.
#'
#' @return In case of \code{re2_match} a character matrix. First column is the
#'    entire matching text, followed by one column for each capture group. If
#'    simplify is FALSE, returns a list of named character vectors. \cr
#'    In case of \code{re2_match_all}, returns a list of character matrices.
#'
#' @example inst/examples/match.R
#'
#' @seealso
#'   \code{\link{re2_regexp}} for options to regular expression,
#'   \link{re2_syntax} for regular expression syntax.
re2_match <- function(string, pattern, simplify = TRUE) {
    .Call(`_re2_re2_match`, string, pattern, simplify)
}

#' @rdname re2_match
re2_match_all <- function(string, pattern) {
    .Call(`_re2_re2_match_all`, string, pattern)
}

#' Count the number of matches in a string
#'
#' @description
#' Vectorized over string and pattern. Match against a string using a regular
#'    expression and return the count of matches.
#'
#' @inheritParams re2_match
#'
#' @return An integer vector.
#'
#' @example inst/examples/count.R
#'
#' @seealso
#'   \code{\link{re2_regexp}} for options to regular expression,
#'   \link{re2_syntax} for regular expression syntax.
#'
re2_count <- function(string, pattern) {
    .Call(`_re2_re2_count`, string, pattern)
}

.re2_match_cpp <- function(text, pattern, ...) {
    more_options <- list(...)
    .Call(`_re2_re2_match_cpp`, text, pattern, more_options)
}

.re2_max_submatch <- function(rewrite) {
    .Call(`_re2_re2_max_submatch`, rewrite)
}

.re2_check_rewrite_string <- function(pattern, rewrite) {
    .Call(`_re2_re2_check_rewrite_string`, pattern, rewrite)
}

.re2_possible_match_range <- function(pattern, maxlen, logical = FALSE) {
    .Call(`_re2_re2_possible_match_range`, pattern, maxlen, logical)
}

.re2_quote_meta <- function(unquoted) {
    .Call(`_re2_re2_quote_meta`, unquoted)
}

#' Compile regular expression pattern
#'
#' \code{re2_regexp} compiles a character string containing a regular
#'   expression and returns a pointer to the object.
#'
#' @section Regexp Syntax:
#'
#' RE2 regular expression syntax is similar to Perl's with some of
#'   the more complicated things thrown away. In particular,
#'   backreferences and generalized assertions are not available, nor
#'   is \verb{\Z}.
#'
#' See \link{re2_syntax} for the syntax
#'   supported by RE2, and a comparison with PCRE and PERL regexps.
#'
#' For those not familiar with Perl's regular expressions, here are
#'   some examples of the most commonly used extensions:
#' \tabular{lll}{
#'   \verb{"hello (\\w+) world"} \tab -- \tab \\w matches a "word" character. \cr
#'   \verb{"version (\\d+)"}     \tab -- \tab \\d matches a digit. \cr
#'   \verb{"hello\\s+world"}     \tab -- \tab \\s matches any whitespace character. \cr
#'   \verb{"\\b(\\w+)\\b"}       \tab -- \tab \\b matches non-empty string at word boundary. \cr
#'   \verb{"(?i)hello"}        \tab -- \tab (?i) turns on case-insensitive matching. \cr
#'   \verb{"/\\*(.*?)\\*/"}      \tab -- \tab \verb{.*?} matches . minimum no. of times possible.
#' }
#' The double backslashes are needed when writing R string literals.
#' However, they should NOT be used when writing raw string literals:
#' \tabular{lll}{
#'   \verb{r"(hello (\w+) world)"} \tab -- \tab \\w matches a "word" character. \cr
#'   \verb{r"(version (\d+))"}     \tab -- \tab \\d matches a digit. \cr
#'   \verb{r"(hello\s+world)"}     \tab -- \tab \\s matches any whitespace character. \cr
#'   \verb{r"(\b(\w+)\b)"}       \tab -- \tab \\b matches non-empty string at word boundary. \cr
#'   \verb{r"((?i)hello)"}        \tab -- \tab (?i) turns on case-insensitive matching. \cr
#'   \verb{r"(/\*(.*?)\*/)"}      \tab -- \tab \verb{.*?} matches . minimum no. of times possible.
#' }
#' When using UTF-8 encoding, case-insensitive matching will perform
#' simple case folding, not full case folding.
#'
#' @param pattern Character string containing a
#' regular expression.
#'
#' @param \dots Options, which are (defaults in parentheses):
#'
#' \tabular{lll}{
#'   \verb{encoding} \tab (\verb{"UTF8"}) String and pattern are UTF-8; Otherwise \verb{"Latin1"}.\cr
#'   \verb{posix_syntax} \tab (\verb{FALSE}) Restrict regexps to POSIX egrep syntax.\cr 
#'   \verb{longest_match} \tab (\verb{FALSE}) Search for longest match, not first match.\cr
#'   \verb{max_mem} \tab (see below) Approx. max memory footprint of RE2 C++ object.\cr
#'   \verb{literal} \tab (\verb{FALSE}) Interpret pattern as literal, not regexp.\cr
#'   \verb{never_nl} \tab (\verb{FALSE}) Never match \\n, even if it is in regexp.\cr
#'   \verb{dot_nl} \tab (\verb{FALSE}) Dot matches everything including new line.\cr
#'   \verb{never_capture} \tab (\verb{FALSE}) Parse all parens as non-capturing.\cr
#'   \verb{case_sensitive} \tab (\verb{TRUE}) Match is case-sensitive (regexp can 
#'                                      override with (?i) unless in posix_syntax mode).\cr
#' }
#' The following options are only consulted when \verb{posix_syntax=TRUE}.
#' When \verb{posix_syntax=FALSE}, these features are always enabled and
#' cannot be turned off; to perform multi-line matching in that case,
#' begin the regexp with (?m). 
#' \tabular{lll}{
#'   \verb{perl_classes} \tab (\verb{FALSE}) Allow Perl's \verb{\\d \\s \\w \\D \\S \\W}.\cr
#'   \verb{word_boundary} \tab (\verb{FALSE}) Allow Perl's \verb{\\b \\B} (word boundary and not).\cr
#'   \verb{one_line} \tab (\verb{FALSE}) \verb{^} and \verb{$} only match beginning and end of text.\cr
#' }
#'
#' The \verb{max_mem} option controls how much memory can be used to
#' hold the compiled form of the regexp and its cached DFA
#' graphs (DFA: The execution engine that implements Deterministic
#' Finite Automaton search). Default is 8MB.
#'
#' @return Compiled regular expression.
#'
#' @example inst/examples/regexp.R
#'
#' @usage re2_regexp(pattern, \dots)
#'
#' @seealso \link{re2_syntax} has regular expression syntax.
#' 
re2_regexp <- function(pattern, ...) {
    more_options <- list(...)
    .Call(`_re2_re2_regexp`, pattern, more_options)
}

#' Replace matched pattern in string
#'
#' @description
#' \code{re2_replace} replaces the first match of "pattern" in "string" with
#'   "rewrite" string.
#' \preformatted{
#'   re2_replace("yabba dabba doo", "b+", "d")
#' }
#' will result in "yada dabba doo". \cr
#'
#' \code{re2_replace_all} replaces successive non-overlapping occurrences of
#'   "pattern" in "text" with "rewrite" string.
#' \preformatted{
#'   re2_replace_all("yabba dabba doo", "b+", "d")
#' }
#' will result in "yada dada doo". \cr
#' Replacements are not subject to re-matching.
#' Because \verb{re2_replace_all} only replaces non-overlapping matches,
#'   replacing "ana" within "banana" makes only one replacement, not
#'   two.
#'
#' Vectorized over string and pattern.
#'
#' @inheritParams re2_match
#'
#' @param rewrite Rewrite string. Backslash-escaped
#'   digits (\\1 to \\9) can be used to insert text matching
#'   corresponding parenthesized group from the pattern. \\0
#'   refers to the entire matching text.
#'
#' @return A character vector with replacements.
#' @example inst/examples/replace.R
#'
#' @seealso
#'   \code{\link{re2_regexp}} for options to regular expression,
#'   \link{re2_syntax} for regular expression syntax.
re2_replace <- function(string, pattern, rewrite) {
    .Call(`_re2_re2_replace`, string, pattern, rewrite)
}

.re2_replace_cpp <- function(string, pattern, rewrite, logical = FALSE) {
    .Call(`_re2_re2_replace_cpp`, string, pattern, rewrite, logical)
}

#' @rdname re2_replace
re2_replace_all <- function(string, pattern, rewrite) {
    .Call(`_re2_re2_replace_all`, string, pattern, rewrite)
}

.re2_replace_all_cpp <- function(string, pattern, rewrite, count = FALSE) {
    .Call(`_re2_re2_replace_all_cpp`, string, pattern, rewrite, count)
}

#' Split string based on pattern
#'
#' @description
#' Vectorized over string and pattern.
#'
#' @inheritParams re2_match
#' @param simplify If FALSE, the default, return a list of string vectors.
#'   If TRUE, return a string matrix.
#' @param n Number of string pieces to return. Default (Inf) returns all.
#'
#' @return A list of string vectors or a string matrix. See option.
#'
#' @example inst/examples/split.R
#' @usage re2_split(string, pattern, simplify = FALSE, n = Inf)
#'
#' @seealso
#'   \code{\link{re2_regexp}} for options to regular expression,
#'   \link{re2_syntax} for regular expression syntax, and
#'   \code{\link{re2_match}} to extract matched groups.
#'
re2_split <- function(string, pattern, simplify=FALSE, n=Inf) {
    .Call(`_re2_re2_split`, string, pattern, simplify, n)
}

Try the re2 package in your browser

Any scripts or data that you put into this service are public.

re2 documentation built on March 29, 2022, 5:05 p.m.