Nothing
#' Character classes
#'
#' Match character classes.
#' @param lo A non-negative integer. Minimum number of repeats, when grouped.
#' @param hi positive integer. Maximum number of repeats, when grouped.
#' @param char_class A logical value. Should \code{x} be wrapped in a character
#' class? If \code{NA}, the function guesses whether that's a good idea.
#' @return A character vector representing part or all of a regular expression.
#' @note R has many built-in locale-dependent character classes, like
#' \code{[:alnum:]} (representing alphanumeric characters, that is lower or
#' upper case letters or numbers). Some of these behave in unexpected ways
#' when using the ICU engine (that is, when using \code{stringi} or
#' \code{stringr}). See the punctuation example. For these engines, using
#' Unicode properties (\code{\link[rebus.unicode]{UnicodeProperty}}) may give
#' you a more reliable match.
#' There are also some generic character classes like \code{\\w} (representing
#' lower or upper case letters or numbers or underscores). Since version 0.0-3,
#' these use the default \code{char_class = FALSE}, since they already act as
#' character classes.
#' Finally, there are ASCII-only ways of specifying letters like \code{a-zA-Z}.
#' Which version you want depends upon how you want to deal with international
#' characters, and the vagaries of the underlying regular expression engine.
#' I suggest reading the \code{\link[base]{regex}} help page and doing lots of
#' testing.
#' @references \url{http://www.regular-expressions.info/shorthand.html} and
#' \url{http://www.rexegg.com/regex-quickstart.html#posix}
#' @seealso \code{\link[base]{regex}}, \code{\link[rebus.unicode]{Unicode}}
#' @examples
#' # R character classes
#' alnum()
#' alpha()
#' blank()
#' cntrl()
#' digit()
#' graph()
#' lower()
#' printable()
#' punct()
#' space()
#' upper()
#' hex_digit()
#'
#' # Special chars
#' any_char()
#' grapheme()
#' newline()
#'
#' # Generic classes
#' dgt()
#' wrd()
#' spc()
#'
#' # Generic negated classes
#' not_dgt()
#' not_wrd()
#' not_spc()
#'
#' # Non-locale-specific classes
#' ascii_digit()
#' ascii_lower()
#' ascii_upper()
#'
#' # Don't provide a class wrapper
#' digit(char_class = FALSE) # same as DIGIT
#'
#' # Match repeated values
#' digit(3)
#' digit(3, 5)
#' digit(0)
#' digit(1)
#' digit(0, 1)
#'
#' # Ranges of characters
#' char_range(0, 7) # octal number
#'
#' # Usage
#' (rx <- digit(3))
#' stringi::stri_detect_regex(c("123", "one23"), rx)
#'
#' # Some classes behave differently under different engines
#' # In particular PRCE and Perl recognise all these characters
#' # as punctuation but ICU does not
#' p <- c(
#' "!", "@", "#", "$", "%", "^", "&", "*", "(", ")", "[", "]", "{", "}", ";",
#' ":", "'", '"', ",", "<", ">", ".", "/", "?", "\\", "|", "`", "~"
#' )
#' icu_matched <- stringi::stri_detect_regex(p, punct())
#' p[icu_matched]
#' p[!icu_matched]
#' pcre_matched <- grepl(punct(), p)
#' p[pcre_matched]
#' p[!pcre_matched]
#'
#' # A grapheme is a character that can be defined by more than one code point
#' # PCRE does not recognise the concept.
#' x <- c("Chloe", "Chlo\u00e9", "Chlo\u0065\u0301")
#' stringi::stri_match_first_regex(x, "Chlo" %R% capture(grapheme()))
#'
#' # newline() matches three types of line ending: \r, \n, \r\n.
#' # You can standardize line endings using
#' stringi::stri_replace_all_regex("foo\nbar\r\nbaz\rquux", NEWLINE, "\n")
#' @include constants.R
#' @include grouping-and-repetition.R
#' @name ClassGroups
NULL
#' @rdname ClassGroups
#' @export
alnum <- function(lo, hi, char_class = TRUE)
{
repeated(ALNUM, lo, hi, char_class = char_class)
}
#' @rdname ClassGroups
#' @export
alpha <- function(lo, hi, char_class = TRUE)
{
repeated(ALPHA, lo, hi, char_class = char_class)
}
#' @rdname ClassGroups
#' @export
blank <- function(lo, hi, char_class = TRUE)
{
repeated(BLANK, lo, hi, char_class = char_class)
}
#' @rdname ClassGroups
#' @export
cntrl <- function(lo, hi, char_class = TRUE)
{
repeated(CNTRL, lo, hi, char_class = char_class)
}
#' @rdname ClassGroups
#' @export
digit <- function(lo, hi, char_class = TRUE)
{
repeated(DIGIT, lo, hi, char_class = char_class)
}
#' @rdname ClassGroups
#' @export
graph <- function(lo, hi, char_class = TRUE)
{
repeated(GRAPH, lo, hi, char_class = char_class)
}
#' @rdname ClassGroups
#' @export
lower <- function(lo, hi, char_class = TRUE)
{
repeated(LOWER, lo, hi, char_class = char_class)
}
#' @rdname ClassGroups
#' @export
printable <- function(lo, hi, char_class = TRUE)
{
repeated(PRINT, lo, hi, char_class = char_class)
}
#' @rdname ClassGroups
#' @export
punct <- function(lo, hi, char_class = TRUE)
{
repeated(PUNCT, lo, hi, char_class = char_class)
}
#' @rdname ClassGroups
#' @export
space <- function(lo, hi, char_class = TRUE)
{
repeated(SPACE, lo, hi, char_class = char_class)
}
#' @rdname ClassGroups
#' @export
upper <- function(lo, hi, char_class = TRUE)
{
repeated(UPPER, lo, hi, char_class = char_class)
}
#' @rdname ClassGroups
#' @export
hex_digit <- function(lo, hi, char_class = TRUE)
{
repeated(HEX_DIGIT, lo, hi, char_class = char_class)
}
#' @rdname ClassGroups
#' @export
any_char <- function(lo, hi)
{
repeated(ANY_CHAR, lo, hi, char_class = FALSE)
}
#' @rdname ClassGroups
#' @export
grapheme <- function(lo, hi)
{
repeated(GRAPHEME, lo, hi, char_class = FALSE)
}
#' @rdname ClassGroups
#' @export
newline <- function(lo, hi)
{
repeated(NEWLINE, lo, hi, char_class = FALSE)
}
#' @rdname ClassGroups
#' @export
dgt <- function(lo, hi, char_class = FALSE)
{
repeated(DGT, lo, hi, char_class = char_class)
}
#' @rdname ClassGroups
#' @export
wrd <- function(lo, hi, char_class = FALSE)
{
repeated(WRD, lo, hi, char_class = char_class)
}
#' @rdname ClassGroups
#' @export
spc <- function(lo, hi, char_class = FALSE)
{
repeated(SPC, lo, hi, char_class = char_class)
}
#' @rdname ClassGroups
#' @export
not_dgt <- function(lo, hi, char_class = FALSE)
{
repeated(NOT_DGT, lo, hi, char_class = char_class)
}
#' @rdname ClassGroups
#' @export
not_wrd <- function(lo, hi, char_class = FALSE)
{
repeated(NOT_WRD, lo, hi, char_class = char_class)
}
#' @rdname ClassGroups
#' @export
not_spc <- function(lo, hi, char_class = FALSE)
{
repeated(NOT_SPC, lo, hi, char_class = char_class)
}
#' @rdname ClassGroups
#' @export
ascii_digit <- function(lo, hi, char_class = TRUE)
{
repeated(ASCII_DIGIT, lo, hi, char_class = char_class)
}
#' @rdname ClassGroups
#' @export
ascii_lower <- function(lo, hi, char_class = TRUE)
{
repeated(ASCII_LOWER, lo, hi, char_class = char_class)
}
#' @rdname ClassGroups
#' @export
ascii_upper <- function(lo, hi, char_class = TRUE)
{
repeated(ASCII_UPPER, lo, hi, char_class = char_class)
}
#' @rdname ClassGroups
#' @export
ascii_alpha <- function(lo, hi, char_class = TRUE)
{
repeated(ASCII_LOWER %R% ASCII_UPPER, lo, hi, char_class = char_class)
}
#' @rdname ClassGroups
#' @export
ascii_alnum <- function(lo, hi, char_class = TRUE)
{
repeated(ASCII_ALPHA %R% ASCII_DIGIT, lo, hi, char_class = char_class)
}
#' @rdname ClassGroups
#' @export
char_range <- function(lo, hi, char_class = lo < hi)
{
lo <- get_first_char(lo)
hi <- get_first_char(hi)
x <- if(lo < hi)
{
regex(lo, "-", hi)
} else if(lo == hi)
{
warning("'lo' and 'hi' are the same value. Return 'lo'.")
as.regex(lo)
} else # lo > hi
{
stop("'hi' is less than 'lo'.")
}
if(char_class)
{
x <- char_class(x)
}
x
}
get_first_char <- function(x)
{
x <- as.character(x)
if(any(is.na(x)))
{
stop("Missing values are not allowed.")
}
if(nchar(x) > 1)
{
warning("Returning only the first character from x.")
return(substring(x, 1, 1))
}
x
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.