knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.path = "man/figures/README-",
  out.width = "100%"
)

rr4r: Rust regex for R

R-CMD-check

rr4r is my practice to use Rust in an R package with the power of extendr. My current goal is to create the same interface to stringr's functions using Rust's regex crate.

Installation

remotes::install_github("yutannihilation/rr4r")

Example

rr4r_detect()

library(rr4r)

fruit <- c("apple", "banana", "pear", "pinapple", NA)

rr4r_detect(fruit, "a")
rr4r_detect(fruit, "^a")
rr4r_detect(fruit, "a$")
rr4r_detect(fruit, "b")
rr4r_detect(fruit, "[aeiou]")

rr4r_extract()

shopping_list <- c("apples x4", "bag of flour", "bag of sugar", "milk x2")

rr4r_extract(shopping_list, "\\d")
rr4r_extract(shopping_list, "[a-z]+")
rr4r_extract(shopping_list, "[a-z]{1,4}")
rr4r_extract(shopping_list, "\\b[a-z]{1,4}\\b")

rr4r_extract() can accept i to choose which capture group to extract. This is useful when you don't need the whole match (with stringi/stringr, you can use lookahead or lookbehind for this purpose, but Rust's regex crate doesn't support it for performance reason).

# Without index
rr4r_extract(c("<p>foo</p>", "<p>bar</p>"), "<p>(.*)</p>")

# With index
rr4r_extract(c("<p>foo</p>", "<p>bar</p>"), "<p>(.*)</p>", 1)

# stringr's equivalent using positive lookahead/lookbehind
stringr::str_extract(c("<p>foo</p>", "<p>bar</p>"), "(?<=<p>).*(?=</p>)")

rr4r_extract_all()

# Extract all matches
rr4r_extract_all(shopping_list, "[a-z]+")
rr4r_extract_all(shopping_list, "\\b[a-z]+\\b")
rr4r_extract_all(shopping_list, "\\d")

rr4r_extract_groups()

library(dplyr, warn.conflicts = FALSE)

tibble(
  x = c("2020-01-02", "2021-12-31", "2022-08-09")
) %>% 
  mutate(
    rr4r_extract_groups(x, 
      pattern = "(?P<year>\\d{4})-(?P<month>\\d{2})-(?P<day>\\d{2})"
    )
  )

rr4r_replace()

fruits <- c("one apple", "two pears", "three bananas")
rr4r_replace(fruits, "[aeiou]", "-")

rr4r_replace("a1", "\\D", toupper)
library(lubridate)

x <- c("Today is 2020-02-02", "2021-03-30")
ptn <- "(?P<year>\\d{4})-(?P<month>\\d{2})-(?P<day>\\d{2})"

one_month_ago <- function(year, month, day) {
  d <- make_date(year, month, day) %m-% months(1)
  format(d, "%Y-%m-%d")
}

rr4r_replace(x, ptn, one_month_ago)


yutannihilation/rr4r documentation built on April 16, 2021, 8:40 a.m.