R/pngroup.R

#' Parallel n-th Capture Group
#'
#' Extract and return n-th regex capure group of the `pattern` matching the `string`.
#' Arguments are vectorized in the `mapply` way.
#'
#' @param string character vector
#' @param pattern vector of regular expressions
#' @param n index of capture group to return
#' @param perl use perl regex -- TRUE/FALSE?
#'
#' @examples
#' pngroup("one two three", "(.+)\\s(.+)\\s(.+)", 4) # "three"
#' pngroup(c("one two three", "four five six"), "(.+)\\s(.+)\\s(.+)", 2) # "one"  "four"
#' pngroup(c("one two three", "four five six"), "(.+)\\s(.+)\\s(.+)", 2:4) # "one"   "five"  "three"
#'
#' @export
pngroup <- function(string, pattern, n = 1, perl = FALSE) {
  if (length(string) * length(pattern) * length(n) == 0) return(NULL)
  length.out <- max(length(string), length(pattern), length(n))
  string <- rep(string, length.out = length.out)
  pattern <- rep(pattern, length.out = length.out)
  n <- rep(n, length.out = length.out)
  mapply(ngroup, string = string, pattern = pattern, n = n, perl = perl, USE.NAMES = FALSE)
}

# Extract n-th regex capture group, not vectorized
ngroup <- function(string, pattern, n, perl = FALSE)
  if (length(pattern) == 0 || is.na(pattern[[1]])) NA_character_ else
    regmatches(string[[1]], regexec(pattern[[1]], string[[1]], perl = perl[[1]]))[[1]][(n[[1]])]
avidclam/amxtra documentation built on May 17, 2019, 12:01 p.m.