#' Which regex pattern first matches string
#'
#' For each element in the `string` vector, this function finds which `pattern`
#' is the first to match it using `grepl`. Thus pattern order influences the results.
#'
#' @param string vector of strings to match against patterns
#' @param pattern vector of patterns to match strings
#'
#' @return Numeric index of the pattern to first match each string or NA if no match found.
#'
#' @examples
#' which_pattern(month.name, c("^J", "[A,a]", "[O,o]"))
#' # [1] 1 2 2 2 2 1 1 2 NA 3 3 NA
#'
#' which_pattern(month.name, c("[A,a]", "^J", "[O,o]"))
#' #[1] 1 1 1 1 1 2 2 1 NA 3 3 NA
#'
#' @export
which_pattern <- function(string, pattern) {
if (length(string) == 0 | length(pattern) == 0) return(NULL)
l <- sapply(pattern, function(p) if (is.na(p)) rep(FALSE, length(string)) else grepl(p, string),
simplify = "matrix", USE.NAMES = FALSE)
m <- matrix(unlist(l), nrow = length(string), ncol = length(pattern))
apply(as.matrix(m), 1, function(x) c(which(x), NA_integer_)[[1]])
}
#' Get first matching pattern band
#'
#' For each element in the `string` vector, this function finds the name (or number)
#' of a collection of regex patterns --- "band" --- one of which was the first to match
#' the string element. Order of bands in the `bandlist` influences the results.
#' The function is useful when classifying character variables based on content.
#'
#' @param string vector of strings to match against patterns
#' @param bandlist list of pattern bands: <name> = <vector of applicable patterns>
#' @param as.factor if FALSE, return band names, not factors; otherwise if TRUE
#'
#' @return Vector of strings (or factors) of pattern "bands" that matched each string first,
#' numeric pattern index if no band names given.
#'
#' @examples
#' # In the following examples patterns should match as narrow as possible
#' # because the first match wins.
#' seasons <- list(
#' Wi = c("De", "Ja", "Fe"),
#' Sp = c("Ma", "Ap"),
#' Su = c("Ju", "Au"),
#' Au = c("Se", "Oc", "No")
#' )
#' patband(month.name, seasons, as.factor = TRUE)
#' # [1] Wi Wi Sp Sp Sp Su Su Su Au Au Au Wi
#' # Levels: Wi Sp Su Au
#'
#' quarters <- list(
#' c("Ja", "Fe", "Mar"),
#' c("Ap", "May", "Jun"),
#' c("Jul", "Au", "Se"),
#' c("Oc", "No", "De")
#' )
#' patband(month.name, quarters)
#' # [1] 1 1 1 2 2 2 3 3 3 4 4 4
#'
#' @seealso [which_pattern()] for the underlying pattern matching behaviour.
#' @export
patband <- function(string, bandlist, as.factor = FALSE) {
bandlist <- as.list(bandlist)
if (is.null(names(bandlist))) {
num <- seq(1, length.out = length(bandlist))
out <- rep(num, sapply(bandlist, length))
if (as.factor) out <- factor(out, levels = num)
} else {
out <- rep(names(bandlist), sapply(bandlist, length))
if (as.factor) out <- factor(out, levels = names(bandlist))
}
out[which_pattern(string, Reduce(c, bandlist))]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.