#' Define a search pattern for use with the find method on a director.
#'
#' A search pattern is one of the following:
#'
#' \describe{
#' \item{exact}{ match. The strings must match exactly this value.}
#' \item{partial}{ match. The strings which contain this string as
#' a substring will be matched.}
#' \item{wildcard}{ match. Fuzzy matching like in the ctrl+p plugin
#' for vim. If the pattern is "abc", it will be translated to the
#' regular expression ".*a.*b.*c.*", that is, any characters followed
#' by an 'a' followed by any characters followed by a 'b' followed by
#' any characters followed by a 'c' followed by any characters (e.g.,
#' "fabulous cake"). Note that wildcard match is case insensitive.}
#' \item{regex}{ match. Apply a regular expression filter to the
#' set of strings.}
#' }
#'
#' @param pattern character. The pattern to search for.
#' @param method character. The search pattern method, one of "exact",
#' "partial", "wildcard", or "regex".
#' @note Patterns can be combined using the \code{|} and \code{&} operators.
#' @examples
#' \dontrun{
#' d$find(search_pattern("this/file", "exact"))
#' # If d is a director object, the above will find exactly the resource
#' # "this/file".
#'
#' d$find(search_pattern("this", "partial"))
#' # The above will find any resource containing "this" as a substring.
#'
#' d$find(search_pattern("this", "wildcard"))
#' # The above will find any resource containing the consecutive letters
#' # "this" separated by arbitrary strings.
#'
#' d$find(search_pattern("foobar", "partial") | search_pattern("root", "exact"))
#' # The above will find any resource with the substring "foobar" or having
#' # exactly the name "root".
#' }
search_pattern <- function(pattern, method) {
msg <- function(x) {
stop("Search ", deparse(substitute(x)) ," must be of type character; ",
"instead I got a ", class(x)[1])
}
if (!is.character(method)) { msg(method) }
if (!is.character(pattern)) { msg(pattern) }
## A search pattern is a method for filtering a set of strings that is highly
## composable. For example, if we have `c("foobar", "barbaz", "bazbux")`,
## we can use the pattern `search_pattern("bar", "partial")` to select the
## first two, since they have the substring "bar".
##
## We can apply `and` and `or` operations to search patterns to mix and match
## them. For example,
## `search_pattern("bar", "partial") & search_pattern("baz", "wildcard")`
## will match strings that contain the substring "bar", as well as the
## characters "b", "a", and "z" separated by arbitrary strings (e.g.,
## "BAzaR").
search_pattern_(pattern, tolower(method))
}
search_pattern_ <- function(pattern, method) {
## We use a recursive solution: the pattern can be
## a "search pattern join" (the `&` and `|` operation described above).
## In this case, we just return the join.
if (is.search_pattern_join(pattern)) { pattern }
else if (length(pattern) > 1) {
## If there is more than one pattern specified, we treat this as an OR
## condition: either pattern 1, or pattern 2, etc.
Reduce(function(x, y) {
search_pattern_(x, method) | search_pattern_(y, method)
}, pattern)
} else if (length(method) > 1) {
## If there is more than one method specified, this is also an OR condition.
## This situation is rare, since we don't often want to say "match this
## as a wildcard or as a regex".
Reduce(function(x, y) {
search_pattern_(pattern, x) | search_pattern_(pattern, y)
}, method)
} else {
`verify_search_pattern_method!`(method)
## We use an S3 class to track information about the pattern (the string
## to match and the method).
as.search_pattern(list(pattern = pattern, method = method))
}
}
`verify_search_pattern_method!` <- function(method) {
## `getNamespace` is a base R function that allows us to grab the namespace
## of the director package. To understand the difference between a package
## environment and package namespace, see Suraj Gupta's wonderful guide
## on [how R searches and finds stuff](http://blog.obeautifulcode.com/R/How-R-Searches-And-Finds-Stuff/).
##
## Instead of hardcoding all the pattern methods we support like "exact"
## and "wildcard", we look into this package's namespace and see if there
## is an "apply_pattern.exact" or "apply_pattern.wildcard" function. If
## someone wants to implement a new pattern method, they only need to
## define an "apply_pattern.new_method" function below, which is cleaner.
ok <- exists(paste0("apply_pattern.", method), envir = getNamespace("director"))
if (!ok) { stop("Invalid search pattern.") }
}
search_pattern_join <- function(pattern1, pattern2, type) {
stopifnot(identical(type, "and") || identical(type, "or"))
## An S3 object that tracks an `&` or `|` condition on patterns.
as.search_pattern(structure(list(pattern1, pattern2, type = type),
class = c("search_pattern_join")))
}
as.search_pattern <- function(x) {
## Remember that when changing classes, the class should be prepended
## rather than appended, since R's S3 mechanism looks left-to-right for
## S3 methods.
class(x) <- c("search_pattern", class(x))
x
}
is.search_pattern <- function(x) { is(x, "search_pattern") }
is.atomic_search_pattern <- function(x) {
## An atomic search pattern is one that has not been joined using
## the `&` or `|` operators.
is.search_pattern(x) && !is.search_pattern_join(x)
}
is.search_pattern_join <- function(x) { is(x, "search_pattern_join") }
## This funky looking notation says "implement the `|` operator for the
## "search_pattern" S3 class.
`|.search_pattern` <- function(e1, e2) {
stopifnot(is(e2, "search_pattern"))
search_pattern_join(e1, e2, type = "or")
}
`&.search_pattern` <- function(e1, e2) {
stopifnot(is(e2, "search_pattern"))
search_pattern_join(e1, e2, type = "and")
}
#' Apply a pattern filter to a character vector.
#'
#' @param pattern search_pattern.
#' @param strings character. The strings to filter down.
apply_pattern <- function(pattern, strings) {
if (is.atomic_search_pattern(pattern)) {
## First we apply the pattern's method as an S3 class. For example,
## a wildcard pattern would get the "wildcard" class.
class(pattern) <- c(pattern$method, class(pattern))
## R's `UseMethod` function *dispatches* an S3 generic. This means
## that we will call `apply_pattern.wildcard` on the `pattern`
## object without having to figure out that is the appropriate method.
UseMethod("apply_pattern", object = pattern)
} else if (is.search_pattern_join(pattern)) {
operand <- if (pattern$type == "and") { intersect } else { union }
## `Recall` is an R shortcut for "recursively call this function", i.e.,
## `apply_pattern(...)`.
operand(Recall(pattern[[1]], strings), Recall(pattern[[2]], strings))
} else { stop("Invalid pattern") }
}
apply_pattern.exact <- function(pattern, strings) {
## An exact match is just a single string that matches on the nose.
if (any(pattern$pattern == strings)) { pattern$pattern }
else { character(0) }
}
apply_pattern.wildcard <- function(pattern, strings) {
## First, replace all regex special characters with the correct backslashed
## version. I wish I could say I knew how many backslashes are necessary
## but it was trial and error. ;)
pattern <- gsub("([]./\\*+()])", "\\\\\\1", pattern$pattern)
## The only regex special characters we allow in wildcards are `^` and `$`
## to mark beginning and ends of strings. The rest gets replaced with a
## `.*` prefix. For example, "^abc" would be come "^.*a.*b.*c".
pattern <- gsub("([^\\$^])", ".*\\1", pattern) # turn this into ctrl+p
## But of course "^.*a" is just "a"! So we turn that special sequence into
## just "^".
pattern <- gsub("^.*", "^", pattern, fixed = TRUE)
## By default, wildcards matching is case insensitive, since it will be used
## to filter on file names, and we rarely have file collisions based on case
## (and when you do you should think of a better file name instead!).
grep(pattern, strings, value = TRUE, ignore.case = TRUE)
}
apply_pattern.partial <- function(pattern, strings) {
## Just a plain substring match.
grep(pattern$pattern, strings, fixed = TRUE, value = TRUE)
}
apply_pattern.regex <- function(pattern, strings) {
## Just a plain regex match.
grep(pattern$pattern, strings, value = TRUE)
}
apply_pattern.idempotence <- function(pattern, strings) {
# TODO: (RK) Consider the string "."
## For an overview of idempotence, see the documentation on the director
## exists method.
##
## The idempotent pattern finds the helpers in a set of filenames and
## strips them. For example, `c("foo.R", "bar/bar.R", "bar/baz.R")` would
## be reduced to just `c("foo.R", "bar/bar.R")` (note that this pattern
## is not just a filter and has side effects).
##
## Grab the indices of those files whose base name is the same as their
## enclosing directory name (for example, "foo/bar/bar.R").
idempotent <- vapply(strings, function(x) basename(x) == basename(dirname(x)), logical(1))
## What are the actual directory names? (for example, "foo/bar")
idem_dirs <- dirname(strings[idempotent])
## Helper files are the files in the `idem_dirs` computed above who do not
## share their name with the parent directory. We need to find the indices
## of these files in our strings.
helpers <- vapply(strings, function(x) dirname(x) %in% idem_dirs, logical(1))
## Now replace the idempotent files with their directory names. In director,
## the name of an idempotent resource is the filename sans the basename
## (for example, "foo/bar" rather than "foo/bar/bar.R").
strings[idempotent] <- idem_dirs
## Strip the helper files but keep the idempotent resources. Note that
## since the idempotent files, like "foo/bar/bar.R", are within an
## idempotent directory, like "foo/bar", they will be marked as TRUE
## in the `helpers` vector.
strings[!helpers | idempotent]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.