.recog_match <- function(recog_db, source, protocols = ".*", matches = NULL) {
if (
identical(
attr(recog_db[[1]]$fingerpints[[1]]$compiled_pattern, ".compiled"),
new("externalptr")
)) {
stop(
"The external pointers for the compiled patterns are not valid. ",
"Please re-load the data you are supplying to the `recog_db` parameter."
)
}
protocol_matchers <- if (is.null(protocols)) {
numeric()
} else {
which(grepl(protocols, map_chr(recog_db, "protocol")))
}
matches_matchers <- if (is.null(matches)) {
numeric()
} else {
which(grepl(matches, map_chr(recog_db, "matches")))
}
matchers <- unique(c(protocol_matchers, matches_matchers))
if (length(matchers) == 0) return(list())
lapply(recog_db[matchers], function(.x) {
preference <- .x$preference_value
lapply(.x$fingerpints, function(.x) {
res <- ore::ore_search(.x$compiled_pattern, source, simplify=TRUE)
if (!is.null(res)) {
grps <- as.vector(ore::groups(res))
lapply(.x$params, function(.x) {
value <- if (.x$position == 0) .x$value else grps[.x$position]
as.list(set_names(value, .x$name))
}) %>% unlist(recursive = FALSE) -> mat_out
mat_out$preference <- preference
mat_out$description <- .x$description
mat_out$pattern <- .x$pattern
mat_out$orig <- source
mat_out
}
}) %>%
discard(is.null) %>%
discard(~length(.x) == 0)
}) %>%
discard(is.null) %>%
discard(~length(.x) == 0) %>%
unlist(recursive = FALSE) %>%
bind_rows() -> out
class(out) <- c("tbl_df", "tbl", "data.frame")
out
}
#' Find fingerprint matches for a given source
#'
#' This is an exhaustive lookup for the fingerprint in all the
#' `protocol`/`matches` categories. As a result, it's not very fast
#' on its own. However, the function has been [memoise::memosie()]'d.
#' As such, if you are performing a number of recogs in a single
#' R session and working from a typical data source (i.e. a large file
#' with many common strings, such as a collection of HTTP `Server``
#' header strings), you will see performance gains after each distinct
#' match input.
#'
#' @md
#' @param recog_db a structure created with [load_fingerprints()] or
#' [use_builtin_fingerprints()]
#' @param source the 1-element character vector to compare against
#' @param protocol,matches regexs to limit what you're comparing against. These
#' are boolean **OR'd** together
#' @export
#' @examples
#' recog_db <- use_builtin_fingerprints()
#' recog_match(recog_db, "VShell_Special_Edition_2_5_0_204 VShell", "ssh")
recog_match <- memoise::memoise(.recog_match)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.