#' @title find_most_similar_string
#'
#' @description Returns a vector of most similar string in another vector.
#' Returns a vector of the same length as input vector '.s'.
#'
#' @param .s a character vector to be matched.
#' @param .t a character vector to be matched against.
#' @param max_dist the maximum string distance
#' @param ignore_case should case be ignored? Default is TRUE.
#' @param verbose should warnings be printed in the console.
#' @param feeling_lucky if multiple most similar strings are found. Should the first one be returned?
#' @param ... other arguments passed to utils::adist.
#'
#' @details Uses the generalized Levenshtein distance. For more information type \code{?utils::adist} in the console. the original source is https://github.com/davidsjoberg/similiars/blob/master/R/similiars.R
#'
#' @examples 1+1
#'
#'
#' @return a character vector
#'
#' @export
find_most_similar_string <- function(.s, .t, max_dist = Inf, verbose = TRUE, ignore_case = TRUE, feeling_lucky = FALSE, ...) {
find_string_distance <- function(.s, .t, ignore_case = TRUE, ...) {
if(any(!is.character(.t), !is.character(.s))) stop("'.s' and '.t' need to be character vectors")
purrr::map(.s, function(.e) {
.d <- utils::adist(.t, .e, ignore.case = ignore_case)
dplyr::tibble(input_string = .e,
string = .t,
string_distance = .d[, 1]) |>
dplyr::arrange(.data$string_distance)
}) |>
purrr::set_names(.s)
}
if(any(!is.character(.t), !is.character(.s))) stop("'.s' and '.t' need to be character vectors")
.dfs <- find_string_distance(.s, .t, ignore_case = ignore_case)
.dfs <- purrr::map(.dfs, function(.h) {.h |> dplyr::filter(.data$string_distance <= max_dist)})
out <- purrr::map_chr(.dfs, ~{
if(is.na(.x$input_string[1])) {return(NA_character_)}
.x <- .x |> dplyr::filter(string_distance == min(string_distance))
if( nrow(.x) > 1){
if(feeling_lucky){
if(verbose){
warning(paste0("No single most similar string found for '",
.x$input_string[1],
"'. Returning '", .x$string[1], "'. Other exactly similar strings were ",
paste(paste0("'", .x$string[-1], "'"),
collapse = ", "),
"."))
}
return(.x$string[1])
} else{
if(verbose){
warning(paste0("No single most similar string found for '",
.x$input_string[1],
"'. Returning NA. Most similar strings were ",
paste(paste0("'", .x$string, "'"),
collapse = ", "),
"."))
}
return(NA_character_)
}
}
if( nrow(.x) == 0 ){
if(verbose){
warning(paste0("No similar string below threshold found for '", .x$input_string[1], "'. Returning NA.\n"))
}
return(NA_character_)
}
.x |> dplyr::pull(string)
})
out |> as.character()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.