R/match_maker.R

Defines functions match_maker match_maker_full

Documented in match_maker match_maker_full

#' Fuzzy Text Matching: element
#'
#' Find the best match (or no match at all) to string inputs.
#' @param e An character
#' @param matches A vector in which to look for matches with e.
#' @param max_dist Set maxDist to be used in stringdist::amatch
#' @keywords match
#' @export
#' @examples
#'
#' iris <- iris %>% tibble::as_tibble()
#' iris %>% dplyr::mutate(
#'   foo = purrr::map_chr(as.character(Species), match_maker,
#'                 matches = c("Virginia", "California", "Sarasota"))
#'   )
#'
#' iris %>% dplyr::mutate(
#'   foo = purrr::map_chr(as.character(Species), match_maker,
#'                 matches = c("Virginia", "California", "Sarasota"), max_dist = 20)
#' )


match_maker <- function(e, matches, max_dist = 5) {

  matches[length(matches) + 1] <- "No matches found"  # Add a last element which we'll use in nomatch

  if (e %in% matches) {
    this_match <- matches[which(matches == e)]
  } else {
    this_match <- matches[stringdist::amatch(e, matches, maxDist = max_dist, nomatch = length(matches))]
  }
  return(this_match)
}


#' Fuzzy Text Matching: vector
#'
#' Find the best match (or no match at all) to string inputs.
#' @param vec A vector
#' @param matches A vector in which to look for matches with e.
#' @param max_dist Set maxDist to be used in stringdist::amatch
#' @keywords match
#' @export
#' @examples
#'
#' iris <- iris %>% tibble::as_tibble()
#' match_maker_full(as.character(iris$Species),
#'                  matches = c("Virginia", "California", "Sarasota"), max_dist = 20)

match_maker_full <- function(vec, matches, max_dist = 5) {
  out <- vector(length = length(vec))

  matches[length(matches) + 1] <- "No matches found"  # Add a last element which we'll use in nomatch

  out <- NULL
  for (i in vec) {
    if (i %in% matches) {
      this_match <- matches[which(matches == i)]
    } else {
      this_match <- matches[stringdist::amatch(i, matches, maxDist = max_dist, nomatch = length(matches))]
    }
    out <- c(out, this_match)
  }
  return(out)
}
aedobbyn/dobtools documentation built on May 28, 2019, 2:33 a.m.