R/RcppExports.R

# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' Function to perform approximate string matching in multithreaded C++ code
#' @description This function will compare two character vectors. For each combination, it will calculate the Levenhstein distance between the needle string and the haystack string. If the distance is lower than the maximum allowed distance, the search will be discontinued and the haystack string will be returned. Therefore, it is necessary to set a reasonable threshold, as the function will return the first string with a distance below the maximum distance.
#' @param needles character vector to match the database haystack vector against
#' @param haystack character vector of possible matches
#' @param maxDistance maximum Levenhstein distance to allow for matching
#' @param nthreads number of threads to use to speed up the computation. Defaults to the number of CPU cores available on your machine.
#' @param displayProgress display a progress bar (if TRUE)
#' @return character vector of matches, which has the same length as the input vector. Input vector items for which no matching strings could be found return an empty string.
#' @export
#' @import Rcpp
#' @import RcppProgress
fuzzyMatch <- function(needles, haystack, maxDistance, nthreads = 1L, displayProgress = TRUE) {
    .Call('_tweetCorp_fuzzyMatch', PACKAGE = 'tweetCorp', needles, haystack, maxDistance, nthreads, displayProgress)
}
jeroenclaes/tweetCorp documentation built on May 27, 2019, 4:50 a.m.