inst/doc/functionality_of_fuzzywuzzyR_package.R

## ---- eval = F, echo = T------------------------------------------------------
#  
#  library(fuzzywuzzyR)
#  
#  word = "new york jets"
#  
#  choices = c("Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys")
#  
#  
#  #------------
#  # processor :
#  #------------
#  
#  init_proc = FuzzUtils$new()      # initialization of FuzzUtils class to choose a processor
#  
#  PROC = init_proc$Full_process    # processor-method
#  
#  PROC1 = tolower                  # base R function ( as an example for a processor )
#  
#  #---------
#  # scorer :
#  #---------
#  
#  init_scor = FuzzMatcher$new()    # initialization of the scorer class
#  
#  SCOR = init_scor$WRATIO          # choosen scorer function
#  
#  
#  init <- FuzzExtract$new()        # Initialization of the FuzzExtract class
#  
#  init$Extract(string = word, sequence_strings = choices, processor = PROC, scorer = SCOR)
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  # example output
#  
#    [[1]]
#  [[1]][[1]]
#  [1] "New York Jets"
#  
#  [[1]][[2]]
#  [1] 100
#  
#  
#  [[2]]
#  [[2]][[1]]
#  [1] "New York Giants"
#  
#  [[2]][[2]]
#  [1] 79
#  
#  
#  [[3]]
#  [[3]][[1]]
#  [1] "Atlanta Falcons"
#  
#  [[3]][[2]]
#  [1] 29
#  
#  
#  [[4]]
#  [[4]][[1]]
#  [1] "Dallas Cowboys"
#  
#  [[4]][[2]]
#  [1] 22
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  # extracts best matches (limited to 2 matches)
#  
#  init$ExtractBests(string = word, sequence_strings = choices, processor = PROC1,
#  
#                    scorer = SCOR, score_cutoff = 0L, limit = 2L)
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  [[1]]
#  [[1]][[1]]
#  [1] "New York Jets"
#  
#  [[1]][[2]]
#  [1] 100
#  
#  
#  [[2]]
#  [[2]][[1]]
#  [1] "New York Giants"
#  
#  [[2]][[2]]
#  [1] 79
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  # extracts matches without keeping the output order
#  
#  init$ExtractWithoutOrder(string = word, sequence_strings = choices, processor = PROC,
#  
#                           scorer = SCOR, score_cutoff = 0L)
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  [[1]]
#  [[1]][[1]]
#  [1] "Atlanta Falcons"
#  
#  [[1]][[2]]
#  [1] 29
#  
#  
#  [[2]]
#  [[2]][[1]]
#  [1] "New York Jets"
#  
#  [[2]][[2]]
#  [1] 100
#  
#  
#  [[3]]
#  [[3]][[1]]
#  [1] "New York Giants"
#  
#  [[3]][[2]]
#  [1] 79
#  
#  
#  [[4]]
#  [[4]][[1]]
#  [1] "Dallas Cowboys"
#  
#  [[4]][[2]]
#  [1] 22
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  # extracts first result
#  
#  init$ExtractOne(string = word, sequence_strings = choices, processor = PROC,
#  
#                  scorer = SCOR, score_cutoff = 0L)
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  [[1]]
#  [1] "New York Jets"
#  
#  [[2]]
#  [1] 100
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  duplicat = c('Frodo Baggins', 'Tom Sawyer', 'Bilbo Baggin', 'Samuel L. Jackson',
#  
#               'F. Baggins', 'Frody Baggins', 'Bilbo Baggins')
#  
#  
#  init$Dedupe(contains_dupes = duplicat, threshold = 70L, scorer = SCOR)
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  [1] "Frodo Baggins"     "Samuel L. Jackson" "Bilbo Baggins"     "Tom Sawyer"
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  s1 = "Atlanta Falcons"
#  
#  s2 = "New York Jets"
#  
#  init = FuzzMatcher$new()          initialization of FuzzMatcher class
#  
#  init$Partial_token_set_ratio(string1 = s1, string2 = s2, force_ascii = TRUE, full_process = TRUE)
#  
#  # example output
#  
#  [1] 31
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  init$Partial_token_sort_ratio(string1 = s1, string2 = s2, force_ascii = TRUE, full_process = TRUE)
#  
#  
#  [1] 31
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  init$Ratio(string1 = s1, string2 = s2)
#  
#  [1] 21
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  init$QRATIO(string1 = s1, string2 = s2, force_ascii = TRUE)
#  
#  [1] 29
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  init$WRATIO(string1 = s1, string2 = s2, force_ascii = TRUE)
#  
#  [1] 29
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  init$UWRATIO(string1 = s1, string2 = s2)
#  
#  [1] 29
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  init$UQRATIO(string1 = s1, string2 = s2)
#  
#  [1] 29
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  init$Token_sort_ratio(string1 = s1, string2 = s2, force_ascii = TRUE, full_process = TRUE)
#  
#  [1] 29
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  
#  init$Partial_ratio(string1 = s1, string2 = s2)
#  
#  [1] 23
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  init$Token_set_ratio(string1 = s1, string2 = s2, force_ascii = TRUE, full_process = TRUE)
#  
#  [1] 29
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  s1 = 'Frodo Baggins'
#  
#  init = FuzzUtils$new()
#  
#  init$Full_process(string = s1, force_ascii = TRUE)
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  # example output
#  
#  [1] "frodo baggins"
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  vec = c('Frodo Baggins', 'Tom Sawyer', 'Bilbo Baggin')
#  
#  str1 = 'Fra Bagg'
#  
#  GetCloseMatches(string = str1, sequence_strings = vec, n = 2L, cutoff = 0.6)
#  
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  [1] "Frodo Baggins"
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  s1 = ' It was a dark and stormy night. I was all alone sitting on a red chair.'
#  
#  s2 = ' It was a murky and stormy night. I was all alone sitting on a crimson chair.'
#  
#  init = SequenceMatcher$new(string1 = s1, string2 = s2)
#  
#  init$ratio()
#  
#  [1] 0.9127517
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  init$quick_ratio()
#  
#  [1] 0.9127517
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  init$real_quick_ratio()
#  
#  [1] 0.966443
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  MCLAPPLY_RATIOS = function(QUERY1, QUERY2, class_fuzz = 'FuzzMatcher', method_fuzz = 'QRATIO', threads = 1, ...) {
#  
#    init <- eval(parse(text = paste0(class_fuzz, '$new()')))
#  
#    METHOD = paste0('init$', method_fuzz)
#  
#    if (threads == 1) {
#  
#      res_qrat = lapply(1:length(QUERY1), function(x) do.call(eval(parse(text = METHOD)), list(QUERY1[[x]], QUERY2[[x]], ...)))}
#  
#    else {
#  
#      res_qrat = parallel::mclapply(1:length(QUERY1), function(x) do.call(eval(parse(text = METHOD)), list(QUERY1[[x]], QUERY2[[x]], ...)), mc.cores = threads)
#    }
#  
#    return(res_qrat)
#  }
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  query1 = c('word1', 'word2', 'word3')
#  
#  query2 = c('similarword1', 'similar_word2', 'similarwor')
#  
#  quer_res = MCLAPPLY_RATIOS(query1, query2, class_fuzz = 'FuzzMatcher', method_fuzz = 'QRATIO', threads = 1)
#  
#  unlist(quer_res)
#  

## ---- eval = F, echo = T------------------------------------------------------
#  
#  # example output
#  
#  [1] 59 56 40
#  

Try the fuzzywuzzyR package in your browser

Any scripts or data that you put into this service are public.

fuzzywuzzyR documentation built on Sept. 11, 2021, 5:06 p.m.