#!/usr/bin/Rscript
# -*- coding: utf-8 -*-
################################ Description ###################################
# Title: Find close names
# Purpose: Find close character string that should be a consequence of misspelling
# Created the 2014-12-30
# by Joris Muller <joris.muller@jom.link>
# Licence: GPLv3 <http://www.gnu.org/licenses/>
################################################################################
#' @title Find close strings
#' @description Find close strings in a character vector (for example, the labels of a factor). It could be useful if you try to find misspelled words.
#' @param char character
#' @return A list, with for each unique word, the others words close to it.
#' @author Joris Muller
#' @import stringdist
#' @export
#' @examples
#' chaine2 <- c("mainson", "maison", "cave", "caves", "Cave", "Hôpital", "Hopital", "Bachibouzouk")
#' the_list <- find_close_strings(chaine2)
#' print(the_list)
find_close_strings <- function(char, nb_subst = 3) {
# Only takes the unique names
char <- unique(char)
# Calculate the distance matrix
dist_matrix <- stringdistmatrix(char, char, useNames = TRUE)
close_list <- apply(X = dist_matrix, MARGIN = 1, FUN = function(x) {
proches <- x > 0 & x < nb_subst
noms_proches <- names(x)[proches]
return(noms_proches)
}
)
return(close_list)
} # End of function "find_close_strings" definition
#' @title Display close names
#' @description Display in an human readable way the result of the function \code{find_close_string}.
#' @param close_list A list produced by the function \code{find_close_string}.
#' @param sentence character A sentence between the word tested and the words that should be close.
#' @return A character vector, one element by word.
#' @author Joris Muller
#' @export
#' @examples
#' chaine2 <- c("mainson", "maison", "cave", "caves", "Cave", "Hôpital", "Hopital", "Bachibouzouk")
#' the_list <- find_close_strings(chaine2)
#' display_close_names(the_list)
display_close_names <- function(close_list, sentence = "is close to") {
# Prepare the character vector where we will append the sentences
results <- character()
# For each element of the list (= each word) write the sentence
for (word in names(close_list)) {
# Find the close words for this word
close_words <- close_list[[word]]
# If there is wlose words, write a sentence
if (length(close_words) > 0) {
a_line <- paste0("'", word, "' ", sentence, " '",
paste0(close_words, collapse = "', '"), "'.")
# Append this sentence to other ones
results <- c(results, a_line)
}
}
return(results)
} # End of function "display_close_names" definition
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.