R/clean_text.R

Defines functions cleantext

Documented in cleantext

#' This function allows you to clean vector text
#'
#' @param x Vecteur le corpus a cleaner
#' @export

cleantext = function(x) {
  tolower(x) %>%
    iconv(., to = "ASCII//TRANSLIT") %>%
    str_replace_all(., "d'|l'", " ") %>%
    str_replace_all(., c("tous", "tout", "a", "lesquels",
                         "lequel", "lesquelles", "plus",
                         "ca" ,"car", "trop", "moins",
                         "si", "sait", "bien", "elle", "ou", 
                         "elles", "etre", "comment",
                         stopwords("fr")[-34]) %>%  
                      paste(collapse = "\\b|\\b") %>% 
                      paste0("\\b", ., "\\b"), "") %>%
    str_replace_all(., "[[:punct:]]+", " ") %>%
    str_replace_all(., "s |s$", " ") %>%
    str_replace_all(., "\\s+", " ") %>%
    trimws()
}
AlexisMayer/toolbox documentation built on Aug. 25, 2020, 3:56 p.m.