#' RSLP
#'
#' Apply the Stemming Algorithm for the Portuguese Language to
#' vector of words.
#'
#' @param words vector of words that you want to stem.
#' @param steprules as obtained from the function extract_rules. (only define if you are certain about it).
#' The default is to get the parsed versionof the rules installed with the package.
#'
#' @references
#' V. Orengo, C. Huyck, "A Stemming Algorithmm for the Portuguese Language", SPIRE, 2001, String Processing and Information Retrieval, International Symposium on, String Processing and Information Retrieval, International Symposium on 2001, pp. 0186, doi:10.1109/SPIRE.2001.10024
#'
#' @examples
#' words <- c("gostou", "gosto", "gostaram")
#' rslp(words)
#'
#' @export
rslp <- function(
words,
steprules = readRDS(system.file("steprules.rds", package = "rslp"))
) {
res <- plyr::laply(words, rslp_, steprules = steprules)
attr(res,"names") <- NULL
return(res)
}
#' RSLP Document
#'
#' Apply the Stemming Algorithm for the Portuguese Language to
#' vector of documents. It extracts words using the regex
#' "\\b[:alpha:]\\b"
#'
#' @param docs chr vector of documents
#' @param steprules as obtained from the function extract_rules. (only define if you are certain about it).
#' The default is to get the parsed version of the rules installed with the package.
#'
#' @examples
#' docs <- c("coma frutas pois elas fazem bem para.")
#' rslp_doc(docs)
#'
#' @export
#'
#' @references
#' V. Orengo, C. Huyck, "A Stemming Algorithmm for the Portuguese Language", SPIRE, 2001, String Processing and Information Retrieval, International Symposium on, String Processing and Information Retrieval, International Symposium on 2001, pp. 0186, doi:10.1109/SPIRE.2001.10024
#'
rslp_doc <- function(docs,
steprules = readRDS(system.file("steprules.rds", package = "rslp"))
){
words <- tokenizers::tokenize_words(docs) %>%
unlist %>% unique
words_s <- rslp(words, steprules = steprules)
names(words_s) <- sprintf("\\b%s\\b", words)
docs <- stringr::str_replace_all(docs, words_s)
return(docs)
}
#' RSLP_
#'
#' Apply the Stemming Algorithm for the Portuguese Language to a word.
#'
#' @param word word to be stemmed.
#' @param steprules as obtained from the function extract_rules.
#'
rslp_ <- function(word,
steprules = readRDS(system.file("steprules.rds", package = "rslp"))
) {
if (stringr::str_sub(word, start = -1) == "s") {
word <- apply_rules(word, "Plural", steprules)
}
if (stringr::str_sub(word, start = -1) == "a") {
word <- apply_rules(word, "Feminine", steprules)
}
word <- apply_rules(word, "Augmentative", steprules)
word <- apply_rules(word, "Adverb", steprules)
word_after_noun <- apply_rules(word, "Noun", steprules)
if (word_after_noun == word) {
word_after_verb <- apply_rules(word, "Verb", steprules)
if (word_after_verb == word) {
word <- apply_rules(word, "Vowel", steprules)
} else {
word <- word_after_verb
}
} else {
word <- word_after_noun
}
word <- remove_accents(word)
return(word)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.