R/split_to_sentences.R

Defines functions split_to_sentences

Documented in split_to_sentences

load("data/abbreviations.rda")

split_to_sentences <- function(text, abbrvt = abbreviations){

    left_str <- '(\\b[a-z]+[.?!]|\\b[:alpha:]{3,}[.?!])'
    right_str <- '([A-Z][:alpha:]+|[IA])'
    text <- str_replace_all(text, paste0(left_str, right_str), "\\1 \\2")


    sentences <- text_split(text, sent_suppress = abbrvt)
    text <- as.vector(sentences$text)


    return(text)

}
achilleas-251/txtprocess documentation built on March 30, 2020, 12:43 a.m.