R/cleancorpus.R

# creating a clean corpus function
clean_corpus<-function(text_data){

  corpus <-corpus_fun(text_data)
  corpus<- tm_map(corpus, removePunctuation)
  corpus<- tm_map(corpus, removeNumbers)
  corpus<- tm_map(corpus, tolower)
  corpus<- tm_map(corpus, removeWords, stopwords("english"))
  corpus<- tm_map(corpus, stripWhitespace)
  corpus<- tm_map(corpus, PlainTextDocument)
  return(corpus)
}
mukundhu/application documentation built on May 18, 2019, 10:15 a.m.