cleanup_data:

Usage Arguments Examples

Usage

1

Arguments

x

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
dfcorpus = Corpus(VectorSource(df$text))

dfcorpus = tm_map(dfcorpus,content_transformer(tolower))
dfcorpus = tm_map(dfcorpus, removePunctuation)
dfcorpus = tm_map(dfcorpus, removeNumbers)
dfcorpus = tm_map(dfcorpus, stripWhitespace)
dfcorpus <- tm_map(dfcorpus, function(x)removeWords(x,stopwords(c())))
dfcorpus = tm_map(dfcorpus, removeWords, c(stopwords("english"),"can","just","virginamerica", "'s", "'m", "--", "...", "-"))
removeURL <- function(x) gsub("http[[:alnum:]]*", "", x)
dfcorpus <- tm_map(dfcorpus, content_transformer(removeURL))

dftdm = TermDocumentMatrix(dfcorpus)
matrix = as.matrix(dftdm)
words = sort(rowSums(matrix),decreasing=TRUE) 
words = sort(words, decreasing = TRUE)
dfnew = data.frame(word = names(words),freq=words)

adamwk97/Lombardi documentation built on May 1, 2020, 11:01 p.m.