1 | cleanup_data(x)
|
x |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | dfcorpus = Corpus(VectorSource(df$text))
dfcorpus = tm_map(dfcorpus,content_transformer(tolower))
dfcorpus = tm_map(dfcorpus, removePunctuation)
dfcorpus = tm_map(dfcorpus, removeNumbers)
dfcorpus = tm_map(dfcorpus, stripWhitespace)
dfcorpus <- tm_map(dfcorpus, function(x)removeWords(x,stopwords(c())))
dfcorpus = tm_map(dfcorpus, removeWords, c(stopwords("english"),"can","just","virginamerica", "'s", "'m", "--", "...", "-"))
removeURL <- function(x) gsub("http[[:alnum:]]*", "", x)
dfcorpus <- tm_map(dfcorpus, content_transformer(removeURL))
dftdm = TermDocumentMatrix(dfcorpus)
matrix = as.matrix(dftdm)
words = sort(rowSums(matrix),decreasing=TRUE)
words = sort(words, decreasing = TRUE)
dfnew = data.frame(word = names(words),freq=words)
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.