R/word.frequency.R

Defines functions word.frequency

Documented in word.frequency

#' @title rWord Frequency
#' @description Blank Description.
#' @param c Corpus Data
#' @param n Name
#' Defaults to TRUE.
#' @keywords package
#' @return NULL
#' @export
#' @examples word.frequency(nk.corpus.ultimus, nk.name)

word.frequency <- function(c, n) {

if('tm' %in% rownames(installed.packages()) == TRUE) {
require(tm)} else {
install.packages("tm", repos = "http://cran.us.r-project.org")	
require(tm)}
	
if('ggplot2' %in% rownames(installed.packages()) == TRUE) {
require(ggplot2)} else {
install.packages("ggplot2", repos = "http://cran.us.r-project.org")	
require(ggplot2)}
		
# Make New Data from Input

cbo.corpus.ultimus <- c
cbo.name <- n

# Format Corpus for Most Frequent Words
cbo.freq.tdm <- as.matrix(TermDocumentMatrix(cbo.corpus.ultimus, control = list(wordLengths=c(1, Inf))))
cbo.freq.mat <- data.frame(ST = rownames(cbo.freq.tdm), Freq = rowSums(cbo.freq.tdm), row.names = NULL)
cbo.flux <- sort(rowSums(cbo.freq.tdm), decreasing = TRUE)
cbo.wordfreq <- data.frame(word = names(cbo.flux), freq = cbo.flux)
row.names(cbo.wordfreq) <- NULL

# Get Top 10 Frequent Words
cbo.topwords <- data.frame(top_n(cbo.wordfreq, 10))
cbo.topwords
   
# Frequent Words Histogram     
cbo.word.freq <- ggplot(cbo.topwords, aes(x = reorder(word, -freq), y = freq, fill = freq)) + 
	geom_bar(stat = "identity", show.legend = FALSE) + 
	labs(title = paste0("Most Frequent Words for Search Term ", cbo.name), y = "Frequency", x = "Word") +
	theme(axis.text.x = element_text(angle = 25, vjust = 1.0, hjust = 1.0)) 
cbo.word.freq	

}
sabalicodev/sabali documentation built on Jan. 13, 2020, 2:22 p.m.