#' Raw Word Lists/Frequency Counts
#'
#' Transcript Apply Raw Word Lists and Frequency Counts by grouping variable(s).
#'
#' @param text.var The text variable.
#' @param grouping.var The grouping variables. Default NULL generates one word
#' list for all text. Also takes a single grouping variable or a list of 1 or
#' more grouping variables.
#' @param stopwords A vector of stop words to remove.
#' @param alphabetical If TRUE the output of frequency lists is ordered
#' alphabetically. If FALSE the list is ordered by frequency rank.
#' @param cut.n Cut off point for reduced frequency stop word list (rfswl).
#' @param cap logical. If TRUE capitalizes words from the cap.list.
#' @param cap.list Vector of words to capitalize.
#' @param cap.I logical. If TRUE capitalizes words containing the personal
#' pronoun I.
#' @param rm.bracket logical If TRUE all brackets and bracketed text are
#' removed from analysis.
#' @param char.keep A character vector of symbols (i.e. punctuation) that
#' \code{word_list} should keep. The default is to remove every symbol except
#' apostrophes.
#' @param apostrophe.remove logical. If TRUE removes apostrophes from the
#' output.
#' @param \ldots Other arguments passed to \code{\link[qdap]{strip}}.
#' @return An object of class \code{"word_list"} is a list of lists of verctors
#' or dataframes containing the following components:
#' \item{cwl}{complete word list; raw words}
#' \item{swl}{stop word list; same as rwl with stop words removed}
#' \item{fwl}{frequency word list; a data frame of words and corresponding
#' frequency counts}
#' \item{fswl}{frequency stopword word list; same as fwl but with stopwords
#' removed}
#' \item{rfswl}{reduced frequency stopword word list; same as fswl but truncated
#' to n rows}
#' @keywords word-list
#' @export
#' @examples
#' word_list(raj.act.1$dialogue)
#'
#' out1 <- with(raj, word_list(text.var = dialogue,
#' grouping.var = list(person, act)))
#' names(out1)
#' lapply(out1$cwl, "[", 1:5)
#'
#' with(DATA, word_list(state, person))
#' with(DATA, word_list(state, person, stopwords = Top25Words))
#' with(DATA, word_list(state, person, cap = FALSE, cap.list=c("do", "we")))
word_list <-
function(text.var, grouping.var = NULL, stopwords = NULL, alphabetical = FALSE,
cut.n = 20, cap = TRUE, cap.list=NULL, cap.I=TRUE, rm.bracket = TRUE,
char.keep = NULL, apostrophe.remove = FALSE, ...) {
upper <- function(x) paste(substring(x, 1, 1),
substring(x, 2, nchar(x)), sep="")
Sw1 <- stopwords[!substring(stopwords, 1, 1) %in% LETTERS]
Sw2 <- stopwords[substring(stopwords, 1, 1) %in% LETTERS]
Sw3 <- if (!is.null(Sw2)) {
paste0(Sw2, "'s")
} else {
NULL
}
stopwords <- c(Sw1, upper(Sw2), Sw2, tolower(Sw2), Sw3, tolower(Sw3))
if(is.null(grouping.var)) {
dat <- as.data.frame(rep("all", length(text.var)),
drop = FALSE, stringsAsFactors = FALSE)
names(dat) <- "all"
grouping.var <- dat
}
group.var <- if (is.list(grouping.var) & length(grouping.var)>1) {
apply(data.frame(grouping.var), 1, function(x){
if (any(is.na(x))){
NA
} else {
paste(x, collapse = ".")
}
}
)
} else {
grouping.var
}
NAME <- if (is.list(grouping.var)) {
m <- unlist(as.character(substitute(grouping.var))[-1])
m <- sapply(strsplit(m, "$", fixed=TRUE),
function(x) x[length(x)], USE.NAMES = FALSE)
paste(m, collapse="&")
} else {
G <- as.character(substitute(grouping.var))
G[length(G)]
}
word_lists1 <- textLISTER(text.var = text.var, group.vars = group.var,
rm.bracket = rm.bracket, char.keep = char.keep, apostrophe.remove = apostrophe.remove, ...)
words.UNLISTED <- lapply(word_lists1, function(x) {
y <- unlist(x)
names(y) <- NULL
return(y)
}
)
if (cap) {
word_lists2 <- lapply(word_lists1, function(x) {
y <- capitalizer(x, caps.list=cap.list)
names(y) <- NULL
return(y)
}
)
} else {
word_lists2 <- lapply(word_lists1, function(x) {
y <- unlist(x)
names(y) <- NULL
return(y)
}
)
}
naomit <- function(x) x[!is.na(x)]
word_lists2 <- lapply(word_lists2, naomit)
stopped.word_list <- lapply(words.UNLISTED, function(x) {
x[!x %in% stopwords]
}
)
stopped.word_list <- lapply(stopped.word_list, naomit)
stopped.word_list <- lapply(stopped.word_list, function(x){
capitalizer(x, caps.list = cap.list)
}
)
COUNT <- function(x) {
if (is.null(x) | identical(x, character(0)) |
identical(x, logical(0))) {
DF <- data.frame(WORD=NA, FREQ=NA)
} else {
DF <- data.frame(table(x))
names(DF) <- c("WORD", "FREQ")
DF$WORD <- as.character(DF$WORD)
DF$FREQ <- as.numeric(DF$FREQ)
DF <- DF[order(-DF$FREQ, DF$WORD), ]
rownames(DF) <- NULL
}
return(DF)
}
freq.word_list <- lapply(word_lists2, COUNT)
freq.stop.word_list <- lapply(stopped.word_list, COUNT)
red.freq.stop.word_list <- ncutWORDS(freq.stop.word_list, cut.n = cut.n)
word_lists2 <- lapply(word_lists2, function(x) {
if (is.null(x)){
return(x)
} else {
comment(x) <- "bagOwords"
return(x)
}
}
)
stopped.word_list <- lapply(stopped.word_list, function(x) {
if (is.null(x)){
return(x)
} else {
comment(x) <- "bagOwords"
return(x)
}
}
)
freq.word_list <- lapply(freq.word_list, function(x) {
if (is.null(x)) {
return(x)
} else {
comment(x) <- "freqList"
return(x)
}
}
)
freq.stop.word_list <- lapply(freq.stop.word_list, function(x) {
if (is.null(x)) {
return(x)
} else {
comment(x) <- "freqList"
return(x)
}
}
)
red.freq.stop.word_list <- lapply(red.freq.stop.word_list, function(x) {
if (is.null(x)){
return(x)
} else {
comment(x) <- "freqList"
return(x)
}
}
)
comment(word_lists2) <- "cwl"
comment(stopped.word_list) <- "swl"
comment(freq.word_list) <- "fwl"
comment(freq.stop.word_list) <- "fswl"
comment(red.freq.stop.word_list) <- "rfswl"
if (alphabetical) {
asort <- function(dat, col=1) {
dat2 <-dat[order(dat[, col]), ]
rownames(dat2) <- NULL
return(dat2)
}
freq.word_list <- lapply(freq.word_list, asort)
freq.stop.word_list <- lapply(freq.stop.word_list, asort)
red.freq.stop.word_list <- lapply(red.freq.stop.word_list, asort)
}
o <- list(cwl = word_lists2, swl = stopped.word_list,
fwl = freq.word_list, fswl = freq.stop.word_list,
rfswl = red.freq.stop.word_list)
class(o) <- c("word_list", "data.frame")
return(o)
}
#' Prints a word_list Object
#'
#' Prints a word_list object.
#'
#' @param x The word_list object
#' @param \ldots ignored
#' @method print word_list
#' @S3method print word_list
print.word_list <-
function(x, ...) {
print(x$rfswl)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.