#' Word Counts
#'
#' \code{word.count} - Transcript apply word counts.
#'
#' @rdname word.count
#' @param text.var The text variable
#' @param grouping.var The grouping variables. Default NULL generates one
#' output for all text. Also takes a single grouping variable or a list of 1 or
#' more grouping variables.
#' @param byrow logical. If TRUE counts by row, if FALSE counts all words.
#' @param missing Value to insert for missing values (empty cells).
#' @param digit.remove logical. If TRUE removes digits before counting words.
#' @param names logical. If TRUE the sentences are given as the names of the
#' counts.
#' @param apostrophe.remove logical. If TRUE apostrophes will be counted
#' in the character count.
#' @param count.space logical. If TRUE spaces are counted as characters.
#' @param prop.by.row logical. If TRUE applies proportional to the row. If
#' FALSE applies by column.
#' @param \ldots Other arguments passed to \code{\link[qdap]{prop}}.
#' @return \code{word.count} - returns a word count by row or total.
#' @note wc is a convenient short hand for word.count.
#' @seealso \code{\link[qdap]{syllable.count}}
#' @seealso \code{\link[qdap]{prop}}
#' @keywords word-count, character-count
#' @export
#' @examples
#' # WORD COUNT
#' word.count(DATA$state)
#' wc(DATA$state)
#' word.count(DATA$state, names = TRUE)
#' word.count(DATA$state, byrow=FALSE, names = TRUE)
#' sum(word.count(DATA$state))
#'
#' # CHARACTER COUNTS
#' character.count(DATA$state)
#' character.count(DATA$state, byrow=FALSE)
#' sum(character.count(DATA$state))
#'
#' # CHARACTER TABLE
#' x <- character.table(DATA$state, DATA$person)
#' plot(x)
#' plot(x, label = TRUE)
#' plot(x, label = TRUE, text.color = "red")
#' plot(x, label = TRUE, lab.digits = 1, zero.replace = "PP7")
#' x$raw[, 1:20]
#' x$prop[, 1:8]
#' x$rnp[, 1:8]
#'
#' ## char.table(DATA$state, DATA$person)
#' ## char.table(DATA$state, DATA$person, percent = TRUE)
#' ## character.table(DATA$state, list(DATA$sex, DATA$adult))
#'
#' library(ggplot2);library(reshape2)
#' dat <- character.table(DATA$state, list(DATA$sex, DATA$adult))
#' dat2 <- colsplit2df(melt(dat$raw), keep.orig = TRUE)
#' head(dat2, 15)
#' dat3 <- dat2[rep(seq_len(dim(dat2)[1]), dat2[, 5]), -5]
#'
#'
#' ggplot(data = dat2, aes(y = variable, x = value, colour=sex)) +
#' facet_grid(adult~.) +
#' geom_line(size=1, aes(group =variable), colour = "black") +
#' geom_point()
#'
#' ggplot(data = dat3, aes(x = variable, fill = variable)) +
#' geom_bar() +
#' facet_grid(sex ~ adult, margins = TRUE) +
#' theme(legend.position="none")
word.count <-
function(text.var, byrow = TRUE, missing = NA, digit.remove = TRUE,
names = FALSE) {
len2 <- function(x, missing) {
len <- length(x)
ifelse((len == 0) | len == 1 && (is.na(x) | is.null(x)), missing, len)
}
txt <- stopwords(text.var, strip = TRUE, digit.remove = digit.remove,
stopwords = NULL)
z <- sapply(txt, len2, missing = missing)
if (!byrow) {
return(sum(z, na.rm = TRUE) )
}
if(names) {
names(z) <- text.var
}
z
}
#' @rdname word.count
#' @export
wc <- word.count
#' Count Number of Characters
#'
#' \code{character.count} - Transcript apply character counts.
#'
#' @return \code{character.count} - returns a character count by row or total.
#' @rdname word.count
#' @export
character.count <-
function(text.var, byrow = TRUE, missing = NA, apostrophe.remove = TRUE,
digit.remove = TRUE, count.space = FALSE) {
len2 <- function(x, missing) {
len <- length(x)
ifelse((len == 0) | (is.na(x) | is.null(x)), missing, nchar(x))
}
txt <- stopwords(text.var, strip = TRUE, separate = FALSE,
digit.remove = digit.remove, stopwords = NULL)
txt[txt %in% c("", "NA")] <- NA
if (!count.space) {
txt <- gsub("\\s+", "", txt)
}
z <- unlist(lapply(txt, len2, missing = missing))
if (!byrow) {
z <- sum(z, na.rm = TRUE)
}
z
}
#' Table of Character Counts
#'
#' \code{character.table} - Computes a table of character counts by grouping .
#' variable(s).
#'
#' @param percent logical. If TRUE output given as percent. If FALSE the
#' output is proportion.
#' @param zero.replace Value to replace 0 values with.
#' @param digits Integer; number of decimal places to round when printing.
#' @return \code{character.table} - returns a list:
#' dataframe of character counts by grouping variable.
#' \item{raw}{Dataframe of the frequency of characters by grouping variable.}
#' \item{prop}{Dataframe of the proportion of characters by grouping variable.}
#' \item{rnp}{Dataframe of the frequency and proportions of characters by
#' grouping variable.}
#' \item{percent}{The value of percent used for plotting purposes.}
#' \item{zero.replace}{The value of zero.replace used for plotting purposes.}
#' @rdname word.count
#' @export
character.table <- function(text.var, grouping.var, percent = TRUE,
prop.by.row = TRUE, zero.replace = 0, digits = 2, ...) {
if(is.null(grouping.var)) {
G <- "all"
} else {
if (is.list(grouping.var)) {
m <- unlist(as.character(substitute(grouping.var))[-1])
m <- sapply(strsplit(m, "$", fixed=TRUE), function(x) {
x[length(x)]
}
)
G <- paste(m, collapse="&")
} else {
G <- as.character(substitute(grouping.var))
G <- G[length(G)]
}
}
if(is.null(grouping.var)){
grouping <- rep("all", length(text.var))
} else {
if (is.list(grouping.var) & length(grouping.var)>1) {
grouping <- paste2(grouping.var)
} else {
grouping <- unlist(grouping.var)
}
}
ctab <- function(x) {
table(unlist(strsplit(tolower(scrubber(paste2(x))), NULL)))
}
text.var <- as.character(text.var)
DF <- data.frame(grouping, text.var, check.names = FALSE,
stringsAsFactors = FALSE)
DF$grouping <- factor(DF$grouping)
L1 <- split(DF$text.var, DF$grouping)
L2 <- lapply(L1, ctab)
chars <- sort(unique(unlist(lapply(L2, names))))
L3 <- do.call(rbind, lapply(L2, function(x){
nots <- chars[!chars %in% names(x)]
new <- rev(c(x, rep(0, length(nots))))
if (!identical(nots, character(0))) {
names(new)[1:length(nots)] <- nots
}
new[order(names(new))]
}))
DF2 <- data.frame(x = rownames(L3), L3, check.names=FALSE,
row.names = NULL)
colnames(DF2)[1] <- G
DF3 <- prop(DF2[-1], percent = percent, by.column = (1 - prop.by.row), ...)
DF3[is.nan(DF3)] <- 0
DF3 <- data.frame(DF2[, 1, drop = FALSE], DF3, check.names = FALSE)
rnp <- raw_pro_comb(DF2[, -1], DF3[, -1], digits = digits,
percent = percent, zero.replace = zero.replace)
rnp <- data.frame(DF2[, 1, drop = FALSE], rnp, check.names = FALSE)
o <- list(raw = DF2, prop = DF3, rnp = rnp, percent = percent,
zero.replace = zero.replace)
class(o) <- "character.table"
o
}
#' Prints a character.table object
#'
#' Prints a character.table object.
#'
#' @param x The character.table object
#' @param digits Integer values specifying the number of digits to be
#' printed.
#' @param percent logical. If TRUE output given as percent. If FALSE the
#' output is proportion. If NULL uses the value from
#' \code{\link[qdap]{termco}}. Only used if \code{label} is TRUE.
#' @param zero.replace Value to replace 0 values with. If NULL uses the value
#' from \code{\link[qdap]{termco}}. Only used if \code{label} is TRUE.
#' @param \ldots ignored
#' @method print character.table
#' @S3method print character.table
print.character.table <-
function(x, digits = 2, percent = NULL, zero.replace = NULL, ...) {
WD <- options()[["width"]]
options(width=3000)
if (!is.null(percent)) {
if (percent != x$percent) {
DF <- as.matrix(x$prop[, -c(1:2)])
if (percent) {
DF <- DF*100
} else {
DF <- DF/100
}
x$prop <- data.frame(x$prop[, 1:2], DF, check.names = FALSE)
}
} else {
percent <- x$percent
}
if (is.null(zero.replace)) {
zero.replace <- x$zero.replace
}
rnp <- raw_pro_comb(x$raw[, -1, drop = FALSE],
x$prop[, -1, drop = FALSE], digits = digits, percent = percent,
zero.replace = zero.replace, override = TRUE)
rnp <- data.frame(x$raw[, 1, drop = FALSE], rnp, check.names = FALSE)
print(rnp)
options(width=WD)
}
#' Plots a character.table Object
#'
#' Plots a character.table object.
#'
#' @param x The character.table object
#' @param label logical. If TRUE the cells of the heat map plot will be labeled
#' with count and proportional values.
#' @param lab.digits Integer values specifying the number of digits to be
#' printed if \code{label} is TRUE.
#' @param percent logical. If TRUE output given as percent. If FALSE the
#' output is proportion. If NULL uses the value from
#' \code{\link[qdap]{question_type}}. Only used if \code{label} is TRUE.
#' @param zero.replace Value to replace 0 values with. If NULL uses the value
#' from \code{\link[qdap]{question_type}}. Only used if \code{label} is TRUE.
#' @param \ldots Other arguments passed to qheat
#' @method plot character.table
#' @S3method plot character.table
plot.character.table <- function(x, label = FALSE, lab.digits = 1, percent = NULL,
zero.replace = NULL, ...) {
if (label) {
if (!is.null(percent)) {
if (percent != x$percent) {
DF <- as.matrix(x$prop[, -1])
if (percent) {
DF <- DF*100
} else {
DF <- DF/100
}
x$prop <- data.frame(x$prop[, 1, drop = FALSE], DF,
check.names = FALSE)
}
} else {
percent <- x$percent
}
if (is.null(zero.replace)) {
zero.replace <- x$zero.replace
}
rnp <- raw_pro_comb(x$raw[, -1], x$prop[, -1],
digits = lab.digits, percent = percent,
zero.replace = zero.replace)
rnp <- data.frame(x$raw[, 1, drop = FALSE], rnp, check.names = FALSE)
qheat(x$prop, values=TRUE, mat2 = rnp, ...)
} else {
qheat(x$prop, ...)
}
}
#' @rdname word.count
#' @export
char.table <- character.table
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.