word_count: Word Counts

View source: R/word_count.R

word_countR Documentation

Word Counts

Description

word_count - Transcript apply word counts.

character_count - Transcript apply character counts.

character_table - Computes a table of character counts by grouping . variable(s).

Usage

word_count(
  text.var,
  byrow = TRUE,
  missing = NA,
  digit.remove = TRUE,
  names = FALSE
)

wc(text.var, byrow = TRUE, missing = NA, digit.remove = TRUE, names = FALSE)

character_count(
  text.var,
  byrow = TRUE,
  missing = NA,
  apostrophe.remove = TRUE,
  digit.remove = TRUE,
  count.space = FALSE
)

character_table(
  text.var,
  grouping.var = NULL,
  percent = TRUE,
  prop.by.row = TRUE,
  zero.replace = 0,
  digits = 2,
  ...
)

char_table(
  text.var,
  grouping.var = NULL,
  percent = TRUE,
  prop.by.row = TRUE,
  zero.replace = 0,
  digits = 2,
  ...
)

Arguments

text.var

The text variable

byrow

logical. If TRUE counts by row, if FALSE counts all words.

missing

Value to insert for missing values (empty cells).

digit.remove

logical. If TRUE removes digits before counting words.

names

logical. If TRUE the sentences are given as the names of the counts.

apostrophe.remove

logical. If TRUE apostrophes will be counted in the character count.

count.space

logical. If TRUE spaces are counted as characters.

grouping.var

The grouping variables. Default NULL generates one word list for all text. Also takes a single grouping variable or a list of 1 or more grouping variables.

percent

logical. If TRUE output given as percent. If FALSE the output is proportion.

prop.by.row

logical. If TRUE applies proportional to the row. If FALSE applies by column.

zero.replace

Value to replace 0 values with.

digits

Integer; number of decimal places to round when printing.

...

Other arguments passed to prop.

Value

word_count - returns a word count by row or total.

character_count - returns a character count by row or total.

character_table - returns a list: dataframe of character counts by grouping variable.

raw

Dataframe of the frequency of characters by grouping variable.

prop

Dataframe of the proportion of characters by grouping variable.

rnp

Dataframe of the frequency and proportions of characters by grouping variable.

percent

The value of percent used for plotting purposes.

zero.replace

The value of zero.replace used for plotting purposes.

Note

wc is a convenient short hand for word_count.

See Also

syllable_count, prop, colcomb2class

Examples

## Not run: 
## WORD COUNT
word_count(DATA$state)
wc(DATA$state)
word_count(DATA$state, names = TRUE)
word_count(DATA$state, byrow=FALSE, names = TRUE)
sum(word_count(DATA$state))

sapply(split(raj$dialogue, raj$person), wc, FALSE) %>%
    sort(decreasing=TRUE) %>% 
    list2df("wordcount", "person") %>%
    `[`(, 2:1)

## PLOT WORD COUNTS
raj2 <- raj
raj2$scaled <- unlist(tapply(wc(raj$dialogue), raj2$act, scale))
raj2$scaled2 <- unlist(tapply(wc(raj$dialogue), raj2$act, scale, scale = FALSE))
raj2$ID <- factor(unlist(tapply(raj2$act, raj2$act, seq_along)))

ggplot(raj2, aes(x = ID, y = scaled, fill =person)) +
    geom_bar(stat="identity") +
    facet_grid(act~.) + 
    ylab("Scaled") + xlab("Turn of Talk") +
    guides(fill = guide_legend(nrow = 5, byrow = TRUE)) +
    theme(legend.position="bottom") +
    ggtitle("Scaled and Centered")


ggplot(raj2, aes(x = ID, y = scaled2, fill =person)) +
    geom_bar(stat="identity") +
    facet_grid(act~.) + 
    ylab("Scaled") + xlab("Turn of Talk") +
    guides(fill = guide_legend(nrow = 5, byrow = TRUE)) +
    theme(legend.position="bottom") +
    ggtitle("Mean Difference")
  
    
raj$wc <- wc(raj$dialogue)
raj$cum.wc <- unlist(with(raj, tapply(wc, act, cumsum)))
raj$turn <- unlist(with(raj, tapply(act, act, seq_along)))
ggplot(raj, aes(y=cum.wc, x=turn)) + 
    geom_step(direction = "hv") + 
    facet_wrap(~act)
        
## CHARACTER COUNTS
character_count(DATA$state)
character_count(DATA$state, byrow=FALSE)
sum(character_count(DATA$state))

## CHARACTER TABLE
x <- character_table(DATA$state, DATA$person)
plot(x)
plot(x, label = TRUE)
plot(x, label = TRUE, text.color = "red")
plot(x, label = TRUE, lab.digits = 1, zero.replace = "PP7")

scores(x)
counts(x)
proportions(x)

plot(scores(x))
plot(counts(x))
plot(proportions(x))

## combine columns
colcomb2class(x, list(vowels = c("a", "e", "i", "o", "u")))

## char_table(DATA$state, DATA$person)
## char_table(DATA$state, DATA$person, percent = TRUE)
## character_table(DATA$state, list(DATA$sex, DATA$adult))

library(ggplot2);library(reshape2)
dat <- character_table(DATA$state, list(DATA$sex, DATA$adult))
dat2 <- colsplit2df(melt(counts(dat)), keep.orig = TRUE)
head(dat2, 15)

ggplot(data = dat2, aes(y = variable, x = value, colour=sex)) +
    facet_grid(adult~.) +
    geom_line(size=1, aes(group =variable), colour = "black") +
    geom_point()

ggplot(data = dat2, aes(x = variable, y = value)) +
    geom_bar(aes(fill = variable), stat = "identity") +
    facet_grid(sex ~ adult, margins = TRUE) +
    theme(legend.position="none")

## End(Not run)

qdap documentation built on May 31, 2023, 5:20 p.m.