Description Usage Arguments Value Note Examples
wfm
- Generate a word frequency matrix by grouping variable(s).
wfdf
- Generate a word frequency data frame by grouping variable.
wfm_expanded
- Expand a word frequency matrix to have multiple rows
for each word.
wfm_combine
- Combines words (rows) of a word frequency matrix
(wfdf
) together.
weight
- Weight a word frequency matrix for analysis where such
weighting is sensible.
weight.wfdf
- Weight a word frequency matrix for analysis where such
weighting is sensible.
as.wfm
- Attempts to coerce a matrix to a wfm
.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 | wfm(
text.var = NULL,
grouping.var = NULL,
output = "raw",
stopwords = NULL,
char2space = "~~",
...
)
## S3 method for class 'wfdf'
wfm(
text.var = NULL,
grouping.var = NULL,
output = "raw",
stopwords = NULL,
char2space = "~~",
...
)
## S3 method for class 'character'
wfm(
text.var = NULL,
grouping.var = NULL,
output = "raw",
stopwords = NULL,
char2space = "~~",
...
)
## S3 method for class 'factor'
wfm(
text.var = NULL,
grouping.var = NULL,
output = "raw",
stopwords = NULL,
char2space = "~~",
...
)
wfdf(
text.var,
grouping.var = NULL,
stopwords = NULL,
margins = FALSE,
output = "raw",
digits = 2,
char2space = "~~",
...
)
wfm_expanded(text.var, grouping.var = NULL, ...)
wfm_combine(wf.obj, word.lists, matrix = TRUE)
## S3 method for class 'wfm'
weight(x, type = "prop", ...)
## S3 method for class 'wfm'
weight(x, type = "prop", ...)
as.wfm(x, ...)
## S3 method for class 'matrix'
as.wfm(x, ...)
## Default S3 method:
as.wfm(x, ...)
## S3 method for class 'TermDocumentMatrix'
as.wfm(x, ...)
## S3 method for class 'DocumentTermMatrix'
as.wfm(x, ...)
## S3 method for class 'data.frame'
as.wfm(x, ...)
## S3 method for class 'wfdf'
as.wfm(x, ...)
## S3 method for class 'Corpus'
as.wfm(x, col = "docs", row = "text", ...)
## S3 method for class 'Corpus'
wfm(text.var, ...)
|
text.var |
The text variable. |
grouping.var |
The grouping variables. Default |
output |
Output type (either |
stopwords |
A vector of stop words to remove. |
char2space |
A vector of characters to be turned into spaces. If
|
margins |
logical. If |
digits |
An integer indicating the number of decimal places (round) or significant digits (signif) to be used. Negative values are allowed. |
wf.obj |
A |
word.lists |
A list of character vectors of words to pass to
|
matrix |
logical. If |
x |
An object with words for row names and integer values. |
type |
The type of weighting to use: c( |
col |
The column name (generally not used). |
row |
The row name (generally not used). |
... |
Other arguments supplied to |
wfm
- returns a word frequency of the class matrix.
wfdf
- returns a word frequency of the class data.frame with
a words column and optional margin sums.
wfm_expanded
- returns a matrix similar to a word frequency
matrix (wfm
) but the rows are expanded to represent the maximum usages
of the word and cells are dummy coded to indicate that number of uses.
wfm_combine
- returns a word frequency matrix (wfm
) or
dataframe (wfdf
) with counts for the combined word.lists merged and
remaining terms (else
).
weight
- Returns a weighted matrix for use with other R
packages. The output is not of the class "wfm".
as.wfm
- Returns a matrix of the class "wfm".
Words can be kept as one by inserting a double tilde ("~~"
), or
other character strings passed to char2space, as a single word/entry. This is
useful for keeping proper names as a single unit.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 | ## Not run:
## word frequency matrix (wfm) example:
with(DATA, wfm(state, list(sex, adult)))[1:15, ]
with(DATA, wfm(state, person))[1:15, ]
Filter(with(DATA, wfm(state, list(sex, adult))), 5)
with(DATA, wfm(state, list(sex, adult)))
## Filter particular words based on max/min values in wfm
v <- with(DATA, wfm(state, list(sex, adult)))
Filter(v, 5)
Filter(v, 5, count.apostrophe = FALSE)
Filter(v, 5, 7)
Filter(v, 4, 4)
Filter(v, 3, 4)
Filter(v, 3, 4, stopwords = Top25Words)
## insert double tilde ("~~") to keep phrases(i.e., first last name)
alts <- c(" fun", "I ")
state2 <- space_fill(DATA$state, alts, rm.extra = FALSE)
with(DATA, wfm(state2, list(sex, adult)))[1:18, ]
## word frequency dataframe (wfdf) example:
with(DATA, wfdf(state, list(sex, adult)))[1:15, ]
with(DATA, wfdf(state, person))[1:15, ]
## wfm_expanded example:
z <- wfm(DATA$state, DATA$person)
wfm_expanded(z)[30:45, ] #two "you"s
## wf_combine examples:
#===================
## raw no margins (will work)
x <- wfm(DATA$state, DATA$person)
## raw with margin (will work)
y <- wfdf(DATA$state, DATA$person, margins = TRUE)
## Proportion matrix
z2 <- wfm(DATA$state, DATA$person, output="proportion")
WL1 <- c(y[, 1])
WL2 <- list(c("read", "the", "a"), c("you", "your", "you're"))
WL3 <- list(bob = c("read", "the", "a"), yous = c("you", "your", "you're"))
WL4 <- list(bob = c("read", "the", "a"), yous = c("a", "you", "your", "your're"))
WL5 <- list(yous = c("you", "your", "your're"))
WL6 <- list(c("you", "your", "your're")) #no name so will be called words 1
WL7 <- c("you", "your", "your're")
wfm_combine(z2, WL2) #Won't work not a raw frequency matrix
wfm_combine(x, WL2) #Works (raw and no margins)
wfm_combine(y, WL2) #Works (raw with margins)
wfm_combine(y, c("you", "your", "your're"))
wfm_combine(y, WL1)
wfm_combine(y, WL3)
## wfm_combine(y, WL4) #Error
wfm_combine(y, WL5)
wfm_combine(y, WL6)
wfm_combine(y, WL7)
worlis <- c("you", "it", "it's", "no", "not", "we")
y <- wfdf(DATA$state, list(DATA$sex, DATA$adult), margins = TRUE)
z <- wfm_combine(y, worlis)
chisq.test(z)
chisq.test(wfm(y))
## Dendrogram
presdeb <- with(pres_debates2012, wfm(dialogue, list(person, time)))
library(sjPlot)
sjc.dend(t(presdeb), 2:4)
## Words correlated within turns of talk
## EXAMPLE 1
library(qdapTools)
x <- factor(with(rajSPLIT, paste(act, pad(TOT(tot)), sep = "|")))
dat <- wfm(rajSPLIT$dialogue, x)
cor(t(dat)[, c("romeo", "juliet")])
cor(t(dat)[, c("romeo", "banished")])
cor(t(dat)[, c("romeo", "juliet", "hate", "love")])
qheat(cor(t(dat)[, c("romeo", "juliet", "hate", "love")]),
diag.na = TRUE, values = TRUE, digits = 3, by.column = NULL)
dat2 <- wfm(DATA$state, id(DATA))
qheat(cor(t(dat2)), low = "yellow", high = "red",
grid = "grey90", diag.na = TRUE, by.column = NULL)
## EXAMPLE 2
x2 <- factor(with(pres_debates2012, paste(time, pad(TOT(tot)), sep = "|")))
dat2 <- wfm(pres_debates2012$dialogue, x2)
wrds <- word_list(pres_debates2012$dialogue,
stopwords = c("it's", "that's", Top200Words))
wrds2 <- tolower(sort(wrds$rfswl[[1]][, 1]))
qheat(word_cor(t(dat2), word = wrds2, r = NULL),
diag.na = TRUE, values = TRUE, digits = 3, by.column = NULL,
high="red", low="yellow", grid=NULL)
## EXAMPLE 3
library(gridExtra); library(ggplot2); library(grid)
dat3 <- lapply(qcv(OBAMA, ROMNEY), function(x) {
with(pres_debates2012, wfm(dialogue[person == x], x2[person == x]))
})
# Presidential debates by person
dat5 <- pres_debates2012
dat5 <- dat5[dat5$person %in% qcv(ROMNEY, OBAMA), ]
disp <- with(dat5, dispersion_plot(dialogue, wrds2, grouping.var = person,
total.color = NULL, rm.vars=time))
cors <- lapply(dat3, function(m) {
word_cor(t(m), word = wrds2, r = NULL)
})
plots <- lapply(cors, function(x) {
qheat(x, diag.na = TRUE, values = TRUE, digits = 3, plot = FALSE,
by.column = NULL, high="red", low="yellow", grid=NULL)
})
plots <- lapply(1:2, function(i) {
plots[[i]] + ggtitle(qcv(OBAMA, ROMNEY)[i]) +
theme(axis.title.x = element_blank(),
plot.margin = unit(rep(0, 4), "lines"))
})
grid.arrange(disp, arrangeGrob(plots[[1]], plots[[2]], ncol=1), ncol=2)
## With `word_cor`
worlis <- list(
pronouns = c("you", "it", "it's", "we", "i'm", "i"),
negative = qcv(no, dumb, distrust, not, stinks),
literacy = qcv(computer, talking, telling)
)
y <- wfdf(DATA$state, qdapTools::id(DATA, prefix = TRUE))
z <- wfm_combine(y, worlis)
word_cor(t(z), word = names(worlis), r = NULL)
## Plotting method
plot(y, TRUE)
plot(z)
## Correspondence Analysis
library(ca)
dat <- pres_debates2012
dat <- dat[dat$person %in% qcv(ROMNEY, OBAMA), ]
speech <- stemmer(dat$dialogue)
mytable1 <- with(dat, wfm(speech, list(person, time), stopwords = Top25Words))
fit <- ca(mytable1)
summary(fit)
plot(fit)
plot3d.ca(fit, labels=1)
mytable2 <- with(dat, wfm(speech, list(person, time), stopwords = Top200Words))
fit2 <- ca(mytable2)
summary(fit2)
plot(fit2)
plot3d.ca(fit2, labels=1)
## Weight a wfm
WFM <- with(DATA, wfm(state, list(sex, adult)))
plot(weight(WFM, "scaled"), TRUE)
weight(WFM, "prop")
weight(WFM, "max")
weight(WFM, "scaled")
## End(Not run)
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.