ngrams | R Documentation |
Count n-grams, either of words, or of characters.
ngrams(.Object, ...)
## S4 method for signature 'partition'
ngrams(
.Object,
n = 2,
p_attribute = "word",
char = NULL,
progress = FALSE,
mc = 1L,
...
)
## S4 method for signature 'character'
ngrams(
.Object,
n = 2,
p_attribute = "word",
char = NULL,
progress = FALSE,
mc = 1L,
...
)
## S4 method for signature 'partition'
ngrams(
.Object,
n = 2,
p_attribute = "word",
char = NULL,
progress = FALSE,
mc = 1L,
...
)
## S4 method for signature 'subcorpus'
ngrams(
.Object,
n = 2,
p_attribute = "word",
char = NULL,
progress = FALSE,
mc = 1L,
...
)
## S4 method for signature 'character'
ngrams(
.Object,
n = 2,
p_attribute = "word",
char = NULL,
progress = FALSE,
mc = 1L,
...
)
## S4 method for signature 'data.table'
ngrams(.Object, n = 2L, p_attribute = "word")
## S4 method for signature 'corpus'
ngrams(
.Object,
n = 2,
p_attribute = "word",
char = NULL,
progress = FALSE,
mc = 1L,
...
)
## S4 method for signature 'list'
ngrams(
.Object,
n = 2,
char = NULL,
mc = FALSE,
verbose = FALSE,
progress = FALSE,
...
)
## S4 method for signature 'partition_bundle'
ngrams(
.Object,
n = 2,
char = NULL,
vocab = NULL,
p_attribute = "word",
mc = FALSE,
verbose = FALSE,
progress = FALSE,
...
)
.Object |
object of class |
... |
Further arguments. |
n |
Number of tokens (if |
p_attribute |
the p-attribute to use (can be > 1) |
char |
If |
progress |
A |
mc |
A |
verbose |
A length-one |
vocab |
A |
use("polmineR")
P <- partition("GERMAPARLMINI", date = "2009-10-27")
ngrm <- ngrams(P, n = 2, p_attribute = "word", char = NULL)
# a more complex scenario: get most frequent ADJA/NN-combinations
ngrm <- ngrams(P, n = 2, p_attribute = c("word", "pos"), char = NULL)
ngrm2 <- subset(
ngrm,
ngrm[["1_pos"]] == "ADJA" & ngrm[["2_pos"]] == "NN"
)
ngrm2@stat[, "1_pos" := NULL][, "2_pos" := NULL]
ngrm3 <- sort(ngrm2, by = "count")
head(ngrm3)
use(pkg = "RcppCWB", corpus = "REUTERS")
dt <- decode("REUTERS", p_attribute = "word", s_attribute = character(), to = "data.table")
y <- ngrams(dt, n = 3L, p_attribute = "word")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.