Description Usage Arguments Details References Examples
These methods should be used to get or set values of text objects
generated by functions like readCorpus
.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | ## S4 method for signature 'kRp.corpus'
taggedText(obj)
## S4 replacement method for signature 'kRp.corpus'
taggedText(obj) <- value
## S4 method for signature 'kRp.corpus'
doc_id(obj, has_id = NULL)
## S4 method for signature 'kRp.corpus'
describe(obj, doc_id = NULL, simplify = TRUE, ...)
## S4 replacement method for signature 'kRp.corpus'
describe(obj, doc_id = NULL, ...) <- value
## S4 method for signature 'kRp.corpus'
language(obj)
## S4 replacement method for signature 'kRp.corpus'
language(obj) <- value
## S4 method for signature 'kRp.corpus'
hasFeature(obj, feature = NULL)
## S4 replacement method for signature 'kRp.corpus'
hasFeature(obj, feature) <- value
## S4 method for signature 'kRp.corpus'
feature(obj, feature, doc_id = NULL)
## S4 replacement method for signature 'kRp.corpus'
feature(obj, feature) <- value
## S4 method for signature 'kRp.corpus'
corpusReadability(obj, doc_id = NULL)
## S4 replacement method for signature 'kRp.corpus'
corpusReadability(obj) <- value
corpusTm(obj)
## S4 method for signature 'kRp.corpus'
corpusTm(obj)
corpusTm(obj) <- value
## S4 replacement method for signature 'kRp.corpus'
corpusTm(obj) <- value
corpusMeta(obj, meta = NULL, fail = TRUE)
## S4 method for signature 'kRp.corpus'
corpusMeta(obj, meta = NULL, fail = TRUE)
corpusMeta(obj, meta = NULL) <- value
## S4 replacement method for signature 'kRp.corpus'
corpusMeta(obj, meta = NULL) <- value
## S4 method for signature 'kRp.corpus'
corpusHyphen(obj, doc_id = NULL)
## S4 replacement method for signature 'kRp.corpus'
corpusHyphen(obj) <- value
## S4 method for signature 'kRp.corpus'
corpusLexDiv(obj, doc_id = NULL)
## S4 replacement method for signature 'kRp.corpus'
corpusLexDiv(obj) <- value
## S4 method for signature 'kRp.corpus'
corpusFreq(obj)
## S4 replacement method for signature 'kRp.corpus'
corpusFreq(obj) <- value
## S4 method for signature 'kRp.corpus'
corpusCorpFreq(obj)
## S4 replacement method for signature 'kRp.corpus'
corpusCorpFreq(obj) <- value
corpusHierarchy(obj, ...)
## S4 method for signature 'kRp.corpus'
corpusHierarchy(obj)
corpusHierarchy(obj) <- value
## S4 replacement method for signature 'kRp.corpus'
corpusHierarchy(obj) <- value
corpusFiles(obj, paths = FALSE, ...)
## S4 method for signature 'kRp.corpus'
corpusFiles(obj, paths = FALSE)
corpusFiles(obj) <- value
## S4 replacement method for signature 'kRp.corpus'
corpusFiles(obj) <- value
corpusDocTermMatrix(obj, ...)
## S4 method for signature 'kRp.corpus'
corpusDocTermMatrix(obj)
corpusDocTermMatrix(obj, terms = NULL, case.sens = NULL, tfidf = NULL) <- value
## S4 replacement method for signature 'kRp.corpus'
corpusDocTermMatrix(obj, terms = NULL, case.sens = NULL,
tfidf = NULL) <- value
## S4 method for signature 'kRp.corpus'
corpusStopwords(obj)
## S4 replacement method for signature 'kRp.corpus'
corpusStopwords(obj) <- value
## S4 method for signature 'kRp.corpus'
diffText(obj, doc_id = NULL)
## S4 replacement method for signature 'kRp.corpus'
diffText(obj) <- value
## S4 method for signature 'kRp.corpus'
originalText(obj)
is.corpus(obj)
## S4 method for signature 'kRp.corpus,ANY,ANY,ANY'
x[i, j, ..., drop = TRUE]
## S4 replacement method for signature 'kRp.corpus,ANY,ANY,ANY'
x[i, j, ...] <- value
## S4 method for signature 'kRp.corpus'
x[[i, doc_id = NULL, ...]]
## S4 replacement method for signature 'kRp.corpus'
x[[i, doc_id = NULL, ...]] <- value
## S4 method for signature 'kRp.corpus'
tif_as_tokens_df(tokens)
tif_as_corpus_df(corpus)
## S4 method for signature 'kRp.corpus'
tif_as_corpus_df(corpus)
|
obj |
An object of class |
value |
A new value to replace the current with. |
has_id |
A character vector with |
doc_id |
A character vector to limit the scope to one or more particular document IDs. |
simplify |
If |
... |
Additional arguments to pass through, depending on the method. |
feature |
Character string naming the object feature to look for. |
meta |
If not NULL, the |
fail |
Logical,
whether the method should fail with an error if |
paths |
Logical,
indicates for |
terms |
A character string defining the |
case.sens |
Logical, whether terms were counted case sensitive. Stored in object's meta data slot. |
tfidf |
Logical,
use |
x |
See |
i |
Defines the row selector ( |
j |
Defines the column selector in the tokens slot. |
drop |
See |
tokens |
An object of class |
corpus |
An object of class |
taggedText()
returns the tokens
slot.
describe()
returns the desc
slot.
hasFeature()
returns TRUE
or codeFALSE,
depending on whether the requested feature is present or not.
feature()
returns the list entry of the feat_list
slot for the requested feature.
corpusReadability()
returns the list of kRp.readability
objects.
corpusTm()
returns the VCorpus
object.
corpusMeta()
returns the list with meta information.
corpusHyphen()
returns the list of kRp.hyphen
objects.
corpusLexDiv()
returns the list of kRp.TTR
objects.
corpusFiles()
returns the character vector of file names of the object.
corpusFreq()
returns the frequency analysis data from the feat_list
slot.
corpusCorpFreq()
returns the kRp.corp.freq
object of the feat_list
slot.
corpusHierarchy()
returns the corpus' hierarchy structure.
corpusDocTermMatrix()
returns the sparse document term matrix of the feat_list
slot.
corpusStopwords()
returns the number of stopwords found in each text (if analyzed) from the feat_list
slot.
diffText()
returns the diff
element of the feat_list
slot.
originalText
regenerates the original text before text transformations and returns it as a data frame.
[
/[[
can be used as a shortcut to index the results of taggedText()
.
tif_as_corpus_df
returns the whole corpus in a single TIF[1] compliant
data.frame.
tif_as_tokens_df
returns the tokens
slot in a TIF[1] compliant
data.frame, i.e., doc_id
is not a factor but a character vector.
[1] Text Interchange Formats (https://github.com/ropensci/tif)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | # use readCorpus() to create an object of class kRp.corpus
# code is only run when the english language package can be loaded
if(require("koRpus.lang.en", quietly = TRUE)){
myCorpus <- readCorpus(
dir=file.path(
path.package("tm.plugin.koRpus"), "examples", "corpus", "Winner", "Wikipedia_new"
),
# use tokenize() so examples run without a TreeTagger installation
tagger="tokenize",
lang="en"
)
taggedText(myCorpus)
corpusMeta(myCorpus, "note") <- "an interesting read!"
# export object to TIF compliant data frame
myCorpus_df <- tif_as_corpus_df(myCorpus)
} else {}
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.