Nothing
# Copyright 2015-2020 Meik Michalke <meik.michalke@hhu.de>
#
# This file is part of the R package tm.plugin.koRpus.
#
# tm.plugin.koRpus is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# tm.plugin.koRpus is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with tm.plugin.koRpus. If not, see <http://www.gnu.org/licenses/>.
#' Getter/setter methods for kRp.corpus objects
#'
#' These methods should be used to get or set values of text objects
#' generated by functions like \code{\link[tm.plugin.koRpus:readCorpus]{readCorpus}}.
#'
#' \itemize{
#' \item{\code{taggedText()} }{returns the \code{tokens} slot.}
#' \item{\code{describe()} }{returns the \code{desc} slot.}
#' \item{\code{hasFeature()} }{returns \code{TRUE} or code{FALSE}, depending on whether the requested feature is present or not.}
#' \item{\code{feature()} }{returns the list entry of the \code{feat_list} slot for the requested feature.}
#' \item{\code{corpusReadability()} }{returns the list of \code{kRp.readability} objects.}
#' \item{\code{corpusTm()} }{returns the \code{VCorpus} object.}
#' \item{\code{corpusMeta()} }{returns the list with meta information.}
#' \item{\code{corpusHyphen()} }{returns the list of \code{kRp.hyphen} objects.}
#' \item{\code{corpusLexDiv()} }{returns the list of \code{kRp.TTR} objects.}
#' \item{\code{corpusFiles()} }{returns the character vector of file names of the object.}
#' \item{\code{corpusFreq()} }{returns the frequency analysis data from the \code{feat_list} slot.}
#' \item{\code{corpusCorpFreq()} }{returns the \code{kRp.corp.freq} object of the \code{feat_list} slot.}
#' \item{\code{corpusHierarchy()} }{returns the corpus' hierarchy structure.}
#' \item{\code{corpusDocTermMatrix()} }{returns the sparse document term matrix of the \code{feat_list} slot.}
#' \item{\code{corpusStopwords()} }{returns the number of stopwords found in each text (if analyzed) from the \code{feat_list} slot.}
#' \item{\code{diffText()} }{returns the \code{diff} element of the \code{feat_list} slot.}
#' \item{\code{originalText} }{regenerates the original text before text transformations and returns it as a data frame.}
#' \item{\code{[}/\code{[[} }{can be used as a shortcut to index the results of \code{taggedText()}.}
#' \item{\code{tif_as_corpus_df} }{returns the whole corpus in a single TIF[1] compliant
#' data.frame.}
#' \item{\code{tif_as_tokens_df} }{returns the \code{tokens} slot in a TIF[1] compliant
#' data.frame, i.e., \code{doc_id} is not a factor but a character vector.}
#' }
#'
#' @param obj An object of class \code{\link[tm.plugin.koRpus:kRp.corpus-class]{kRp.corpus}}.
#' @param value A new value to replace the current with.
#' @param doc_id A character vector to limit the scope to one or more particular document IDs.
#' @param ... Additional arguments to pass through, depending on the method.
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @references
#' [1] Text Interchange Formats (\url{https://github.com/ropensci/tif})
#' @export
#' @importFrom koRpus taggedText
#' @aliases
#' taggedText,-methods
#' taggedText,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
#' @example inst/examples/if_lang_en_clause_start.R
#' @example inst/examples/generate_myCorpus_1_text.R
#' @examples
#'
#' taggedText(myCorpus)
#'
#' corpusMeta(myCorpus, "note") <- "an interesting read!"
#'
#' # export object to TIF compliant data frame
#' myCorpus_df <- tif_as_corpus_df(myCorpus)
#' @example inst/examples/if_lang_en_clause_end.R
## the standard generic for taggedText() is defined in the koRpus package
setMethod("taggedText",
signature=signature(obj="kRp.corpus"),
function (obj){
return(slot(obj, name="tokens"))
}
)
## the standard generic for taggedText()<- is defined in the koRpus package
#' @rdname kRp.corpus_get-methods
#' @importFrom koRpus taggedText<-
#' @export
#' @docType methods
#' @aliases
#' taggedText<-,-methods
#' taggedText<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("taggedText<-",
signature=signature(obj="kRp.corpus"),
function (obj, value){
slot(obj, name="tokens") <- value
return(obj)
}
)
## the standard generic for doc_id() is defined in the koRpus package
#' @rdname kRp.corpus_get-methods
#' @param has_id A character vector with \code{doc_id}s to look for in the object. The return value
#' is then a logical vector of the same length, indicating if the respective id was found or not.
#' @importFrom koRpus doc_id
#' @export
#' @docType methods
#' @aliases
#' doc_id,-methods
#' doc_id,kRp.corpus-method
setMethod("doc_id",
signature=signature(obj="kRp.corpus"),
function (obj, has_id=NULL){
result <- unique(as.character(slot(obj, name="tokens")[["doc_id"]]))
if(is.null(has_id)){
return(result)
} else if(length(has_id) > 1) {
return(has_id %in% result)
} else {
return(any(result == has_id))
}
}
)
## the standard generic for describe() is defined in the sylly package
#' @param simplify If \code{TRUE} and result is a list of length 1, return the list element.
#' @rdname kRp.corpus_get-methods
#' @importFrom sylly describe
#' @docType methods
#' @export
#' @aliases
#' describe,-methods
#' describe,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("describe",
signature=signature(obj="kRp.corpus"),
function (obj, doc_id=NULL, simplify=TRUE, ...){
result <- slot(obj, name="desc")
if(!is.null(doc_id)){
doc_ids_in_obj <- doc_id(obj, has_id=doc_id)
if(all(doc_ids_in_obj)){
result <- result[names(result) %in% doc_id]
} else {
stop(simpleError(
paste0("Invalid doc_id:\n \"", paste0(doc_id[!doc_ids_in_obj], collapse="\", \""), "\"!")
))
}
} else {}
if(all(isTRUE(simplify), length(result) == 1)){
result <- result[[1]]
} else {}
return(result)
}
)
## the standard generic for describe()<- is defined in the sylly package
#' @rdname kRp.corpus_get-methods
#' @importFrom sylly describe<-
#' @export
#' @docType methods
#' @aliases
#' describe<-,-methods
#' describe<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("describe<-",
signature=signature(obj="kRp.corpus"),
function (obj, doc_id=NULL, ..., value){
if(is.null(doc_id)){
slot(obj, name="desc") <- value
} else {
doc_ids_in_obj <- doc_id(obj, has_id=doc_id)
if(all(doc_ids_in_obj)){
if(length(doc_id) > 1){
slot(obj, name="desc")[doc_id] <- value
} else {
slot(obj, name="desc")[[doc_id]] <- value
}
} else {
stop(simpleError(
paste0("Invalid doc_id:\n \"", paste0(doc_id[!doc_ids_in_obj], collapse="\", \""), "\"!")
))
}
}
return(obj)
}
)
## the standard generic for language() is defined in the sylly package
#' @importFrom sylly language
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#' language,-methods
#' language,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("language",
signature=signature(obj="kRp.corpus"),
function (obj){
result <- slot(obj, name="lang")
return(result)
}
)
## the standard generic for language()<- is defined in the sylly package
#' @importFrom sylly language<-
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#' language<-,-methods
#' language<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("language<-",
signature=signature(obj="kRp.corpus"),
function (obj, value){
slot(obj, name="lang") <- value
return(obj)
}
)
## the standard generic for hasFeature() is defined in the koRpus package
#' @param feature Character string naming the object feature to look for.
#' @importFrom koRpus hasFeature
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
#' @aliases
#' hasFeature,-methods
#' hasFeature,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("hasFeature",
signature=signature(obj="kRp.corpus"),
function (obj, feature=NULL){
if(is.null(feature)){
features <- slot(obj, "features")
return(names(features[features]))
} else {
return(isTRUE(slot(obj, name="features")[feature]))
}
}
)
## the standard generic for hasFeature()<- is defined in the koRpus package
#' @importFrom koRpus hasFeature<-
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#' hasFeature<-,-methods
#' hasFeature<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("hasFeature<-",
signature=signature(obj="kRp.corpus"),
function (obj, feature, value){
if(!is.logical(value)){
stop(simpleError("The \"feature\" value must be logical!"))
} else {}
if(isTRUE(value)){
slot(obj, name="features")[feature] <- value
} else {
current_features <- slot(obj, name="features")
slot(obj, name="features") <- current_features[!names(current_features) %in% feature]
}
return(obj)
}
)
## the standard generic for feature() is defined in the koRpus package
#' @importFrom koRpus feature
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
#' @aliases
#' feature,-methods
#' feature,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("feature",
signature=signature(obj="kRp.corpus"),
function (obj, feature, doc_id=NULL){
if(is.null(doc_id)){
return(slot(obj, name="feat_list")[[feature]])
} else {
doc_ids_in_obj <- doc_id(obj, has_id=doc_id)
if(all(doc_ids_in_obj)){
return(slot(obj, name="feat_list")[[feature]][doc_id])
} else {
warning(
paste0("Invalid doc_id, omitted:\n \"", paste0(doc_id[!doc_ids_in_obj], collapse="\", \""), "\""),
call.=FALSE
)
return(slot(obj, name="feat_list")[[feature]][doc_id[doc_ids_in_obj]])
}
}
}
)
## the standard generic for feature()<- is defined in the koRpus package
#' @importFrom koRpus feature<-
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#' feature<-,-methods
#' feature<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("feature<-",
signature=signature(obj="kRp.corpus"),
function (obj, feature, value){
slot(obj, name="feat_list")[[feature]] <- value
if(is.null(value)){
hasFeature(obj, feature) <- FALSE
} else {
hasFeature(obj, feature) <- TRUE
}
return(obj)
}
)
## the standard generic for corpusReadability() is defined in the koRpus package
#' @importFrom koRpus corpusReadability
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
#' @aliases
#' corpusReadability,-methods
#' corpusReadability,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusReadability",
signature=signature(obj="kRp.corpus"),
function (obj, doc_id=NULL){
return(feature(obj, "readability", doc_id=doc_id))
}
)
## the standard generic for corpusReadability()<- is defined in the koRpus package
#' @importFrom koRpus corpusReadability<-
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#' corpusReadability<-,-methods
#' corpusReadability<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusReadability<-",
signature=signature(obj="kRp.corpus"),
function (obj, value){
feature(obj, "readability") <- value
return(obj)
}
)
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
setGeneric("corpusTm", function(obj) standardGeneric("corpusTm"))
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
#' @aliases
#' corpusTm,-methods
#' corpusTm,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusTm",
signature=signature(obj="kRp.corpus"),
function (obj){
return(slot(obj, name="raw")[["tm"]])
}
)
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
# @param value The new value to replace the current with.
setGeneric("corpusTm<-", function(obj, value) standardGeneric("corpusTm<-"))
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#' corpusTm<-,-methods
#' corpusTm<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusTm<-",
signature=signature(obj="kRp.corpus"),
function (obj, value){
slot(obj, name="raw")[["tm"]] <- value
return(obj)
}
)
#' @rdname kRp.corpus_get-methods
#' @param meta If not NULL, the \code{meta} list entry of the given name.
#' @param fail Logical, whether the method should fail with an error if \code{meta} was not found.
#' If set to \code{FALSE}, returns \code{invisible(NULL)} instead.
#' @docType methods
#' @export
setGeneric("corpusMeta", function(obj, meta=NULL, fail=TRUE) standardGeneric("corpusMeta"))
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
#' @aliases
#' corpusMeta,-methods
#' corpusMeta,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusMeta",
signature=signature(obj="kRp.corpus"),
function (obj, meta=NULL, fail=TRUE){
all.meta <- slot(obj, name="meta")
if(is.null(meta)){
result <- all.meta
} else {
if(meta %in% names(all.meta)){
result <- all.meta[[meta]]
} else {
if(isTRUE(fail)){
stop(simpleError(paste0("Entry named \"", meta,"\" not found in the slot \"meta\" of this object!")))
} else {
result <- invisible(NULL)
}
}
}
return(result)
}
)
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
# @param value The new value to replace the current with.
setGeneric("corpusMeta<-", function(obj, meta=NULL, value) standardGeneric("corpusMeta<-"))
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#' corpusMeta<-,-methods
#' corpusMeta<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusMeta<-",
signature=signature(obj="kRp.corpus"),
function (obj, meta=NULL, value){
if(is.null(meta)){
slot(obj, name="meta") <- value
} else {
slot(obj, name="meta")[[meta]] <- value
}
return(obj)
}
)
## the standard generic for corpusHyphen() is defined in the koRpus package
#' @importFrom koRpus corpusHyphen
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
#' @aliases
#' corpusHyphen,-methods
#' corpusHyphen,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusHyphen",
signature=signature(obj="kRp.corpus"),
function (obj, doc_id=NULL){
return(feature(obj, "hyphen", doc_id=doc_id))
}
)
## the standard generic for corpusHyphen()<- is defined in the koRpus package
#' @importFrom koRpus corpusHyphen<-
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#' corpusHyphen<-,-methods
#' corpusHyphen<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusHyphen<-",
signature=signature(obj="kRp.corpus"),
function (obj, value){
feature(obj, "hyphen") <- value
return(obj)
}
)
## the standard generic for corpusLexDiv() is defined in the koRpus package
#' @importFrom koRpus corpusLexDiv
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
#' @aliases
#' corpusLexDiv,-methods
#' corpusLexDiv,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusLexDiv",
signature=signature(obj="kRp.corpus"),
function (obj, doc_id=NULL){
return(feature(obj, "lex_div", doc_id=doc_id))
}
)
## the standard generic for corpusLexDiv()<- is defined in the koRpus package
#' @importFrom koRpus corpusLexDiv<-
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#' corpusLexDiv<-,-methods
#' corpusLexDiv<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusLexDiv<-",
signature=signature(obj="kRp.corpus"),
function (obj, value){
feature(obj, "lex_div") <- value
return(obj)
}
)
## the standard generic for corpusFreq() is defined in the koRpus package
#' @importFrom koRpus corpusFreq
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
#' @aliases
#' corpusFreq,-methods
#' corpusFreq,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusFreq",
signature=signature(obj="kRp.corpus"),
function (obj){
return(feature(obj, "freq"))
}
)
## the standard generic for corpusFreq()<- is defined in the koRpus package
#' @importFrom koRpus corpusFreq<-
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#' corpusFreq<-,-methods
#' corpusFreq<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusFreq<-",
signature=signature(obj="kRp.corpus"),
function (obj, value){
feature(obj, "freq") <- value
return(obj)
}
)
## the standard generic for corpusCorpFreq() is defined in the koRpus package
#' @importFrom koRpus corpusCorpFreq
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
#' @aliases
#' corpusCorpFreq,-methods
#' corpusCorpFreq,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusCorpFreq",
signature=signature(obj="kRp.corpus"),
function (obj){
return(feature(obj, "corp_freq"))
}
)
## the standard generic for corpusCorpFreq()<- is defined in the koRpus package
#' @importFrom koRpus corpusCorpFreq<-
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#' corpusCorpFreq<-,-methods
#' corpusCorpFreq<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusCorpFreq<-",
signature=signature(obj="kRp.corpus"),
function (obj, value){
feature(obj, "corp_freq") <- value
return(obj)
}
)
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
setGeneric("corpusHierarchy", function(obj, ...) standardGeneric("corpusHierarchy"))
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
#' @aliases
#' corpusHierarchy,-methods
#' corpusHierarchy,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusHierarchy",
signature=signature(obj="kRp.corpus"),
function (obj){
return(feature(obj, "hierarchy"))
}
)
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
# @param value The new value to replace the current with.
setGeneric("corpusHierarchy<-", function(obj, value) standardGeneric("corpusHierarchy<-"))
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#' corpusHierarchy<-,-methods
#' corpusHierarchy<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusHierarchy<-",
signature=signature(obj="kRp.corpus"),
function (obj, value){
feature(obj, "hierarchy") <- value
return(obj)
}
)
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @param paths Logical, indicates for \code{corpusFiles()} whether full paths should be returned, or just the actual file name.
#' @export
setGeneric("corpusFiles", function(obj, paths=FALSE, ...) standardGeneric("corpusFiles"))
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
#' @aliases
#' corpusFiles,-methods
#' corpusFiles,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusFiles",
signature=signature(obj="kRp.corpus"),
function (obj, paths=FALSE){
if(isTRUE(paths)){
result <- apply(
meta(corpusTm(obj))[, c("path","file")],
1,
paste0,
collapse=.Platform$file.sep
)
} else {
result <- as.character(meta(corpusTm(obj))[["file"]])
}
result <- as.character(result)
names(result) <- NULL
return(result)
}
)
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
# @param value The new value to replace the current with.
setGeneric("corpusFiles<-", function(obj, value) standardGeneric("corpusFiles<-"))
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#' corpusFiles<-,-methods
#' corpusFiles<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusFiles<-",
signature=signature(obj="kRp.corpus"),
function (obj, value){
meta(corpusTm(obj))[["file"]] <- value
return(obj)
}
)
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
setGeneric("corpusDocTermMatrix", function(obj, ...) standardGeneric("corpusDocTermMatrix"))
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
#' @aliases
#' corpusDocTermMatrix,-methods
#' corpusDocTermMatrix,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusDocTermMatrix",
signature=signature(obj="kRp.corpus"),
function (obj){
return(feature(obj, "doc_term_matrix"))
}
)
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
# @param value The new value to replace the current with.
setGeneric("corpusDocTermMatrix<-", function(
obj,
terms=NULL,
case.sens=NULL,
tfidf=NULL,
value
)
standardGeneric("corpusDocTermMatrix<-")
)
#' @param terms A character string defining the \code{tokens} used for calculating the matrix.
#' Stored in object's meta data slot.
#' @param case.sens Logical, whether terms were counted case sensitive.
#' Stored in object's meta data slot.
#' @param tfidf Logical, use \code{TRUE} if the term frequency--inverse document frequency (tf-idf)
#' values were calculated instead of absolute frequency.
#' Stored in object's meta data slot.
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#' corpusDocTermMatrix<-,-methods
#' corpusDocTermMatrix<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusDocTermMatrix<-",
signature=signature(obj="kRp.corpus"),
function (
obj,
terms=NULL,
case.sens=NULL,
tfidf=NULL,
value
){
feature(obj, "doc_term_matrix") <- value
corpusMeta(obj, meta="doc_term_matrix") <- list(
terms=terms,
case.sens=case.sens,
tfidf=tfidf
)
return(obj)
}
)
## the standard generic for corpusStopwords() is defined in the koRpus package
#' @importFrom koRpus corpusStopwords
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
#' @aliases
#' corpusStopwords,-methods
#' corpusStopwords,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusStopwords",
signature=signature(obj="kRp.corpus"),
function (obj){
return(feature(obj, "stopwords"))
}
)
## the standard generic for corpusStopwords()<- is defined in the koRpus package
#' @importFrom koRpus corpusStopwords<-
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#' corpusStopwords<-,-methods
#' corpusStopwords<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusStopwords<-",
signature=signature(obj="kRp.corpus"),
function (obj, value){
feature(obj, "stopwords") <- value
return(obj)
}
)
## the standard generic for diffText() is defined in the koRpus package
#' @rdname kRp.corpus_get-methods
#' @importFrom koRpus diffText
#' @export
#' @docType methods
#' @aliases
#' diffText,-methods
#' diffText,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("diffText",
signature=signature(obj="kRp.corpus"),
function (obj, doc_id=NULL){
return(feature(obj, "diff", doc_id=doc_id))
}
)
## the standard generic for diffText()<- is defined in the koRpus package
#' @rdname kRp.corpus_get-methods
#' @importFrom koRpus diffText<-
#' @export
#' @docType methods
#' @aliases
#' diffText<-,-methods
#' diffText<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("diffText<-",
signature=signature(obj="kRp.corpus"),
function (obj, value){
feature(obj, "diff") <- value
return(obj)
}
)
## the standard generic for originalText() is defined in the koRpus package
#' @rdname kRp.corpus_get-methods
#' @importFrom koRpus originalText kRp_txt_trans
#' @export
#' @docType methods
#' @aliases
#' originalText,-methods
#' originalText,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("originalText",
signature=signature(obj="kRp.corpus"),
function (obj){
return(originalText(
kRp_txt_trans(
tokens=taggedText(obj)
)
))
}
)
#' @rdname kRp.corpus_get-methods
#' @export
#' @include 01_class_01_kRp.corpus.R
is.corpus <- function(obj){
inherits(obj, "kRp.corpus")
}
#' @rdname kRp.corpus_get-methods
#' @param x See \code{obj}.
#' @param i Defines the row selector (\code{[}) or the name to match (\code{[[}) in the tokens slot.
#' @param j Defines the column selector in the tokens slot.
#' @param drop See \code{\link[base:Extract]{[}}.
#' @export
#' @docType methods
#' @aliases
#' [,-methods
#' [,kRp.corpus,ANY,ANY,ANY-method
setMethod("[",
signature=signature(x="kRp.corpus"),
function (x, i, j, ..., drop=TRUE){
return(taggedText(x)[i, j, ..., drop=drop])
}
)
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#' [<-,-methods
#' [<-,kRp.corpus,ANY,ANY,ANY-method
setMethod("[<-",
signature=signature(x="kRp.corpus"),
function (x, i, j, ..., value){
taggedText(x)[i, j, ...] <- value
return(x)
}
)
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#' [[,-methods
#' [[,kRp.corpus,ANY-method
setMethod("[[",
signature=signature(x="kRp.corpus"),
function (x, i, doc_id=NULL, ...){
if(is.null(doc_id)){
return(taggedText(x)[[i, ...]])
} else {
doc_ids_in_obj <- doc_id(x, has_id=doc_id)
tt <- taggedText(x)
if(all(doc_ids_in_obj)){
return(tt[tt[["doc_id"]] %in% doc_id, i, ...])
} else {
warning(
paste0("Invalid doc_id, omitted:\n \"", paste0(doc_id[!doc_ids_in_obj], collapse="\", \""), "\""),
call.=FALSE
)
return(tt[tt[["doc_id"]] %in% doc_id[doc_ids_in_obj], i, ...])
}
}
}
)
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#' [[<-,-methods
#' [[<-,kRp.corpus,ANY,ANY-method
setMethod("[[<-",
signature=signature(x="kRp.corpus"),
function (x, i, doc_id=NULL, ..., value){
if(is.null(doc_id)){
taggedText(x)[[i]] <- value
} else {
doc_ids_in_obj <- doc_id(x, has_id=doc_id)
tt <- taggedText(x)
if(all(doc_ids_in_obj)){
tt[tt[["doc_id"]] %in% doc_id, i, ...] <- value
taggedText(x) <- tt
} else {
stop(simpleError(
paste0("Invalid doc_id:\n \"", paste0(doc_id[!doc_ids_in_obj], collapse="\", \""), "\"!")
))
}
}
return(x)
}
)
#' @rdname kRp.corpus_get-methods
#' @param tokens An object of class \code{kRp.corpus}.
#' @export
#' @docType methods
#' @aliases
#' tif_as_tokens_df,-methods
#' tif_as_tokens_df,hierarchy-method
setMethod("tif_as_tokens_df",
signature=signature(tokens="kRp.corpus"),
function(tokens){
result <- taggedText(tokens)
# TIF needs doc_id to be a character vector, not a factor
result[["doc_id"]] <- as.character(result[["doc_id"]])
return(result)
}
)
#' @rdname kRp.corpus_get-methods
#' @param corpus An object of class \code{kRp.corpus}.
#' @docType methods
#' @export
setGeneric("tif_as_corpus_df", function(corpus) standardGeneric("tif_as_corpus_df"))
#' @rdname kRp.corpus_get-methods
#' @importFrom NLP meta
#' @export
#' @docType methods
#' @aliases
#' tif_as_corpus_df,-methods
#' tif_as_corpus_df,hierarchy-method
setMethod("tif_as_corpus_df",
signature=signature(corpus="kRp.corpus"),
function(corpus){
corpus_tm <- corpusTm(obj=corpus)
corpus_texts <- sapply(corpus_tm, function(thisText){paste0(thisText, collapse="\n")})
corpus_meta <- meta(corpus_tm)
result <- data.frame(
doc_id=as.character(corpus_meta[["doc_id"]]),
text=as.character(corpus_texts),
stringsAsFactors=FALSE
)
extra_cols <- !colnames(corpus_meta) %in% "doc_id"
if(sum(extra_cols) > 1){
# append hierarchy information
result <- cbind(result, as.data.frame(corpus_meta)[,extra_cols])
} else {}
rownames(result) <- NULL
return(result)
}
)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.