R/02_method_20_kRp.corpus_get_set_is.R

Defines functions is.corpus

Documented in is.corpus

# Copyright 2015-2020 Meik Michalke <meik.michalke@hhu.de>
#
# This file is part of the R package tm.plugin.koRpus.
#
# tm.plugin.koRpus is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# tm.plugin.koRpus is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with tm.plugin.koRpus.  If not, see <http://www.gnu.org/licenses/>.

#' Getter/setter methods for kRp.corpus objects
#' 
#' These methods should be used to get or set values of text objects
#' generated by functions like \code{\link[tm.plugin.koRpus:readCorpus]{readCorpus}}.
#' 
#' \itemize{
#'   \item{\code{taggedText()} }{returns the \code{tokens} slot.}
#'   \item{\code{describe()} }{returns the \code{desc} slot.}
#'   \item{\code{hasFeature()} }{returns \code{TRUE} or code{FALSE}, depending on whether the requested feature is present or not.}
#'   \item{\code{feature()} }{returns the list entry of the \code{feat_list} slot for the requested feature.}
#'   \item{\code{corpusReadability()} }{returns the list of \code{kRp.readability} objects.}
#'   \item{\code{corpusTm()} }{returns the \code{VCorpus} object.}
#'   \item{\code{corpusMeta()} }{returns the list with meta information.}
#'   \item{\code{corpusHyphen()} }{returns the list of \code{kRp.hyphen} objects.}
#'   \item{\code{corpusLexDiv()} }{returns the list of \code{kRp.TTR} objects.}
#'   \item{\code{corpusFiles()} }{returns the character vector of file names of the object.}
#'   \item{\code{corpusFreq()} }{returns the frequency analysis data from the \code{feat_list} slot.}
#'   \item{\code{corpusCorpFreq()} }{returns the \code{kRp.corp.freq} object of the \code{feat_list} slot.}
#'   \item{\code{corpusHierarchy()} }{returns the corpus' hierarchy structure.}
#'   \item{\code{corpusDocTermMatrix()} }{returns the sparse document term matrix of the \code{feat_list} slot.}
#'   \item{\code{corpusStopwords()} }{returns the number of stopwords found in each text (if analyzed) from the \code{feat_list} slot.}
#'   \item{\code{diffText()} }{returns the \code{diff} element of the \code{feat_list} slot.}
#'   \item{\code{originalText} }{regenerates the original text before text transformations and returns it as a data frame.}
#'   \item{\code{[}/\code{[[} }{can be used as a shortcut to index the results of \code{taggedText()}.}
#'   \item{\code{tif_as_corpus_df} }{returns the whole corpus in a single TIF[1] compliant
#'      data.frame.}
#'   \item{\code{tif_as_tokens_df} }{returns the \code{tokens} slot in a TIF[1] compliant
#'      data.frame, i.e., \code{doc_id} is not a factor but a character vector.}
#' }
#' 
#' @param obj An object of class \code{\link[tm.plugin.koRpus:kRp.corpus-class]{kRp.corpus}}.
#' @param value A new value to replace the current with.
#' @param doc_id A character vector to limit the scope to one or more particular document IDs.
#' @param ... Additional arguments to pass through, depending on the method.
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @references
#'    [1] Text Interchange Formats (\url{https://github.com/ropensci/tif})
#' @export
#' @importFrom koRpus taggedText
#' @aliases
#'    taggedText,-methods
#'    taggedText,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
#' @example inst/examples/if_lang_en_clause_start.R
#' @example inst/examples/generate_myCorpus_1_text.R
#' @examples
#'
#'   taggedText(myCorpus)
#'
#'   corpusMeta(myCorpus, "note") <- "an interesting read!"
#' 
#'   # export object to TIF compliant data frame
#'   myCorpus_df <- tif_as_corpus_df(myCorpus)
#' @example inst/examples/if_lang_en_clause_end.R
## the standard generic for taggedText() is defined in the koRpus package
setMethod("taggedText",
  signature=signature(obj="kRp.corpus"),
  function (obj){
    return(slot(obj, name="tokens"))
  }
)

## the standard generic for taggedText()<- is defined in the koRpus package
#' @rdname kRp.corpus_get-methods
#' @importFrom koRpus taggedText<-
#' @export
#' @docType methods
#' @aliases
#'    taggedText<-,-methods
#'    taggedText<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("taggedText<-",
  signature=signature(obj="kRp.corpus"),
  function (obj, value){
    slot(obj, name="tokens") <- value
    return(obj)
  }
)

## the standard generic for doc_id() is defined in the koRpus package
#' @rdname kRp.corpus_get-methods
#' @param has_id A character vector with \code{doc_id}s to look for in the object. The return value
#'    is then a logical vector of the same length, indicating if the respective id was found or not.
#' @importFrom koRpus doc_id
#' @export
#' @docType methods
#' @aliases
#'    doc_id,-methods
#'    doc_id,kRp.corpus-method
setMethod("doc_id",
  signature=signature(obj="kRp.corpus"),
  function (obj, has_id=NULL){
    result <- unique(as.character(slot(obj, name="tokens")[["doc_id"]]))
    if(is.null(has_id)){
      return(result)
    } else if(length(has_id) > 1) {
      return(has_id %in% result)
    } else {
      return(any(result == has_id))
    }
  }
)

## the standard generic for describe() is defined in the sylly package
#' @param simplify If \code{TRUE} and result is a list of length 1, return the list element.
#' @rdname kRp.corpus_get-methods
#' @importFrom sylly describe
#' @docType methods
#' @export
#' @aliases
#'    describe,-methods
#'    describe,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("describe",
  signature=signature(obj="kRp.corpus"),
  function (obj, doc_id=NULL, simplify=TRUE, ...){
    result <- slot(obj, name="desc")
    if(!is.null(doc_id)){
      doc_ids_in_obj <- doc_id(obj, has_id=doc_id)
      if(all(doc_ids_in_obj)){
        result <- result[names(result) %in% doc_id]
      } else {
        stop(simpleError(
          paste0("Invalid doc_id:\n  \"", paste0(doc_id[!doc_ids_in_obj], collapse="\", \""), "\"!")
        ))
      }
    } else {}

    if(all(isTRUE(simplify), length(result) == 1)){
      result <- result[[1]]
    } else {}
    
    return(result)
  }
)

## the standard generic for describe()<- is defined in the sylly package
#' @rdname kRp.corpus_get-methods
#' @importFrom sylly describe<-
#' @export
#' @docType methods
#' @aliases
#'    describe<-,-methods
#'    describe<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("describe<-",
  signature=signature(obj="kRp.corpus"),
  function (obj, doc_id=NULL, ..., value){
    if(is.null(doc_id)){
      slot(obj, name="desc") <- value
    } else {
      doc_ids_in_obj <- doc_id(obj, has_id=doc_id)
      if(all(doc_ids_in_obj)){
        if(length(doc_id) > 1){
          slot(obj, name="desc")[doc_id] <- value
        } else {
          slot(obj, name="desc")[[doc_id]] <- value
        }
      } else {
        stop(simpleError(
          paste0("Invalid doc_id:\n  \"", paste0(doc_id[!doc_ids_in_obj], collapse="\", \""), "\"!")
        ))
      }
    }
    return(obj)
  }
)

## the standard generic for language() is defined in the sylly package
#' @importFrom sylly language
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#'    language,-methods
#'    language,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("language",
  signature=signature(obj="kRp.corpus"),
  function (obj){
    result <- slot(obj, name="lang")
    return(result)
  }
)

## the standard generic for language()<- is defined in the sylly package
#' @importFrom sylly language<-
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#'    language<-,-methods
#'    language<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("language<-",
  signature=signature(obj="kRp.corpus"),
  function (obj, value){
    slot(obj, name="lang") <- value
    return(obj)
  }
)


## the standard generic for hasFeature() is defined in the koRpus package
#' @param feature Character string naming the object feature to look for.
#' @importFrom koRpus hasFeature
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
#' @aliases
#'    hasFeature,-methods
#'    hasFeature,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("hasFeature",
  signature=signature(obj="kRp.corpus"),
  function (obj, feature=NULL){
    if(is.null(feature)){
      features <- slot(obj, "features")
      return(names(features[features]))
    } else {
      return(isTRUE(slot(obj, name="features")[feature]))
    }
  }
)


## the standard generic for hasFeature()<- is defined in the koRpus package
#' @importFrom koRpus hasFeature<-
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#'    hasFeature<-,-methods
#'    hasFeature<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("hasFeature<-",
  signature=signature(obj="kRp.corpus"),
  function (obj, feature, value){
    if(!is.logical(value)){
      stop(simpleError("The \"feature\" value must be logical!"))
    } else {}
    if(isTRUE(value)){
      slot(obj, name="features")[feature] <- value
    } else {
      current_features <- slot(obj, name="features")
      slot(obj, name="features") <- current_features[!names(current_features) %in% feature]
    }
    return(obj)
  }
)


## the standard generic for feature() is defined in the koRpus package
#' @importFrom koRpus feature
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
#' @aliases
#'    feature,-methods
#'    feature,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("feature",
  signature=signature(obj="kRp.corpus"),
  function (obj, feature, doc_id=NULL){
    if(is.null(doc_id)){
      return(slot(obj, name="feat_list")[[feature]])
    } else {
      doc_ids_in_obj <- doc_id(obj, has_id=doc_id)
      if(all(doc_ids_in_obj)){
        return(slot(obj, name="feat_list")[[feature]][doc_id])
      } else {
        warning(
          paste0("Invalid doc_id, omitted:\n  \"", paste0(doc_id[!doc_ids_in_obj], collapse="\", \""), "\""),
          call.=FALSE
        )
        return(slot(obj, name="feat_list")[[feature]][doc_id[doc_ids_in_obj]])
      }
    }
  }
)


## the standard generic for feature()<- is defined in the koRpus package
#' @importFrom koRpus feature<-
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#'    feature<-,-methods
#'    feature<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("feature<-",
  signature=signature(obj="kRp.corpus"),
  function (obj, feature, value){
    slot(obj, name="feat_list")[[feature]] <- value
    if(is.null(value)){
      hasFeature(obj, feature) <- FALSE
    } else {
      hasFeature(obj, feature) <- TRUE
    }
    return(obj)
  }
)


## the standard generic for corpusReadability() is defined in the koRpus package
#' @importFrom koRpus corpusReadability
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
#' @aliases
#'    corpusReadability,-methods
#'    corpusReadability,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusReadability",
  signature=signature(obj="kRp.corpus"),
  function (obj, doc_id=NULL){
    return(feature(obj, "readability", doc_id=doc_id))
  }
)

## the standard generic for corpusReadability()<- is defined in the koRpus package
#' @importFrom koRpus corpusReadability<-
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#'    corpusReadability<-,-methods
#'    corpusReadability<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusReadability<-",
  signature=signature(obj="kRp.corpus"),
  function (obj, value){
    feature(obj, "readability") <- value
    return(obj)
  }
)


#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
setGeneric("corpusTm", function(obj) standardGeneric("corpusTm"))
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
#' @aliases
#'    corpusTm,-methods
#'    corpusTm,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusTm",
  signature=signature(obj="kRp.corpus"),
  function (obj){
    return(slot(obj, name="raw")[["tm"]])
  }
)

#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
# @param value The new value to replace the current with.
setGeneric("corpusTm<-", function(obj, value) standardGeneric("corpusTm<-"))
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#'    corpusTm<-,-methods
#'    corpusTm<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusTm<-",
  signature=signature(obj="kRp.corpus"),
  function (obj, value){
    slot(obj, name="raw")[["tm"]] <- value
    return(obj)
  }
)

#' @rdname kRp.corpus_get-methods
#' @param meta If not NULL, the \code{meta} list entry of the given name.
#' @param fail Logical, whether the method should fail with an error if \code{meta} was not found.
#'    If set to \code{FALSE}, returns \code{invisible(NULL)} instead.
#' @docType methods
#' @export
setGeneric("corpusMeta", function(obj, meta=NULL, fail=TRUE) standardGeneric("corpusMeta"))
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
#' @aliases
#'    corpusMeta,-methods
#'    corpusMeta,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusMeta",
  signature=signature(obj="kRp.corpus"),
  function (obj, meta=NULL, fail=TRUE){
    all.meta <- slot(obj, name="meta")
    if(is.null(meta)){
      result <- all.meta
    } else {
      if(meta %in% names(all.meta)){
        result <- all.meta[[meta]]
      } else {
        if(isTRUE(fail)){
          stop(simpleError(paste0("Entry named \"", meta,"\" not found in the slot \"meta\" of this object!")))
        } else {
          result <- invisible(NULL)
        }
      }
    }

    return(result)
  }
)


#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
# @param value The new value to replace the current with.
setGeneric("corpusMeta<-", function(obj, meta=NULL, value) standardGeneric("corpusMeta<-"))
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#'    corpusMeta<-,-methods
#'    corpusMeta<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusMeta<-",
  signature=signature(obj="kRp.corpus"),
  function (obj, meta=NULL, value){
    if(is.null(meta)){
      slot(obj, name="meta") <- value
    } else {
      slot(obj, name="meta")[[meta]] <- value
    }
    return(obj)
  }
)


## the standard generic for corpusHyphen() is defined in the koRpus package
#' @importFrom koRpus corpusHyphen
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
#' @aliases
#'    corpusHyphen,-methods
#'    corpusHyphen,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusHyphen",
  signature=signature(obj="kRp.corpus"),
  function (obj, doc_id=NULL){
    return(feature(obj, "hyphen", doc_id=doc_id))
  }
)

## the standard generic for corpusHyphen()<- is defined in the koRpus package
#' @importFrom koRpus corpusHyphen<-
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#'    corpusHyphen<-,-methods
#'    corpusHyphen<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusHyphen<-",
  signature=signature(obj="kRp.corpus"),
  function (obj, value){
    feature(obj, "hyphen") <- value
    return(obj)
  }
)


## the standard generic for corpusLexDiv() is defined in the koRpus package
#' @importFrom koRpus corpusLexDiv
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
#' @aliases
#'    corpusLexDiv,-methods
#'    corpusLexDiv,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusLexDiv",
  signature=signature(obj="kRp.corpus"),
  function (obj, doc_id=NULL){
    return(feature(obj, "lex_div", doc_id=doc_id))
  }
)

## the standard generic for corpusLexDiv()<- is defined in the koRpus package
#' @importFrom koRpus corpusLexDiv<-
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#'    corpusLexDiv<-,-methods
#'    corpusLexDiv<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusLexDiv<-",
  signature=signature(obj="kRp.corpus"),
  function (obj, value){
    feature(obj, "lex_div") <- value
    return(obj)
  }
)


## the standard generic for corpusFreq() is defined in the koRpus package
#' @importFrom koRpus corpusFreq
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
#' @aliases
#'    corpusFreq,-methods
#'    corpusFreq,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusFreq",
  signature=signature(obj="kRp.corpus"),
  function (obj){
    return(feature(obj, "freq"))
  }
)


## the standard generic for corpusFreq()<- is defined in the koRpus package
#' @importFrom koRpus corpusFreq<-
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#'    corpusFreq<-,-methods
#'    corpusFreq<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusFreq<-",
  signature=signature(obj="kRp.corpus"),
  function (obj, value){
    feature(obj, "freq") <- value
    return(obj)
  }
)


## the standard generic for corpusCorpFreq() is defined in the koRpus package
#' @importFrom koRpus corpusCorpFreq
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
#' @aliases
#'    corpusCorpFreq,-methods
#'    corpusCorpFreq,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusCorpFreq",
  signature=signature(obj="kRp.corpus"),
  function (obj){
    return(feature(obj, "corp_freq"))
  }
)


## the standard generic for corpusCorpFreq()<- is defined in the koRpus package
#' @importFrom koRpus corpusCorpFreq<-
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#'    corpusCorpFreq<-,-methods
#'    corpusCorpFreq<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusCorpFreq<-",
  signature=signature(obj="kRp.corpus"),
  function (obj, value){
    feature(obj, "corp_freq") <- value
    return(obj)
  }
)


#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
setGeneric("corpusHierarchy", function(obj, ...) standardGeneric("corpusHierarchy"))
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
#' @aliases
#'    corpusHierarchy,-methods
#'    corpusHierarchy,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusHierarchy",
  signature=signature(obj="kRp.corpus"),
  function (obj){
    return(feature(obj, "hierarchy"))
  }
)


#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
# @param value The new value to replace the current with.
setGeneric("corpusHierarchy<-", function(obj, value) standardGeneric("corpusHierarchy<-"))
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#'    corpusHierarchy<-,-methods
#'    corpusHierarchy<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusHierarchy<-",
  signature=signature(obj="kRp.corpus"),
  function (obj, value){
    feature(obj, "hierarchy") <- value
    return(obj)
  }
)


#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @param paths Logical, indicates for \code{corpusFiles()} whether full paths should be returned, or just the actual file name.
#' @export
setGeneric("corpusFiles", function(obj, paths=FALSE, ...) standardGeneric("corpusFiles"))
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
#' @aliases
#'    corpusFiles,-methods
#'    corpusFiles,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusFiles",
  signature=signature(obj="kRp.corpus"),
  function (obj, paths=FALSE){
    if(isTRUE(paths)){
      result <- apply(
        meta(corpusTm(obj))[, c("path","file")],
        1,
        paste0,
        collapse=.Platform$file.sep
      )
    } else {
      result <- as.character(meta(corpusTm(obj))[["file"]])
    }
    result <- as.character(result)
    names(result) <- NULL
    return(result)
  }
)

#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
# @param value The new value to replace the current with.
setGeneric("corpusFiles<-", function(obj, value) standardGeneric("corpusFiles<-"))
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#'    corpusFiles<-,-methods
#'    corpusFiles<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusFiles<-",
  signature=signature(obj="kRp.corpus"),
  function (obj, value){
    meta(corpusTm(obj))[["file"]] <- value
    return(obj)
  }
)


#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
setGeneric("corpusDocTermMatrix", function(obj, ...) standardGeneric("corpusDocTermMatrix"))
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
#' @aliases
#'    corpusDocTermMatrix,-methods
#'    corpusDocTermMatrix,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusDocTermMatrix",
  signature=signature(obj="kRp.corpus"),
  function (obj){
    return(feature(obj, "doc_term_matrix"))
  }
)

#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
# @param value The new value to replace the current with.
setGeneric("corpusDocTermMatrix<-", function(
    obj,
    terms=NULL,
    case.sens=NULL,
    tfidf=NULL,
    value
  )
  standardGeneric("corpusDocTermMatrix<-")
)
#' @param terms A character string defining the \code{tokens} used for calculating the matrix.
#'    Stored in object's meta data slot.
#' @param case.sens Logical, whether terms were counted case sensitive.
#'    Stored in object's meta data slot.
#' @param tfidf Logical, use \code{TRUE} if the term frequency--inverse document frequency (tf-idf)
#'    values were calculated instead of absolute frequency.
#'    Stored in object's meta data slot.
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#'    corpusDocTermMatrix<-,-methods
#'    corpusDocTermMatrix<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusDocTermMatrix<-",
  signature=signature(obj="kRp.corpus"),
  function (
    obj,
    terms=NULL,
    case.sens=NULL,
    tfidf=NULL,
    value
  ){
    feature(obj, "doc_term_matrix") <- value
    corpusMeta(obj, meta="doc_term_matrix") <- list(
      terms=terms,
      case.sens=case.sens,
      tfidf=tfidf
    )
    return(obj)
  }
)


## the standard generic for corpusStopwords() is defined in the koRpus package
#' @importFrom koRpus corpusStopwords
#' @rdname kRp.corpus_get-methods
#' @docType methods
#' @export
#' @aliases
#'    corpusStopwords,-methods
#'    corpusStopwords,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusStopwords",
  signature=signature(obj="kRp.corpus"),
  function (obj){
    return(feature(obj, "stopwords"))
  }
)

## the standard generic for corpusStopwords()<- is defined in the koRpus package
#' @importFrom koRpus corpusStopwords<-
#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#'    corpusStopwords<-,-methods
#'    corpusStopwords<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("corpusStopwords<-",
  signature=signature(obj="kRp.corpus"),
  function (obj, value){
    feature(obj, "stopwords") <- value
    return(obj)
  }
)


## the standard generic for diffText() is defined in the koRpus package
#' @rdname kRp.corpus_get-methods
#' @importFrom koRpus diffText
#' @export
#' @docType methods
#' @aliases
#'    diffText,-methods
#'    diffText,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("diffText",
  signature=signature(obj="kRp.corpus"),
  function (obj, doc_id=NULL){
    return(feature(obj, "diff", doc_id=doc_id))
  }
)

## the standard generic for diffText()<- is defined in the koRpus package
#' @rdname kRp.corpus_get-methods
#' @importFrom koRpus diffText<-
#' @export
#' @docType methods
#' @aliases
#'    diffText<-,-methods
#'    diffText<-,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("diffText<-",
  signature=signature(obj="kRp.corpus"),
  function (obj, value){
    feature(obj, "diff") <- value
    return(obj)
  }
)


## the standard generic for originalText() is defined in the koRpus package
#' @rdname kRp.corpus_get-methods
#' @importFrom koRpus originalText kRp_txt_trans
#' @export
#' @docType methods
#' @aliases
#'    originalText,-methods
#'    originalText,kRp.corpus-method
#' @include 01_class_01_kRp.corpus.R
setMethod("originalText",
  signature=signature(obj="kRp.corpus"),
  function (obj){
    return(originalText(
      kRp_txt_trans(
        tokens=taggedText(obj)
      )
    ))
  }
)


#' @rdname kRp.corpus_get-methods
#' @export
#' @include 01_class_01_kRp.corpus.R
is.corpus <- function(obj){
  inherits(obj, "kRp.corpus")
}

#' @rdname kRp.corpus_get-methods
#' @param x See \code{obj}.
#' @param i Defines the row selector (\code{[}) or the name to match (\code{[[}) in the tokens slot.
#' @param j Defines the column selector in the tokens slot.
#' @param drop See \code{\link[base:Extract]{[}}.
#' @export
#' @docType methods
#' @aliases
#'    [,-methods
#'    [,kRp.corpus,ANY,ANY,ANY-method
setMethod("[",
  signature=signature(x="kRp.corpus"),
  function (x, i, j, ..., drop=TRUE){
    return(taggedText(x)[i, j, ..., drop=drop])
  }
)

#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#'    [<-,-methods
#'    [<-,kRp.corpus,ANY,ANY,ANY-method
setMethod("[<-",
  signature=signature(x="kRp.corpus"),
  function (x, i, j, ..., value){
    taggedText(x)[i, j, ...] <- value
    return(x)
  }
)

#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#'    [[,-methods
#'    [[,kRp.corpus,ANY-method
setMethod("[[",
  signature=signature(x="kRp.corpus"),
  function (x, i, doc_id=NULL, ...){
    if(is.null(doc_id)){
      return(taggedText(x)[[i, ...]])
    } else {
      doc_ids_in_obj <- doc_id(x, has_id=doc_id)
      tt <- taggedText(x)
      if(all(doc_ids_in_obj)){
        return(tt[tt[["doc_id"]] %in% doc_id, i, ...])
      } else {
        warning(
          paste0("Invalid doc_id, omitted:\n  \"", paste0(doc_id[!doc_ids_in_obj], collapse="\", \""), "\""),
          call.=FALSE
        )
        return(tt[tt[["doc_id"]] %in% doc_id[doc_ids_in_obj], i, ...])
      }
    }
  }
)

#' @rdname kRp.corpus_get-methods
#' @export
#' @docType methods
#' @aliases
#'    [[<-,-methods
#'    [[<-,kRp.corpus,ANY,ANY-method
setMethod("[[<-",
  signature=signature(x="kRp.corpus"),
  function (x, i, doc_id=NULL, ..., value){
    if(is.null(doc_id)){
      taggedText(x)[[i]] <- value
    } else {
      doc_ids_in_obj <- doc_id(x, has_id=doc_id)
      tt <- taggedText(x)
      if(all(doc_ids_in_obj)){
        tt[tt[["doc_id"]] %in% doc_id, i, ...] <- value
        taggedText(x) <- tt
      } else {
        stop(simpleError(
          paste0("Invalid doc_id:\n  \"", paste0(doc_id[!doc_ids_in_obj], collapse="\", \""), "\"!")
        ))
      }
    }
    return(x)
  }
)

#' @rdname kRp.corpus_get-methods
#' @param tokens An object of class \code{kRp.corpus}.
#' @export
#' @docType methods
#' @aliases
#'    tif_as_tokens_df,-methods
#'    tif_as_tokens_df,hierarchy-method
setMethod("tif_as_tokens_df",
  signature=signature(tokens="kRp.corpus"),
  function(tokens){
    result <- taggedText(tokens)
    # TIF needs doc_id to be a character vector, not a factor
    result[["doc_id"]] <- as.character(result[["doc_id"]])
    return(result)
  }
)

#' @rdname kRp.corpus_get-methods
#' @param corpus An object of class \code{kRp.corpus}.
#' @docType methods
#' @export
setGeneric("tif_as_corpus_df", function(corpus) standardGeneric("tif_as_corpus_df"))
#' @rdname kRp.corpus_get-methods
#' @importFrom NLP meta
#' @export
#' @docType methods
#' @aliases
#'    tif_as_corpus_df,-methods
#'    tif_as_corpus_df,hierarchy-method
setMethod("tif_as_corpus_df",
  signature=signature(corpus="kRp.corpus"),
  function(corpus){
    corpus_tm <- corpusTm(obj=corpus)
    corpus_texts <- sapply(corpus_tm, function(thisText){paste0(thisText, collapse="\n")})
    corpus_meta <- meta(corpus_tm)
    result <- data.frame(
      doc_id=as.character(corpus_meta[["doc_id"]]),
      text=as.character(corpus_texts),
      stringsAsFactors=FALSE
    )
    extra_cols <- !colnames(corpus_meta) %in% "doc_id"
    if(sum(extra_cols) > 1){
      # append hierarchy information
      result <- cbind(result, as.data.frame(corpus_meta)[,extra_cols])
    } else {}
    rownames(result) <- NULL
    return(result)
  }
)

Try the tm.plugin.koRpus package in your browser

Any scripts or data that you put into this service are public.

tm.plugin.koRpus documentation built on May 18, 2021, 5:07 p.m.