Nothing
# Copyright 2010-2021 Meik Michalke <meik.michalke@hhu.de>
#
# This file is part of the R package koRpus.
#
# koRpus is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# koRpus is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with koRpus. If not, see <http://www.gnu.org/licenses/>.
#' S4 Class kRp.corp.freq
#'
#' This class is used for objects that are returned by \code{\link[koRpus:read.corp.LCC]{read.corp.LCC}} and \code{\link[koRpus:read.corp.celex]{read.corp.celex}}.
#'
#' The slot \code{meta} simply contains all information from the "meta.txt" of the LCC[1] data and remains
#' empty for data from a Celex[2] DB.
#'
#' @section Contructor function:
#' Should you need to manually generate objects of this class (which should rarely be the case), the contructor function
#' \code{kRp_corp_freq(...)} can be used instead of
#' \code{new("kRp.corp.freq", ...)}.
#'
#' @slot meta Metadata on the corpora (see details).
#' @slot words Absolute word frequencies. It has at least the following columns:
#' \describe{
#' \item{\code{num}:}{Some word ID from the DB, integer}
#' \item{\code{word}:}{The word itself}
#' \item{\code{lemma}:}{The lemma of the word}
#' \item{\code{tag}:}{A part-of-speech tag}
#' \item{\code{wclass}:}{The word class}
#' \item{\code{lttr}:}{The number of characters}
#' \item{\code{freq}:}{The frequency of that word in the corpus DB}
#' \item{\code{pct}:}{Percentage of appearance in DB}
#' \item{\code{pmio}:}{Appearance per million words in DB}
#' \item{\code{log10}:}{Base 10 logarithm of word frequency}
#' \item{\code{rank.avg}:}{Rank in corpus data, \code{\link{rank}} ties method "average"}
#' \item{\code{rank.min}:}{Rank in corpus data, \code{\link{rank}} ties method "min"}
#' \item{\code{rank.rel.avg}:}{Relative rank, i.e. percentile of \code{"rank.avg"}}
#' \item{\code{rank.rel.min}:}{Relative rank, i.e. percentile of \code{"rank.min"}}
#' \item{\code{inDocs}:}{The absolute number of documents in the corpus containing the word}
#' \item{\code{idf}:}{The inverse document frequency}
#' }
#' The slot might have additional columns, depending on the input material.
#' @slot desc Descriptive information. It contains six numbers from the \code{meta} information, for convenient accessibility:
#' \describe{
#' \item{\code{tokens}:}{Number of running word forms}
#' \item{\code{types}:}{Number of distinct word forms}
#' \item{\code{words.p.sntc}:}{Average sentence length in words}
#' \item{\code{chars.p.sntc}:}{Average sentence length in characters}
#' \item{\code{chars.p.wform}:}{Average word form length}
#' \item{\code{chars.p.word}:}{Average running word length}
#' }
#' The slot might have additional columns, depending on the input material.
#' @slot bigrams A data.frame listing all tokens that co-occurred next to each other in the corpus:
#' \describe{
#' \item{\code{token1}:}{The first token}
#' \item{\code{token2}:}{The second token that appeared right next to the first}
#' \item{\code{freq}:}{How often the co-occurrance was present}
#' \item{\code{sig}:}{Log-likelihood significance of the co-occurrende}
#' }
#' @slot cooccur Similar to \code{bigrams}, but listing co-occurrences anywhere in one sentence:
#' \describe{
#' \item{\code{token1}:}{The first token}
#' \item{\code{token2}:}{The second token that appeared in the same sentence}
#' \item{\code{freq}:}{How often the co-occurrance was present}
#' \item{\code{sig}:}{Log-likelihood significance of the co-occurrende}
#' }
#' @slot caseSens A single logical value, whether the frequency statistics were calculated case sensitive
#' or not.
#' @name kRp.corp.freq,-class
#' @aliases kRp.corp.freq-class
#' @import methods
#' @keywords classes
# @author m.eik michalke \email{meik.michalke@@hhu.de}
#' @references
#' [1] \url{https://wortschatz.uni-leipzig.de/en/download/}
#' [2] \url{http://celex.mpi.nl}
#' @export kRp_corp_freq
#' @exportClass kRp.corp.freq
#' @rdname kRp.corp.freq-class
kRp_corp_freq <- setClass("kRp.corp.freq",
representation=representation(
meta="data.frame",
words="data.frame",
desc="data.frame",
bigrams="data.frame",
cooccur="data.frame",
caseSens="logical"
),
prototype(
)
)
setMethod("initialize", "kRp.corp.freq",
function(
.Object,
meta=data.frame(
meta=character(),
value=character()
),
words=data.frame(
num=numeric(),
word=character(),
lemma=character(),
tag=character(),
wclass=character(),
lttr=numeric(),
freq=numeric(),
pct=numeric(),
pmio=numeric(),
log10=numeric(),
rank.avg=numeric(),
rank.min=numeric(),
rank.rel.avg=numeric(),
rank.rel.min=numeric(),
inDocs=numeric(),
idf=numeric()
),
desc=data.frame(
tokens=character(),
types=character(),
words.p.sntc=numeric(),
chars.p.sntc=numeric(),
chars.p.wform=numeric(),
chars.p.word=numeric()
),
bigrams=data.frame(
token1=character(),
token2=character(),
freq=numeric(),
sig=numeric()
),
cooccur=data.frame(
token1=character(),
token2=character(),
freq=numeric(),
sig=numeric()
),
caseSens=FALSE
){
slot(.Object, "meta") <- meta
slot(.Object, "words") <- words
slot(.Object, "desc") <- desc
slot(.Object, "bigrams") <- bigrams
slot(.Object, "cooccur") <- cooccur
slot(.Object, "caseSens") <- caseSens
validObject(.Object)
return(.Object)
}
)
setValidity("kRp.corp.freq", function(object){
meta <- slot(object, "meta")
words <- slot(object, "words")
desc <- slot(object, "desc")
bigrams <- slot(object, "bigrams")
cooccur <- slot(object, "cooccur")
meta.names <- dimnames(meta)[[2]]
words.names <- dimnames(words)[[2]]
desc.names <- dimnames(desc)[[2]]
bigrams.names <- dimnames(bigrams)[[2]]
cooccur.names <- dimnames(cooccur)[[2]]
if(!identical(meta.names, c("meta", "value"))){
stop(simpleError("Invalid object: Wrong column names in slot \"meta\"."))
} else {}
if(!identical(words.names, c(
"num", "word", "lemma", "tag", "wclass", "lttr", "freq", "pct", "pmio", "log10",
"rank.avg", "rank.min", "rank.rel.avg", "rank.rel.min", "inDocs", "idf"))){
stop(simpleError("Invalid object: Wrong column names in slot \"words\"."))
} else {}
if(!identical(desc.names, c("tokens", "types", "words.p.sntc", "chars.p.sntc", "chars.p.wform", "chars.p.word"))){
stop(simpleError("Invalid object: Wrong column names in slot \"desc\"."))
} else {}
if(!identical(bigrams.names, c("token1", "token2", "freq", "sig"))){
stop(simpleError("Invalid object: Wrong column names in slot \"bigrams\"."))
} else {}
if(!identical(cooccur.names, c("token1", "token2", "freq", "sig"))){
stop(simpleError("Invalid object: Wrong column names in slot \"cooccur\"."))
} else {}
return(TRUE)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.