#' @include partition.R bundle.R S4classes.R
NULL
#' @rdname partition_bundle-class
setMethod("show", "partition_bundle", function (object) {
message('<<partition_bundle>>')
message(sprintf('%-25s', 'Number of objects:'), length(object@objects))
# same code as in show-method for partition
sFix <- unlist(lapply(
names(object@s_attributes_fixed),
function(x) paste(x, "=", paste(object@s_attributes_fixed[[x]], collapse="/"))
))
message(sprintf("%-25s", "s-attributes fixed:"), sFix[1])
if (length(sFix) > 1) for (i in length(sFix)) message(sprintf("%-25s\n"), sFix[i])
})
#' @rdname partition_bundle-class
setMethod("summary", "partition_bundle", function (object, progress = FALSE){
.fn <- function(x) data.frame(summary(x), stringsAsFactors = FALSE)
a <- if (!progress) lapply(object@objects, .fn) else pblapply(object@objects, .fn)
y <- do.call(rbind, a)
rownames(y) <- NULL
y
})
#' @details The \code{merge}-method aggregates several partitions into one partition. The
#' prerequisite for this function to work properly is that there are no
#' overlaps of the different partitions that are to be summarized.
#' Encodings and the root node need to be identical, too.
#' @param name the name for the new partition
#' @return An object of the class 'partition. See partition for the
#' details on the class.
#' @exportMethod merge
#' @rdname partition_bundle-class
#' @examples
#'
#' # merge partition_bundle into one partition
#' gparl <- corpus("GERMAPARLMINI") %>%
#' split(s_attribute = "date") %>%
#' merge()
setMethod("merge", "partition_bundle", function(x, name = "", verbose = FALSE){
corpus_id <- get_corpus(x)
if (length(corpus_id) > 1L){
warning(
"WARNING: Merging will not work correctly, ",
"as the objects within the bundle are derived from different corpora."
)
}
obj_type <- unique(unname(sapply(x@objects, class)))
if (length(obj_type) > 1L)
stop("Class of the objects within the bundle is not unique.")
.message('number of objects to be merged: ', length(x@objects), verbose = verbose)
s_attr <- unique(unname(unlist(lapply(x@objects, slot, "s_attribute_strucs"))))
strucs_combined <- unname(unlist(lapply(x@objects, slot, "strucs")))
if (any(table(strucs_combined) > 1L)) stop("The objects are not non-overlapping.")
strucs_combined <- unique(strucs_combined)
strucs_combined <- strucs_combined[order(strucs_combined)]
y <- new(
obj_type,
corpus = corpus_id,
registry_dir = x[[1]]@registry_dir,
data_dir = x[[1]]@data_dir,
info_file = x[[1]]@info_file,
template = x[[1]]@template,
xml = x[[1]]@xml,
encoding = x[[1]]@encoding,
s_attribute_strucs = s_attr, strucs = strucs_combined,
name = name
)
y@cpos <- get_region_matrix(
corpus = corpus_id, registry = corpus_registry_dir(corpus_id),
s_attribute = s_attr, strucs = strucs_combined
)
y@size <- size(y)
y
})
#' @param name The name of the new `subcorpus` object.
#' @rdname subcorpus_bundle
setMethod("merge", "subcorpus_bundle", function(x, name = "", verbose = FALSE){
y <- callNextMethod()
corpus_type <- get_type(y@corpus)
y@type <- if (is.null(corpus_type)) character() else corpus_type
y@data_dir <- path(
corpus_data_dir(
corpus = y@corpus,
registry = corpus_registry_dir(y@corpus)
)
)
y
})
#' @param ... Further `subcorpus` objects to be merged with `x` and `y`.
#' @param y A `subcorpus` to be merged with `x`.
#' @examples
#'
#' # Merge multiple subcorpus objects
#' a <- corpus("GERMAPARLMINI") %>% subset(date == "2009-10-27")
#' b <- corpus("GERMAPARLMINI") %>% subset(date == "2009-10-28")
#' c <- corpus("GERMAPARLMINI") %>% subset(date == "2009-11-10")
#' y <- merge(a, b, c)
#' s_attributes(y, "date")
#' @rdname subcorpus_bundle
setMethod("merge", "subcorpus", function(x, y, ...){
merge(as(c(list(x), c(y, list(...))), "bundle"), name = "", verbose = FALSE)
})
#' @exportMethod barplot
#' @rdname partition_bundle-class
#' @examples
#' use(pkg = "RcppCWB", corpus = "REUTERS")
#'
#' pb <- partition_bundle("REUTERS", s_attribute = "id")
#' barplot(pb, las = 2)
#'
#' sc <- corpus("GERMAPARLMINI") %>%
#' subset(date == "2009-11-10") %>%
#' split(s_attribute = "speaker") %>%
#' barplot(las = 2)
setMethod("barplot", "partition_bundle", function(height, ...){
tab <- summary(height)
tab <- tab[order(tab[["size"]], decreasing = TRUE),]
barplot(tab[["size"]], names.arg = tab[["name"]], ...)
})
#' @include partition_bundle.R context.R
NULL
#' Generate bundle of partitions.
#'
#' Use \code{partition_bundle} to create a \code{partition_bundle} object, which
#' combines a set of \code{partition} objects.
#'
#' @param .Object A \code{partition}, a length-one \code{character} vector supplying a CWB corpus, or a \code{partition_bundle}
#' @param s_attribute The s-attribute to vary.
#' @param values Values the s-attribute provided shall assume.
#' @param prefix A character vector that will be attached as a prefix to partition names.
#' @param progress Logical, whether to show progress bar.
#' @param mc Logical, whether to use multicore parallelization.
#' @param xml A \code{logical} value.
#' @param type The type of \code{partition} to generate.
#' @param verbose Logical, whether to provide progress information.
#' @param ... parameters to be passed into partition-method (see respective documentation)
#' @return S4 class \code{partition_bundle}, with list of partition objects in slot 'objects'
#' @export partition_bundle
#' @author Andreas Blaette
#' @name partition_bundle
#' @docType methods
#' @rdname partition_bundle-method
#' @examples
#' \dontrun{
#' use("polmineR")
#' bt2009 <- partition("GERMAPARLMINI", date = "2009-.*", regex = TRUE)
#' pb <- partition_bundle(bt2009, s_attribute = "date", progress = TRUE)
#' pb <- enrich(pb, p_attribute = "word")
#' dtm <- as.DocumentTermMatrix(pb, col = "count")
#' summary(pb)
#' pb <- partition_bundle("GERMAPARLMINI", s_attribute = "date")
#' }
#' @seealso \code{\link{partition}} and \code{\link{bundle}}
setGeneric("partition_bundle", function(.Object, ...) standardGeneric("partition_bundle"))
#' @rdname partition_bundle-method
setMethod("partition_bundle", "partition", function(
.Object, s_attribute, values = NULL, prefix = "",
mc = getOption("polmineR.mc"), verbose = TRUE, progress = FALSE,
type = get_type(.Object), ...
) {
split(
x = as(.Object, "subcorpus"), s_attribute = s_attribute,
values = values, prefix = prefix,
mc = mc, verbose = verbose, progress = progress,
type = type,
...
)
})
#' @exportMethod partition_bundle
#' @rdname partition_bundle-method
setMethod("partition_bundle", "corpus", function(
.Object, s_attribute, values = NULL, prefix = "",
mc = getOption("polmineR.mc"), verbose = TRUE, progress = FALSE, xml = "flat", type = get_type(.Object),
...
){
split(
x = .Object,
s_attribute = s_attribute,
values = values,
prefix = prefix,
mc = mc,
verbose = verbose,
progress = progress,
xml = xml,
type = type,
...
)
})
#' @rdname partition_bundle-method
setMethod("partition_bundle", "character", function(
.Object, s_attribute, values = NULL, prefix = "",
mc = getOption("polmineR.mc"), verbose = TRUE, progress = FALSE, xml = "flat", type = get_type(.Object),
...
) {
partition_bundle(
.Object = corpus(.Object), s_attribute = s_attribute, values = values, prefix = prefix,
mc = mc, verbose = verbose, progress = progress,
xml = xml, type = type, ...
)
})
setGeneric("as.partition_bundle", function(.Object, ...) standardGeneric("as.partition_bundle"))
#' @rdname partition_bundle-class
setMethod("as.partition_bundle", "list", function(.Object, ...){
as(.Object, "bundle") # defined in bundle.R
})
#' @param node A logical value, whether to include the node (i.e. query matches) in the region matrix
#' generated when creating a `partition` from a `context`-object.
#' @exportMethod as.partition_bundle
#' @rdname partition_bundle-method
#' @importFrom cli cli_progress_step
#' @examples
#' \dontrun{
#' use("RcppCWB", corpus = "REUTERS")
#' pb <- corpus("REUTERS") %>%
#' context(query = "oil", p_attribute = "word") %>%
#' partition_bundle(node = FALSE, verbose = TRUE)
#' }
setMethod("partition_bundle", "context", function(.Object, node = TRUE, verbose = TRUE, progress = TRUE, mc = 1L){
stopifnot(
is.logical(node),
is.logical(verbose),
is.logical(progress)
)
DT <- copy(.Object@cpos)
setkeyv(x = DT, cols = c("match_id", "cpos"))
if (!node){
if (verbose) cli_progress_step("exclude node from preparation of partitions")
DT <- subset(DT, DT[["position"]] != 0)
}
if (verbose) cli_progress_step("generate list of {.code data.table} objects with regions")
.cpos_left_right <- function(.SD)
list(cpos_left = min(.SD[["cpos"]]), cpos_right = max(.SD[["cpos"]]))
DT_list <- list(
left = subset(DT, DT[["position"]] < 0),
right = subset(DT, DT[["position"]] > 0)
)
if (node) DT_list[["node"]] <- subset(DT, DT[["position"]] == 0)
DT_regions <- rbindlist(lapply(DT_list, function(x) x[, .cpos_left_right(.SD), by = "match_id"]))
setorderv(DT_regions, cols = "match_id")
regions_list <- split(DT_regions, by = "match_id")
if (verbose) cli_progress_step("generate list of {.code data.table} objects with counts")
CNT <- DT[, .N, by = c("match_id", paste(.Object@p_attribute, "id", sep = "_"))]
setnames(CNT, old = "N", new = "count")
for (p_attr in .Object@p_attribute){
CNT[[p_attr]] <- RcppCWB::cl_id2str(
corpus = .Object@corpus, registry = RcppCWB::corpus_registry_dir(.Object@corpus),
p_attribute = p_attr, id = CNT[[paste(p_attr, "id", sep = "_")]]
)
}
count_list <- split(CNT, by = "match_id")
if (verbose) cli_progress_step("assemble {.code partition_bundle}")
prototype <- as(as(.Object, "corpus"), "partition")
prototype@p_attribute <- .Object@p_attribute
.fn <- function(i){
y <- prototype
y@cpos <- as.matrix(regions_list[[i]][, c("cpos_left", "cpos_right")])
y@size <- as.integer(sum(y@cpos[,2] - y@cpos[,1] + 1L)) # see #265
y@stat = count_list[[i]][, "match_id" := NULL]
y
}
retval <- as(as(.Object, "corpus"), "partition_bundle")
retval@p_attribute <- .Object@p_attribute
retval@objects <- if (progress)
pblapply(seq_along(.Object), .fn, cl = mc)
else
lapply(seq_along(.Object), .fn)
retval@explanation <- "this partition_bundle is derived from a context object"
retval
})
#' @rdname partition_bundle-class
setMethod("partition_bundle", "environment", function(.Object)
.get_objects(class = "partition_bundle", envir = .Object)
)
#' @details Applying the \code{partition_bundle}-method to a \code{partition_bundle}-object will iterate
#' through the \code{partition} objects in the \code{object}-slot in the \code{partition_bundle}, and apply
#' \code{partition_bundle} on each \code{partition}, splitting it up by the s-attribute provided by the
#' argument \code{s_attribute}. The return value is a \code{partition_bundle}, the names of which will be
#' the names of the incoming \code{partition_bundle} concatenated with the s-attribute values used for splitting.
#' The argument \code{prefix} can be used to achieve a more descriptive name.
#' @examples
#' # split up objects in partition_bundle by using partition_bundle-method
#' use("polmineR")
#' pb <- partition_bundle("GERMAPARLMINI", s_attribute = "date")
#' pb2 <- partition_bundle(pb, s_attribute = "speaker", progress = FALSE)
#'
#' summary(pb2)
#' @rdname partition_bundle-method
setMethod("partition_bundle", "partition_bundle", function(.Object, s_attribute, prefix = character(), progress = TRUE, mc = getOption("polmineR.mc")){
if (is.logical(mc)) mc <- if (isTRUE(mc)) as.integer(getOption("polmineR.cores")) else 1L
mc <- as.integer(mc)
stopifnot(length(mc) == 1L, !is.na(mc), is.integer(mc))
iterfun <- function(p){
pb <- partition_bundle(p, s_attribute = s_attribute, verbose = FALSE, progress = FALSE)
names(pb) <- paste(name(p), paste(prefix, names(pb), sep = if (length(prefix) > 0) "_" else ""), sep = "_")
pb@objects
}
partition_list_nested <- if (progress) pblapply(.Object@objects, iterfun, cl = mc) else lapply(.Object@objects, iterfun)
as.partition_bundle(unlist(partition_list_nested))
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.