Nothing
#' @importFrom checkmate assert check_list check_character check_logical
#' @importFrom reticulate import conda_create conda_install conda_list miniconda_path py_eval virtualenv_list virtualenv_create virtualenv_install
#' @importFrom NLP words sents
#' @importFrom stats setNames
#' @importFrom utils head
NULL
check_init <- function() {
if (!is_initialized()) {
caller_name <- deparse(sys.calls()[[sys.nframe() - 1]])
msg <- paste(sprintf("in '%s' stanza is not initialized,", caller_name),
"use 'stanza_initialize' to initialize stanza!",
collapse = " ")
stop(msg, call. = FALSE)
}
}
#' Select Download Method
#'
#' Function to obtain the download method code or list all allowed download methods.
#'
#' @param method a character string giving the name of the download method.
#' The case oft he download method name is ignored.
#' If \code{NULL} all allowed download methods are shown.
#'
#' @returns an integer giving the download method code.
#'
#' @examples
#' if (is_stanza_initialized()) {
#' stanza_download_method_code()
#' stanza_download_method_code("none")
#' stanza_download_method_code("reuse_resources")
#' stanza_download_method_code("download_resources")
#' }
#'
#' @export
stanza_download_method_code <- function(method = NULL) {
download_methods <- rpy("download_methods")
if (is.null(method)) {
return(download_methods)
}
if (checkmate::test_integerish(method, len = 1L)) {
return(method)
}
checkmate::check_string(method, null.ok = TRUE)
if (toupper(method) %in% names(download_methods)) {
as.integer(unname(download_methods[toupper(method)]))
} else {
msg <- sprintf("'%s' is not among the allowed methods, allowed methods are %s.",
method, deparse(names(download_methods)))
stop(msg)
}
}
#' NLP Pipeline
#'
#' @param language a character string giving the language (default is \code{"en"}).
#' @param model_dir path to the directory for storing the for \code{Stanza} models
#' (default is \code{"~/stanza_resources"}).
#' @param package (default is \code{"default"}.
#' @param processors FIXME: we should define if we want to use comma seperated string or a character vector.
#' @param logging_level a character string giving the logging level (default is \code{"INFO"}),
#' available levels are \code{c('DEBUG', 'INFO', 'WARNING', 'WARN', 'ERROR', 'CRITICAL', 'FATAL')}.
#' @param use_gpu a logical giving if \code{GPU} or \code{CPU} should be used (default is \code{FALSE}).
#' @param download_method an integer or character string giving the download method code.
#' If a character string is provided, it is passed to \code{stanza_download_method_code}
#' to obtain the integer code.
#' Use \code{stanza_download_method_code} to obtain the code and list all
#' available download methods.
#' @param ... additional named arguments passed to the stanza pipeline.
#'
#' @returns a function that can be used to process text.
#'
#' @examples
#' \dontrun{
#' p <- stanza_pipeline()
#' doc <- p('R is a programming language for statistical computing.')
#' }
#'
#' @export
stanza_pipeline <- function(language = "en",
model_dir = stanza_options("model_dir"),
package = "default",
processors = list(),
logging_level = 'INFO',
use_gpu = FALSE,
download_method = "reuse_resources",
...) {
check_init()
assert(check_character(language, len = 1L),
check_character(model_dir, len = 1L),
check_character(package, len = 1L),
check_character(logging_level, len = 1L),
check_logical(use_gpu), combine = "and")
assert(check_character(processors, len = 1L), check_list(processors))
if (length(processors) == 0L) {
processors <- setNames(list(), character(0))
}
if (is.list(processors)) assert(check_character(names(processors)))
if (length(kwargs <- list(...))) {
if (!is.character(names(kwargs))) {
stop("the additional arguments '...' have to be named")
}
}
download_method <- stanza_download_method_code(download_method)
processor <- stanza$Pipeline(lang = language, dir = model_dir, package = package,
processors = processors, logging_level = 'INFO',
use_gpu = use_gpu, download_method = download_method,
...)
function(doc, processors = NULL) {
obj <- processor(doc, processors)
cls <- sprintf("stanza_%s", tolower(gsub(".*\\.", "", head(class(obj), 1))))
class(obj) <- c(cls, class(obj))
obj
}
}
#' Stanza Version
#'
#' Obtain the version of the \pkg{stanza} Python package.
#'
#' @returns a character string giving the version of the \pkg{stanza} Python package.
#'
#' @examples
#' stanza_version()
#'
#' @export
stanza_version <- function() {
trimws(stanza[["__version__"]])
}
#
# Document
#
#' @noRd
#' @export
print.stanza.models.common.doc.Document <- function(x, ...) {
writeLines(sprintf("<%s>", head(class(x), 1)))
writeLines(sprintf(" number of sentences: %i", py_eval("len")(x$sentences)))
writeLines(sprintf(" number of tokens: %i", x$num_tokens))
writeLines(sprintf(" number of words: %i", x$num_words))
}
#'
#' @export
sents.stanza.models.common.doc.Document <- function(x, type = c("word", "token"), ...) {
type <- match.arg(type)
rstanza$sents(x, type)
}
#' @noRd
#' @export
words.stanza.models.common.doc.Document <- function(x, ...) {
rstanza$words(x)
}
#' @noRd
#' @export
sents.stanza.models.common.doc.Document <- function(x, type = c("word", "token"), ...) {
type <- match.arg(type)
rstanza$sents(x, type)
}
#' Tokens
#'
#' @param x an object inheriting from \code{"stanza_document"} or \code{"stanza_sentence"}.
#' @param ... optional additional arguments, currently not used.
#'
#' @returns a data.frame with the tokens.
#'
#' @export
tokens <- function(x, ...) {
rstanza$tokens(x)
}
#' Entities
#'
#' @param x an object inheriting from \code{"stanza_document"}.
#' @param ... optional additional arguments, currently not used.
#'
#' @returns a data.frame with the entities.
#'
#' @export
entities <- function(x, ...) {
rstanza$entities(x)
}
#' Multi-Word Token
#'
#' @param x an object of
#' @param ... optional additional arguments, currently not used.
#'
#' @returns a data.frame with the multi-word tokens.
#'
#' @export
multi_word_token <- function(x, ...) {
x <- rstanza$multi_word_token(x)
data.frame(x)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.