R/install.R
In stanza: 'Stanza' - A 'R' NLP Package for Many Human Languages

Documented in conda_install_stanza stanza_download virtualenv_install_stanza

#' Download Models
#'
#' Download pretrained NLP models. For more information about the parameters
#' see \url{https://stanfordnlp.github.io/stanza/download_models.html}.
#' 
#' @param language a character string giving the language (default is \code{"en"}). 
#' @param model_dir path to the directory for storing the for \code{Stanza} models
#'  (default is \code{"~/stanza_resources"}).
#' @param package a character string giving the package to be used (default is \code{"default"}.
#'  In this context package refers to a language specific set of models packaged
#'  together to a single \code{".zip"} file.
#' @param processors a character string or named list giving the processors to download models for.
#'  If a string is provided it should provide the names of the desired processers as comma seperated
#'  string, e.g., \code{"tokenize,pos"}.
#'  If a named list is provided, the name should be the processor name and the values the
#'  package name, e.g., \code{list(tokenize = "ewt", pos = "ewt")}.
#   Default is \code{list()}.
#' @param logging_level a character string giving the logging level (default is \code{"INFO"}),
#'  available levels are \code{c('DEBUG', 'INFO', 'WARNING', 'WARN', 'ERROR', 'CRITICAL', 'FATAL')}.
#  @param verbose (default is \code{NULL})
#   NOTE: This is not needed since it just overwrites the logging_level
#         - if TRUE => logging_level = 'INFO'
#         - if FALSE => logging_level = 'ERROR'
#         - if NULL => logging_level = 'WARN'
#         This information was obtained by reading the source code
#         https://github.com/stanfordnlp/stanza/blob/master/stanza/resources/common.py 
#' @param resources_url a character string giving the url to the \code{Stanza} model resources.
#'  The default value is obtained from Python during the initiatlization and can be obtained
#'  and changed by using \code{stanza_options}.
#  @param resources_branch (default is \code{NULL})
#   NOTE: This is not needed since it just overwrites resources_url
#   if resources_url == DEFAULT_RESOURCES_URL and resources_branch is not None:
#           resources_url = STANZA_RESOURCES_GITHUB + resources_branch
#' @param resources_version a character string giving the version of the resources.
#'  The default value is obtained from Python during the initiatlization and can be obtained
#'  and changed by using \code{stanza_options}.
#' @param model_url a character string giving the model url.
#'  The default value is obtained from Python during the initiatlization and can be obtained
#'  and changed by using \code{stanza_options}.
#' 
#' @returns \code{NULL}
#' 
#' @examples
#' if (stanza_options("testing_level") >= 3L) {
#' stanza_initialize()
#' stanza_download("en")
#' }
#' @export
stanza_download <- function(
    language = "en",
    model_dir = stanza_options("model_dir"),
    package = "default",
    processors = list(),
    logging_level = "INFO",
    resources_url = stanza_options("resources_url"),
    resources_version = stanza_options("resources_version"),
    model_url = stanza_options("model_url")) {
    check_init()
    assert(check_character(logging_level, len = 1L), combine = "and")
    assert(check_list(processors), check_character(language, len = 1L), combine = "and")
    if (length(processors) == 0L) {
        processors <- setNames(list(), character(0))
    }
    assert(check_character(names(processors)))

    if (is_old_stanza()) {
        stanza$download(lang = language, dir = model_dir, package = package, 
            processors = processors, logging_level = logging_level,
            resources_url = resources_url, resources_version = resources_version, 
            model_url = model_url)
    } else {
        stanza$download(lang = language, model_dir = model_dir, package = package, 
            processors = processors, logging_level = logging_level, verbose = NULL,
            resources_url = resources_url, resources_version = resources_version, 
            model_url = model_url)
    }
}


is_old_stanza <- function() {
    # sversion <- "1.2.4"
    sversion <- stanza_version()
    version <- as.integer(strsplit(sversion, ".", fixed = TRUE)[[1]])
    version[1] <= 1 && version[2] <= 2
}


#' Conda Install Stanza
#'
#' @param envname a character string giving the name or path of the conda environment
#'  to be used or created for the installation.
#' @param packages a character vector giving the packages to be installed.
#' @param forge a logical giving if conda forge should be used for the installation.
#' @param channel a character vector giving the conda channels to be used.
#' @param conda a character string giving the path to the conda executable.
#' @param ... additional arguments passed to \code{conda_install}.
#' 
#' @returns \code{NULL}
#' 
#' @examples
#' \dontrun{
#' conda_install_stanza()
#' }
#' @export
conda_install_stanza <- function(envname = "stanza", packages = c("python", "stanza"),
    forge = FALSE, channel = c("stanfordnlp"), conda = "auto", ...) {
    envs <- conda_list(conda)
    default_timeout <- options("timeout")[[1L]]
    on.exit({options(timeout = default_timeout) })
    options(timeout = 1800L)  # 30 min
    if (!isTRUE(envname %in% envs$name)) {
        conda_create(envname = envname, packages = packages, forge = forge, 
                     channel = channel, conda = conda)
    } else {
        packages <- setdiff(packages, "python")
        conda_install(envname = envname, packages = packages, forge = forge, 
                      channel = channel, conda = conda, ...)
    }
}


#' Install Stanza via Virtual Environment
#'
#' @param envname a character string giving the name or path of the virtual environment
#'  to be used or created for the installation.
#' @param packages a character vector giving the packages to be installed.
#' @param python a string giving the name or path of the python version to be used 
#'      (e.g., \code{"python3"}).
#' @param ... additional arguments passed to \code{conda_install}.
#' 
#' @returns \code{NULL}
#' 
#' @examples
#' \dontrun{
#' virtualenv_install_stanza()
#' }
#' @export
virtualenv_install_stanza <- function(envname = "stanza", packages = "stanza", python = NULL, ...) {
    default_timeout <- options("timeout")[[1L]]
    on.exit({options(timeout = default_timeout) })
    options(timeout = 1800L)  # 30 min
    if (!isTRUE(envname %in% virtualenv_list())) {
        virtualenv_create(envname = envname, python = python, packages = packages, ...)
    } else {
        virtualenv_install(envname = envname, packages = packages, ...)
    }
}