R/RcppExports.R

Defines functions posParallelRcpp posDebugRcpp transition_cost dictionary_info dict_index_user dict_index_sys

Documented in dict_index_sys dict_index_user dictionary_info posDebugRcpp posParallelRcpp transition_cost

# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' Build system dictionary
#'
#' @param dic_dir Directory where the source dictionaries are located.
#' This argument is passed as '-d' option argument.
#' @param out_dir Directory where the binary dictionary will be written.
#' This argument is passed as '-o' option argument.
#' @param encoding Encoding of input csv files.
#' This argument is passed as '-f' option argument.
#' @returns Logical.
#' @name dict_index_sys
#' @keywords internal
NULL

#' Build user dictionary
#'
#' @param dic_dir Directory where the source dictionaries are located.
#' This argument is passed as '-d' option argument.
#' @param file Path to write the user dictionary.
#' This argument is passed as '-u' option argument.
#' @param csv_file Path to an input csv file.
#' @param encoding Encoding of input csv files.
#' This argument is passed as '-f' option argument.
#' @returns Logical.
#' @name dict_index_user
#' @keywords internal
NULL

dict_index_sys <- function(dic_dir, out_dir, encoding) {
    .Call(`_gibasa_dict_index_sys`, dic_dir, out_dir, encoding)
}

dict_index_user <- function(dic_dir, file, csv_file, encoding) {
    .Call(`_gibasa_dict_index_user`, dic_dir, file, csv_file, encoding)
}

#' Get dictionary information
#'
#' Returns all dictionary information under the current configuration.
#'
#' @details
#' To use the `tokenize()` function, there should be a system dictionary for 'MeCab'
#' specified in some 'mecabrc' configuration files
#' with a line `dicdir=<path/to/dir/dictionary/included>`.
#' This function can be used to check if such a configuration file exists.
#'
#' Currently, this package detects 'mecabrc' configuration files
#' that are stored in the user's home directory
#' or the file specified by the `MECABRC` environment variable.
#'
#' If there are no such configuration files, the package tries to fall back
#' to the 'mecabrc' file that is included with default installations of 'MeCab',
#' but this fallback is not guaranteed to work in all cases.
#'
#' In case there are no 'mecabrc' files available at all,
#' this function will return an empty data.frame.
#'
#' Note that in this case, the `tokenize()` function will not work
#' even if a system dictionary is manually specified via the `sys_dic` argument.
#' In such a case, you should mock up a 'mecabrc' file to temporarily use the dictionary.
#' See examples for `build_sys_dic()` and `build_user_dic()` for details.
#'
#' @param sys_dic Character scalar; path to the system dictionary for 'MeCab'.
#' @param user_dic Character scalar; path to the user dictionary for 'MeCab'.
#' @returns A data.frame (an empty data.frame if there is no dictionary configured at all).
#' @examples
#' \dontrun{
#' dictionary_info()
#' }
#' @name dictionary_info
#' @export
NULL

#' Get transition cost between pos attributes
#'
#' Gets transition cost between two pos attributes for a given dictionary.
#' Note that the valid range of pos attributes differs depending on the dictionary.
#' If `rcAttr` or `lcAttr` is out of range, this function will be aborted.
#'
#' @param rcAttr Integer; the right context attribute ID of the right-hand side of the transition.
#' @param lcAttr Integer; the left context attribute ID of the left-hand side of the transition.
#' @param sys_dic Character scalar; path to the system dictionary for 'MeCab'.
#' @param user_dic Character scalar; path to the user dictionary for 'MeCab'.
#' @returns An integer scalar.
#'
#' @name transition_cost
#' @keywords internal
NULL

#' Tokenizer for debug use
#'
#' Tokenizes a character vector
#' and returns all possible results out of the tokenization process.
#' The returned data.frame contains additional attributes for debug usage.
#'
#' @param text A character vector to be tokenized.
#' @param sys_dic Character scalar; path to the system dictionary for 'MeCab'.
#' @param user_dic Character scalar; path to the user dictionary for 'MeCab'.
#' @param partial Logical; If `TRUE`, activates partial parsing mode.
#' @param grain_size Integer value larger than 1.
#' @returns A data.frame.
#'
#' @name posDebugRcpp
#' @keywords internal
#' @export
NULL

dictionary_info <- function(sys_dic = "", user_dic = "") {
    .Call(`_gibasa_dictionary_info`, sys_dic, user_dic)
}

transition_cost <- function(rcAttr, lcAttr, sys_dic = "", user_dic = "") {
    .Call(`_gibasa_transition_cost`, rcAttr, lcAttr, sys_dic, user_dic)
}

posDebugRcpp <- function(text, sys_dic = "", user_dic = "", partial = 0L) {
    .Call(`_gibasa_posDebugRcpp`, text, sys_dic, user_dic, partial)
}

#' Call tagger inside 'RcppParallel::parallelFor' and return a data.frame.
#'
#' This function is an internal function called by `tokenize()`.
#' For common usage, use `tokenize()` instead.
#'
#' @param text A character vector to be tokenized.
#' @param sys_dic Character scalar; path to the system dictionary for 'MeCab'.
#' @param user_dic Character scalar; path to the user dictionary for 'MeCab'.
#' @param partial Logical; If `TRUE`, activates partial parsing mode.
#' @param grain_size Integer value larger than 1.
#' @returns A data.frame.
#'
#' @name posParallelRcpp
#' @keywords internal
#' @export
NULL

posParallelRcpp <- function(text, sys_dic = "", user_dic = "", partial = 0L, grain_size = 1L) {
    .Call(`_gibasa_posParallelRcpp`, text, sys_dic, user_dic, partial, grain_size)
}

# Register entry points for exported C++ functions
methods::setLoadAction(function(ns) {
    .Call(`_gibasa_RcppExport_registerCCallable`)
})
paithiov909/gibasa documentation built on June 14, 2025, 4:31 p.m.