R/pos.R

Defines functions pos

Documented in pos

# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' POS tagging by mecab-ko
#'
#' \code{pos} returns part-of-speech (POS) tagged morpheme of Korean phrases.
#' 
#' This is a basic function of part-of-speech tagging by mecab-ko. The function coerces input to UTF-8.
#'
#' @param sentence Character vector.
#' @param join Boolean to determine providing POS tags with the morphemes or not. The default value is TRUE.
#' @param format A data type for the result. The default value is "list". You can set this to "data.frame" to get a result as data frame format.
#' @param sys_dic A location of system MeCab dictionary. The default value is "".
#' @param user_dic A location of user-specific MeCab dictionary. The default value is "".
#' @param parallel Boolean to determine using parallel analyzing. The default value is FALSE.
#' @return List of POS tagged morpheme will be returned in conjoined character vecter form. Element name of the list are original phrases. If \code{join=FALSE}, it returns list of morpheme with named with tags.
#'
#' See examples in \href{https://github.com/junhewk/RmecabKo}{Github}.
#' 
#' @examples 
#' \dontrun{
#' pos(c("Some Korean Phrases"))
#' pos(c("Some Korean Phrases"), join=FALSE)
#' }
#' 
#' @import RcppMeCab
#' @export
pos <- function(sentence, join = TRUE, format = c("list", "data.frame"), sys_dic = "", user_dic = "", parallel = FALSE) {
  sentence <- enc2utf8(sentence)
  
  if (!is.list(sentence)) {
    if (lengths(sentence) == 1) {
      result <- RcppMeCab::pos(sentence, join, format, sys_dic, user_dic)
    } else {
      if (parallel == TRUE) {
        result <- RcppMeCab::posParallel(sentence, join, format, sys_dic, user_dic)
      } else {
        result <- RcppMeCab::pos(sentence, join, format, sys_dic, user_dic)
      }
      
    }
  }

  result
}
junhewk/RmecabKo documentation built on May 21, 2019, 3:03 a.m.