R/nouns.R

Defines functions nouns words

Documented in nouns words

# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' Noun extractor by mecab-ko
#'
#' \code{nouns} returns nouns extracted from Korean phrases.
#' 
#' Noun extraction is used for many Korean text analysis algorithms. The function coerces input to UTF-8.
#'
#' @param phrase A character vector or character vectors.
#' @return List of nouns will be returned. Element name of the list are original phrases.
#'
#' See examples in \href{https://github.com/junhewk/RmecabKo}{Github}.
#' 
#' @examples 
#' \dontrun{
#' nouns(c("Some Korean Phrases"))
#' }
#' 
#' @import RcppMeCab
#' @export
nouns <- function(sentence, sys_dic = "", user_dic = "", parallel = FALSE) {
  sentence <- enc2utf8(sentence)
  
  if (!is.list(sentence)) {
    if (lengths(sentence) == 1) {
      result <- RcppMeCab::pos(sentence, join = FALSE, format = "list", sys_dic, user_dic)
    } else {
      if (parallel == TRUE) {
        result <- RcppMeCab::posParallel(sentence, join = FALSE, format = "list", sys_dic, user_dic)
      } else {
        result <- RcppMeCab::pos(sentence, join = FALSE, format = "list", sys_dic, user_dic)
      }
      
    }
  }
  
  sapply(result, function(x) x[which(names(x) %in% c("NNG", "NNP", "NNB"))])
}

#' Words extractor by mecab-ko
#'
#' \code{words} returns full morphemes extracted from Korean phrases.
#' 
#' It is based on Mecab-Ko POS classification. Full morphemes are consisted with The function coerces input to UTF-8.
#'
#' @param phrase Character vector.
#' @return List of full morphemes will be returned.
#'
#' See examples in \href{https://github.com/junhewk/RmecabKo}{Github}.
#' 
#' @examples 
#' \dontrun{
#' words(c("Some Korean Phrases"))
#' }
#' 
#' @import RcppMeCab
#' @export
words <- function(sentence, sys_dic = "", user_dic = "", parallel = FALSE) {
  sentence <- enc2utf8(sentence)
  
  if (!is.list(sentence)) {
    if (lengths(sentence) == 1) {
      result <- RcppMeCab::pos(sentence, join = FALSE, format = "list", sys_dic, user_dic)
    } else {
      if (parallel == TRUE) {
        result <- RcppMeCab::posParallel(sentence, join = FALSE, format = "list", sys_dic, user_dic)
      } else {
        result <- RcppMeCab::pos(sentence, join = FALSE, format = "list", sys_dic, user_dic)
      }
      
    }
  }
  
  sapply(result, function(x) x[which(names(x) %in% c("NNG", "NNP", "NNB", "NNBC", "NR", "NP", "VV", "VA",
                                                     "VX", "VCP", "VCN", "MM", "MAG", "MAJ", "IC"))])
}
junhewk/RmecabKo documentation built on May 21, 2019, 3:03 a.m.