Nothing
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
#' POS tagging by mecab-ko
#'
#' \code{pos} returns part-of-speech (POS) tagged morpheme of Korean phrases.
#'
#' This is a basic function of part-of-speech tagging by mecab-ko.
#'
#' @param phrase Character vector.
#' @param join Boolean.
#' @return List of POS tagged morpheme will be returned in conjoined character vecter form. Element name of the list are original phrases. If \code{join=FALSE}, it returns list of morpheme with named with tags.
#'
#' See examples in \href{https://github.com/junhewk/RmecabKo}{Github}.
#'
#' @examples
#' \dontrun{
#' pos(c("Some Korean Phrases"))
#' pos(c("Some Korean Phrases"), join=FALSE)
#' }
#'
#' @importFrom utils localeToCharset
#' @export
pos <- function(phrase, join = TRUE) {
if (typeof(phrase) != "character") {
stop("'phrase' must be a character vector")
}
if(is_osx() | is_linux()) {
dicpath <- "/usr/local/lib/mecab/dic/mecab-ko-dic"
if(dir.exists(dicpath)) {
dicpath <- paste0("-d ", dicpath)
} else {
stop(paste0("Mecab-ko-dic is not found on ", dicpath, ". Please check https://bitbucket.org/eunjeon/mecab-ko-dic."))
}
# Rcpp function to tagging
tagged <- posRcpp(phrase, dicpath, join)
} else if(is_windows()) {
if(!mecab_installed()) {
stop("Mecab binary is not installed. Please run install_mecab().")
}
mecabLibs <- getOption("mecab.libpath")
# loading /inst/mecab/mecab.exe (mecab-ko-msvc) with system.file and system
mecabKo <- utils::shortPathName(file.path(mecabLibs, "mecab.exe"))
# mecabKoDic root in not working
mecabKoRc <- utils::shortPathName(file.path(mecabLibs, "mecabrc"))
mecabKoDic <- utils::shortPathName(file.path(mecabLibs, "mecab-ko-dic"))
# saving phrase to UTF-8 txt file
phraseFile <- utils::shortPathName(tempfile())
con <- file(phraseFile, "a", encoding = "UTF-8")
tryCatch({
cat(iconv(phrase, from = utils::localeToCharset()[1], to = "UTF-8"), file=con, sep="\n")
},
finally = {
close(con)
})
outputFile <- utils::shortPathName(tempfile())
mecabOption <- c("-r", mecabKoRc, "-d", mecabKoDic, "-o", outputFile, phraseFile)
# run mecab.exe
system2(mecabKo, mecabOption)
con <- file(outputFile, "r")
posResult <- readLines(con, encoding="UTF-8")
close(con)
i <- 1
tagged <- list()
length(tagged) <- i
for(line in seq(1, length(posResult), 1)) {
taggedLine <- c()
if(posResult[line] == "EOS") {
i <- i + 1
if (line != length(posResult)) length(tagged) <- i
} else if(substring(posResult[line], 1, 1) == ",") {
if (join) {
taggedLine <- c(taggedLine, ",/SC")
} else {
taggedLine["SC"] = ","
}
} else {
taggedElements <- strsplit(posResult[line], ",")
if (join) {
taggedLine <- c(taggedLine, gsub("\t", "/", taggedElements[[1]][1]))
} else {
taggedMorpheme <- strsplit(taggedElements[[1]][1], "\t")
taggedLine[taggedMorpheme[[1]][2]] <- taggedMorpheme[[1]][1]
}
tagged[[i]] <- c(tagged[[i]], taggedLine)
}
}
suppressWarnings(file.remove(phraseFile))
suppressWarnings(file.remove(outputFile))
}
names(tagged) <- phrase
return(tagged)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.