knitr::opts_chunk$set(comment = NA)

rTinySegmenter

rTinySegmenter is a R version of TinySegmenter, which is an extremely compact Japanese tokenizer originally written in JavaScript by Mr. Taku Kudo.

Installation

devtools::install_github("yamano357/rTinySegmenter")

Usage

library(dplyr)
library(rTinySegmenter)

# Original Model 
segmentr <- tiny_segmenter$new(text = "私の名前は中野です")
segmentr$tokenize()

names(x = segmentr$model$TEMPLATE)
segmentr$model$TEMPLATE[[names(x = segmentr$model$TEMPLATE)[1]]]

# feature
segmentr$model_attr$unit %>% 
  as.data.frame()

# feature valus
segmentr$model_attr$score %>% 
  as.data.frame()


# Custom Model
small_segmentr <- tiny_segmenter$new(
  text = "私の名前は中野です", 
  model_file = system.file(package = "rTinySegmenter", "extdata", "small-model.json")
)
small_segmentr$tokenize()

names(x = small_segmentr$model$TEMPLATE)


# feature
small_segmentr$model_attr$unit %>% 
  as.data.frame()

TODO

See Also



yamano357/rTinySegmenter documentation built on May 4, 2019, 2:28 p.m.