knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.path = "man/figures/README-",
  out.width = "100%"
)

washoku

CRAN status R build status Lifecycle: experimental

The goal of washoku is to ...

Installation

This package is not yet on CRAN, but can be installed from GitHub with:

if (!requireNamespace("remotes"))
  install.packages("remotes")

remotes::install_github("uribo/washoku")

Usage

recipe step_*()

library(washoku)
library(sudachir)
library(recipes)
library(textrecipes)
d <-
  tibble::tibble(
  id = c(1, 1, 2),
  txt = c("事実を読者の前に告白すると、去年の八月頃すでに自分の小説を紙上に連載すべきはずだったのである。",
          "吾輩は猫である。名前はまだ無い。",
          "国家公務員はかつ丼を食べたい。"))

Sudachi

sudachir::install_sudachipy()
reticulate::use_condaenv("r-sudachipy", required = TRUE)
rec_sudachi <- 
  d %>% 
  recipe(id ~ txt) %>% 
  step_tokenize_jp(txt, 
                   engine = "sudachir", 
                   options = list(mode = "C", type = "surface", pos = TRUE))

rec_sudachi <-
  rec_sudachi %>%
  textrecipes::step_pos_filter(txt, keep_tags = c("名詞", "動詞")) %>%
  textrecipes::step_untokenize(txt)

bake(prep(rec_sudachi), new_data = NULL)

MeCab

rec_mecab <- 
  d %>% 
  recipe(id ~ txt) %>% 
  step_tokenize_jp(txt, 
                   engine = "RcppMeCab",
                   options = NULL)

rec_mecab <- 
  rec_mecab %>% 
  textrecipes::step_pos_filter(txt, keep_tags = c("名詞", "動詞")) %>%
  textrecipes::step_untokenize(txt)

bake(prep(rec_mecab), new_data = NULL)


uribo/washoku documentation built on Sept. 20, 2022, 4:33 a.m.