knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "man/figures/README-", out.width = "100%" )
The goal of washoku is to ...
This package is not yet on CRAN, but can be installed from GitHub with:
if (!requireNamespace("remotes")) install.packages("remotes") remotes::install_github("uribo/washoku")
step_*()
library(washoku) library(sudachir) library(recipes) library(textrecipes)
d <- tibble::tibble( id = c(1, 1, 2), txt = c("事実を読者の前に告白すると、去年の八月頃すでに自分の小説を紙上に連載すべきはずだったのである。", "吾輩は猫である。名前はまだ無い。", "国家公務員はかつ丼を食べたい。"))
Sudachi
sudachir::install_sudachipy()
reticulate::use_condaenv("r-sudachipy", required = TRUE)
rec_sudachi <- d %>% recipe(id ~ txt) %>% step_tokenize_jp(txt, engine = "sudachir", options = list(mode = "C", type = "surface", pos = TRUE)) rec_sudachi <- rec_sudachi %>% textrecipes::step_pos_filter(txt, keep_tags = c("名詞", "動詞")) %>% textrecipes::step_untokenize(txt) bake(prep(rec_sudachi), new_data = NULL)
MeCab
rec_mecab <- d %>% recipe(id ~ txt) %>% step_tokenize_jp(txt, engine = "RcppMeCab", options = NULL) rec_mecab <- rec_mecab %>% textrecipes::step_pos_filter(txt, keep_tags = c("名詞", "動詞")) %>% textrecipes::step_untokenize(txt) bake(prep(rec_mecab), new_data = NULL)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.