Nothing
hyphenatr
: HY-PHEN-ATE ALL THE THINGS
Based on the Hunspell hyphenation library and includes hyphenation rules dictionaries from LibreOffice. See the <inst/COPYRIGHTS> file for individual copyright information.
The following functions are implemented:
curr_dict
: Identify current hyphen rules languagehyphenate
: Hyphenate a character vector of wordslist_dicts
: List available hyphenation languages rulesswitch_dict
: Switch hyphen rules languagedevtools::install_github("hrbrmstr/hyphenatr")
library(hyphenatr)
library(jsonlite)
library(microbenchmark)
# current verison
packageVersion("hyphenatr")
#> [1] '0.3.0.9000'
list_dicts()
#> [1] "af_ZA" "bg_BG" "ca" "cs_CZ" "da_DK" "de" "de_AT" "de_CH" "de_DE" "el_GR" "en_GB" "en_US"
#> [13] "es_ANY" "et_EE" "fr" "gl" "hr_HR" "hu_HU" "is" "it_IT" "lt" "lt_LT" "lv_LV" "nb_NO"
#> [25] "nl_NL" "nn_NO" "pl_PL" "pt_BR" "pt_PT" "ro_RO" "ru_RU" "sh" "sk_SK" "sl_SI" "sr" "sv"
#> [37] "te_IN" "uk_UA" "zu_ZA"
curr_dict()
#> [1] "en_US"
# test word list (10K words)
dat <- readLines(system.file("extdata/top10000en.txt", package="hyphenatr"))
microbenchmark(out1 <- hyphenate(dat))
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> out1 <- hyphenate(dat) 20.77134 22.16768 23.70809 23.65906 24.73395 30.21601 100
out1[500:550]
#> [1] "got" "fam=ily" "pol=icy" "in=vestors" "record" "loss"
#> [7] "re=ceived" "April" "Ex=change" "code" "graph=ics" "agency"
#> [13] "in=creased" "man=ager" "keep" "look" "of=ten" "de=signed"
#> [19] "Euro=pean" "earn=ings" "en=vi=ron=ment" "July" "job" "third"
#> [25] "wa=ter" "net" "banks" "an=a=lysts" "strong" "party"
#> [31] "econ=omy" "away" "dol=lar" "taken" "de=vel=oped" "con=tinue"
#> [37] "al=low" "Mi=crosoft" "key" "ei=ther" "se=cu=rity" "project"
#> [43] "agreed" "though" "Ja=pan" "rather" "coun=tries" "plant"
#> [49] "along" "Ap=ple" "ac=tion"
microbenchmark(out2 <- hyphenate(dat, simplify=FALSE))
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> out2 <- hyphenate(dat, simplify = FALSE) 26.32844 28.27894 29.26569 29.13235 29.80986 33.21204 100
jsonlite::toJSON(out2[530:540], pretty=TRUE)
#> [
#> ["econ", "omy"],
#> ["away"],
#> ["dol", "lar"],
#> ["taken"],
#> ["de", "vel", "oped"],
#> ["con", "tinue"],
#> ["al", "low"],
#> ["Mi", "crosoft"],
#> ["key"],
#> ["ei", "ther"],
#> ["se", "cu", "rity"]
#> ]
microbenchmark(out3 <- hyphenate(dat, simplify="-"))
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> out3 <- hyphenate(dat, simplify = "-") 26.22136 28.04543 29.82251 30.0245 31.20909 36.4886 100
out3[500:550]
#> [1] "got" "fam-ily" "pol-icy" "in-vestors" "record" "loss"
#> [7] "re-ceived" "April" "Ex-change" "code" "graph-ics" "agency"
#> [13] "in-creased" "man-ager" "keep" "look" "of-ten" "de-signed"
#> [19] "Euro-pean" "earn-ings" "en-vi-ron-ment" "July" "job" "third"
#> [25] "wa-ter" "net" "banks" "an-a-lysts" "strong" "party"
#> [31] "econ-omy" "away" "dol-lar" "taken" "de-vel-oped" "con-tinue"
#> [37] "al-low" "Mi-crosoft" "key" "ei-ther" "se-cu-rity" "project"
#> [43] "agreed" "though" "Ja-pan" "rather" "coun-tries" "plant"
#> [49] "along" "Ap-ple" "ac-tion"
microbenchmark(out4 <- hyphenate(dat, simplify="­"))
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> out4 <- hyphenate(dat, simplify = "­") 28.57537 29.78537 31.6346 31.31182 33.16067 37.89471 100
out4[500:550]
#> [1] "got" "fam­ily" "pol­icy" "in­vestors"
#> [5] "record" "loss" "re­ceived" "April"
#> [9] "Ex­change" "code" "graph­ics" "agency"
#> [13] "in­creased" "man­ager" "keep" "look"
#> [17] "of­ten" "de­signed" "Euro­pean" "earn­ings"
#> [21] "en­vi­ron­ment" "July" "job" "third"
#> [25] "wa­ter" "net" "banks" "an­a­lysts"
#> [29] "strong" "party" "econ­omy" "away"
#> [33] "dol­lar" "taken" "de­vel­oped" "con­tinue"
#> [37] "al­low" "Mi­crosoft" "key" "ei­ther"
#> [41] "se­cu­rity" "project" "agreed" "though"
#> [45] "Ja­pan" "rather" "coun­tries" "plant"
#> [49] "along" "Ap­ple" "ac­tion"
switch_dict("de_DE")
hyphenate("tägelîch")
#> [1] "tä=gelîch"
library(hyphenatr)
library(testthat)
date()
#> [1] "Thu Mar 17 20:15:06 2016"
switch_dict("en_US")
test_dir("tests/")
#> testthat results ========================================================================================================
#> OK: 5 SKIPPED: 0 FAILED: 0
Please note that this project is released with a Contributor Code of Conduct. By participating in this project you agree to abide by its terms.
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.