inst/doc/orthography_processing.R

## ----setup, include = FALSE----------------------------------------------
library(qlcData)

## ---- eval = FALSE-------------------------------------------------------
#  # install devtools from CRAN
#  install.packages("devtools")
#  # install qlcData from github using devtools
#  devtools::install_github("cysouw/qlcData", build_vignettes = TRUE)
#  # load qlcTokenize package
#  library(qlcData)
#  # access help files of the package
#  help(qlcData)
#  # access this vignette
#  vignette("orthography_processing")

## ------------------------------------------------------------------------
test <- "hállo hállо"

## ---- eval = FALSE-------------------------------------------------------
#  write.profile(test)

## ----echo=FALSE, results='asis'------------------------------------------
# some example string
knitr::kable(write.profile(test))

## ------------------------------------------------------------------------
# the differenec between various "o" characters is mostly invisible on screen
"o" == "o"  # these are the same "o" characters, so this statement in true
"o" == "о"  # this is one latin and and cyrillic "o" character, so this statement is false

## ------------------------------------------------------------------------
test <- c("this thing", "is", "a", "vector", "with", "many", "strings")

## ---- eval = FALSE-------------------------------------------------------
#  write.profile(test)

## ----echo=FALSE, results='asis'------------------------------------------
# some example string
knitr::kable(write.profile(test))

## ------------------------------------------------------------------------
tokenize(test)

## ---- eval = FALSE-------------------------------------------------------
#  dir.create("~/Desktop/tokenize")
#  setwd("~/Desktop/tokenize")
#  tokenize(test, file.out = "test_profile.txt")

## ---- echo = FALSE, results='asis'---------------------------------------
test_profile.txt <- as.data.frame(rbind(as.matrix(tokenize(test)$profile),c("th", ""),c("ng", "")))
knitr::kable(test_profile.txt)

## ---- eval = FALSE-------------------------------------------------------
#  tokenize(test, profile = "test_profile.txt")
#  
#  # with overwriting of the existing profile:
#  # tokenize(test, profile = "test_profile.txt", file.out = "test_profile.txt")
#  
#  # note that you can abbreviate this in R:
#  # tokenize_old(test, p = "test_profile.txt", f = "test_profile.txt")

## ---- echo = FALSE-------------------------------------------------------
tokenize(test, profile = test_profile.txt)

## ---- eval = FALSE-------------------------------------------------------
#  tokenize(c("think", "thin", "both"), profile = "test_profile.txt")

## ---- echo = FALSE-------------------------------------------------------
tokenize(c("think", "thin", "both"), profile = test_profile.txt)

## ---- echo = FALSE, results='asis'---------------------------------------
Grapheme <- c("c", "c", "n", "s", "a", "i")
IPA <- c("k", "tʃ", "n", "s", "a", "i")
Right <- c("", "[ie]", "", "", "", "")
italian <- cbind(Grapheme, Right, IPA)
knitr::kable(italian)

## ------------------------------------------------------------------------
tokenize(c("casa", "cina"), profile = italian, transliterate = "IPA", regex = TRUE)$strings

## ---- echo = FALSE, results='asis'---------------------------------------
Grapheme <- c("c", "c", "n", "s", "a", "i", "e")
IPA <- c("k", "tʃ", "n", "s", "a", "i", "e")
Right <- c("", "frontV", "", "", "", "","")
Class <- c("","","","","","frontV","frontV")
italian <- cbind(Grapheme, Right, Class, IPA)
knitr::kable(italian)

## ------------------------------------------------------------------------
tokenize(c("casa", "cina"), profile = italian, transliterate = "IPA", regex = TRUE)$strings

Try the qlcData package in your browser

Any scripts or data that you put into this service are public.

qlcData documentation built on May 2, 2019, 8:29 a.m.