Nothing
## ----setup, include=FALSE, cache=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
options(width = 1000)
knitr::opts_chunk$set(echo = TRUE, message = FALSE, comment = NA, eval = TRUE)
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
library(udpipe)
dl <- udpipe_download_model(language = "dutch")
str(dl)
## ---- echo=FALSE----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
knitr::opts_chunk$set(echo = TRUE, message = FALSE, comment = NA, eval = !dl$download_failed)
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## Either give a file in the current working directory
udmodel_dutch <- udpipe_load_model(file = "dutch-alpino-ud-2.5-191206.udpipe")
## Or give the full path to the file
udmodel_dutch <- udpipe_load_model(file = dl$file_model)
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
txt <- c("Ik ben de weg kwijt, kunt u me zeggen waar de Lange Wapper ligt? Jazeker meneer",
"Het gaat vooruit, het gaat verbazend goed vooruit")
x <- udpipe_annotate(udmodel_dutch, x = txt)
x <- as.data.frame(x)
str(x)
table(x$upos)
## ---- results='hide'------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## Tokenization + finds sentences, does not execute POS tagging, nor lemmatization or dependency parsing
x <- udpipe_annotate(udmodel_dutch, x = txt, tagger = "none", parser = "none")
x <- as.data.frame(x)
table(x$upos)
table(x$dep_rel)
## Tokenization + finds sentences, does POS tagging and lemmatization but does not execute dependency parsing
x <- udpipe_annotate(udmodel_dutch, x = txt, tagger = "default", parser = "none")
x <- as.data.frame(x)
table(x$upos)
table(x$dep_rel)
## Tokenization + finds sentences and executes dependency parsing but does not do POS tagging nor lemmatization
x <- udpipe_annotate(udmodel_dutch, x = txt, tagger = "none", parser = "default")
x <- as.data.frame(x)
table(x$upos)
table(x$dep_rel)
## ---- results='hide'------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## Either put every token on a new line and use tokenizer: vertical
input <- list(doc1 = c("Ik", "ben", "de", "weg", "kwijt", ",", "kunt", "u", "me", "zeggen",
"waar", "de", "Lange Wapper", "ligt", "?", "Jazeker", "meneer"),
doc2 = c("Het", "gaat", "vooruit", ",", "het", "gaat", "verbazend", "goed", "vooruit"))
txt <- sapply(input, FUN=function(x) paste(x, collapse = "\n"))
x <- udpipe_annotate(udmodel_dutch, x = txt, tokenizer = "vertical")
x <- as.data.frame(x)
## Or put every token of each document in 1 string separated by a space and use tokenizer: horizontal
## Mark that if a token contains a space, you need to replace the space
## with the 'NO-BREAK SPACE' (U+00A0) character to make sure it is still considered as one token
txt <- sapply(input, FUN=function(x){
x <- gsub(" ", intToUtf8(160), x) ## replace space with no-break-space
paste(x, collapse = " ")
})
x <- udpipe_annotate(udmodel_dutch, x = as.character(txt), tokenizer = "horizontal")
x <- as.data.frame(x)
## ---- eval=FALSE----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# dl <- udpipe_download_model(language = "sanskrit", udpipe_model_repo = "jwijffels/udpipe.models.ud.2.0")
# udmodel_sanskrit <- udpipe_load_model(file = dl$file_model)
# txt <- "ततः असौ प्राह क्षत्रियस्य तिस्रः भार्या धर्मम् भवन्ति तत् एषा कदाचिद् वैश्या सुता भविष्यति तत् अनुरागः ममास्याम् ततः रथकारः तस्य निश्चयम् विज्ञायावदत् वयस्य किम् अ धुना कर्तव्यम् कौलिकः आह किम् अहम् जानामि त्वयि मित्रे यत् अभिहितं मया ततः"
# x <- udpipe_annotate(udmodel_sanskrit, x = txt)
# Encoding(x$conllu)
# x <- as.data.frame(x)
## ---- eval=FALSE----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# x <- udpipe_annotate(udmodel_sanskrit, x = txt)
# cat(x$conllu, file = "myannotation.conllu")
## ---- results='hide', echo=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
invisible(if(file.exists(dl$file_model)) file.remove(dl$file_model))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.