Nothing
## ----setup, include=FALSE, cache=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
options(width = 1000)
knitr::opts_chunk$set(echo = TRUE, message = FALSE, comment = NA, eval = (require(udpipe, quietly = TRUE)))
## ---- message = FALSE, warning=FALSE, error=FALSE, results='hide', echo=FALSE---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
library(udpipe)
udmodel <- udpipe_download_model("dutch")
## ---- message = FALSE, warning=FALSE, error=FALSE, results='hide'---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
library(crfsuite)
x <- ner_download_modeldata("conll2002-nl")
## ---- echo=FALSE----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
knitr::opts_chunk$set(echo = TRUE, message = FALSE, comment = NA, eval = inherits(x, "data.frame") && !udmodel$download_failed)
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
subset(x, doc_id == 100)
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
library(data.table)
x <- as.data.table(x)
x <- x[, pos_previous := shift(pos, n = 1, type = "lag"), by = list(doc_id)]
x <- x[, pos_next := shift(pos, n = 1, type = "lead"), by = list(doc_id)]
x <- x[, token_previous := shift(token, n = 1, type = "lag"), by = list(doc_id)]
x <- x[, token_next := shift(token, n = 1, type = "lead"), by = list(doc_id)]
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
x <- x[, pos_previous := txt_sprintf("pos[w-1]=%s", pos_previous), by = list(doc_id)]
x <- x[, pos_next := txt_sprintf("pos[w+1]=%s", pos_next), by = list(doc_id)]
x <- x[, token_previous := txt_sprintf("token[w-1]=%s", token_previous), by = list(doc_id)]
x <- x[, token_next := txt_sprintf("token[w-1]=%s", token_next), by = list(doc_id)]
subset(x, doc_id == 100, select = c("doc_id", "token", "token_previous", "token_next"))
x <- as.data.frame(x)
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
crf_train <- subset(x, data == "ned.train")
crf_test <- subset(x, data == "testa")
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
model <- crf(y = crf_train$label,
x = crf_train[, c("pos", "pos_previous", "pos_next",
"token", "token_previous", "token_next")],
group = crf_train$doc_id,
method = "lbfgs", file = "tagger.crfsuite",
options = list(max_iterations = 25, feature.minfreq = 5, c1 = 0, c2 = 1))
model
stats <- summary(model)
plot(stats$iterations$loss, pch = 20, type = "b",
main = "Loss evolution", xlab = "Iteration", ylab = "Loss")
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
weights <- coefficients(model, encoding = "UTF-8")
head(weights$transitions)
head(subset(weights$states, label %in% "B-LOC"), n = 10)
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
scores <- predict(model,
newdata = crf_test[, c("pos", "pos_previous", "pos_next",
"token", "token_previous", "token_next")],
group = crf_test$doc_id)
crf_test$entity <- scores$label
table(crf_test$entity, crf_test$label)
## ---- results='hide'------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
crf_options("lbfgs")
crf_options("l2sgd")
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
library(crfsuite)
library(udpipe)
library(data.table)
data(airbnb_chunks, package = "crfsuite")
str(airbnb_chunks)
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
table(airbnb_chunks$chunk_entity)
## ---- eval=FALSE----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# ## Annotate text data with udpipe (version >= 0.7)
# udmodel <- udpipe_download_model("dutch")
# udmodel <- udpipe_load_model(udmodel$file_model)
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
airbnb_tokens <- unique(airbnb_chunks[, c("doc_id", "text")])
airbnb_tokens <- udpipe(x = airbnb_tokens, object = udmodel)
str(airbnb_tokens)
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
x <- merge(airbnb_chunks, airbnb_tokens)
table(x$chunk_entity)
## Indicate beginning of sequence and end of sequence and sequence position
x <- as.data.table(x)
x <- x[, bos := sprintf("BOS+%s", (1:.N)-1), by = list(doc_id)]
x <- x[, eos := sprintf("EOS-%s", (.N:1)-1), by = list(doc_id)]
x <- as.data.frame(x)
## Add preceding and next tokens and parts of speech tags
x <- crf_cbind_attributes(x, terms = c("lemma", "upos"),
by = c("doc_id", "sentence_id"), ngram_max = 3, sep = "|")
attributes <- c("bos", "eos", grep("lemma|upos", colnames(x), value=TRUE))
model <- crf(y = x$chunk_entity, x = x[, attributes],
group = x$doc_id,
method = "lbfgs")
scores <- predict(model, newdata = x[, attributes], group = x$doc_id)
barplot(table(scores$label[scores$label != "O"]), col = "royalblue", cex.names = 0.75)
## ---- echo=FALSE----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
invisible(if(file.exists("annotator.crfsuite")) file.remove("annotator.crfsuite"))
invisible(if(file.exists("tagger.crfsuite")) file.remove("tagger.crfsuite"))
invisible(if(file.exists(udmodel$file)) file.remove(udmodel$file))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.