Nothing
## -----------------------------------------------------------------------------
library(textreuse)
dir <- system.file("extdata/ats", package = "textreuse")
corpus <- TextReuseCorpus(dir = dir, tokenizer = tokenize_ngrams, n = 5,
progress = FALSE)
## -----------------------------------------------------------------------------
jaccard_similarity(corpus[["remember00palm"]],
corpus[["remembermeorholy00palm"]])
## ----eval=FALSE---------------------------------------------------------------
# comparisons <- pairwise_compare(corpus, jaccard_similarity, progress = FALSE)
# comparisons[1:4, 1:4]
## ---- echo=FALSE--------------------------------------------------------------
comparisons <- pairwise_compare(corpus, jaccard_similarity, progress = FALSE)
round(comparisons[1:3, 1:3], digits = 3)
## -----------------------------------------------------------------------------
candidates <- pairwise_candidates(comparisons)
candidates[candidates$score > 0.1, ]
## ----eval=FALSE---------------------------------------------------------------
# vignette("minhash", package = "textreuse")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.