An R package for working with text data. Mostly a wrapper for the corpus
, quanteda
& udpipe
packages, and an attempt at a uniform framework.
devtools::install_github("jaytimm/text2df")
library(dplyr) pmids <- pubmedr::pmed_search_pubmed(search_term = 'Psilocybin', fields = c('TIAB','MH')) corpus <- pubmedr::pmed_get_records2(pmids = pmids$pmid) |> bind_rows() |> filter(!is.na(abstract)) |> rename(doc_id = pmid, text = abstract)
x0 <- corpus |> text2df::tif2sentence() head(x0) %>% knitr::kable()
x1 <- corpus |> text2df::tif2sentence() |> text2df::tif2token() x1[c(1:3)]
library(pubmedr) data("pmed_tbl_mesh") mwe <- pmed_tbl_mesh |> filter(!grepl(',', TermName)) |> filter(grepl(' ', TermName)) |> distinct(TermName, .keep_all = T) sample(mwe$TermName, size = 10)
x10 <- corpus |> text2df::tif2sentence() |> text2df::tif2token() |> text2df::token2mwe(mwe = mwe$TermName) x10[c(1:3)]
x2 <- corpus |> text2df::tif2sentence() |> text2df::tif2token() |> text2df::token2mwe(mwe = mwe$TermName) |> text2df::token2df() x2 |> head() |> knitr::kable()
locald <- '/home/jtimm/pCloudDrive/nlp/udpipe-model'
setwd(locald) udmodel <- udpipe::udpipe_load_model('english-ewt-ud-2.5-191206.udpipe') x3 <- corpus |> text2df::tif2sentence() |> text2df::tif2token() |> text2df::token2mwe(mwe = mwe$TermName) |> text2df::token2annotation(model = udmodel) x3 %>% head() %>% knitr::kable()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.