An R package for working with text data. Mostly a wrapper for the
corpus
, quanteda
& udpipe
packages, and an attempt at a uniform
framework.
devtools::install_github("jaytimm/text2df")
library(dplyr)
pmids <- pubmedr::pmed_search_pubmed(search_term = 'Psilocybin',
fields = c('TIAB','MH'))
## [1] "Psilocybin[TIAB] OR Psilocybin[MH]: 1414 records"
corpus <- pubmedr::pmed_get_records2(pmids = pmids$pmid) |>
bind_rows() |>
filter(!is.na(abstract)) |>
rename(doc_id = pmid, text = abstract)
x0 <- corpus |>
text2df::tif2sentence()
head(x0) %>% knitr::kable()
| doc_id | text | |:---|:-------------------------------------------------------------------| | 36129571.1 | Few treatments are available for patients with mood disorders or post-traumatic stress disorder (PTSD) who have already failed multiple interventions. | | 36129571.2 | After several decades when research into psychedelics was effectively halted by federal legislation, the past several years have shown the re-emergence of thoughtful investigations studying the utility of compounds such as 3,4-methylenedioxymethamphetamine (MDMA) and psilocybin. | | 36129571.3 | Several studies have coupled the safe administration of psychedelic compounds in a controlled environment after several hours of preparation of study participants and followed by multiple sessions to integrate the psychedelic experience. | | 36129571.4 | The improvement participants experience appear related to the often profound perspective changes experienced and seem unlike the improvements seen in the currently available care paradigms. | | 36129571.5 | Studies cited include treatment resistant depression, end of life despair, and PTSD. | | 36129571.6 | Psychedelic psychotherapy, a unique remarriage of biological therapy and psychotherapy, has the potential to transform mental health care. |
x1 <- corpus |>
text2df::tif2sentence() |>
text2df::tif2token()
x1[c(1:3)]
## $`36129571.1`
## [1] "Few" "treatments" "are" "available"
## [5] "for" "patients" "with" "mood"
## [9] "disorders" "or" "post-traumatic" "stress"
## [13] "disorder" "(" "PTSD" ")"
## [17] "who" "have" "already" "failed"
## [21] "multiple" "interventions" "."
##
## $`36129571.2`
## [1] "After" "several"
## [3] "decades" "when"
## [5] "research" "into"
## [7] "psychedelics" "was"
## [9] "effectively" "halted"
## [11] "by" "federal"
## [13] "legislation" ","
## [15] "the" "past"
## [17] "several" "years"
## [19] "have" "shown"
## [21] "the" "re-emergence"
## [23] "of" "thoughtful"
## [25] "investigations" "studying"
## [27] "the" "utility"
## [29] "of" "compounds"
## [31] "such" "as"
## [33] "3,4-methylenedioxymethamphetamine" "("
## [35] "MDMA" ")"
## [37] "and" "psilocybin"
## [39] "."
##
## $`36129571.3`
## [1] "Several" "studies" "have" "coupled"
## [5] "the" "safe" "administration" "of"
## [9] "psychedelic" "compounds" "in" "a"
## [13] "controlled" "environment" "after" "several"
## [17] "hours" "of" "preparation" "of"
## [21] "study" "participants" "and" "followed"
## [25] "by" "multiple" "sessions" "to"
## [29] "integrate" "the" "psychedelic" "experience"
## [33] "."
library(pubmedr)
data("pmed_tbl_mesh")
mwe <- pmed_tbl_mesh |>
filter(!grepl(',', TermName)) |>
filter(grepl(' ', TermName)) |>
distinct(TermName, .keep_all = T)
sample(mwe$TermName, size = 10)
## [1] "SOS Protein"
## [2] "Todds Paralysis"
## [3] "PCR 4099"
## [4] "Hospital Morgues"
## [5] "Eulenburg's Disease"
## [6] "Non-Steroidal Anti-Inflammatory Agent"
## [7] "Cyclin-Dependent Kinase Inhibitor 2C"
## [8] "Host Parasite Interactions"
## [9] "Peritoneal Fibrosing Syndrome"
## [10] "Radiation Protective Effects"
x10 <- corpus |>
text2df::tif2sentence() |>
text2df::tif2token() |>
text2df::token2mwe(mwe = mwe$TermName)
x10[c(1:3)]
## $`36129571.1`
## [1] "Few" "treatments"
## [3] "are" "available"
## [5] "for" "patients"
## [7] "with" "mood_disorders"
## [9] "or" "post-traumatic_stress_disorder"
## [11] "(" "PTSD"
## [13] ")" "who"
## [15] "have" "already"
## [17] "failed" "multiple"
## [19] "interventions" "."
##
## $`36129571.2`
## [1] "After" "several"
## [3] "decades" "when"
## [5] "research" "into"
## [7] "psychedelics" "was"
## [9] "effectively" "halted"
## [11] "by" "federal"
## [13] "legislation" ","
## [15] "the" "past"
## [17] "several" "years"
## [19] "have" "shown"
## [21] "the" "re-emergence"
## [23] "of" "thoughtful"
## [25] "investigations" "studying"
## [27] "the" "utility"
## [29] "of" "compounds"
## [31] "such" "as"
## [33] "3,4-methylenedioxymethamphetamine" "("
## [35] "MDMA" ")"
## [37] "and" "psilocybin"
## [39] "."
##
## $`36129571.3`
## [1] "Several" "studies" "have"
## [4] "coupled" "the" "safe"
## [7] "administration" "of" "psychedelic"
## [10] "compounds" "in" "a"
## [13] "controlled_environment" "after" "several"
## [16] "hours" "of" "preparation"
## [19] "of" "study" "participants"
## [22] "and" "followed" "by"
## [25] "multiple" "sessions" "to"
## [28] "integrate" "the" "psychedelic"
## [31] "experience" "."
x2 <- corpus |>
text2df::tif2sentence() |>
text2df::tif2token() |>
text2df::token2mwe(mwe = mwe$TermName) |>
text2df::token2df()
x2 |> head() |> knitr::kable()
| doc_id | token | sentence_id | term_id | token_id | |:---------|:-----------|:------------|--------:|---------:| | 36129571 | Few | 1 | 1 | 1 | | 36129571 | treatments | 1 | 2 | 2 | | 36129571 | are | 1 | 3 | 3 | | 36129571 | available | 1 | 4 | 4 | | 36129571 | for | 1 | 5 | 5 | | 36129571 | patients | 1 | 6 | 6 |
setwd(locald)
udmodel <- udpipe::udpipe_load_model('english-ewt-ud-2.5-191206.udpipe')
x3 <- corpus |>
text2df::tif2sentence() |>
text2df::tif2token() |>
text2df::token2mwe(mwe = mwe$TermName) |>
text2df::token2annotation(model = udmodel)
x3 %>% head() %>% knitr::kable()
| doc_id | sentence_id | start | end | term_id | token_id | token | lemma | upos | xpos | feats | |:-----|------:|---:|--:|----:|:-----|:------|:-----|:---|:---|:----------------------| | 36129571 | 1 | 1 | 3 | 1 | 1 | Few | few | ADJ | JJ | Degree=Pos | | 36129571 | 1 | 5 | 14 | 2 | 2 | treatments | treatment | NOUN | NNS | Number=Plur | | 36129571 | 1 | 16 | 18 | 3 | 3 | are | be | AUX | VBP | Mood=Ind\|Tense=Pres\|VerbForm=Fin | | 36129571 | 1 | 20 | 28 | 4 | 4 | available | available | ADJ | JJ | Degree=Pos | | 36129571 | 1 | 30 | 32 | 5 | 5 | for | for | ADP | IN | NA | | 36129571 | 1 | 34 | 41 | 6 | 6 | patients | patient | NOUN | NNS | Number=Plur |
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.