A clean & un-adulterated version of the MeSH hierarchical vocabulary & thesaurus as data frame. Included in the R package
PumbedMTK
. Based on two files:desc2021.xml
&mtrees2021.bin
; available via nlm.nih.gov.
git_dir <- '/home/jtimm/jt_work/GitHub/PubmedMTK/mesh/'
## Most recent data are available for downlaod below: desc2020 <- 'ftp://nlmpubs.nlm.nih.gov/online/mesh/2020/xmlmesh/desc2021.xml' trees2021 <- 'ftp://nlmpubs.nlm.nih.gov/online/mesh/MESH_FILES/meshtrees/mtrees2021.bin'
if (!require("pacman")) install.packages("pacman") pacman::p_load(magrittr, dplyr, tidyr, xml2) setwd(git_dir) desc <- xml2::read_xml('desc2020.xml') trees <- read.csv('mtrees2021.bin', header = FALSE, sep =';')
desc
fileExtract descriptor details (& concepts & terms) from descriptor file.
## Descriptor DescriptorUI <- desc %>% xml2::xml_find_all('.//DescriptorUI') %>% xml2::xml_text() DescriptorName <- desc %>% xml2::xml_find_all('.//DescriptorName') %>% xml2::xml_text() descriptor <- data.frame(DescriptorUI, DescriptorName) %>% distinct() %>% arrange(DescriptorUI) ## Concepts ConceptName <- desc %>% xml2::xml_find_all('.//ConceptName') %>% xml2::xml_text() ConceptUI <- desc %>% xml2::xml_find_all('.//ConceptUI') %>% xml2::xml_text() concept <- data.frame(ConceptUI, ConceptName) ## Terms TermUI <- desc %>% xml2::xml_find_all('.//TermUI') %>% xml2::xml_text() TermName <- desc %>% xml2::xml_find_all('.//Term') %>% xml2::xml_find_all('String') %>% xml2::xml_text() term <- data.frame(TermUI, TermName)
Note: Primary concept == primary term == descriptor name == mesh term.
concept_term <- term %>% left_join(concept, by = c('TermName' = 'ConceptName')) %>% fill(ConceptUI) %>% left_join(descriptor, by = c('TermName' = 'DescriptorName')) %>% mutate(DescriptorName = ifelse(is.na(DescriptorUI), NA, TermName)) %>% fill(DescriptorUI, DescriptorName) %>% select(DescriptorUI, DescriptorName, ConceptUI, TermUI, TermName)
mtrees
filetree <- trees %>% rename(mesh_heading = V1, tree_location = V2) %>% select(tree_location, mesh_heading) # Extract the two highest parent nodes from tree location. ## For general classification purposes. level1 <- tree[nchar(tree$tree_location) == 3, ] level2 <- tree[nchar(tree$tree_location) == 7, ] %>% mutate(join = gsub('\\....', '', tree_location)) top_parents <- level2 %>% left_join(level1, by = c('join' = 'tree_location')) colnames(top_parents) <- c('tree2', 'mesh2', 'tree1', 'mesh1') top_parents <- top_parents[, c(3:4, 1:2)]
Manually add labels for highest-level node in hierarchy:
### 2-4 High-level categories cats <- c('Anatomy', 'Organisms', 'Diseases', 'Chemicals and Drugs', 'Analytical, Diagnostic and Therapeutic Techniques, and Equipment', 'Psychiatry and Psychology', 'Phenomena and Processes', 'Disciplines and Occupations', 'Anthropology, Education, Sociology, and Social Phenomena', 'Technology, Industry, and Agriculture', 'Humanities', 'Information Science', 'Named Groups', 'Health Care', 'Publication Characteristics', 'Geographicals') code <- c(LETTERS[1:14], 'V', 'Z') high_tree <- data.frame(code, cats) meta <- top_parents %>% mutate(code = gsub('..$', '', tree1)) %>% left_join(high_tree) %>% select(code:cats, tree1:mesh2)
Which adds the MeSH tree location to descriptor data via
MeSH
heading &DescriptorName
variables. Note that a singleDescriptorName
may be classified (in tree structure) in multiple ways.
clean_col <- function(x) { x <- enc2utf8(x) x <- trimws(x) #x <- tolower(x) } ### 2-5 Join metadata & descriptors/terms pmtk_tbl_mesh <- concept_term %>% left_join(tree, by = c('DescriptorName' = 'mesh_heading')) %>% mutate (tree2 = substring(tree_location, 1, 7)) %>% left_join(meta) %>% # rename(descriptor_id = DescriptorUI, # descriptor_name = DescriptorName, # term_name = TermName) %>% select(DescriptorUI:DescriptorName, TermName, code, cats, mesh1, mesh2, tree_location, tree1, tree2) %>% mutate_at(c('DescriptorName', 'TermName'), clean_col) %>% filter(complete.cases(.))
knitr::kable(head(PubmedMTK::pmtk_tbl_mesh))
pmtk_tbl_mesh <- data.table::data.table(pmtk_tbl_mesh) setwd('/home/jtimm/jt_work/GitHub/PubmedMTK/data') usethis::use_data(pmtk_tbl_mesh, overwrite=TRUE)
library(PubmedMTK) data("pmtk_tbl_mesh") data("pmtk_tbl_pmc_ref") saveRDS(pmtk_tbl_mesh, '/home/jtimm/pCloudDrive/GitHub/packages/PubmedMTK/data-raw/pmtk_tbl_mesh.rds') saveRDS(pmtk_tbl_pmc_ref, '/home/jtimm/pCloudDrive/GitHub/packages/PubmedMTK/data-raw/pmtk_tbl_pmc_ref.rds')
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.