PMC as reference

Build abstract corpus

Open Access Common Use PMID list available here.

key <- '4f47f85a9cc03c4031b3dc274c2840b06108'
ld <- '/home/jtimm/Desktop/local-data/oa-common-use'
setwd(ld)
pmc <- read.csv('oa_comm_use_file_list.csv')
pmc0 <- subset(pmc, !is.na(PMID))
corpus <-  PubmedMTK::pmtk_get_records2(pmids = pmc0$PMID, 
                                        cores = 10, 
                                        ncbi_key = key) 

corpus0 <- data.table::rbindlist(corpus)
setwd(ld)
corpus <- readRDS('pmc_abstract_corpus.rds')
corpus0 <- data.table::rbindlist(corpus)

MeSH annotation frequency

m0 <- PubmedMTK::pmtk_gather_mesh(x = corpus0)

pmtk_tbl_pmc_ref <- m0[ , list(doc_count = length(unique(pmid))),
           by = list(type, term)]
total_pmid <- length(unique(m0$pmid))
pmtk_tbl_pmc_ref[, doc_prop := doc_count/total_pmid]
pmtk_tbl_pmc_ref <- PubmedMTK::pmtk_tbl_pmc_ref
colnames(pmtk_tbl_pmc_ref)[2] <- 'DescriptorName'
pmtk_tbl_pmc_ref$DescriptorName <- gsub('_', ' ', pmtk_tbl_pmc_ref$DescriptorName)
setwd('/home/jtimm/pCloudDrive/GitHub/packages/PubmedMTK/data')
usethis::use_data(pmtk_tbl_pmc_ref, overwrite=TRUE)


jaytimm/PubmedMTK documentation built on Sept. 25, 2022, 10:49 p.m.