getting_started_with_easyPubMed.R
In easyPubMed: Search and Retrieve Scientific Publication Records from PubMed

## ----inst__0001, include = TRUE, echo = TRUE, eval = FALSE---------------
#  install.packages("easyPubMed")

## ----inst___02, include = TRUE, echo = TRUE, eval = FALSE----------------
#  library(easyPubMed)

## ----include = FALSE-----------------------------------------------------
library(easyPubMed)
data("EPMsamples")

## ----inst___04, include = TRUE, echo = TRUE, eval = FALSE----------------
#  library(devtools)
#  install_github("dami82/easyPubMed")

## ----message = FALSE, warning = FALSE, eval = FALSE----------------------
#  my_query <- 'Damiano Fantini[AU] AND "2018"[PDAT]'
#  my_entrez_id <- get_pubmed_ids(my_query)
#  my_abstracts_txt <- fetch_pubmed_data(my_entrez_id, format = "abstract")

## ----message = FALSE, warning = FALSE, eval = TRUE, echo = FALSE, include=FALSE----
# Loading from the dataset attached to the package
# You may omit this conversion if your system supports UTF8
my_abstracts_txt <- iconv(EPMsamples$DF_papers_abs$pm_res, from = "UTF8", to = "ASCII", sub = ".")

## ----message = FALSE, warning = FALSE, eval = TRUE-----------------------
head(my_abstracts_txt)

## ----message = FALSE, warning = FALSE, eval = FALSE----------------------
#  my_abstracts_xml <- fetch_pubmed_data(pubmed_id_list = my_entrez_id)

## ----include=FALSE, echo = FALSE, eval = TRUE----------------------------
# Loading from the dataset attached to the package
# You may omit this conversion if your system supports UTF8
my_abstracts_xml <- iconv(EPMsamples$DF_papers_std$pm_res, from = "UTF8", to = "ASCII", sub = ".")

## ----message = FALSE, warning = FALSE, eval = TRUE-----------------------
class(my_abstracts_xml) 

my_titles <- custom_grep(my_abstracts_xml, "ArticleTitle", "char")

# use gsub to remove the tag, also trim long titles
TTM <- nchar(my_titles) > 75
my_titles[TTM] <- paste(substr(my_titles[TTM], 1, 70), "...", sep = "")

# Print as a data.frame (use kable)
head(my_titles)

## ----message = FALSE, warning = FALSE, eval=FALSE------------------------
#  new_query <- 'Bladder[TIAB] AND Northwestern[AD] AND Chicago[AD] AND "2018"[PDAT]'
#  out.A <- batch_pubmed_download(pubmed_query_string = new_query,
#                                 format = "xml",
#                                 batch_size = 20,
#                                 dest_file_prefix = "easyPM_example",
#                                 encoding = "ASCII")
#  

## ----message = FALSE, warning = FALSE, include = FALSE, echo = FALSE, eval=TRUE----
# Loading from the dataset attached to the package
out.A <- EPMsamples$NUBL_dw18$pm_res

## ----message = FALSE, warning = FALSE, eval=TRUE-------------------------
# this variable stores the name of the output files
print(out.A) 

## ----message = FALSE, warning = FALSE, eval=TRUE-------------------------
my_PM_list <- articles_to_list(pubmed_data = my_abstracts_xml)
class(my_PM_list[1])
print(substr(my_PM_list[4], 1, 510))

## ----message = FALSE, warning = FALSE, eval=TRUE-------------------------
curr_PM_record <- my_PM_list[1]
custom_grep(curr_PM_record, tag = "PubDate")

custom_grep(curr_PM_record, tag = "LastName", format = "char")

## ----message = FALSE, warning = FALSE, eval=TRUE-------------------------
# Select a single PubMed record from the internal dataset, NUBL_1618
curr_PM_record <- easyPubMed::EPMsamples$NUBL_1618$rec_lst[[37]]
my.df <- article_to_df(curr_PM_record, max_chars = 18)

# Fields extracted from the PubMed record
head(colnames(my.df))

# Trim long strings and then display some content: each row corresponds to one author
my.df$title <- substr(my.df$title, 1, 15)
my.df$address <- substr(my.df$address, 1, 19)
my.df$jabbrv <- substr(my.df$jabbrv, 1, 10)

# Visualize
my.df[,c("pmid", "title", "jabbrv", "firstname", "address")] 

## ----message = FALSE, warning = FALSE, eval=TRUE-------------------------
my.df2 <- article_to_df(curr_PM_record, autofill = TRUE)

# Trim long strings and then display some content: each row corresponds to one author
my.df2$title <- substr(my.df2$title, 1, 15)
my.df2$jabbrv <- substr(my.df2$jabbrv, 1, 10)
my.df2$address <- substr(my.df2$address, 1, 19)

# Visualize
my.df2[,c("pmid", "title", "jabbrv", "firstname", "address")]

## ----message = FALSE, warning = FALSE, eval=TRUE-------------------------
xx <- lapply(my_PM_list, article_to_df, autofill = TRUE, max_chars = 50)
full_df <- do.call(rbind, xx)

full_df[seq(1, nrow(full_df), by = 10), c("pmid", "lastname", "jabbrv")] 

## ----takes_some_time, message = FALSE, warning = FALSE, eval=TRUE--------
new_query <- 'Bladder[TIAB] AND Northwestern[AD] AND Chicago[AD] AND "2018"[PDAT]' 
out.B <- batch_pubmed_download(pubmed_query_string = new_query, 
                               dest_file_prefix = "NUBL_18_", 
                               encoding = "ASCII")

# Retrieve the full name of the XML file downloaded in the previous step
new_PM_file <- out.B[[1]]
new_PM_df <- table_articles_byAuth(pubmed_data = new_PM_file, 
                                   included_authors = "first", 
                                   max_chars = 0, 
                                   encoding = "ASCII")

# Printing a sample of the resulting data frame
new_PM_df$address <- substr(new_PM_df$address, 1, 28)
new_PM_df$jabbrv <- substr(new_PM_df$jabbrv, 1, 9)
sid <- seq(5, nrow(new_PM_df), by = 10)

new_PM_df[sid, c("pmid", "year", "jabbrv", "lastname", "address")]

## ----takes_some_time2, message = FALSE, warning = FALSE, eval=FALSE------
#  new_query <- 'Bladder[TIAB] AND Northwestern[AD] AND Chicago[AD] AND "2018"[PDAT]'
#  new_query <- get_pubmed_ids(new_query)
#  fetched_data <- fetch_pubmed_data(new_query, encoding = "ASCII")

## ----takes_some_time2biz, include = FALSE, echo = FALSE, message = FALSE, warning = FALSE, eval=TRUE----
fetched_data <- EPMsamples$NUBL_1618$pm_res

## ----takes_some_time2triz, message = FALSE, warning = FALSE, eval=TRUE----
new_PM_df <- table_articles_byAuth(pubmed_data = fetched_data, 
                                   included_authors = "first", 
                                   max_chars = 0, 
                                   encoding = "ASCII")

# Printing a sample of the resulting data frame
new_PM_df$address <- substr(new_PM_df$address, 1, 28)
new_PM_df$jabbrv <- substr(new_PM_df$jabbrv, 1, 9)
sid <- seq(5, nrow(new_PM_df), by = 10)

new_PM_df[sid, c("pmid", "year", "jabbrv", "lastname", "address")] 

## ----message = FALSE, warning = FALSE, eval=TRUE-------------------------
sessionInfo()

## ----include = FALSE-----------------------------------------------------
# cleaning
for (xfile in c(out.A, out.B)) {
   tryCatch(file.remove(xfile), error = function(e){NULL})  
}

Any scripts or data that you put into this service are public.

easyPubMed documentation built on May 2, 2019, 3:47 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

easyPubMed
Search and Retrieve Scientific Publication Records from PubMed

inst/doc/getting_started_with_easyPubMed.R
In easyPubMed: Search and Retrieve Scientific Publication Records from PubMed

Try the easyPubMed package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

easyPubMed Search and Retrieve Scientific Publication Records from PubMed

inst/doc/getting_started_with_easyPubMed.R In easyPubMed: Search and Retrieve Scientific Publication Records from PubMed

Try the easyPubMed package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

easyPubMed
Search and Retrieve Scientific Publication Records from PubMed

inst/doc/getting_started_with_easyPubMed.R
In easyPubMed: Search and Retrieve Scientific Publication Records from PubMed