#' Download MEDLINE format articles from PubMed
#'
#' This function allows to download MEDLINE format articles from PubMed database. The function inputs a keyword and outputs a .txt file containing the articles. The search results are returned as a character vector of MEDLINE formatted articles.
#'
#' @param keyword1 A character string indicating the keyword to be used for searching in PubMed.
#' @param keyword2 The second keyword to search for.
#'
#' @return A .txt file containing the articles in MEDLINE format
#'
#' @importFrom
#' httr GET
#'
#' @importFrom
#' purrr map safely transpose
#'
#' @importFrom
#' XML xmlParse xpathSApply xmlValue
#'
#' @examples
#' medline_text <- download_medline("cancer", "genetics")
#'
#' @rdname download_medline
#' @export download_medline
download_medline <- function(keyword1, keyword2="gene"){
# Define the API endpoint
endpoint <- "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?"
# Check if a second keyword was provided
if (is.null(keyword2)) {
# Define the parameters for the API call
parameters <- paste("db=pubmed&term=", keyword1, "&retmax=10000", sep="")
} else {
# Define the parameters for the API call
parameters <- paste("db=pubmed&term=", paste(keyword1, "+AND+", keyword2, sep=""),
"&retmax=10000", sep="")
}
# Make the API call to retrieve the search results
response <- httr::GET(paste(endpoint, parameters, sep=""))
# Parse the response to extract the PMID (PubMed identifier) values
pmids <- XML::xmlParse(txt_content <- as.character(response))
pmids <- XML::xpathSApply(pmids, "//Id", XML::xmlValue)
# Define the endpoint for fetching the articles
fetch_endpoint <- "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?"
# Define the parameters for the fetch API call in a for loop
x <- seq(1:length(pmids))
y <- length(x)
pmids_request <- seq(from = 1, to = y, by = 100)
# Iteration in PMIDs
lines_tmp <- c()
for(i in pmids_request){
pmids_tmp <- pmids[i:100]
fetch_parameters <- paste("db=pubmed&id=", paste(pmids_tmp, collapse=","), "&rettype=medline&retmode=text", sep="")
fetch_url <- paste(fetch_endpoint, fetch_parameters, sep="")
lines_tmp <- c(lines_tmp, fetch_url)
}
# Request URL generated
request_ <- purrr::map(lines_tmp, purrr::safely(readLines))
request_ <- purrr::transpose(request_)
request_ <- request_[["result"]]
request_lines <- unlist(request_)
request_lines <- request_lines[-1]
if (is.null(keyword2)) {
# Save the response to a .txt file
write(request_lines, file =paste(keyword1, ".txt", sep=""))
} else{
# Save the response to a .txt file
write(request_lines, file =paste(keyword1,"_", keyword2, ".txt", sep=""))
}
closeAllConnections()
return(request_lines)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.