#' Read Pubmed Format
#'
#' Function to extract PMID and AB values from MEDLINE text data
#'
#' @param text a character vector containing the text data from MEDLINE
#' @param key1 a string representing the first identifier key (PMID)
#' @param key2 a string representing the second identifier key (AB)
#'
#' @return
#' a data frame with columns "PMID" and "AB" that contains the extracted values
#'
#' @importFrom
#' stringr str_detect
#'
#' @examples
#' example1 <- readLines("pubmed-Oligodendr_ODP-set.txt")
#' prueba <- read_medline(example1)
#'
#' @rdname read_medline
#' @export read_medline
read_medline <- function(text, key1 = "PMID-", key2 = "AB -"){
a <- vector()
for (i in 1:length(text)){
if (text[i]==""){
a <- c(a, i)
}
}
b <- 1
data_list <- list()
if(length(a)==0){
data_list <- list(text)
} else {
for (i in 1:length(a)){
c <- a[i]
data_list[[i]] <- text[b:c]
b <- c+1
}
}
list_text <- data_list
mydf <- data.frame()
for (i in 1:length(list_text)){
for (j in 1:length(list_text[[i]])){
if (stringr::str_detect(list_text[[i]][j], key1)){
tmp <- strsplit(list_text[[i]][j], " ")
text_tidy <- data.frame(PMID=tmp[[1]][2])
next
}
if (stringr::str_detect(list_text[[i]][j], key2)){
for (l in j:length(list_text[[i]])){
if (!stringr::str_detect(list_text[[i]][l], "FAU -") &
!stringr::str_detect(list_text[[i]][l], "CI -") &
!stringr::str_detect(list_text[[i]][l], "AU -")){
next
} else {
tmp_1 <- paste(list_text[[i]][j:l], collapse = "")
tmp_2 <- strsplit(tmp_1, " - ")
text_tidy1 <- data.frame(AB = tmp_2[[1]][2])
}
}
}
}
df <- cbind(text_tidy, text_tidy1)
mydf <- rbind(mydf, df)
mydf <- mydf[!duplicated(mydf$AB),]
}
return(mydf)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.