## format references in dataframe...
## CHECK publication types... table(xpathSApply(doc, "//ref/element-citation", xmlGetAttr, "publication-type"))
## OR //ref/mixed-citation
## OR called citation-type in //ref/citation
pmcRef <- function (doc){
z <- getNodeSet(doc, "//ref")
# element-citation, mixed-citation, or citation?
n <- length(z)
# no refs?
if(n == 0){
NULL
}else{
refs <- vector("list", n)
for(i in 1:n){
z2 <- xmlDoc(z[[ i ]])
# label <- xvalue(z2, "//label")
label <- xpathSApply(z2, "//label",xmlValue, trim=TRUE)
if(length(label) == 0) label<- NA
label <- gsub("\\.$", "", label) # 1. 2. 3. to 1 2 3
id <- xpathSApply(z2, "//ref", xmlGetAttr, "id")
# mixed or element-citation?
type <- xpathSApply(z2, "//ref/element-citation|//ref/mixed-citation", xmlGetAttr, "publication-type")
## BMC genomics from 2007 PMC1853089
if(length(type) == 0) type <- xpathSApply(z2, "//ref/citation", xmlGetAttr, "citation-type")
if(length(type) == 0) type <- xpathSApply(z2, "//ref/note", xmlName)
if(length(type) == 0) type <- "unknown"
# CHECK TAGS
tags <- unique( xpathSApply(z2, "//ref/element-citation//*|//ref/mixed-citation//*|//ref/citation//*", xmlName) )
# no spaces between surname and given name, so need to parse
if("surname"%in% tags){
a1 <- xpathSApply(z2, "//surname", xmlValue)
a2 <- xpathSApply(z2, "//given-names", xmlValue)
a3 <- paste(a1, a2)
etal <- xpathSApply(z2, "//etal", xmlValue)
if(length(etal) >0){
if(etal == "") etal <- "et al" # may be tag only see PMC2374372
a3<- c(a3, etal)
}
a3 <- paste(a3, collapse=", ")
year <- xvalue(z2, "//year")
title <- xvalue(z2, "//article-title")
title <- gsub("\\.$", "", title)
journal <- xvalue(z2, "//source")
volume <- xvalue(z2, "//volume")
pages <- xvalue(z2, "//fpage")
if(!is.na(pages)){
x2 <- xvalue(z2, "//lpage")
if(!is.na(x2)) pages <- paste(pages, x2, sep = "-")
}
pmid <- xvalue(z2, '//pub-id[@pub-id-type="pmid"]')
bookpub <- xvalue(z2, "//publisher-name")
## TO DO need better parsing for books
if(!is.na(bookpub)){
y <- xvalue(z2, "//publisher-loc") # may be missing on Ph.d
if(!is.na(y)) bookpub <- paste(y, bookpub, sep=": ")
title <- journal
journal <- bookpub
}
# missing title and other tags see PMC3559055
if(is.na(title)){
title <- paste(xpathSApply(z2, "//ref/element-citation/text()|//ref/mixed-citation/text()|//ref/citation/text()", xmlValue) , collapse="")
title <- gsub("^[,. ]+", "", title)
}
refs[[i]] <- data.frame(pmid, authors=a3, year,
title, journal, volume, pages, label, id, type,
stringsAsFactors=FALSE)
}else{
## use node() to avoid combining words like "CDC 2014 Map of" into "CDC2104Map of "
title <- paste( xpathSApply(z2, "//ref/element-citation/node()|//ref/mixed-citation/node()|//ref/citation/node()", xmlValue), collapse=" ")
if(title == "") title <- xpathSApply(z2, "//ref//p", xmlValue) # notes
title <- gsub(" ", " ", title)
title <- gsub(" . ", ". ", title, fixed=TRUE)
pmid <- xvalue(z2, '//pub-id[@pub-id-type="pmid"]')
if(!is.na(pmid)) title <- gsub(pmid, "", title)
title <- gsub("( ", "(", title, fixed=TRUE)
refs[[i]] <- data.frame(pmid, authors=NA, year= NA, title, journal=NA, volume=NA, pages=NA, label, id, type, stringsAsFactors=FALSE)
}
free(z2)
}
x <- do.call("rbind", refs)
if(all(is.na(x$label))){
print("Warning: No label tags found, using id tag instead")
x$label <- gsub("[^0-9]", "", x$id)
}
x
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.