Description Usage Arguments Details Value Examples
pub_chunks
makes it easy to extract sections of an article.
You can extract just authors across all articles, or all references
sections, or the complete text of each article. Then you can pass the
output downstream for visualization and analysis.
1 | pub_chunks(x, sections = "all", provider = NULL)
|
x |
one of the following:
|
sections |
(character) What elements to get, can be one or more in
a vector or list. See |
provider |
(character) a single publisher name. see
|
Options for the sections
parameter:
front - Publisher, journal and article metadata elements
body - Body of the article
back - Back of the article, acknowledgments, author contributions, references
title - Article title
doi - Article DOI
categories - Publisher's categories, if any
authors - Authors
aff - Affiliation (includes author names)
keywords - Keywords
abstract - Article abstract
executive_summary - Article executive summary
refs - References
refs_dois - References DOIs - if available
publisher - Publisher name
journal_meta - Journal metadata
article_meta - Article metadata
acknowledgments - Acknowledgments
permissions - Article permissions
history - Dates, recieved, published, accepted, etc.
A list, named by the section selected. sections not found or
not in accepted list return NULL
or zero length list. A ".publisher"
list element gets attached to each list output, even when no
data is found. When fulltext::ft_get
output is passed in here, the
list is named by the publisher, then within each publisher is a list
of articles named by their identifiers (e.g. DOIs).
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 | # a file path to an XML file
x <- system.file("examples/elsevier_1.xml", package = "pubchunks")
pub_chunks(x, "title")
pub_chunks(x, "authors")
pub_chunks(x, "acknowledgments")
pub_chunks(x, "refs")
pub_chunks(x, c("title", "refs"))
## Not run:
# works the same with the xml already in a string
xml <- paste0(readLines(x), collapse = "")
pub_chunks(xml, "title")
# also works if you've already read in the XML (with xml2 pkg)
xml <- paste0(readLines(x), collapse = "")
xml <- xml2::read_xml(xml)
pub_chunks(xml, "title")
# Hindawi
x <- system.file("examples/hindawi_1.xml", package = "pubchunks")
pub_chunks(x, "abstract")
pub_chunks(x, "authors")
pub_chunks(x, "aff")
pub_chunks(x, "title")
pub_chunks(x, "refs")$refs
pub_chunks(x, c("abstract", "title", "authors", "refs"))
# Pensoft
x <- system.file("examples/pensoft_1.xml", package = "pubchunks")
pub_chunks(x, "abstract")
pub_chunks(x, "aff")
pub_chunks(x, "title")
pub_chunks(x, "refs")$refs
pub_chunks(x, c("abstract", "title", "authors", "refs"))
# Peerj
x <- system.file("examples/peerj_1.xml", package = "pubchunks")
pub_chunks(x, "abstract")
pub_chunks(x, "authors")
pub_chunks(x, "aff")
pub_chunks(x, "title")
pub_chunks(x, "refs")$refs
pub_chunks(x, c("abstract", "title", "authors", "refs"))
# Frontiers
x <- system.file("examples/frontiers_1.xml", package = "pubchunks")
pub_chunks(x, "authors")
pub_chunks(x, "aff")
pub_chunks(x, "refs")$refs
pub_chunks(x, c("doi", "abstract", "title", "authors", "refs", "abstract"))
# eLife
x <- system.file("examples/elife_1.xml", package = "pubchunks")
pub_chunks(x, "authors")
pub_chunks(x, "aff")
pub_chunks(x, "refs")$refs
pub_chunks(x, c("doi", "title", "authors", "refs"))
# f1000research
x <- system.file("examples/f1000research_3.xml", package = "pubchunks")
pub_chunks(x, "title")
pub_chunks(x, "aff")
pub_chunks(x, "refs")$refs
pub_chunks(x, c("doi", "title", "authors", "keywords", "refs"))
# Copernicus
x <- system.file("examples/copernicus_1.xml", package = "pubchunks")
pub_chunks(x, c("doi", "abstract", "title", "authors", "refs"))
pub_chunks(x, "aff")
pub_chunks(x, "refs")$refs
# MDPI
x <- system.file("examples/mdpi_1.xml", package = "pubchunks")
x <- system.file("examples/mdpi_2.xml", package = "pubchunks")
pub_chunks(x, "title")
pub_chunks(x, "aff")
pub_chunks(x, "refs")$refs
vv <- pub_chunks(x, c("doi", "title", "authors", "keywords", "refs",
"abstract", "categories"))
vv$doi
vv$title
vv$authors
vv$keywords
vv$refs
vv$abstract
vv$categories
# Many inputs at once
x <- system.file("examples/frontiers_1.xml", package = "pubchunks")
y <- system.file("examples/elife_1.xml", package = "pubchunks")
z <- system.file("examples/f1000research_1.xml", package = "pubchunks")
pub_chunks(list(x, y, z), c("doi", "title", "authors", "refs"))
# non-XML files/content are xxx?
# pub_chunks('foo bar')
# Pubmed brief XML files (abstract only)
x <- system.file("examples/pubmed_brief_1.xml", package = "pubchunks")
pub_chunks(x, "title")
# Pubmed full XML files
x <- system.file("examples/pubmed_full_1.xml", package = "pubchunks")
pub_chunks(x, "title")
# using output of fulltext::ft_get()
if (requireNamespace("fulltext", quietly = TRUE)) {
library("fulltext")
# single
x <- fulltext::ft_get('10.7554/eLife.03032')
pub_chunks(fulltext::ft_collect(x), sections="authors")
# many
dois <- c('10.1371/journal.pone.0086169', '10.1371/journal.pone.0155491',
'10.7554/eLife.03032')
x <- fulltext::ft_get(dois)
pub_chunks(fulltext::ft_collect(x), sections="authors")
# as.ft_data() function
x <- ft_collect(as.ft_data())
names(x)
x$cached
pub_chunks(x, "title")
pub_chunks(x, "title") %>% pub_tabularize()
}
## End(Not run)
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.