chunks: Extract chunks of data from articles

Description Usage Arguments Details Value Examples

View source: R/chunks.R

Description

chunks makes it easy to extract sections of an article. You can extract just authors across all articles, or all references sections, or the complete text of each article. Then you can pass the output downstream for vizualization and analysis.

Usage

1
2
3
chunks(x, what = "all")

tabularize(x)

Arguments

x

An object of class ft_data, the output from a call to ft_get

what

What to get, can be one or more in a vector or list. See Details.

Details

Options for the what parameter:

Note that we currently only support PLOS, eLife, and Entrez right now, more to come.

Value

A list of output, one for each thing requested

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
## Not run: 
x <- ft_get('10.1371/journal.pone.0086169', from='plos')
chunks(x, what="authors")

library("rplos")
(dois <- searchplos(q="*:*", fl='id',
   fq=list('doc_type:full',"article_type:\"research article\""), limit=5)$data$id)
x <- ft_get(dois, from="plos")
x %>% chunks("front")
x %>% chunks("body")
x %>% chunks("back")
x %>% chunks("history")
x %>% chunks(c("doi","history")) %>% tabularize()
x %>% chunks("authors")
x %>% chunks(c("doi","categories"))
x %>% chunks("all")
x %>% chunks("publisher")
x %>% chunks("acknowledgments")
x %>% chunks("permissions")
x %>% chunks("journal_meta")
x %>% chunks("article_meta")

# Coerce list output to a data.frame, where possible
(dois <- searchplos(q="*:*", fl='id',
   fq=list('doc_type:full',"article_type:\"research article\""), limit=5)$data$id)
x <- ft_get(dois, from="plos")
x %>% chunks("publisher") %>% tabularize()
x %>% chunks("refs") %>% tabularize()
x %>% chunks(c("doi","publisher")) %>% tabularize()
x %>% chunks(c("doi","publisher","permissions")) %>% tabularize()

x <- ft_get(c("10.3389/fnagi.2014.00130",'10.1155/2014/249309','10.1155/2014/162024'),
   from='entrez')
x %>% chunks("doi") %>% tabularize()
x %>% chunks("authors") %>% tabularize()
x %>% chunks(c("doi","publisher","permissions")) %>% tabularize()
x %>% chunks("history") %>% tabularize()

x <- ft_get('10.3389/fnagi.2014.00130', from='entrez')
x %>% chunks("keywords")

# Piping workflow
opts <- list(fq=list('doc_type:full',"article_type:\"research article\""))
ft_search(query='ecology', from='plos', plosopts = opts)$plos$data$id %>%
 ft_get(from = "plos") %>%
 chunks("publisher")

# Via entrez
res <- ft_get(c("10.3389/fnagi.2014.00130",'10.1155/2014/249309','10.1155/2014/162024'),
   from='entrez')
chunks(res, what="abstract")
chunks(res, what="title")
chunks(res, what="keywords")
chunks(res, what="publisher")

(res <- ft_search(query='ecology', from='entrez'))
ft_get(res$entrez$data$doi, from='entrez') %>% chunks("title")
ft_get(res$entrez$data$doi[1:4], from='entrez') %>% chunks("acknowledgments")
ft_get(res$entrez$data$doi[1:4], from='entrez') %>% chunks(c('title','keywords'))

# From eLife
x <- ft_get(c('10.7554/eLife.04251', '10.7554/eLife.04986'), from='elife')
x %>% chunks("abstract")
x %>% chunks("publisher")
x %>% chunks("journal_meta")
x %>% chunks("acknowledgments")
x %>% chunks("refs_dois")
x %>% chunks(c("abstract", "executive_summary"))

## End(Not run)

fulltext documentation built on May 29, 2017, 12:09 p.m.