This is with:

get_papers = FALSE
# data driven is the same as data-driven
library(petro.One)


top <- c("data driven")
discipline <- c("reservoir", "production", "surface facilities", "metering")

by.discipline.dd <- join_keywords(top, discipline, 
                                   get_papers = FALSE, sleep = 3, verbose = TRUE)
by.discipline.dd

with:

get_papers = TRUE

This causes the creation of a dataframe for papers

library(petro.One)

top <- c("data driven")
discipline <- c("reservoir", "production", "surface facilities", "metering")

by.discipline.dd <- join_keywords(top, discipline, 
                                   get_papers = TRUE, sleep = 3, verbose = TRUE)
by.discipline.dd
library(rNodal.utils)
data_driven <- by.discipline.dd
save_to_project(data_driven, name = "data_driven_L2")

We try this other one with 1300+ papers.

library(petro.One)

major <- c("artificial intelligence")
minor <- c("drilling")

# the returning data structure is a a list
# the list contains two dataframes: one for the keywords and a second for the papers
ai_drilling <- join_keywords(major, minor, get_papers = TRUE, sleep = 3, verbose = TRUE)
ai_drilling
library(rNodal.utils)
save_to_project(ai_drilling, name = "ai_drilling_L2")
table(by.discipline.dd$papers$keyword)

The app crashes with more than 1000 papers

my.url <- by.discipline.dd$keywords$url[3]
my.url
get_papers_count(my.url)
# 79
# onepetro_page_to_dataframe(my.url)
onepetro_page_to_dataframe(my.url)
# using onepetro_page_to_dataframe()
recno <- 3
my.sf <- by.discipline.dd$keywords$sf[recno]
url.1 <- make_search_url(my.sf, how = "all")
url.1
paper_count <- get_papers_count(url.1)
paper_count
url.2 <- make_search_url(my.sf, how = "all", rows = paper_count)
url.2
papers.df <- onepetro_page_to_dataframe(url.2)
papers.df
# "conference-paper" are the main category of papers
library(petro.One)

recno <- 2
my.sf <- by.discipline.dd$keywords$sf[recno]
url.1 <- make_search_url(my.sf, how = "all")
url.1
paper_count <- get_papers_count(url.1)
paper_count
url.2 <- make_search_url(my.sf, how = "all", rows = paper_count)
url.2
papers.df.j <- read_multipage(url.2) %>% 
    filter(dc_type == "journal-paper")
papers.df.j
paper_count <- as.numeric(urltools::param_get(url.2, "rows"))
paper_count
papers.df.j <- read_multipage(url.2, doctype = "journal-paper")
papers.df.c <- read_multipage(url.2, doctype = "conference-paper")
papers.df.p <- read_multipage(url.2, doctype = "presentation")

papers.df <- rbind(papers.df.c, papers.df.j, papers.df.p)
papers.df
library(petro.One)
my_url <- make_search_url(query = "neural network",
                          how = "all")
df <- read_multidoc(my_url)
dim(df)
recno <- 1
my.sf <- by.discipline.dd$keywords$sf[recno]
url.1 <- make_search_url(my.sf, how = "all")
url.1
papers_by_type(url.1)


f0nzie/petro.One documentation built on May 29, 2019, 12:05 a.m.