R Notebook"

library(petro.One)

my_url <- make_search_url(query = "neural network", 
                          how = "all"
                          )

get_papers_count(my_url)
# 3238

df <- papers_by_type(my_url)
df
# Conference paper  2881            
# General              4            
# Journal paper      323            
# Media                2            
# Other                5            
# Presentation        23    
sum(df$value)
# 3238
# specify document type = "conference-paper", rows = 1000

my_url <- make_search_url(query = "neural network", 
                          how = "all",
                          dc_type = "conference-paper",
                          rows = 100)

get_papers_count(my_url)
papers_by_type(my_url)
df <- onepetro_page_to_dataframe(my_url)
df
my_url <- make_search_url(query = "neural network", 
                          how = "all",
                          dc_type = "other",
                          rows = 100)

get_papers_count(my_url)
# 5
df <- onepetro_page_to_dataframe(my_url)
df
# Error in `[.data.frame`(source_data, , 5) : undefined columns selected
my_url <- make_search_url(query = "neural network", 
                          how = "all",
                          dc_type = "media",
                          rows = 100)

get_papers_count(my_url)
# 2
df <- onepetro_page_to_dataframe(my_url)
df
# 0 rows
library(petro.One)

my_url <- make_search_url(query = "pressure"
                          )

get_papers_count(my_url)
# Search results: Your search for pressure has returned 132,327 results.
# 132327
papers_by_type(my_url)
# Chapter                87         
# Conference paper   108985         
# General              1521         
# Journal paper       20807         
# Media                 107         
# Other                 158         
# Presentation          559         
# Standard              103 
library(petro.One)

url <- make_search_url(query = "pressure",
                          rows = 90        # page of size 90
                          )

# get_papers_count(my_url)
df <- onepetro_page_to_dataframe(url)
df
# Error in onepetro_page_to_dataframe(my_url) : Dataframe sizes different
# WRB Lance Formation - Pressure Gradient
# Document Type: Other
get_papers_count(url)
webpage <- xml2::read_html(url)
petro.One:::read_titles(webpage)
petro.One:::read_sources(webpage)
petro.One:::read_author(webpage)

The question

How do we know the webpage we just read contains howmany of conference-paper, media or presentation kind of documents?

Is there a way to search the webpage for those tags?

Visit External Site
> read_html(url) %>% + html_nodes(., css = '.stats-container') %>% + html_text(url) Error in xpath_class() : could not find function "xpath_class" > > > read_html(url) %>% + html_nodes(., xpath = '//*[contains(concat( " ", @class, " " ), concat( " ", "stats-container", " " ))]') %>% + html_text(url) character(0) wzxhzdk:34 wzxhzdk:35 wzxhzdk:36 wzxhzdk:37 wzxhzdk:38 wzxhzdk:39 wzxhzdk:40 wzxhzdk:41 wzxhzdk:42 The tag `result-link:nth-child(1)` returns a partial list of the papers. Some papers will not have download links. wzxhzdk:43 wzxhzdk:44 ## Represent a set of non-uniform sized dataframes in R https://stackoverflow.com/questions/17125296/represent-a-set-of-non-uniform-sized-dataframes-in-r wzxhzdk:45 data-type dc_type-media

Try the petro.One package in your browser

Any scripts or data that you put into this service are public.

petro.One documentation built on May 2, 2019, 3:10 p.m.