Document types that are not uniform

The types other and media data is not retrieved at the moment of the query return.

Problem: Yields errors when it has 12 rows

library(petro.One)
my_url <- make_search_url(query = "deepwater", 
                          dc_type = "other")
get_papers_count(my_url)
# 12
onepetro_page_to_dataframe(my_url)
library(petro.One)

my_url <- make_search_url(query = "deepwater", 
                          dc_type = "other")

webpage <- read_html(my_url)
petro.One:::read_titles(webpage)

Problem: returns 0 rows when it has 69

library(petro.One)

my_url <- make_search_url(query = "deepwater", dc_type = 'media')

get_papers_count(my_url)
# 69
onepetro_page_to_dataframe(my_url)
# https://www.onepetro.org/search?q=deepwater&peer_reviewed=&published_between=&from_year=&to_year=&dc_type=media
library(petro.One)
my_url <- make_search_url(query = "deepwater", 
                          dc_type = "media")
webpage <- read_html(my_url)
petro.One:::read_titles(webpage)

dc_type return different number of columns

# fails due to different size of dataframes
# number of columns of result is not a multiple of vector length (arg 40)

# 
# specify document type = "conference-paper", rows = 1000

library(petro.One)
# source('./R/url.R')

my_url <- make_search_url(query = "unconventional",
                          rows = 1000)

get_papers_count(my_url)
# 16359
# 17843
papers_by_type(my_url)
onepetro_page_to_dataframe(my_url)
# Error in onepetro_page_to_dataframe(my_url) : Dataframe sizes different

Problem: fails when rows >= 1025

library(petro.One)
## fails when rows=1500
# source('./R/url.R')

my_url <- make_search_url(query = "shale oil", 
                          how = "all", 
                          dc_type = "conference-paper",
                          rows = 1000)

get_papers_count(my_url)
papers_by_type(my_url)
# 2578, dc_type = NULL
# 2578, dc_type = "conference-paper"
#  380, dc_type = "journal-paper"
onepetro_page_to_dataframe(my_url)


f0nzie/petro.One documentation built on May 29, 2019, 12:05 a.m.