library(petro.One) # provide the list of keywords keywords <- " water injection water-injection water injection machine learning water injection artificial intelligence " # convert the text to a dataframe # read text table and split rows at carriage return kw_tbl <- read.table(text = keywords, header = FALSE, sep = "\n", stringsAsFactors = FALSE, strip.white = TRUE, col.names = "keyword") kw_tbl
# iterate through the keywords dataframe rec <- vector("list") i <- 1 for (k in kw_tbl$keyword) { url_all <- make_search_url(query = k, how = "all") # create search query count <- get_papers_count(url_all) # paper count rec[[i]] <- list(keyword = k, count = count) # add observation cat(sprintf("%30s %5d \n", k, count)) # print it i <- i + 1 # increment counter Sys.sleep(5) # do not bug OnePetro website too much } # be a good internet citizen data.table::rbindlist(rec) # final data table
library(petro.One) # provide the list of keywords keywords <- " water injection machine-learning water injection intelligence water injection intelligent water injection workflow water injection learning waterflooding intelligence waterflooding intelligent waterflooding machine learning waterflooding algorithm water flooding intelligence water flooding intelligent water flooding workflow water flooding control water flooding algorithm water flooding SVM water flooding loop " # convert the text to a dataframe # read text table and split rows at carriage return kw_tbl <- read.table(text = keywords, header = FALSE, sep = "\n", stringsAsFactors = FALSE, strip.white = TRUE, col.names = "keyword") kw_tbl # iterate through the keywords dataframe rec <- vector("list") i <- 1 for (k in kw_tbl$keyword) { url_all <- make_search_url(query = k, how = "all") # create search query count <- get_papers_count(url_all) # paper count rec[[i]] <- list(keyword = k, paper_count = count) # add observation cat(sprintf("%30s %5d \n", k, count)) # print it i <- i + 1 # increment counter Sys.sleep(5) # do not bug OnePetro website too much } # be a good internet citizen data.table::rbindlist(rec) # final data table
The result does not change. It is similar to the previous search.
library(petro.One) # provide the list of keywords keywords <- " water+injection machine-learning water+injection intelligence water+injection intelligent water+injection workflow water+injection learning waterflooding intelligence waterflooding intelligent waterflooding machine learning waterflooding algorithm water+flooding intelligence water+flooding intelligent water+flooding workflow water+flooding control water+flooding algorithm water+flooding SVM water+flooding loop " # convert the text to a dataframe # read text table and split rows at carriage return kw_tbl <- read.table(text = keywords, header = FALSE, sep = "\n", stringsAsFactors = FALSE, strip.white = TRUE, col.names = "keyword") kw_tbl # iterate through the keywords dataframe rec <- vector("list") i <- 1 for (k in kw_tbl$keyword) { url_all <- make_search_url(query = k, how = "all") # create search query count <- get_papers_count(url_all) # paper count rec[[i]] <- list(keyword = k, paper_count = count) # add observation cat(sprintf("%30s %5d \n", k, count)) # print it i <- i + 1 # increment counter Sys.sleep(5) # do not bug OnePetro website too much } # be a good internet citizen data.table::rbindlist(rec) # final data table
There is no change either.
library(petro.One) # provide the list of keywords keywords <- " 'water+injection' machine-learning 'water+injection' intelligence 'water+injection' intelligent 'water+injection' workflow water+injection learning waterflooding intelligence waterflooding intelligent waterflooding 'machine+learning' waterflooding algorithm 'water+flooding' intelligence 'water+flooding' intelligent 'water+flooding' workflow 'water+flooding' control 'water+flooding' algorithm 'water+flooding' SVM 'water+flooding' loop " # convert the text to a dataframe # read text table and split rows at carriage return kw_tbl <- read.table(text = keywords, header = FALSE, sep = "\n", stringsAsFactors = FALSE, strip.white = TRUE, col.names = "keyword") kw_tbl # iterate through the keywords dataframe rec <- vector("list") i <- 1 for (k in kw_tbl$keyword) { url_all <- make_search_url(query = k, how = "all") # create search query count <- get_papers_count(url_all) # paper count rec[[i]] <- list(keyword = k, paper_count = count) # add observation cat(sprintf("%30s %5d \n", k, count)) # print it i <- i + 1 # increment counter Sys.sleep(5) # do not bug OnePetro website too much } # be a good internet citizen data.table::rbindlist(rec) # final data table
library(petro.One) # provide the list of keywords keywords <- " \\'water+injection\\'+AND+\\'machine-learning\\' \\water injection\\+AND+machine-learning water injection machine-learning water+injection machine-learning 'water+injection'+machine-learning 'water+injection'+AND+'machine-learning' " # convert the text to a dataframe # read text table and split rows at carriage return kw_tbl <- read.table(text = keywords, header = FALSE, sep = "\n", stringsAsFactors = FALSE, strip.white = TRUE, col.names = "keyword") kw_tbl # iterate through the keywords dataframe rec <- vector("list") i <- 1 for (k in kw_tbl$keyword) { url_all <- make_search_url(query = k, how = "all") # create search query count <- get_papers_count(url_all) # paper count rec[[i]] <- list(keyword = k, paper_count = count) # add observation cat(sprintf("%30s %5d \n", k, count)) # print it i <- i + 1 # increment counter Sys.sleep(5) # do not bug OnePetro website too much } # be a good internet citizen data.table::rbindlist(rec) # final data table
# this is the correct way of passing composite keywords url_all <- make_search_url(query = "'water+injection'+AND+'machine-learning'", how = "all") url_all get_papers_count(url_all) # 143
library(petro.One) # provide the list of keywords keywords <- " \\'water+injection\\'+AND+\\'machine-learning\\' \\'water+injection\\'+AND+\\'machine-learning\\' \"water+injection\"+AND+\"machine-learning\" 'water+injection'+AND+'machine-learning' " # convert the text to a dataframe # read text table and split rows at carriage return kw_tbl <- read.table(text = keywords, header = FALSE, sep = "\n", stringsAsFactors = FALSE, strip.white = TRUE, col.names = "keyword") kw_tbl # iterate through the keywords dataframe rec <- vector("list") i <- 1 for (k in kw_tbl$keyword) { url_all <- make_search_url(query = k, how = "all") # create search query count <- get_papers_count(url_all) # paper count rec[[i]] <- list(keyword = k, paper_count = count) # add observation cat(sprintf("%30s %5d \n", k, count)) # print it i <- i + 1 # increment counter Sys.sleep(5) # do not bug OnePetro website too much } # be a good internet citizen data.table::rbindlist(rec) # final data table
library(petro.One) # provide the list of keywords keywords <- " water injection optimization water injection optimisation water injection optim waterflooding optimization waterflooding optimisation waterflooding optim water flooding optim " # convert the text to a dataframe # read text table and split rows at carriage return kw_tbl <- read.table(text = keywords, header = FALSE, sep = "\n", stringsAsFactors = FALSE, strip.white = TRUE, col.names = "keyword") kw_tbl # iterate through the keywords dataframe rec <- vector("list") i <- 1 for (k in kw_tbl$keyword) { url_all <- make_search_url(query = k, how = "all") # create search query count <- get_papers_count(url_all) # paper count rec[[i]] <- list(keyword = k, paper_count = count) # add observation cat(sprintf("%30s %5d \n", k, count)) # print it i <- i + 1 # increment counter Sys.sleep(5) # do not bug OnePetro website too much } # be a good internet citizen data.table::rbindlist(rec) # final data table
This low value because we are using "and", equivalent to "exact phrase" in OnePetro.
library(petro.One) url1 <- make_search_url(query = "water injection optimization", how = "all", rows = 129) df1 <- onepetro_page_to_dataframe(url1) url2 <- make_search_url(query = "water injection optimisation", how = "all", rows = 53) df2 <- onepetro_page_to_dataframe(url2) url3 <- make_search_url(query = "waterflooding optimization", how = "all", rows = 186) df3 <- onepetro_page_to_dataframe(url3) url4 <- make_search_url(query = "waterflooding optimisation", how = "all", rows = 75) df4 <- onepetro_page_to_dataframe(url4)
df12 <- rbind(df1, df2) df14 <- rbind(df1, df2, df3, df4) df14
write.csv(df14, file = "../notebooks/wi_wf.csv", col.names = TRUE)
getwd()
library(petro.One) # provide the list of keywords keywords <- " ''water flooding'' genetic water injection algorithm water injection prediction water injection data-driven waterflooding data-driven waterflooding algorithm waterflooding prediction water flooding algorithm water flooding intelligent water flooding control water flooding machine water flooding neural " # convert the text to a dataframe # read text table and split rows at carriage return kw_tbl <- read.table(text = keywords, header = FALSE, sep = "\n", stringsAsFactors = FALSE, strip.white = TRUE, col.names = "keyword") kw_tbl # iterate through the keywords dataframe papers <- data.frame() rec <- vector("list") i <- 1 for (k in kw_tbl$keyword) { url_all <- make_search_url(query = k, how = "all") # create search query count <- get_papers_count(url_all) # paper count rec[[i]] <- list(keyword = k, paper_count = count) # add observation if (count > 0) { # if papers > 0 url_gt0 <- make_search_url(query = k, how = "all", rows = count) # query df <- onepetro_page_to_dataframe(url_gt0) # create a dataframe papers <- rbind(papers, df) # cumulative dataframe } cat(sprintf("%30s %5d \n", k, count)) # print it i <- i + 1 # increment counter Sys.sleep(5) # do not bug OnePetro website too much } # be a good internet citizen dt <- data.table::rbindlist(rec) # final data table dt dt[dt$paper_count>0,] # show only those > 0 papers
library(petro.One) # search any word like "neural" or "network" url_any <- make_search_url(query = "neural network", how = "any") url_any get_papers_count(url_any) # search for papers that have "neural" and "network" at the same time url_all <- make_search_url(query = "neural network", how = "all") url_all get_papers_count(url_all) # search for papers that have "neural" and "network" at the same time url_all <- make_search_url(query = "'neural+network'+'water+injection'", how = "all") url_all get_papers_count(url_all)
neural network
and water injection
We have to use the plus sign and apostrophe to build a search query of major keywords that have space in between.
url_all <- make_search_url(query = "'neural+network'+'water+injection'", how = "all") url_all get_papers_count(url_all) # 319
This is the same as searching:
# search for papers that have "neural" and "network" at the same time url_all <- make_search_url(query = "'neural+network'+AND+'water+injection'", how = "all") url_all get_papers_count(url_all) # 319
What happens if we remove the
+
sign?
# search for papers that have "neural" and "network" at the same time url_all <- make_search_url(query = "'neural network'+'water injection'", how = "all") url_all get_papers_count(url_all) # 12205
The result is not we expect. What the search returns is an OR search of "neural", "network", "water", and "injection", equivalent to this search:
url_all <- make_search_url(query = "neural network water injection", how = "any") url_all get_papers_count(url_all) # 12205
# provide two different set of keywords to combine as vectors major <- c("water injection", "water flooding") minor <- c("machine-learning", "intelligent") lesser <- c("neural network", "SVM", "genetic", "algorithm") df <- expand.grid(major, minor, lesser, stringsAsFactors = FALSE) df
# for two composite keywords for (i in 1:nrow(df)) { # cat(i, df[i, "Var1"], "\n") s1 <- unlist(strsplit(df[i, "Var1"], " ")) s2 <- unlist(strsplit(df[i, "Var2"], " ")) s1plus <- paste(s1, collapse = "+") s2plus <- paste(s2, collapse = "+") # cat(i, s1, length(s1), s1plus, s2, length(s2), s2plus, "\n") sf <- paste0("'", s1plus, "'+AND+'", s2plus, "'") # cat(i, sf, "\n") url <- make_search_url(sf, how = "all") cat(i, sf, get_papers_count(url), "\n") }
# for three composite keywords for (i in 1:nrow(df)) { # joint the major keyword with the minors s1 <- unlist(strsplit(df[i, "Var1"], " ")) s2 <- unlist(strsplit(df[i, "Var2"], " ")) s3 <- unlist(strsplit(df[i, "Var3"], " ")) s1plus <- paste(s1, collapse = "+") s2plus <- paste(s2, collapse = "+") s3plus <- paste(s3, collapse = "+") sf <- paste0("'", s1plus, "'+AND+'", s2plus, "'+AND+'", s3plus, "'") url <- make_search_url(sf, how = "all") cat(i, sf, get_papers_count(url), "\n") sleep(3) }
# works for "n" columns bool_op <- "AND" sep <- paste0("'", bool_op, "'") for (i in 1:nrow(df)) { sf <- NULL for (j in 1:ncol(df)) { s <- unlist(strsplit(df[i, j], " ")) splus <- paste(s, collapse = "+") if (!is.null(sf)) { # sf <- paste(sf, splus, sep = "'+AND+'") sf <- paste(sf, splus, sep = sep) } else { sf <- paste0("'", sf, splus) } } sf <- paste0(sf, "'") # cat(i, sf, "\n") url <- make_search_url(sf, how = "all") cat(sprintf("%3d %60s %5d \n", i, sf, get_papers_count(url))) Sys.sleep(3) }
library(petro.One) # provide different set of keywords to combine as vectors major <- c("water injection", "water flooding") minor <- c("machine-learning", "intelligent") lesser <- c("neural network", "SVM", "genetic", "algorithm") df <- expand.grid(major, minor, lesser, stringsAsFactors = FALSE) df #' @param ... input character vectors #' @param bool_op boolean operator. It can be AND or OR join_keywords <- function(..., bool_op = "AND") { rec <- vector("list") sleep <- 3 # works for "n" columns df <- expand.grid(..., stringsAsFactors = FALSE) # combine keywords sep <- paste0("'", bool_op, "'") # add apostrophes to operator # iterate through the rows of keyword combinations dataframe for (i in 1:nrow(df)) { sf <- NULL # iterate through columns of keywords for (j in 1:ncol(df)) { s <- unlist(strsplit(df[i, j], " ")) splus <- paste(s, collapse = "+") if (!is.null(sf)) { sf <- paste(sf, splus, sep = sep) } else { sf <- paste0("'", sf, splus) } } sf <- paste0(sf, "'") url <- make_search_url(sf, how = "all") paper_count <- get_papers_count(url) cat(sprintf("%3d %60s %5d \n", i, sf, paper_count)) # build a record of results rec[[i]] <- list(paper_count = paper_count, sf = sf, url = url) Sys.sleep(sleep) # give OnePetro a break } rec.df <- data.table::rbindlist(rec) # convert list to dataframe df <- cbind(df, rec.df) # join the results invisible(df) } #paper_count_from_vectors <- function() {} # provide two different set of keywords to combine as vectors major <- c("water injection", "water flooding") minor <- c("machine-learning", "intelligent") lesser <- c("neural network", "SVM", "genetic", "algorithm") # join_keywords(major, minor, lesser) # p.df <- join_keywords(major, minor) p.df <- join_keywords(major, minor, lesser) p.df
paper_count_from_vectors <- function(..., bool_op = "AND", sleep = 3) { joined <- join_keywords(...) i <- 1 for (u in joined$url) { # print(u) joined$paper_count[[i]] <- get_papers_count(u) #cat(get_papers_count(u)) #cat(u, "\n") Sys.sleep(sleep) i <- i + 1 } #cat(sprintf("%3d %60s %5d \n", i, sf, joined$paper_count[])) #Sys.sleep(sleep) return(joined) } # provide two different set of keywords to combine as vectors major <- c("water injection", "water flooding") minor <- c("machine-learning", "intelligent") lesser <- c("neural network", "SVM", "genetic") another <- c("algorithm") paper_count_from_vectors(major, minor, lesser, another, bool_op = "AND")
# this is the correct way of passing composite keywords url_all <- make_search_url(query = "'water+injection'+AND+'machine-learning'", how = "all") url_all get_papers_count(url_all) # 143
my_url <- make_search_url(query = "neural network", dc_type = "standard", how = "all") # print(get_papers_count(my_url)) df <- read_multipage(my_url) df # petro.One:::expect_equal_scale(nrow(df), 0, tolerance_pct = 0.01)
expect_equal_scale2 <- function (object, expected, ..., tolerance_pct) { tolerance <- tolerance_pct scale <- object testthat::expect_equal(object, expected, tolerance, scale, check.attributes = TRUE) } expect_equal_scale2(nrow(df), 0, tolerance_pct = 0.01)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.