We start by a major category at the top. Example: machine learning. The dependents will be the vectors minor
and lesser
.
library(petro.One) major <- c("machine learning") minor <- c("reservoir", "production", "logging", "completion", "intervention", "drilling", "geology", "seismic", "petrophysics", "geophysics", "economics") lesser <- c("data", "algorithm") # the returning data structure is a a list # the list contains two dataframes: one for the keywords and a second for the papers prod.li <- join_keywords(major, minor, lesser, get_papers = TRUE, sleep = 3) prod.li
The 6000+ paper results may need to clean up, since some papers could be repeating on each of the subcategories.
This is a quick summary of how many papers on machine learning were produced. Again, this number may be misleading because papers could be repeating as there could be papers on machine learning that could be crossing disciplinary boundaries.
library(ggplot2) library(dplyr) keywords <- prod.li$keywords # get the keywords variable from the list # data only keywords %>% filter(Var3 == "data") %>% ggplot(aes(x=reorder(Var2, paper_count), y=paper_count)) + coord_flip()+ geom_bar(stat="identity")
# algorithm only keywords %>% filter(Var3 == "algorithm") %>% ggplot(aes(x=reorder(Var2, paper_count), y=paper_count)) + coord_flip()+ geom_bar(stat="identity")
These yield the same results but using a more compressed approach in terms of commands.
We are using the pipe symbol %>%
to filter the results from one line of command to the next.
library(magrittr) library(dplyr) keywords.1 <- prod.df %>% # extract2("keywords") %>% `[[`("keywords") %>% filter(Var3 == "data") keywords.1 %>% ggplot(aes(x=reorder(Var2, paper_count), y=paper_count)) + coord_flip() + geom_bar(stat="identity")
keywords.2 <- prod.df %>% # extract2("keywords") %>% `[[`("keywords") %>% filter(Var3 == "algorithm") keywords.2 %>% ggplot(aes(x=reorder(Var2, paper_count), y=paper_count)) + coord_flip()+ geom_bar(stat="identity")
replace well construction
1 by drilling
# replace `well construction`1 by `drilling` keywords.2 <- prod.df %>% # extract2("keywords") %>% `[[`("keywords") %>% filter(Var3 == "algorithm") %>% mutate(paper_count = ifelse(Var2 == "well construction",, paper_count)) # mutate(Var2 = ifelse(Var2 == "well construction", "drilling", Var2)) keywords.2 %>% ggplot(aes(x = reorder(Var2, paper_count), y=paper_count)) + coord_flip()+ geom_bar(stat="identity")
library(petro.One) major <- c("machine learning") discipline <- c("reservoir", "production", "logging", "completion", "intervention", "drilling", "geology", "seismic", "petrophysics", "geophysics", "economics") learning <- c("supervised learning", "unsupervised learning") by.learning <- join_keywords(major, discipline, learning, get_papers = TRUE, sleep = 3)
library(petro.One) major <- c("machine learning") discipline <- c("reservoir", "production", "logging", "completion", "intervention", "drilling", "geology", "seismic", "petrophysics", "geophysics", "economics") learning <- c("supervised learning", "unsupervised learning") tech_class <- c("clustering", "classification", "regression") by.learning <- join_keywords(major, discipline, learning, tech_class, get_papers = TRUE, sleep = 3)
library(petro.One) major <- c("machine learning") minor <- c("reservoir", "production", "logging", "completion", "intervention", "drilling", "geology", "seismic", "petrophysics", "geophysics", "economics") ml_technique <- c("SVM", "Support Vector Machine", "Genetic algorithm", "neural network", "fuzzy logic", "decision tree", "k-means", "boosting", "deep learning", "PCA", "principal component analysis", "logistic regression", "kernel density estimation", "nearest neighbors", "reinforcement learning") by.ml_technique <- join_keywords(major, minor, ml_technique, get_papers = TRUE, sleep = 3) by.ml_technique
Replace the long-SVM by SVM
var3.str.1 <- "SVM" var3.str.2 <- "Support Vector Machine" not.svm.only <- by.ml_technique %>% `[[`("keywords") %>% select(Var1, Var2, Var3, paper_count) %>% filter(Var3 != var3.str.1 & Var3 != var3.str.2) %>% as.data.frame() %>% print
var3.str.1 <- "SVM" var3.str.2 <- "Support Vector Machine" svm.only <- by.ml_technique %>% `[[`("keywords") %>% select(Var1, Var2, Var3, paper_count) %>% filter(Var3 == var3.str.1 | Var3 == var3.str.2) %>% mutate(Var3 = ifelse(Var3 == var3.str.2, var3.str.1, Var3)) %>% group_by(Var1, Var2, Var3) %>% summarize(paper_count = sum(paper_count)) %>% as.data.frame() %>% print
# binding both dataframes keywords.new <- rbind(not.svm.only, svm.only) keywords.new
var3.str.1 <- "PCA" var3.str.2 <- "principal component analysis" not.pca.only <- keywords.new %>% select(Var1, Var2, Var3, paper_count) %>% filter(Var3 != var3.str.1 & Var3 != var3.str.2) %>% as.data.frame() %>% print
var3.str.1 <- "PCA" var3.str.2 <- "principal component analysis" pca.only <- keywords.new %>% select(Var1, Var2, Var3, paper_count) %>% filter(Var3 == var3.str.1 | Var3 == var3.str.2) %>% mutate(Var3 = ifelse(Var3 == var3.str.2, var3.str.1, Var3)) %>% group_by(Var1, Var2, Var3) %>% summarize(paper_count = sum(paper_count)) %>% as.data.frame() %>% print
keywords.new.2 <- rbind(not.pca.only, pca.only) keywords.new.2
library(petro.One) major <- c("machine learning") minor <- c("reservoir", "production", "logging", "completion", "intervention", "workover", "drilling", "geology", "seismic", "petrophysics", "geophysics", "economics", "surface facilities") ml_technique.2 <- c("SVM", "Support Vector Machine", "discriminant analysis", "logistic regression", "naive bayes", "nearest neighbor", "linear regression", "SVR", "Support Vector Regressor", "GPR", " Gaussian Processes Regression", "decision tree", "neural network", "neural nets", "k-means", "c-means", "hierarchical", "gaussian mixture", "hidden markov", "deep learning", "convolutional network", "Boltzman machine", "Genetic algorithm", "fuzzy logic", "boosting", "PCA", "principal component analysis", "kernel density estimation", "reinforcement learning") by.ml_technique.2 <- join_keywords(major, minor, ml_technique.2, get_papers = TRUE, sleep = 3) by.ml_technique.2 save(by.ml_technique.2, file = "ml_technique_2.rda") # just in case
# load(file = "ml_technique_2.rda") load("R:/github-oilgains/petro.One/ml_technique_2.rda")
by.ml_technique.2$keywords
library(dplyr) var3.str.1 <- "SVM" var3.str.2 <- "Support Vector Machine" not.svm.only <- by.ml_technique.2 %>% `[[`("keywords") %>% select(Var1, Var2, Var3, paper_count) %>% filter(Var3 != var3.str.1 & Var3 != var3.str.2) %>% as.data.frame() %>% print var3.str.1 <- "SVM" var3.str.2 <- "Support Vector Machine" svm.only <- by.ml_technique.2 %>% `[[`("keywords") %>% select(Var1, Var2, Var3, paper_count) %>% filter(Var3 == var3.str.1 | Var3 == var3.str.2) %>% mutate(Var3 = ifelse(Var3 == var3.str.2, var3.str.1, Var3)) %>% group_by(Var1, Var2, Var3) %>% summarize(paper_count = sum(paper_count)) %>% as.data.frame() %>% print # binding both dataframes keywords.new <- rbind(not.svm.only, svm.only) keywords.new
var3.str.1 <- "PCA" var3.str.2 <- "principal component analysis" not.pca.only <- keywords.new %>% select(Var1, Var2, Var3, paper_count) %>% filter(Var3 != var3.str.1 & Var3 != var3.str.2) %>% as.data.frame() %>% print pca.only <- keywords.new %>% select(Var1, Var2, Var3, paper_count) %>% filter(Var3 == var3.str.1 | Var3 == var3.str.2) %>% mutate(Var3 = ifelse(Var3 == var3.str.2, var3.str.1, Var3)) %>% group_by(Var1, Var2, Var3) %>% summarize(paper_count = sum(paper_count)) %>% as.data.frame() %>% print keywords.new.2 <- rbind(not.pca.only, pca.only) keywords.new.2
var3.str.1 <- "neural network" var3.str.2 <- "neural nets" not.nnet.only <- keywords.new.2 %>% select(Var1, Var2, Var3, paper_count) %>% filter(Var3 != var3.str.1 & Var3 != var3.str.2) %>% as.data.frame() %>% print nnet.only <- keywords.new.2 %>% select(Var1, Var2, Var3, paper_count) %>% filter(Var3 == var3.str.1 | Var3 == var3.str.2) %>% mutate(Var3 = ifelse(Var3 == var3.str.2, var3.str.1, Var3)) %>% group_by(Var1, Var2, Var3) %>% summarize(paper_count = sum(paper_count)) %>% as.data.frame() %>% print keywords.new.3 <- rbind(not.nnet.only, nnet.only) keywords.new.3
library(petro.One) major <- c("machine learning") minor <- c("reservoir", "production", "logging", "completion", "intervention", "workover", "drilling", "geology", "seismic", "petrophysics", "geophysics", "economics", "surface facilities") ml_technique.3 <- c("SVR", "Support Vector Regression", "GPR", " Gaussian Process Regression" ) by.ml_technique.3 <- join_keywords(major, minor, ml_technique.3, get_papers = TRUE, sleep = 3) by.ml_technique.3 # save(by.ml_technique.2, file = "ml_technique_2.rda") # just in case
library(petro.One) major <- c("machine learning") minor <- c("reservoir", "production", "logging", "completion", "intervention", "workover", "drilling", "geology", "seismic", "petrophysics", "geophysics", "economics", "surface facilities") ml_technique.3 <- c("SVR", "Support Vector Regression", "GPR", " Gaussian Process Regression" ) by.ml_technique.3 <- join_keywords(major, minor, ml_technique.3, get_papers = TRUE, sleep = 3) by.ml_technique.3 # save(by.ml_technique.2, file = "ml_technique_2.rda") # just in case
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.