In f0nzie/petro.One: Statistics and Text Mining for Oil and Gas Papers from OnePetro Metadata

Analysis

Load dataset `ml_technique_4.rda`

library(dplyr)

results_loc <- system.file("results", package = "petro.One")
load(file = paste(results_loc, "ml_technique_4.rda", sep = "/"))

keywords.4 <- by.ml_technique.4$keywords
papers.4   <- by.ml_technique.4$papers

# sort and sum most used algos
keywords.4 %>%
    group_by(Var3) %>%
    summarize(papers=sum(paper_count)) %>%
    rename(algorithm = Var3) %>% 
    arrange(desc(papers))

keywords.4 <- by.ml_technique.4$keywords
tibble::as.tibble(keywords.4)

keywords.41 <- keywords.4 %>%
    select(-c(Var1, sf, url)) %>% 
    rename(algorithm = Var3, discipline = Var2) %>%
    mutate(algorithm = trimws(algorithm)) %>% 

    print()

# show machine learning algorithms
keywords.41 %>%
    distinct(algorithm)

# rename equivalent algorithms

keywords.42 <- keywords.41 %>%
    mutate(algorithm = ifelse(algorithm == "PCA", "principal component analysis", algorithm)) %>% 
    mutate(algorithm = ifelse(algorithm == "SVM", "Support Vector Machine", algorithm)) %>% 
    mutate(algorithm = ifelse(algorithm == "SVD", "Singular Value Decomposition", algorithm)) %>% 
    mutate(algorithm = ifelse(algorithm == "SVR", "Support Vector Regression", algorithm)) %>% 
    mutate(algorithm = ifelse(algorithm == "GPR", "Gaussian Process Regression", algorithm)) %>% 
    mutate(algorithm = ifelse(algorithm == "neural nets", "neural network", algorithm)) %>% 
    mutate(algorithm = ifelse(algorithm == "convolutional neural", "deep learning", algorithm)) %>% 
    mutate(algorithm = ifelse(algorithm == "convolutional network", "deep learning", algorithm)) %>% 
    # sort and sum most used algos
    group_by(algorithm) %>%
    summarize(papers = sum(paper_count)) %>%
    arrange(desc(papers)) %>% 
    mutate(algorithm = gsub("(?<=\\b)([a-z])", "\\U\\1", tolower(algorithm), perl=TRUE)) %>% 
    print

write.csv(keywords.42$algorithm, file = "ml_algorithms.csv")

# papers with selected columns only. Create a new ID to detect duplicate papers
new.papers.4 <- papers.4 %>% 
    select(title_data, keyword, year, paper_id, source) %>% 
    mutate(id = trimws(paste0(trimws(source), trimws(paper_id))))

new.papers.4

# how many duplicate papers
new.papers.4 %>% 
    distinct(id)

# new.papers.4$id == distinct(new.papers.4, id)

# split keyword at AND word
res <- strsplit(new.papers.4$keyword, "AND")
res[[1]]

# replace extraneous chars by blank and space in str string vectors 
res.la.1 <- lapply(res, function(x) gsub("\\+", " ", x))
res.la.2 <- lapply(res.la.1, function(x) gsub("'", "", x))
res.la.2[[1]]

# convert list of string vectors to dataframe
keyword.cols <- as.data.frame(do.call(rbind, res.la.2), stringsAsFactors = FALSE)
keyword.cols

# merge columns
mod.papers.4 <- cbind(new.papers.4, keyword.cols)
mod.papers.4

# save dataframe to development results
devres_loc <- "../inst/results"
write.csv(mod.papers.4, file = paste(devres_loc, "ml_papers_4.csv", sep = "/"))

Unique <- mod.papers.4 %>%
  group_by(V3) %>%
  filter(n_distinct(id))

library(dplyr)
mod.papers.4 %>%
    distinct(title_data, id)

names(mod.papers.4)

# get row index number for non-duplicated rows
nodup.papers.4 <- mod.papers.4[!duplicated(mod.papers.4[6]),]
nodups.papers.4 <- nodup.papers.4 %>% 
    select(-c(keyword, paper_id, source, V1))

# save dataframe to development results
devres_loc <- "../inst/results"
write.csv(nodups.papers.4, file = paste(devres_loc, "ml_nodups_papers_4.csv", sep = "/"))

Read Excel file with Petroleum Engineering applications

xls.nodups.papers.4 <- readxl::read_xlsx(paste(devres_loc, "ml_nodups_papers_4.xlsx", sep = "/"), sheet = 1)

xls.nodups.papers.4

# unique applications
unique(xls.nodups.papers.4$pe_app)

f0nzie/petro.One documentation built on May 29, 2019, 12:05 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

f0nzie/petro.One
Statistics and Text Mining for Oil and Gas Papers from OnePetro Metadata

In f0nzie/petro.One: Statistics and Text Mining for Oil and Gas Papers from OnePetro Metadata

Analysis

Load dataset `ml_technique_4.rda`

Read Excel file with Petroleum Engineering applications

R Package Documentation

Browse R Packages

We want your feedback!

f0nzie/petro.One Statistics and Text Mining for Oil and Gas Papers from OnePetro Metadata

In f0nzie/petro.One: Statistics and Text Mining for Oil and Gas Papers from OnePetro Metadata

Analysis

Load dataset ml_technique_4.rda

Read Excel file with Petroleum Engineering applications

R Package Documentation

Browse R Packages

We want your feedback!

f0nzie/petro.One
Statistics and Text Mining for Oil and Gas Papers from OnePetro Metadata

Load dataset `ml_technique_4.rda`