library(tidyverse)
library(secretary)
library(chariot)
library(pg13)
library(skyscraper)
if (!interactive()) {
report_filename <- paste0("~/Desktop/pubmed_search_01_pubmed_search_", as.character(Sys.Date()), ".txt")
cat(file = report_filename)
}
concepts <-
chariot::queryAthena(
"
SELECT DISTINCT
concept_name
FROM chemidplus_search.concept
;
",
override_cache = TRUE
) %>%
unlist() %>%
unname()
concepts <- sample(concepts)
if (!interactive()) {
cat("########### First Iteration\n", file = report_filename, append = TRUE)
}
error_concepts <- vector()
total_concepts <- length(concepts)
while (length(concepts)) {
concept <- concepts[1]
conn <- chariot::connectAthena()
output <-
tryCatch(
skyscraper::scrape_pubmed(conn = conn,
search_term = concept),
error = function(e) paste("Error")
)
chariot::dcAthena(conn = conn,
remove = TRUE)
if (length(output)) {
if (output == "Error") {
error_concepts <-
c(error_concepts,
concept)
}
Sys.sleep(5)
rm(output)
concepts <- concepts[-1]
# Getting metrics
# 1. current concepts length
# 2. completed concepts
# 3. percent completed
# 4. Error concepts
# 1
current_ct <- length(concepts)
# 2
completed_ct <- total_concepts-current_ct
# 3
percent_completed <- signif(((completed_ct/total_concepts)*100), digits = 2)
#4.
error_ct <- length(error_concepts)
if (interactive()) {
secretary::typewrite(secretary::italicize(percent_completed), "percent completed.")
secretary::typewrite(secretary::cyanTxt(current_ct, "out of", total_concepts, "to go."))
secretary::typewrite(secretary::redTxt(error_ct, "errors."))
} else {
cat(paste0("[", as.character(Sys.time()), "]"), sep = "", file = report_filename, append = TRUE)
cat("\t", current_ct, "/", total_concepts, " (", percent_completed, " percent completed)\n", sep = "", file = report_filename, append = TRUE)
cat(paste0("[", as.character(Sys.time()), "]"), sep = "", file = report_filename, append = TRUE)
cat("\t", error_ct, " errors\n", sep = "", file = report_filename, append = TRUE)
}
if ((completed_ct %% 50) == 0) {
conn <- chariot::connectAthena()
skyscraper::export_schema_to_data_repo(conn = conn,
target_dir = "~/GitHub/Public-Packages/pubmedSearchData/",
schema = "pubmed_search")
chariot::dcAthena(conn = conn)
}
rm(current_ct,
completed_ct,
percent_completed,
error_ct)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.