
Defines functions ArticleSearch

Documented in ArticleSearch

ArticleSearch <-
function(keywords, webSite = "ScienceDirect", maxSize, saveCSV = FALSE, exportName = "BibDataOnline") {
  if (missing(keywords)){
    stop("Keywords have not been specified")
  } else {
    if (length(keywords) == 0) {
      stop("Keywords vector can not be empty")
  webList <- c("ScienceDirect")
  if(webSite %in% webList) {
    if(webSite == "ScienceDirect") {
      HomeUrl <- function(searchKeywords, offset, show = 100) {
      searchKeywords <- paste0(gsub(pattern = " ", replacement = "%20", x = keywords), collapse = "%2C")
      show <- 100 
      if(!missing(maxSize)) {
        if(maxSize <= 25) {
          show <- 25
      currentSession <- xml2::read_html(HomeUrl(searchKeywords,0,show))
      resultsFound <- as.numeric(gsub(x = rvest::html_text(rvest::html_node(currentSession, ".search-body-results-text")), pattern = "[A-z]|\\s+|[:punct:]|[.]|[,]", replacement = ""))
      if(missing(maxSize)) {
        maxSize <- resultsFound
      if(maxSize < resultsFound) {
        resultsFound <- maxSize
      if(resultsFound > 999) {
        resultsFound <- 999
        cat("Only can get a maximum of 999 articles\n"); flush.console()
      offset <- (ceiling(resultsFound/100)-1)*100
      cat("Retrieving",resultsFound,"articles...\n"); flush.console()
      artNodes <- rvest::html_nodes(currentSession, ".result-list-title-link")
      artTitle <- rvest::html_text(artNodes)
      artLink <- rvest::html_attr(artNodes, "href")

      if(offset > 0) {
        for (currentOffset in seq(from = 100, to = offset, by = 100)) {
          currentSession <- xml2::read_html(HomeUrl(searchKeywords,currentOffset))
          artNodes <- rvest::html_nodes(currentSession, ".result-list-title-link")
          artTitle <- c(artTitle, rvest::html_text(artNodes))
          artLink <- c(artLink, rvest::html_attr(artNodes, "href"))
      cat("Main information obtained. Obtaining bibliometric extra info...\n"); flush.console()
      artData <- data.frame("Title" = artTitle, "URL" = paste0("https://www.sciencedirect.com",artLink), stringsAsFactors = FALSE)
      artData <- artData[1:maxSize, ]
        artInfo <- NULL
          if (is.null(nrow(artInfo))) {
            artPage <- xml2::read_html(artData$URL[1])
          } else {
            if(nrow(artInfo) == nrow(artData)) {
              cat("\nThe entire information of", nrow(artData), "articles has been obtained\n"); flush.console()
            artPage <- xml2::read_html(artData$URL[nrow(artInfo)+1])
            cat("\r",(nrow(artInfo) + 1),"of",nrow(artData)); flush.console()
          artAbstract <- paste0(rvest::html_text(rvest::html_children(rvest::html_nodes(artPage, ".abstract.author"))[-1]), collapse = ",")
          artKeywords <- paste0(rvest::html_text(rvest::html_children(rvest::html_nodes(artPage, ".keywords-section"))[-1]), collapse = ",")
          artAuthors <- paste0(rvest::html_text(rvest::html_children(rvest::html_nodes(artPage, ".author-group"))[-1]), collapse = ",")
          artJournal <- paste0(rvest::html_text(rvest::html_nodes(artPage, ".publication-title-link")), collapse = ",")
          artDOI <- paste0(rvest::html_attr(rvest::html_nodes(artPage, ".doi"), "href"), collapse = ",")
          if(length(artAbstract) == 0) artAbstract <- ""
          if(length(artKeywords) == 0) artKeywords <- ""
          if(length(artAuthors) == 0) artAuthors <- ""
          if(length(artJournal) == 0) artJournal <- ""
          if(length(artDOI) == 0) artDOI <- ""
          artInfo <- rbind(artInfo, c(artAbstract, artKeywords, artAuthors, artJournal, artDOI))
        artData <- data.frame(cbind(artData$Title, artInfo, artData$URL), stringsAsFactors = FALSE)
        names(artData) <- c("Title", "Abstract", "Keywords", "Authors", "Journal", "DOI", "URL")
      }, error = function(e) cat("\nCouldn't finish to get the entire information"))
      row.names(artData) <- paste0("A",1:nrow(artData))
      if(saveCSV) {
        cat("Saving data in a CSV file"); flush.console()
        write.table(artData, file = paste0(exportName, ".csv"), col.names = TRUE, row.names = TRUE, sep = ",")
        cat(". Done"); flush.console()
  } else {
    stop("Only 'ScienceDirect' database is available, soon there will be more...")

Try the KDViz package in your browser

Any scripts or data that you put into this service are public.

KDViz documentation built on May 1, 2019, 6:34 p.m.