R/wos_retrieve.R

Defines functions wos_retrieve_all wos_retrieve wos_retrieve_page

Documented in wos_retrieve wos_retrieve_all wos_retrieve_page

#' Retrieve a batch of records (100 max)
#'
#' @param result a search query result generated by a previous call to \code{\link{wos_search}}
#' @param first index of the firest record to retrieve
#' @param count number of records to retrieve
#' @param url url of WoS service (to be used with a proxy)

#'
#' @import xml2
#' @import RCurl

wos_retrieve_page <- function(result, first = NULL, count = 100, url = "http://search.webofknowledge.com") {

  query_id <- result$id
  sid <- result$sid

  body <- paste0('<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
  <soap:Body>
  <ns2:retrieve xmlns:ns2="http://woksearchlite.v3.wokmws.thomsonreuters.com">
    <queryId>', query_id, '</queryId>

    <retrieveParameters>
       <firstRecord>', first,'</firstRecord>
       <count>', count,'</count>
    </retrieveParameters>

  </ns2:retrieve>
  </soap:Body>
  </soap:Envelope>')

  url <- paste0(url, "/esti/wokmws/ws/WokSearchLite")
  headers <- c(
    Accept = "multipart/*",
    'Content-Type' = "text/xml; charset=utf-8",
    'Cookie' = paste0("SID=", sid),
    SOAPAction = ""
  )

  h <- RCurl::basicTextGatherer()
  RCurl::curlPerform(
    url = url,
    httpheader = headers,
    postfields = body,
    writefunction = h$update
  )

  xml <- xml2::read_xml(h$value())

  err <- xml2::xml_find_first(xml, xpath = ".//faultstring")
  if (length(err) > 0) {
    stop("Error : ", xml2::xml_text(err))
  }

  resp <- wos_parse_records(xml)
  resp

}


#' Retrieve records from a search query result
#'
#' @param result a search query result generated by a previous call to \code{\link{wos_search}}
#' @param first index of the firest record to retrieve
#' @param count number of records to retrieve
#'
#' @return A tibble (data frame) with the records results. For fields with multiple value, such as authors or keywords, values are returne as a concatenated string.
#'
#' @export
#' @importFrom dplyr bind_rows
#' @importFrom utils head

wos_retrieve <- function(result, first = 1, count = 100) {

  indices <- seq(first, first + count, 100)
  if (indices[length(indices)] != first + count) {
    indices <- c(indices, first + count)
  }
  counts <- diff(indices)
  indices <- head(indices, -1)

  res <- list()
  for (i in seq_along(indices)) {
    cat("Retrieving ", indices[i] - first + 1, "-", indices[i] - first + counts[i], " of ", count, "\n", sep = "")
    res[[i]] <- dplyr::bind_rows(wos_retrieve_page(result, first = indices[i], count = counts[i]))
  }

  dplyr::bind_rows(res)

}

#' @describeIn wos_retrieve Retrieve all records from a search query result
#' @export

wos_retrieve_all <- function(result) {

  wos_retrieve(result, first = 1, count = result$results)

}
juba/rwos documentation built on Oct. 17, 2020, 7:48 p.m.