R/collect.R

#' @export
paginate <- function(.sch, pags = 'all', parm_pag, path = '.', ..., url, method, wait = 0) {
  UseMethod('paginate', .sch)
}

#' @export
paginate.searchdoc <- function(.sch, pags = 'all', parm_pag, path = '.', ..., url, method, wait = 0) {
  if(identical(pags, 'all')) {
    pags <- 1:(.sch$npag)
  } else if(any(!is.numeric(pags))) {
    stop('ParĂ¢metro pags errado.')
  } else if(length(pags) == 2 & all(names(pags) == c('from', 'to'))) {
    pmax <- ifelse(is.infinite(pags[2]), .sch$npag, pmax)
    pags <- (pags[1]):pmax
  }
  if(missing(url)) {
    url <- .sch$url
  }
  if(missing(method)) {
    method <- .sch$method
  }
  p <- dplyr::progress_estimated(length(pags), min_time = 2)
  for(pag in pags) {
    paginate_one(pag, parm_pag, path, ..., url, method, wait = wait)
    p$tick()$print()
  }
}

#' @export
paginate_one <- function(pag, parm_pag, path, url, method, ..., wait) {
  Sys.sleep(wait)
  .data <- list(...)
  .data[[parm_pag]] <- pag
  if(method == 'get') {
    r <- httr::GET(url, query = .data,
                   config = list(ssl.verifypeer = FALSE))
  } else if(method == 'post') {
    r <- httr::POST(url, body = .data,
                    config = list(ssl.verifypeer = FALSE))
  } else if(is.function(method)) {
    r <- method(.data, url, ...)
  }
  if(r$status_code != 200) {
    stop('Request failed.')
  }
  nm <- sprintf('%s/%06d.rds', path, pag)
  saveRDS(r, nm)
}
jtrecenti/crawlr documentation built on May 20, 2019, 3:17 a.m.