R/download_to_file.R

Defines functions get_diag_message download_to_file

# Internal function used to download results to a file 
download_to_file <- function(url,
                             outfile,
                             binary_file=FALSE,
                             caching=sbdi_config()$caching,
                             verbose=sbdi_config()$verbose,
                             on_redirect=NULL,
                             on_client_error=NULL,
                             on_server_error=NULL,...) {
 
  assert_that(is.notempty.string(url))
  ## download from a URL using RCurl to a file
  ## we do this directly using RCurl to file, rather than reading into R memory and then dumping to file
  if (missing(outfile)) {
    outfile <- sbdi_cache_filename(url)
  } else {
    assert_that(is.string(outfile), is.dir(dirname(outfile))) ## check that outfile is a string and that it points to a valid directory
  }
  assert_that(is.flag(verbose))
  
  ## first check for zero-size cached files
  if (file.exists(outfile) && !(file.info(outfile)$size>0)) {
    ## file exists but is zero sized
    unlink(outfile)
  }
  if (nchar(url) > getOption("ALA4R_server_config")$server_max_url_length) warning("URL length may be longer than is allowed by the server")
  
  ## are we using cached results?
  if ((caching %in% c("off", "refresh")) || (! file.exists(outfile))) {
    if (verbose && (caching != "off")) message(sprintf("Caching %s to file %s", url, outfile))
    ## either we are not using caching, or we want to refresh the cache, or the file doesn't exist in the cache
    if (verbose) {
      get <- GET(url, write_disk(outfile, overwrite=TRUE), 
                 user_agent(sbdi_config()$user_agent), 
                 verbose(data_out=FALSE, data_in=FALSE, info=FALSE, ssl=FALSE)) 
    } else { 
      get <- GET(url, write_disk(outfile, overwrite=TRUE), 
                 user_agent(sbdi_config()$user_agent)) }
    status_code <- status_code(get)
    ## check http status here
    ## if unsuccessful, delete the file from the cache first, after checking if there's any useful info in the file body
    diag_message <- ""
    if ((substr(status_code, 1, 1)=="5") || (substr(status_code, 1, 1)=="4")) {
      headers <- headers(get)
      if (exists("content-length",where=headers) && (as.numeric(headers["content-length"][1])<10000)) {
        ## if the file body is not too big, check to see if there's any useful diagnostic info in it
        diag_message <- get_diag_message(outfile)
      }
      unlink(outfile)
    }
    ## check status code of response. Note that we execute the on_redirect etc functions, but we don't capture the output. might wish to implement this differently?
    check_status_code(status_code, 
                      on_redirect=on_redirect, 
                      on_client_error=on_client_error, 
                      on_server_error=on_server_error, 
                      extra_info=diag_message)
  } else {
    if (verbose) message(sprintf("Using cached file %s for %s", outfile, url))
  }
  outfile
}

get_diag_message <- function(jsonfile) {
 ALA4R:::get_diag_message(jsonfile) 
}
bioatlas/r-functionality documentation built on Nov. 1, 2020, 3:42 a.m.