R/00.11_download_hubeau.R

#' @import dplyr
#' @export
download_hubeau <- function(api, query, iter = 1, summarise_function = identity) {
  if (iter == 1)
    cat("\nLes données de concentration des pesticides dans les cours d'eau sont téléchargées en utilisant l'API HUB'EAU:\nhttp://hubeau.eaufrance.fr/page/api-qualite-cours-deau\n")
  
  # Fonctions internes
  build_query <- function(api, query) {
    params <- names(query)
    values <- query
    
    paste0(api, "?", paste(paste0(params, "=", values), collapse = "&"))
  }
  
  download_i <- function(i, group, api, query, iter) {
    
    query_i <- query
    query_i[[group]] <- i
    
    download_hubeau(api = api,
                    query = query_i,
                    iter = iter + 1)
  }
  
  # Préparation de l'API
  queryAPI <- purrr::map(.x = query,
             .f = function(x) {
               if (length(x) > 1) {
                 paste(x, collapse = ",")
               } else {
                 x
               }
             })
  
  # Premier essai
  df <- NULL
  p <- 1
  api_call <- build_query(api, c(queryAPI,
                                 size = 5000,
                                 page = p))
  
  if (nchar(api_call) > 2083) {
    length_params <- sapply(query, length)
    group <- names(which(length_params == max(length_params[length_params > 1])))
    
    if (iter == 1) {
      future::plan("multiprocess")
    } else {
      future::plan("sequential")
    }
    
    df <- furrr::future_map(.x = query[[group]],
                     .f = download_i,
                     group = group, api = api, query = query, iter = iter,
                     .progress = TRUE) %>% 
      bind_rows()
    
  } else {
    res <- httr::GET(api_call)
    
    if (res$status_code == 200) {
      if (httr::content(res, as = "text") != "") {
        df <- readr::read_delim(httr::content(res, as = "text"), 
                         delim = ";", 
                         locale = readr::locale(decimal_mark = ","),
                         col_types = readr::cols(.default = readr::col_character()))
      }
    }
    
    if (res$status_code == 206) {
      page_max <- stringr::str_match(res$headers$link,
                            "rel=\"first(.*)rel=\"last") %>% 
        '['(2) %>% 
        stringr::str_match(string = .,
                  "page=(.*)&size=") %>% 
        '['(2) %>% 
        as.numeric()
      
      size <- stringr::str_match(res$headers$link,
                        "rel=\"first(.*)rel=\"last") %>% 
        '['(2) %>% 
        stringr::str_match(string = .,
                  "size=(.*)>;") %>% 
        '['(2) %>% 
        as.numeric()
      
      if (page_max * size > 20000) {
        length_params <- sapply(query, length)
        group <- names(which(length_params == max(length_params[length_params > 1])))
        
        if (iter == 1) {
          future::plan("multiprocess")
        } else {
          future::plan("sequential")
        }
        
        df <- furrr::future_map(.x = query[[group]],
                         .f = download_i,
                         group = group, api = api, query = query, iter = iter,
                         .progress = TRUE) %>% 
          bind_rows()
      } else {
        df <- readr::read_delim(httr::content(res, as = "text"), 
                         delim = ";", 
                         locale = readr::locale(decimal_mark = ","),
                         col_types = readr::cols(.default = readr::col_character()))
        
        while (grepl("next", res$headers$link))
        {
          nexturl <- stringr::str_match(res$header$link,
                               "\"last\"<(.*)>; rel=\"next\"")[1,2]
          
          res <- httr::GET(nexturl)
          df <- bind_rows(df,
                          readr::read_delim(httr::content(res, as = "text"),
                                     delim = ";",
                                     locale = readr::locale(decimal_mark = ","),
                                     col_types = readr::cols(.default = readr::col_character())))
        }
      }
    }
    
    if (!res$status_code %in% c(200, 206))
      df <- res
  }
  summarise_function(df)
}
  
CedricMondy/bnvd documentation built on June 25, 2019, 5:57 p.m.