R/read_adl_feather.R

Defines functions read_adl_feather

Documented in read_adl_feather

#' Read a feather from Azure Data Lake
#'
#' Read a feather from Azure Data Lake given a path to the file. `set_adl_token()`
#' must be run before any file may be read.
#'
#' @param adl_file_path A string representing the adl file path. _Required parameter._
#' @return a tibble
#' @importFrom feather read_feather
#' @importFrom rlang abort
#'
#' @examples
#' \dontrun{
#'
#'  set_adl_token(tenant = "abc123", client_id = "abc123", client_secret = "abc123")
#'  df <- read_adl_feather(
#'  adl_file_path =  "adl://<storename>.azuredatalakestore.net/path/to/file.feather"
#'  )
#'
#' }
#'
#' @export
read_adl_feather <- function(adl_file_path){

  a <- strsplit(adl_file_path, split = "//", fixed = TRUE)
  b <- strsplit(a[[1]][2], split = ".net/", fixed = TRUE)
  adl_fs_name <- b[[1]][1]
  file_name <- b[[1]][2]

  if (identical(Sys.getenv("ADL_TOKEN"), "")) {
    rlang::abort(message = "ADL_TOKEN must be set. See `?set_adl_token`.")
  }

  r <- httr::GET(paste0("https://", adl_fs_name, ".net", "/webhdfs/v1/",
                        file_name, "?op=OPEN&read=true"),
                 httr::add_headers(Authorization = paste0("Bearer ", Sys.getenv("ADL_TOKEN"))))

  abort_bad_response <- function(arg, must,  not = NULL) {
    msg <- glue::glue("{arg} must {must}")
    if (!is.null(not)) {
      not <- httr::status_code(not)
      msg <- glue::glue("{msg}; not {not}.
                        make sure `adl_file_path` is correct")
    }

    rlang::abort("error_bad_response",
          message = msg,
          arg = arg,
          must = must,
          not = not
    )
  }

  if (httr::http_error(r)) {
    abort_bad_response("Azure Data Lake API request failed http respose",
                       must = "have status 200", not = r)
  }else{


  # Check status
  file_type <- strsplit(file_name, split=".", fixed=TRUE)[[1]][2]
    tmp = tempfile(fileext = ".feather")
    writeBin(httr::content(r), tmp, useBytes = TRUE)
    df <- feather::read_feather(path = tmp)
    return(df)
}
}
alexhallam/flyrod documentation built on Nov. 20, 2019, 7:33 a.m.