R/write_adl_feather.R

Defines functions write_adl_feather

Documented in write_adl_feather

#' Write a feather from Azure Data Lake
#'
#' Read a feather from Azure Data Lake given a path to the file. `set_adl_token()`
#' must be run before any file may be read.
#'
#' @param adl_file_path A string representing the adl file path. _Required parameter._
#' @param df A dataframe to send to Azure _Required parameter._
#' @return a httr message. the 201 message represents a successful write
#'
#' @examples
#' \dontrun{
#'
#'  set_adl_token(tenant = "abc123", client_id = "abc123", client_secret = "abc123")
#'
#'  Day_of_week_str <- c("Monday", "Tuesday",
#'  "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")
#'
#'  Day_of_week <- c(0, 1, 2, 3, 4, 5, 6)
#'
#'  df <- data.frame(Day_of_week_str, Day_of_week)
#'
#'  write_adl_feather(df = test_df,
#'  adl_file_path =  "adl://<storename>.azuredatalakestore.net/path/to/file.feather")
#'
#' }
#'
#' @export
write_adl_feather <- function(df, adl_file_path){

  # parse path
  a <- strsplit(adl_file_path, split = "//", fixed = TRUE)
  b <- strsplit(a[[1]][2], split = ".net/", fixed = TRUE)
  c <- strsplit(b[[1]][2], split = ".", fixed = TRUE)

  # set neccesary names for http request and file checking
  adl_fs_name <- b[[1]][1]
  file_name <- b[[1]][2]
  extension <- c[[1]][2]

  # is file  correct type
  if (extension != "feather") {
    rlang::abort(message = "file extension must be feather")
  }else{
    # temp_file_name = 'temp_upload.feather'
    temp_file_name <- tempfile(fileext = ".csv")

    feather::write_feather(df, temp_file_name)
  }

  temp_upload <- httr::upload_file(temp_file_name)

  r <- suppressMessages(
    httr::PUT(paste0("https://", adl_fs_name, ".net", "/webhdfs/v1/",
                     file_name, "?op=CREATE&overwrite=true&write=true"),
                 body = temp_upload,
                 httr::add_headers(Authorization = paste0("Bearer ", Sys.getenv("ADL_TOKEN")),
                             "Transfer-Encoding" = "chunked"))
  )

  return(r$status_code)

  # deletes tempfile
  unlink(temp_file_name, recursive = TRUE)
}
alexhallam/flyrod documentation built on Nov. 20, 2019, 7:33 a.m.