R/ftp_list.R

Defines functions to_tbl to_df strexg strex parse_files ftp_list

Documented in ftp_list

#' List files on an FTP server
#'
#' @export
#' @param url (character) URL for the FTP server
#' @param just_list (logical) list files only? Default: `FALSE`
#' @param messages (logical) verbose messages. Default: `FALSE`
#' @param ... further args passed on to \pkg{curl}
#' @return a tibble (i.e., a `data.frame`)
#' @examples \dontrun{
#' ftp_list("ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/")
#'
#' # just list files
#' ftp_list("ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/", TRUE)
#'
#' # more examples
#' ftp_list("ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/by_year/")
#' ftp_list("ftp://ftp.ncdc.noaa.gov/pub/data/noaa/2014/")
#' }
ftp_list <- function(url, just_list = FALSE, messages = FALSE, ...) {
  hand <- curl::new_handle()
  curl::handle_setheaders(
    hand,
    .list = list(Authorization = "Basic anonymous:myrmecocystus@gmail.com"))
  if (messages) curl::handle_setopt(hand, verbose = TRUE)
  if (just_list) curl::handle_setopt(hand, dirlistonly = TRUE)
  gg <- curl::curl_fetch_memory(url, handle = hand)
  gg <- rawToChar(gg$content)
  if (just_list) {
    to_tbl(utils::read.delim(text = gg, header = FALSE,
                             stringsAsFactors = FALSE))
  } else {
    to_df(parse_files(gg))
  }
}

parse_files <- function(x) {
  x <- strsplit(x, "\n")[[1]]
  lapply(x, function(z) {
    perm <- strex(z, "^[a-z-]+")
    dir <- strex(z, "[0-9]\\s[a-z]+")
    group <- strex(z, "csdb-ops|1005")
    #size <- strex(z, "[0-9]{2,}")
    size <- strexg(z, "[0-9]{2,}")[[1]][2]
    date <- strex(z, "[A-Za-z]{3}\\s+[0-9]{1,2}\\s+[0-9]{2}:[0-9]{2}|[A-Za-z]{3}\\s+[0-9]{1,2}\\s+[0-9]{4}")
    file <- strex(z, "[A-Za-z0-9._-]+$")
    tmp <- list(perm = perm, dir = dir, group = group, size = size,
         date = date, file = file)
    tmp[vapply(tmp, length, 1) == 0] <- ""
    tmp
  })
}

strex <- function(string, pattern) {
  regmatches(string, regexpr(pattern, string))
}

strexg <- function(string, pattern) {
  regmatches(string, gregexpr(pattern, string))
}

to_df <- function(x) {
  tibble::as_tibble((data.table::setDF(
    data.table::rbindlist(x, use.names = TRUE, fill = TRUE))))
}

to_tbl <- function(x) tibble::as_tibble(x)
ropensci/ftp documentation built on Sept. 3, 2020, 3:34 a.m.