R/getFTPFiles.R

Defines functions getFTPFiles

Documented in getFTPFiles

#' Fetch FTP Files
#'
#' This function allows you to pull down a series of files from the FTP
#'
#' @param host The FTP host url
#' @param userpwd login username and password colon seperated
#' @param ftpType can be FTP or SFTP ... defaults to FTP
#' @param fileList List of files to pull down
#' @param pattern Regex search criteria
#' @param tempPath Optional input for temp path location
#' @return A single data.frame object with all ftp files appended together
#'
#' @importFrom utils download.file read.csv
#'
#' @export

getFTPFiles <- function(host,
                        userpwd,
                        ftpType = "FTP",
                        fileList,
                        pattern = "",
                        tempPath = "") {

  #do a quick check for traling "/"
  host <- ifelse(grepl("\\/$", host), host, paste0(host,"/"))

  #do a quick check on the system OS for "/" purposes
  windows <- ifelse(grepl("[Ww]indows",Sys.info()['sysname']), TRUE, FALSE)

  #only get files of certain pattern.
  if (length(pattern) > 0) {

    fileList <- fileList[which(grepl(pattern, fileList))]

  }

  #set-up temporary location to save downloaded file
  if (tempPath == "") {

    temp_file <- ifelse(grepl("\\.zip", fileList[1]),
                        paste0(tempfile(), ".zip"),
                        paste0(tempfile(), ".csv"))


  } else {

    temp_file <- ifelse(grepl("\\.zip", fileList[1]),
                        paste0(tempfile(tmpdir = tempPath), ".zip"),
                        paste0(tempfile(tmpdir = tempPath), ".csv"))

  }

  dat <- data.frame()

  #check the FTP protocol
  if (ftpType == "FTP") {

    #get list of files and agg them, handles the i=1 case too.
    for (i in fileList) {

      url <- paste0("ftp://", userpwd, "@", host, i)

      if (grepl("\\.zip", i)) {

        download.file(url, temp_file, quiet = T)
        unzip(temp_file, exdir = tempdir())
        tempDat <- suppressWarnings(data.table::fread(paste0(tempdir(), ifelse(windows, "\\", "/"), gsub("\\.zip", ".csv", i)),
                            stringsAsFactors = F))

      } else if (grepl("\\.csv", i)) {

        download.file(url, temp_file, quiet = T)
        tempDat <- suppressWarnings(data.table::fread(temp_file, stringsAsFactors = F))

      } else {

        stop("Unsupported filetype provided.  Only accepts 'csv' or 'zip'.")

      }

      dat <- rbind(dat, tempDat)
      unlink(temp_file)
      rm(tempDat)
    }
  } else if (ftpType == "SFTP") {

    temp_file <- gsub("\\.zip", ".csv", temp_file)

    #subtle differences in the sftp method.
    for (i in fileList) {

      url <- paste0(host, i)
      x <- RCurl::getURL(url, userpwd = userpwd, connecttimeout = 60)

      writeLines(x, temp_file)
      tempDat <- data.table::fread(temp_file)
      unlink(temp_file)

      dat <- rbind(dat, tempDat)
      rm(tempDat)
    }
  } else {
    stop("Incorrect FTP type given.
         Please provide either 'FTP' or 'SFTP' to the ftp parameters.")
  }
  return(dat)
}
blazickjoe/DataScienceLibrary documentation built on Nov. 5, 2019, 2:26 p.m.