R/fetch_data.R

Defines functions fetch_data

Documented in fetch_data

#' Fetch Data from AARNET Cloudstor, OneDrive, Sharepoint, GDrive, GitHub or any Other Web-based Provider
#'
#' Given a URL that resolves to a downloadable data file, `fetch_data()` will
#' attempt to download and import the data.
#'
#' @param url User provided url generated by "sharing" the file via any of the
#' supported filetypes of [rio::import()], also works with raw GitHub or any
#' other provider from which a raw data file can be downloaded and imported.
#' Character.
#' @param which User provided spreadhseet for '.xslx', 'Google Sheets' and
#'  '.ods' files either as the number of the sheet in order or the name of the
#'  sheet. For .Rdata objects it can be an object name. See [rio::import()] for
#'  further details
#' @param file_ext Optional user provided file type extension. If provided,
#'  `fetch_data()` will attempt to use this as instruction about the file
#'  type being imported, _e.g._ "xlsx" or "csv". If not provided,
#'  `fetch_data()` will do its best to determine the file type automagically and
#'  import the file. Character.
#' @export fetch_data
#' @return A `data.frame`` of data derived from any '.csv', '.xls(x)', '.txt',
#' or '.ods' file provided that \R can import
#' @examples
#' # Fetch a .Rda file from GitHub
#' P_sojae <-
#'   fetch_data(url = "https://github.com/openplantpathology/hagis/blob/master/data/P_sojae_survey.rda?raw=true")
#' summary(P_sojae)

fetch_data <- function(url,
                       which = NULL,
                       file_ext = NULL) {
  # if no file_ext provided, stop
  # if this is a Google Sheets object, import it
  if (grepl("docs.google.com/spreadsheets", url)) {
    out <- rio::import(file = url)
    return(out)
  }
  # if the URL says it's a .csv import it
  if (grepl("csv", url)) {
    out <- rio::import(file = url)
    return(out)
  }
  # if the URL says it's a .rda import it
  if (grepl(".rda", url) | grepl(".rds", url)) {
    out <- rio::import(file = url)
    return(out)
  }
  # if the URL says it's a .txt file, import it
  if (grepl("txt", url)) {
    out <- rio::import(file = url)
  } else {
    # otherwise we'll download it and figure out what we have
    # create a file object in the tempdir() to store and read the download
    f <- file.path(tempdir(), "tmp")
    # download the file
    #
    # if the file is in OneDrive modify the URL to download the file
    if (grepl("https://usqprd-my.sharepoint.com", url)) {
      url <- gsub("\\?.*$", "\\?download=1", url)
    }

    h <- curl::new_handle()
    curl::handle_setopt(
      handle = h,
      httpauth = 1,
      userpwd = "user:passwd"
    )

    curl::curl_download(url = url,
                        destfile = f,
                        mode = "wb",
                        handle = h)

    # did the user give us a file extension?
    if (!is.null(file_ext)) {
      # did the user include the "." before the file extension?
      if (substr(file_ext, 1, 1) != ".") {
        file_ext <- paste0(".", file_ext)
      }
      # create a full filename and extension
      file.rename(f, paste0(f, file_ext))
      import <- paste0(f, file_ext)
    }

    # finally import file
    if (!is.null(which)) {
      out <- rio::import(file = import, which = which)
    } else
      out <- rio::import(file = import)
  }
  return(as.data.frame(out))
}
adamhsparks/usq.scp documentation built on Feb. 8, 2020, 5 a.m.