R/download_GESDISC.R

Defines functions download_GESDISC

Documented in download_GESDISC

#' Download from EarthData GES DISC
#'
#' Function to download data from [NASA EarthData GES DISC](https://disc.gsfc.nasa.gov/) URLs.
#' @param txt_path Path to a text file generated from GES DISC that contains list of links of data file to download.
#' @param dest_folder Folder in which to save downloaded files.  If NULL, saves to working directory.
#' @param base_names Manually-generated, user-defined names for saving downloaded files.  Must be same length as number of files.
#' @param rqr_user_pwd Should an EarthData login username and password be required? Defaults to TRUE.
#' @param subset_list Select individual URLs by number.
#' @param progress_bar If TRUE, uses 'pbapply' package to generate console progress bar.
#' @param cl Cluster generated from parallel::makeCluster() to process downloads over multiple cores.  Defaults to NULL (single-threaded).
#' @return Names of files.
#' @export
#' @examples
#' download_GESDISC(txt_path = "filelist.txt", subset_list = c(2,3,4))

download_GESDISC <- function(txt_path,
                             dest_folder  = NULL,
                             base_names   = NULL,
                             rqr_user_pwd = T,
                             subset_list  = NULL,
                             progress_bar = T,
                             cl           = NULL){
  #- If txt_path supplied, import
  url <- read.table(txt_path, stringsAsFactors = F, header = F)[,1]

  #- Ask for username and password
  if (rqr_user_pwd){
    username <- rstudioapi::showPrompt(title = "Username",
                                       message = "Username",
                                       default = "")
    password <- rstudioapi::askForPassword(prompt = "Password")
    upw_insert <- paste0(username, ":", password, "@")
  }else{
    upw_insert <- ""
  }

  #- remove README file
  is.readme <- grepl(pattern = "README",
                     x       = url)
  url <- url[!is.readme]

  #- Select URLs
  if (!is.null(subset_list)){
    url <- url[subset_list]
  }

  #- Create filenames if not supplied
  nFiles <- length(url)
  if (is.null(base_names)){
    tryCatch({
      fnames <- getNames_GESDISC(url)
    }, error = function(e){
      stop(paste0("Filename generation failed.",
                  "\nPlease supply vector of file basenames to argument 'base_names'.",
                  "\nMust be same length as number of files (n=",
                  nFiles,
                  ")."))
    })
  }else{
    if (length(base_names) != length(url)){
      stop(paste0("Argument 'base_names' must be same length as number of download files (n=",
                  nFiles,
                  ")."))
    }
    fnames <- base_names
  }

  # If "?" included, remove from there back
  if (any(grepl("?", fnames, fixed=T))){
    fnames2 <- strsplit(fnames, "?", fixed = T)
    fnames2 <- do.call("c", lapply(fnames2, function(x){x[1]}))
    fnames <- fnames2
  }

  #- Assign folder name
  if (!is.null(dest_folder)){
    # add "/" if necessary
    if (substr(x     = dest_folder,
               start = nchar(dest_folder),
               stop  = nchar(dest_folder)) != "/"){
      dest_folder <- paste0(dest_folder, "/")
    }
    # concatenate filenames
    fnames <- paste0(dest_folder, fnames)
  }else{
    stop("Please specify a destination folder.")
  }

  #- If files already exist, remove from list
  if (any(file.exists(fnames))){
    already_dled <- file.exists(fnames)
    cat(paste0("Ignoring files from download list that already exist...\n"))
    cat(paste0(sum(already_dled), "/", length(fnames), " files ignored.\n"))
    fnames <- fnames[!already_dled]
    url    <- url[!already_dled]
  }

  #- Insert username/password information
  url_out <- unlist(
    lapply(X = url,
           FUN = function(x, y){
             if (grepl("https://", x, fixed = T)){
               spl_tail <- strsplit(x = x,
                                    split = "https://",
                                    fixed = T)[[1]][2]
               return(
                 paste0("https://", y, spl_tail)
                        )
             }else{
               return(x)
             }
           },
           y = upw_insert
    )
  )
  if (identical(url, url_out)){
    warning("URLs supplied do not contain 'https:// and therefore username and password are not applied.")
  }

  #- Download
  if (length(url_out) == 1){
    download.file(url      = url_out[1],
                  destfile = fnames[1],
                  mode     = "wb",
                  quiet    = F)
  }else{
    # Combine into df
    mat <- cbind(url_out, fnames)
    # apply download function over matrix with progress bar
    if (progress_bar){
      tf <- pbapply::pbapply(X      = mat,
                             MARGIN = 1,
                             FUN    = function(x){
                               download.file(url      = x[1],
                                             destfile = x[2],
                                             mode     = "wb",
                                             quiet    = T)
                             },
                             cl = cl)
    }else{
      if (is.null(cl)){
        # apply download function over matrix
        tf <- apply(X      = mat,
                    MARGIN = 1,
                    FUN    = function(x){
                      download.file(url      = x[1],
                                    destfile = x[2],
                                    mode     = "wb",
                                    quiet    = T)
                    })
      }else{
        parallel::parApply(cl = cl,
                           X      = mat,
                           MARGIN = 1,
                           FUN    = function(x){
                             download.file(url      = x[1],
                                           destfile = x[2],
                                           mode     = "wb",
                                           quiet    = T)
                           })
      }
    }
  }

  #- Return names
  invisible(fnames)
}
ssaxe-usgs/modelCollect documentation built on Aug. 20, 2019, 9:44 a.m.