R/getNames_GESDISC.R

Defines functions getNames_GESDISC

Documented in getNames_GESDISC

#' Extract filename information from EarthData GES DISC URLs
#'
#' Function to extract filenames from [NASA EarthData GES DISC](https://disc.gsfc.nasa.gov/) file download URLs.
#' @param path Path to a text file generated from GES DISC that contains list of links of data file to download.
#' @return Names of files.
#' @export
#' @examples
#' getNames_GESDISC(path = "subset_NLDAS_MOS0125_M_V002_20181109_191725.txt")

getNames_GESDISC <- function(url){
  # Split by "&" symbol
  url_split <- strsplit(url, split = "&", fixed = T)
  # Extract filename and variables and create new name
  return_info <- function(x){
    # find indices of LABEL and VARIABLE
    label_ind <- unlist(lapply(x, function(y){
      grepl(pattern = "LABEL",
            x = y,
            fixed = T)
    }))
    var_ind <- unlist(lapply(x, function(y){
      grepl(pattern = "VARIABLE",
            x = y,
            fixed = T)
    }))
    # extract LABEL and VARIABLE
    label_info <- x[label_ind]
    var_info <- x[var_ind]
    # get basename from LABEL
    label_info <- gsub(pattern = "LABEL=",
                       replacement = "",
                       x = label_info,
                       fixed = T)
    label_info_basename <- tools::file_path_sans_ext(label_info)
    label_info_ext      <- tools::file_ext(label_info)
    # return label_info if no variable information
    if (sum(var_ind) == 0) return(label_info)
    # Get variables from VAR
    var_info <- gsub(pattern = "VARIABLES=",
                     replacement = "",
                     x = var_info,
                     fixed = T)
    var_names <- unlist(strsplit(x = var_info,
                                 split = "%2",
                                 fixed = T))
    var_names <- paste(var_names, collapse = "_")
    # Create final filename
    fn_out <- paste0(label_info_basename,
                     ".",
                     "Variables_",
                     var_names,
                     ".",
                     label_info_ext)
    # return
    return(fn_out)
  }
  # get filenames
  fnames <- unlist(lapply(url_split, return_info))
  # return
  return(fnames)
}
ssaxe-usgs/modelCollect documentation built on Aug. 20, 2019, 9:44 a.m.