SFNRC: APIs and Tools for Processing Data from DataForEver and DBHYDRO

Documented in downloadNWS

#' @title Function bulk downloads National Weather Service precipitation geotiff files
#'
#' @description Useful for automating download of large numbers of NWS spatial precipitation layers, and easily download new data. Stage 3 and Stage 4 data can be found at https://water.noaa.gov/resources/downloads/precip/
#'
#' @param data_folder parent directory for data to be deposited. If this does not exist, it will be created.
#' @param overwrite_data TRUE/FALSE value indicating whether existing files should be overwritten. If TRUE, all data are downloaded fresh.
#' @param stage3_filename example of a stage 3 filename. YYYYMMDD is a token that will be replaced with actual dates. Adjusting this can accommodate odd filenames (e.g., stage 3 2017; stage 4 2016) and download different data types (e.g., 1 day, last 180 days)
#' @param stage4_filename example of a stage 4 filename. YYYYMMDD is a token that will be replaced with actual dates.
#' @param day_range numeric range of days, used in identifying files to download. Default only downloads data from the first of the month.
#' @param month_range numeric range of months, used in identifying files to download. Default downloads data from all months.
#' @param year_range numeric range of years, used in identifying files to download. Default downloads data from all years (filtered by available Stage 3/Stage 4 data).
#'
#' @return NULL \code{downloadNWS} downloads files to the provided `data_folder` directory. 
#' 
#' @importFrom terra rast
#' @importFrom terra writeRaster
#' 
#' @export
#'
#' @examples
#' 2+2
downloadNWS <- function(data_folder,
                         overwrite_data = FALSE, # if TRUE, all data is downloaded fresh
                         stage3_filename = 'nws_precip_mtd_YYYYMMDD.tif', # example of a stage 3 filename. YYYYMMDD is a token that will be replaced with actual dates.
                         stage4_filename = 'nws_precip_mtd_YYYYMMDD_conus.tif',
                         day_range       = 1, # range of days to download from each month. Default only downloads data from the first of the month
                         month_range     = 1:12,
                         year_range      = 2005:as.numeric(format(Sys.Date(), format = '%Y'))
) {
  
  ### identify which stages to download data for, based on years provided
  stage <- c('stage3', 'stage4')
  if (!any(2005:2017 %in% year_range)) {
    stage <- stage[-c(which(stage %in% 'stage3'))]
  }
  if (!any(2016:as.numeric(format(Sys.Date(), format = '%Y')) %in% year_range)) {
    stage <- stage[-c(which(stage %in% 'stage4'))]
  }
  
  filename <- c(stage3_filename, # stage 3 filename pattern
                stage4_filename) # stage 4 filename pattern
  
  if (any(grepl(x = stage, pattern = '3'))) {
    if (!dir.exists(file.path(data_folder, 'stage3'))) {
      dir.create(file.path(data_folder, 'stage3'), recursive = TRUE)
    }
    ### create list of dates, then urls
    years                <- as.character(year_range[year_range %in% 2005:2017])
    months               <- as.character(month_range) #c('01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12')
    months[nchar(months) < 2]      <- paste0('0', months[nchar(months) < 2])
    days                 <- as.character(day_range)
    days[nchar(days) < 2]<- paste0('0', days[nchar(days) < 2])
    targetDates          <- expand.grid(years, months, days)
    targetDates$ymd      <- paste0(targetDates$Var1, targetDates$Var2, targetDates$Var3)
    # targetDates        <- targetDates[!grepl(x = targetDates$ymd, pattern = '20050101'), ]
    targetDates$ymd2     <- paste(targetDates$Var1, targetDates$Var2, targetDates$Var3, sep = '/')
    targetDates$url      <- paste0('https://water.noaa.gov/resources/downloads/precip/stageIII/', targetDates$ymd2, '/', mapply(gsub, filename[1], pattern = 'YYYYMMDD', replacement = targetDates$ymd, USE.NAMES = FALSE))
    targetDates$basename <- basename(targetDates$url)
    targetDates$download_location   <- file.path(data_folder, 'stage3', targetDates$basename)
    existing_files       <- list.files(file.path(data_folder, 'stage3'), full.names = FALSE)
    targetDates$exists_in_directory <- ifelse(targetDates$basename %in% existing_files, 1, 0)
    
    if (overwrite_data == FALSE) {
      targetDates <- targetDates[targetDates$exists_in_directory == 0, ] # remove existing data from list of impending downloads
    }
    
    ### now download and save
    for (i in 1:nrow(targetDates)) {
      message('Downloading Stage 3 data for ', targetDates$ymd2[i])
      tryCatch(
        tmp <- suppressWarnings(terra::rast(targetDates$url[i])), 
        error = function(cond) {
          message(paste("URL does not seem to exist:", targetDates$url[i]))
        }
      )
      if (exists('tmp')) {
        terra::writeRaster(x = tmp, filename = targetDates$download_location[i], overwrite = TRUE)
        rm(tmp)
      }
    }
    message('Stage 3 data download complete!')
    # tst <- terra::rast(targetDates$download_location[i])  
  }
  if (any(grepl(x = stage, pattern = '4'))) {
    if (!dir.exists(file.path(data_folder, 'stage4'))) {
      dir.create(file.path(data_folder, 'stage4'), recursive = TRUE)
    }
    ### create list of dates, then urls. July 2016 is the first month that follows this pattern. Jan-Jun 2016 can be downloaded manually.
    years                <- as.character(year_range[year_range %in% 2016:as.numeric(format(Sys.Date(), format = '%Y'))])
    months               <- as.character(month_range) #c('01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12')
    months[nchar(months) < 2] <- paste0('0', months[nchar(months) < 2])
    # years <- as.character(2016:as.numeric(format(Sys.Date(), format = '%Y')))
    # months<- c('01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12')
    days                 <- as.character(day_range)
    days[nchar(days) < 2]<- paste0('0', days[nchar(days) < 2])
    targetDates          <- expand.grid(years, months, days)
    targetDates$ymd      <- paste0(targetDates$Var1, targetDates$Var2, targetDates$Var3)
    ### remove first part of 2016
    # targetDates        <- targetDates[!(targetDates$ymd %in% paste0('20160',1:6, '01')), ]
    ### remove future months from current year
    current_mo           <- format(Sys.Date(), format = '%m')
    available_months     <- 1:as.numeric(current_mo)
    months_to_remove     <- as.character(c(1:12)[!(c(1:12) %in% available_months)])
    months_to_remove[nchar(months_to_remove) == 1] <- paste0('0', months_to_remove[nchar(months_to_remove) == 1])
    dates_to_remove      <- paste0(format(Sys.Date(), format = '%Y'), months_to_remove, '01')
    targetDates          <- targetDates[!(targetDates$ymd %in% dates_to_remove), ]
    targetDates$ymd2     <- paste(targetDates$Var1, targetDates$Var2, targetDates$Var3, sep = '/')
    targetDates$url      <- paste0('https://water.noaa.gov/resources/downloads/precip/stageIV/', targetDates$ymd2, "/", mapply(gsub, filename[2], pattern = 'YYYYMMDD', replacement = targetDates$ymd, USE.NAMES = FALSE))
    targetDates$basename <- basename(targetDates$url)
    targetDates$download_location   <- file.path(data_folder, 'stage4', targetDates$basename)
    existing_files       <- list.files(file.path(data_folder, 'stage4'), full.names = FALSE)
    targetDates$exists_in_directory <- ifelse(targetDates$basename %in% existing_files, 1, 0)
    
    if (overwrite_data == FALSE) {
      targetDates <- targetDates[targetDates$exists_in_directory == 0, ] # remove existing data from list of impending downloads
    }
    
    ### now download and save
    for (i in 1:nrow(targetDates)) {
      message('Downloading Stage 4 data for ', targetDates$ymd2[i])
      tryCatch(
        tmp <- suppressWarnings(terra::rast(targetDates$url[i])), 
        error = function(cond) {
          message(paste("URL does not seem to exist:", targetDates$url[i]))
        }
      )
      if (exists('tmp')) {
        terra::writeRaster(x = tmp, filename = targetDates$download_location[i], overwrite = TRUE)
        rm(tmp)
      }
    }
    message('Stage 4 data download complete!')
  }
  invisible()
}