R/dlGsodStations.R

Defines functions dlGsodStations

Documented in dlGsodStations

#' Download (and extract) GSOD data
#' 
#' @description
#' Download (and extract) data from a GSOD station based on its unique USAF code.
#' 
#' @param usaf Numeric. A unique USAF station code. It can either be manually 
#' determined from built-in \code{gsodstations}, or retrieved from spatial 
#' subsetting (see \code{\link{stationFromCoords}}, 
#' \code{\link{stationFromExtent}}).
#' @param start_year Numeric. The desired year to start data acquisition. If not 
#' supplied, download starts from the first year of measurement. 
#' @param end_year Numeric. The desired year to stop data acquisition. If not 
#' supplied, download stops at the last year of measurement. 
#' @param dsn Character, default is the current working directory. Destination 
#' folder for data download. 
#' @param unzip Logical, default is FALSE. If TRUE, *.gz files are unpacked and 
#' merged after download. 
#' @param rm_gz Logical, default is FALSE. If TRUE, *.gz files are removed after
#' extraction. If \code{unzip = FALSE}, this argument is ignored. 
#' @param ... Additional arguments passed to \code{\link{gzGsodStations}}.
#' 
#' @return
#' A vector of *.gz filenames, or an object of class \code{data.frame} if 
#' \code{unzip = TRUE}.
#' 
#' @author
#' Florian Detsch
#' 
#' @examples
#' \dontrun{
#' moshi = subset(
#'   gsodstations
#'   , `STATION NAME` == "MOSHI"
#' )
#' 
#' # download data from moshi, tanzania, from 1990 to 1995
#' gsod_moshi = dlGsodStations(
#'   usaf = moshi$USAF
#'   , start_year = 1990
#'   , end_year = 1995
#'   , dsn = tempdir()
#'   , unzip = TRUE
#' )
#' 
#' # plot temperature data
#' plot(
#'   TEMP ~ YEARMODA
#'   , data = gsod_moshi
#'   , type = "l"
#' )
#' }
#' 
#' @export
dlGsodStations <- function(usaf,
                           start_year = NA, 
                           end_year = NA,
                           dsn = ".", 
                           unzip = FALSE, 
                           rm_gz = FALSE,
                           ...) {
  
  USAF = NULL
  
  # Set `rm_gz = FALSE` in case `unzip = FALSE`
  rm_gz <- ifelse(unzip, rm_gz, FALSE)
  
  # Extract desired station from list of GSOD stations
  dl_usaf <- subset(
    gsodstations
    , USAF == usaf
  )

  # If not supplied, set start_year and end_year to whole temporal range of
  # available measurements
  start_measurement <- as.numeric(substr(dl_usaf$BEGIN, 1, 4))
  end_measurement <- as.numeric(substr(dl_usaf$END, 1, 4))

  if (is.na(start_year))
    start_year <- start_measurement
  if (is.na(end_year))
    end_year <- end_measurement
  
  # Else, verify user-defined temporal range and adjust start year / end year
  # if `start_year < start_measurement` / `end_year > end_measurement`
  start_year <- max(start_year, start_measurement)
  end_year <- min(end_year, end_measurement)
  
  # Throw error in case of false user input, i.e. `start_year > end_year`
  if (start_year > end_year)
    stop("Skipping GSOD station ", dl_usaf$USAF, " (", dl_usaf$`STATION NAME`, 
         "): Start year is higher than end year!", sep = "")
  
  cat("Processing GSOD station ", dl_usaf$USAF, " (", 
      dl_usaf$`STATION NAME`, ") ... \n", sep = "")
  
  # Download op.gz of current station per year
  fls_gz <- sapply(start_year:end_year, function(year) {
    # Basename of both URL and destfile
    dlbase <- paste0(dl_usaf$USAF, "-", dl_usaf$WBAN, "-", year, ".op.gz")
    # URL
    dlurl <- paste0("ftp://ftp.ncdc.noaa.gov/pub/data/gsod/", year, "/", 
                    dlbase)
    # Destination file
    dlfile <- paste0(dsn, "/", dlbase)
    
    # Check if destination file already exists, else proceed to download
    if (file.exists(dlfile)) {
      cat("File", dlfile, "already exists. Proceeding to next file... \n")
    } else {      
      try(utils::download.file(dlurl, dlfile), silent = FALSE)
    }
    
    return(dlfile)
  })
    
  # Unzip downloaded files (optional)
  if (unzip) {
    
    df <- gzGsodStations(usaf = usaf, 
                         dsn = dsn, 
                         start_year = start_year, 
                         end_year = end_year, 
                         ...)
    return(df)
    
  } else {
    
    # Return downloaded .gz files
    return(fls_gz)
  }
}
environmentalinformatics-marburg/GSODTools documentation built on Jan. 5, 2024, 12:19 a.m.