#' Validate Years
#'
#' @param years User entered years for request
#' @keywords internal
#' @return None unless error in years being requested by users
#' @autoglobal
#' @noRd
.validate_years <- function(years) {
if (inherits(years, what = "character")) {
stop(call. = FALSE,
"Years must be entered as a numeric value.")
}
this_year <- 1900 + as.POSIXlt(Sys.Date())$year
for (i in years) {
if (i <= 0) {
stop("\nThis is not a valid year.\n")
} else if (i < 1929) {
stop(call. = FALSE,
"\nThe GSOD data files start at 1929, you have entered a year prior
to 1929.\n")
} else if (i > this_year) {
stop(call. = FALSE,
"\nThe year cannot be greater than current year.\n")
}
}
return(invisible(NULL))
}
#' Validate Station IDs
#'
#' @param station User entered station ID
#' @param isd_history isd_history.csv from NCEI provided by GSODR
#' @keywords internal
#' @return None unless an error with the years or invalid station ID
#' @autoglobal
#' @noRd
.validate_station_id <- function(station, isd_history) {
if (!station %in% isd_history$STNID) {
stop(
call. = FALSE,
"\n",
paste0(station),
" is not a valid station ID number, please check your entry.\n",
"Valid Station IDs can be found in the isd-history.txt file\n",
"available from the US NCEI server by combining the USAF and\n",
"WBAN columns, e.g. '007005' '99999' is '007005-99999' from this\n",
"file <https://www1.ncdc.noaa.gov/pub/data/noaa/isd-history.txt>\n"
)
}
return(invisible(NULL))
}
#' Validate Station Data for Years Available
#'
#' @param station User entered station ID
#' @param isd_history isd_history.csv from NCEI provided by GSODR
#' @param years User entered years for query
#' @keywords internal
#' @return station_id value, "station", `NA` if no match with available data
#' @autoglobal
#' @noRd
.validate_station_data_years <- function(station, isd_history, years) {
BEGIN <-
as.numeric(substr(isd_history[isd_history$STNID == station,]$BEGIN, 1, 4))
END <-
as.numeric(substr(isd_history[isd_history$STNID == station,]$END, 1, 4))
if (min(years) < BEGIN | max(years) > END) {
warning(
"\nThis station, ",
station,
", only provides data for years ",
BEGIN,
" to ",
END,
".\n",
"Please send a request that falls within these years.",
call. = FALSE
)
station <- NA
}
return(station)
}
#' Validate country requests
#'
#' @param country User requested country name
#' @param isd_history Data provided from NCEI on stations' locations and years
#' @keywords internal
#' @return A validated country name
#' @autoglobal
#' @noRd
.validate_country <-
function(country, isd_history) {
if (!is.null(country)) {
country <- toupper(trimws(country[1]))
nc <- nchar(country)
if (nc == 3) {
if (country %in% isd_history$ISO3C) {
c <- which(country == isd_history$ISO3C)
country <- as.character(isd_history[c, "CTRY"][1])
} else {
stop(call. = FALSE,
"\nPlease provide a valid name or 2 or 3 ",
"letter ISO country code\n")
}
} else if (nc == 2) {
if (country %in% isd_history$ISO2C) {
c <- which(country == isd_history$ISO2C)
country <- as.character(isd_history[c, "CTRY"][1])
} else if (country %in% isd_history$CTRY) {
c <- which(country == isd_history$CTRY)
country <- as.character(isd_history[c, "CTRY"][1])
} else {
stop(call. = FALSE,
"\nPlease provide a valid name or 2 or 3 ",
"\nletter ISO country code")
}
} else if (country %in% isd_history$COUNTRY_NAME) {
c <- which(country == isd_history$COUNTRY_NAME)
country <- as.character(isd_history[c, "CTRY"][1])
} else {
stop(call. = FALSE,
"\nPlease provide a valid name or 2 or 3 ",
"letter ISO country code\n")
}
}
return(country)
}
#' Validate data for missing days
#'
#' @param max_missing User entered maximum permissible missing days
#' @param GSOD_list A list of GSOD files that have been downloaded from NCEI
#' @keywords internal
#' @return A validated `list()` of GSOD files that meet requirements for missing
#' days
#' @autoglobal
#' @noRd
.validate_missing_days <-
function(max_missing, file_list) {
records <-
unlist(lapply(X = paste0(file_list),
FUN = R.utils::countLines))
names(records) <- file_list
year <- as.numeric(substr(
file_list[1],
start = nchar(file_list[1]) - 19,
stop = nchar(file_list[1]) - 16
))
ifelse(
format(as.POSIXct(paste0(year, "-03-01")) - 1, "%d") != "29",
allow <- 365 - max_missing,
allow <- 366 - max_missing
)
file_list <- stats::na.omit(ifelse(records >= allow,
file_list,
NA))
}
#' Download GSOD files from NCEI server
#'
#' @param station Station ID being requested. Optional
#' @param years Years being requested. Mandatory
#' @keywords internal
#' @autoglobal
#' @return A list of data for processing before returning to user
#'
#' @noRd
.download_files <-
function(station,
years) {
# if no station or station > 10 download annual zip files ------------------
if (is.null(station) | length(station) > 10) {
url_list <-
paste0(
"https://www.ncei.noaa.gov/data/global-summary-of-the-day/archive/",
years,
".tar.gz"
)
tryCatch(
for (i in url_list) {
if (.check_url_exists(x = i)) {
curl::curl_download(
url = i,
destfile = file.path(tempdir(), basename(i)),
mode = "wb"
)
}
},
error = function(x) {
stop(call. = FALSE,
"\nA file download has failed.\n")
}
)
# create a list of files that have been downloaded and untar them
tar_files <-
list.files(tempdir(), pattern = "*\\.tar.gz$", full.names = TRUE)
withr::with_dir(tempdir(), .untar_files(tar_files))
GSOD_list <-
list.files(
tempdir(),
pattern = "*\\.csv$",
full.names = TRUE,
recursive = TRUE
)
if (is.null(station)) {
return(GSOD_list)
} else {
# Get a Cartesian join of all stations of interest and all years
files_stations <-
CJ(years, station, sorted = FALSE)[, paste0(tempdir(),
"/",
years,
"/",
gsub("-", "", station),
".csv")]
GSOD_list <-
subset(GSOD_list, GSOD_list %in% files_stations)
return(GSOD_list)
}
}
# if a station is provided, download its files -----------------------------
if (!is.null(station)) {
station <- gsub("-", "", station)
url_list <-
CJ(years, station, sorted = FALSE)[, paste0(
"https://www.ncei.noaa.gov/data/global-summary-of-the-day/access/",
years,
"/",
station,
".csv"
)]
tryCatch(
for (i in url_list) {
# check for an http error b4 proceeding'
if (.check_url_exists(x = i)) {
curl::curl_download(url = i,
destfile =
paste0(
tempdir(),
"/",
substr(i, nchar(i) - 20, nchar(i) - 16),
# year
"-",
basename(i) # filename
))
}
},
error = function(x) {
stop(call. = FALSE,
"\nThe file downloads have failed. Please retry.\n")
}
)
GSOD_list <-
list.files(tempdir(), pattern = "*\\.csv$", full.names = TRUE)
}
return(GSOD_list)
}
#' Agroclimatology List
#'
#' @param x A `data.table` of GSOD data from .download_data
#' @param isd_history isd_history file from NCEI
#' @param years Years being requested
#' @keywords internal
#' @return A list of GSOD stations suitable for agroclimatology work
#' @autoglobal
#' @noRd
.agroclimatology_list <-
function(file_list, isd_history, years) {
station_list <- isd_history[isd_history$LAT >= -60 &
isd_history$LAT <= 60,]$STNID
station_list <- gsub("-", "", station_list)
station_list <-
CJ(years, sorted = FALSE)[, paste0(tempdir(),
"/",
years,
"/",
station_list,
".csv")]
file_list <- file_list[file_list %in% station_list]
rm(station_list)
return(file_list)
}
#' Subset country list
#'
#' @param country Country of interest to subset on
#' @param GSOD_list List of GSOD files to be subset
#' @param isd_history isd_history.csv file from NCEI provided by GSODR
#' @param years Years being requested
#' @keywords internal
#' @return A list of stations in the requested country
#' @autoglobal
#' @noRd
.subset_country_list <-
function(country,
file_list,
isd_history,
years) {
station_list <-
isd_history[isd_history$CTRY == country,]$STNID
station_list <- gsub("-", "", station_list)
station_list <-
CJ(years, sorted = FALSE)[, paste0(tempdir(),
"/",
years,
"/",
station_list,
".csv")]
file_list <- file_list[file_list %in% station_list]
return(file_list)
rm(station_list)
}
#' Process .gz files
#'
#' @param file_list List of GSOD files
#' @param isd_history isd_history.csv file from NCEI provided by GSODR
#' @keywords internal
#' @return A `data.table` of GSOD weather data
#' @autoglobal
#' @noRd
.apply_process_csv <- function(file_list, isd_history) {
x <- lapply(X = file_list,
FUN = .process_csv,
isd_history = isd_history)
return(rbindlist(x))
}
#' Check That a URL Exists Before Downloading
#'
#' @param x a URL for checking
#' @return A numeric value representing the HTTP response
#' @noRd
.check_url_exists <- function(x) {
# check for an http error b4 proceeding, only if status is 200
return(grepl(
200L,
curlGetHeaders(
x,
redirect = TRUE,
verify = TRUE,
timeout = 0L,
TLS = ""
)[[1]]
))
}
#' Untar GSOD Tar Archive Files
#'
#' @param tar_files a list of tar files located in in `tempdir()`
#'
#' @noRd
#' @return called for it's side-effects, untars the archive files in the
#' `tempdir()`
.untar_files <- function(tar_files) {
for (i in tar_files) {
year_dir <- substr(i, nchar(i) - 10, nchar(i) - 7)
utils::untar(i, exdir = year_dir)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.