knitr::opts_chunk$set(echo = TRUE)
# crayon needs to be explicitly activated in Rmd options(crayon.enabled = TRUE) # Hooks needs to be set to deal with outputs # thanks to fansi logic old_hooks <- fansi::set_knit_hooks(knitr::knit_hooks, which = c("output", "message", "error"))
The "isd_history.csv" file details GSOD station metadata. These data include the start and stop years used by {GSODR} to pre-check requests before querying the server for download and the country code used by {GSODR} when sub-setting for requests by country. The following checks are performed on the raw data file before inclusion in {GSODR},
Check for valid lon and lat values;
isd_history where latitude or longitude are NA
or both 0 are removed leaving only properly georeferenced stations,
isd_history where latitude is < -90˚ or > 90˚ are removed,
isd_history where longitude is < -180˚ or > 180˚ are removed.
A new field, STNID, a concatenation of the USAF and WBAN fields, is added.
library("sessioninfo") library("skimr") library("countrycode") library("data.table")
# download data new_isd_history <- fread("https://www1.ncdc.noaa.gov/pub/data/noaa/isd-history.csv")
# pad WBAN where necessary new_isd_history[, WBAN := sprintf("%05d", WBAN)] # add STNID column new_isd_history[, STNID := paste(USAF, WBAN, sep = "-")] setcolorder(new_isd_history, "STNID") setnames(new_isd_history, "STATION NAME", "NAME") # remove stations where LAT or LON is NA new_isd_history <- na.omit(new_isd_history, cols = c("LAT", "LON")) # remove extra columns new_isd_history[, c("USAF", "WBAN", "ICAO") := NULL]
new_isd_history <- new_isd_history[setDT(countrycode::codelist), on = c("CTRY" = "fips")] new_isd_history <- new_isd_history[, c( "STNID", "NAME", "LAT", "LON", "ELEV(M)", "CTRY", "STATE", "BEGIN", "END", "country.name.en", "iso2c", "iso3c" )] # clean data new_isd_history[new_isd_history == -999] <- NA new_isd_history[new_isd_history == -999.9] <- NA new_isd_history <- new_isd_history[!is.na(new_isd_history$LAT) & !is.na(new_isd_history$LON),] new_isd_history <- new_isd_history[new_isd_history$LAT != 0 & new_isd_history$LON != 0,] new_isd_history <- new_isd_history[new_isd_history$LAT > -90 & new_isd_history$LAT < 90,] new_isd_history <- new_isd_history[new_isd_history$LON > -180 & new_isd_history$LON < 180,] # set colnames to upper case names(new_isd_history) <- toupper(names(new_isd_history)) setnames(new_isd_history, old = "COUNTRY.NAME.EN", new = "COUNTRY_NAME") # set country names to be upper case for easier internal verifications new_isd_history[, COUNTRY_NAME := toupper(COUNTRY_NAME)] # set key for joins when processing CSV files setkeyv(new_isd_history, "STNID")
# ensure we aren't using a locally installed dev version install.packages("GSODR", repos = "https://cloud.r-project.org/") load(system.file("extdata", "isd_history.rda", package = "GSODR")) # select only the cols of interest x <- names(isd_history) new_isd_history <- new_isd_history[, ..x] (isd_diff <- diffobj::diffPrint(new_isd_history, isd_history)) rm(isd_history) isd_history <- new_isd_history
str(isd_history) # write rda file to disk for use with GSODR package save(isd_history, file = "../inst/extdata/isd_history.rda", compress = "bzip2") save(isd_diff, file = "../inst/extdata/isd_diff.rda", compress = "bzip2")
Users of these data should take into account the following (from the NCEI website):
The following data and products may have conditions placed on their international commercial use. They can be used within the U.S. or for non-commercial international activities without restriction. The non-U.S. data cannot be redistributed for commercial purposes. Re-distribution of these data by others must provide this same notification. A log of IP addresses accessing these data and products will be maintained and may be made available to data providers.
For details, please consult: WMO Resolution 40. NOAA Policy
session_info()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.