data-raw/fetch_isd-history.md

title: "Fetch and Clean 'isd_history.csv' File" author: "Adam H. Sparks" date: "2025-05-03" output: github_document

PRE.fansi SPAN {padding-top: .25em; padding-bottom: .25em};

Introduction

The "isd_history.csv" file details GSOD station metadata. These data include the start and stop years used by {GSODR} to pre-check requests before querying the server for download and the country code used by {GSODR} when sub-setting for requests by country. The following checks are performed on the raw data file before inclusion in {GSODR},

Data Processing

Set up workspace

library("sessioninfo")
library("skimr")
## Error in library("skimr"): there is no package called 'skimr'
library("countrycode")
library("data.table")

Download and clean data

# download data
new_isd_history <- fread("https://www1.ncdc.noaa.gov/pub/data/noaa/isd-history.csv")

Add/drop columns and save to disk

# pad WBAN where necessary
new_isd_history[, WBAN := sprintf("%05d", WBAN)]

# add STNID column
new_isd_history[, STNID := paste(USAF, WBAN, sep = "-")]
setcolorder(new_isd_history, "STNID")
setnames(new_isd_history, "STATION NAME", "NAME")

# remove stations where LAT or LON is NA
new_isd_history <- na.omit(new_isd_history, cols = c("LAT", "LON"))

# remove extra columns
new_isd_history[, c("USAF", "WBAN", "ICAO") := NULL]

Add country names based on FIPS

new_isd_history <-
  new_isd_history[setDT(countrycode::codelist), on = c("CTRY" = "fips")]

new_isd_history <- new_isd_history[, c(
  "STNID",
  "NAME",
  "LAT",
  "LON",
  "ELEV(M)",
  "CTRY",
  "STATE",
  "BEGIN",
  "END",
  "country.name.en",
  "iso2c",
  "iso3c"
)]

# clean data
new_isd_history[new_isd_history == -999] <- NA
new_isd_history[new_isd_history == -999.9] <- NA
new_isd_history <-
  new_isd_history[!is.na(new_isd_history$LAT) &
                    !is.na(new_isd_history$LON),]
new_isd_history <-
  new_isd_history[new_isd_history$LAT != 0 &
                    new_isd_history$LON != 0,]
new_isd_history <-
  new_isd_history[new_isd_history$LAT > -90 &
                    new_isd_history$LAT < 90,]
new_isd_history <-
  new_isd_history[new_isd_history$LON > -180 &
                    new_isd_history$LON < 180,]

# set colnames to upper case
names(new_isd_history) <- toupper(names(new_isd_history))
setnames(new_isd_history,
         old = "COUNTRY.NAME.EN",
         new = "COUNTRY_NAME")

# set country names to be upper case for easier internal verifications
new_isd_history[, COUNTRY_NAME := toupper(COUNTRY_NAME)]

# set key for joins when processing CSV files
setkeyv(new_isd_history, "STNID")

Show changes from last release

# ensure we aren't using a locally installed dev version
install.packages("GSODR", repos = "https://cloud.r-project.org/")
## Installing package into '/Users/adamsparks/Library/R/arm64/4.5/library'
## (as 'lib' is unspecified)
## 
## The downloaded binary packages are in
##  /var/folders/vz/txwj1tx51txgw7zv_b5c5_3m0000gn/T//Rtmp96xxfr/downloaded_packages
load(system.file("extdata", "isd_history.rda", package = "GSODR"))

# select only the cols of interest
x <- names(isd_history)
new_isd_history <- new_isd_history[, ..x] 

(isd_diff <- diffobj::diffPrint(new_isd_history, isd_history))
## No visible differences between objects.
## < new_isd_history                                                             
## > isd_history                                                                 
## @@ 1,40 / 1,40 @@                                                             
##   Key: <STNID>                                                                
##                 STNID                                              NAME    LAT
##                <char>                                            <char>  <num>
##       1: 008268-99999                                         WXPOD8278 32.950
##       2: 010010-99999                               JAN MAYEN(NOR-NAVY) 70.933
##       3: 010014-99999                                        SORSTOKKEN 59.792
##       4: 010015-99999                                        BRINGELAND 61.383
##       5: 010016-99999                                       RORVIK/RYUM 64.850
##      ---                                                                      
##   27933: A07355-00241                         VIROQUA MUNICIPAL AIRPORT 43.579
##   27934: A07357-00182 ELBOW LAKE MUNICIPAL PRIDE OF THE PRAIRIE AIRPORT 45.986
##   27935: A07359-00240                              IONIA COUNTY AIRPORT 42.938
##   27936: A51255-00445                       DEMOPOLIS MUNICIPAL AIRPORT 32.464
##   27937: A51256-00451      BRANSON WEST MUNICIPAL EMERSON FIELD AIRPORT 36.699
##              LON ELEV(M)   CTRY  STATE    BEGIN      END  COUNTRY_NAME  ISO2C 
##            <num>   <num> <char> <char>    <int>    <int>        <char> <char> 
##       1:  65.567  1156.7     AF        20100519 20120323   AFGHANISTAN     AF 
##       2:  -8.667     9.0     NO        19310101 20250430        NORWAY     NO 
##       3:   5.341    48.8     NO        19861120 20250430        NORWAY     NO 
##       4:   5.867   327.0     NO        19870117 19971231        NORWAY     NO 
##       5:  11.233    14.0     NO        19870116 19910806        NORWAY     NO 
##      ---                                                                      
##   27933: -90.913   394.1     US     WI 20140731 20250430 UNITED STATES     US 
##   27934: -95.992   367.3     US     MN 20140731 20250430 UNITED STATES     US 
##   27935: -85.061   249.0     US     MI 20140731 20250501 UNITED STATES     US 
##   27936: -87.954    34.1     US     AL 20140731 20250501 UNITED STATES     US 
##   27937: -93.402   411.2     US     MO 20140731 20250501 UNITED STATES     US 
##           ISO3C                                                               
##          <char>                                                               
##       1:    AFG                                                               
##       2:    NOR                                                               
##       3:    NOR                                                               
##       4:    NOR                                                               
##       5:    NOR                                                               
##      ---                                                                      
##   27933:    USA                                                               
##   27934:    USA                                                               
##   27935:    USA                                                               
##   27936:    USA                                                               
##   27937:    USA
rm(isd_history)

isd_history <- new_isd_history

View and save the data

str(isd_history)
## Classes 'data.table' and 'data.frame':   27937 obs. of  12 variables:
##  $ STNID       : chr  "008268-99999" "010010-99999" "010014-99999" "010015-99999" ...
##  $ NAME        : chr  "WXPOD8278" "JAN MAYEN(NOR-NAVY)" "SORSTOKKEN" "BRINGELAND" ...
##  $ LAT         : num  33 70.9 59.8 61.4 64.8 ...
##  $ LON         : num  65.57 -8.67 5.34 5.87 11.23 ...
##  $ ELEV(M)     : num  1156.7 9 48.8 327 14 ...
##  $ CTRY        : chr  "AF" "NO" "NO" "NO" ...
##  $ STATE       : chr  "" "" "" "" ...
##  $ BEGIN       : int  20100519 19310101 19861120 19870117 19870116 19880320 19861109 19850601 19730101 19310103 ...
##  $ END         : int  20120323 20250430 20250430 19971231 19910806 19971226 20250430 20250430 19970801 20041030 ...
##  $ COUNTRY_NAME: chr  "AFGHANISTAN" "NORWAY" "NORWAY" "NORWAY" ...
##  $ ISO2C       : chr  "AF" "NO" "NO" "NO" ...
##  $ ISO3C       : chr  "AFG" "NOR" "NOR" "NOR" ...
##  - attr(*, ".internal.selfref")=<externalptr> 
##  - attr(*, "sorted")= chr "STNID"
# write rda file to disk for use with GSODR package
save(isd_history,
     file = "../inst/extdata/isd_history.rda",
     compress = "bzip2")

save(isd_diff,
     file = "../inst/extdata/isd_diff.rda",
     compress = "bzip2")

Notes

NOAA policy

Users of these data should take into account the following (from the NCEI website):

The following data and products may have conditions placed on their international commercial use. They can be used within the U.S. or for non-commercial international activities without restriction. The non-U.S. data cannot be redistributed for commercial purposes. Re-distribution of these data by others must provide this same notification. A log of IP addresses accessing these data and products will be maintained and may be made available to data providers. For details, please consult: WMO Resolution 40. NOAA Policy

R System Information

## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.5.0 (2025-04-11)
##  os       macOS Sequoia 15.4.1
##  system   aarch64, darwin20
##  ui       X11
##  language (EN)
##  collate  en_AU.UTF-8
##  ctype    en_AU.UTF-8
##  tz       Australia/Perth
##  date     2025-05-03
##  pandoc   3.6.4 @ /opt/homebrew/bin/pandoc
##  quarto   1.7.29 @ /usr/local/bin/quarto
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  !  package     * version    date (UTC) lib source
##     askpass       1.2.1      2024-10-04 [1] CRAN (R 4.5.0)
##     brio          1.1.5      2024-04-24 [1] CRAN (R 4.5.0)
##     cachem        1.1.0      2024-05-16 [1] CRAN (R 4.5.0)
##     callr         3.7.6      2024-03-25 [1] CRAN (R 4.5.0)
##     cli           3.6.5      2025-04-23 [1] CRAN (R 4.5.0)
##     codetools     0.2-20     2024-03-31 [2] CRAN (R 4.5.0)
##     colorout      1.3-2      2025-04-24 [1] Github (jalvesaq/colorout@572ab10)
##     commonmark    1.9.5      2025-03-17 [1] CRAN (R 4.5.0)
##     countrycode * 1.6.1      2025-03-31 [1] CRAN (R 4.5.0)
##     crayon        1.5.3      2024-06-20 [1] CRAN (R 4.5.0)
##     credentials   2.0.2      2024-10-04 [1] CRAN (R 4.5.0)
##     curl          6.2.2      2025-03-24 [1] CRAN (R 4.5.0)
##     data.table  * 1.17.0     2025-02-22 [1] CRAN (R 4.5.0)
##     desc          1.4.3      2023-12-10 [1] CRAN (R 4.5.0)
##     devtag        0.0.0.9000 2025-04-18 [1] Github (moodymudskipper/devtag@24f9b21)
##     devtools      2.4.5      2022-10-11 [1] CRAN (R 4.5.0)
##     diffobj       0.3.6      2025-04-21 [1] CRAN (R 4.5.0)
##     digest        0.6.37     2024-08-19 [1] CRAN (R 4.5.0)
##     ellipsis      0.3.2      2021-04-29 [1] CRAN (R 4.5.0)
##     evaluate      1.0.3      2025-01-10 [1] CRAN (R 4.5.0)
##     fansi         1.0.6      2023-12-08 [1] CRAN (R 4.5.0)
##     fastmap       1.2.0      2024-05-15 [1] CRAN (R 4.5.0)
##     fs            1.6.6      2025-04-12 [1] CRAN (R 4.5.0)
##     glue          1.8.0      2024-09-30 [1] CRAN (R 4.5.0)
##  VP GSODR       * 4.1.3.9000 2024-10-16 [?] CRAN (R 4.5.0) (on disk 4.1.3)
##     htmltools     0.5.8.1    2024-04-04 [1] CRAN (R 4.5.0)
##     htmlwidgets   1.6.4      2023-12-06 [1] CRAN (R 4.5.0)
##     httpuv        1.6.15     2024-03-26 [1] CRAN (R 4.5.0)
##     jsonlite      2.0.0      2025-03-27 [1] CRAN (R 4.5.0)
##     knitr       * 1.50       2025-03-16 [1] CRAN (R 4.5.0)
##     later         1.4.2      2025-04-08 [1] CRAN (R 4.5.0)
##     lifecycle     1.0.4      2023-11-07 [1] CRAN (R 4.5.0)
##     magrittr      2.0.3      2022-03-30 [1] CRAN (R 4.5.0)
##     memoise       2.0.1      2021-11-26 [1] CRAN (R 4.5.0)
##     mime          0.13       2025-03-17 [1] CRAN (R 4.5.0)
##     miniUI        0.1.1.1    2018-05-18 [1] CRAN (R 4.5.0)
##     nvimcom     * 0.9.67     2025-04-12 [1] local
##     openssl       2.3.2      2025-02-03 [1] CRAN (R 4.5.0)
##     pillar        1.10.2     2025-04-05 [1] CRAN (R 4.5.0)
##     pkgbuild      1.4.7      2025-03-24 [1] CRAN (R 4.5.0)
##     pkgconfig     2.0.3      2019-09-22 [1] CRAN (R 4.5.0)
##     pkgload       1.4.0      2024-06-28 [1] CRAN (R 4.5.0)
##     processx      3.8.6      2025-02-21 [1] CRAN (R 4.5.0)
##     profvis       0.4.0      2024-09-20 [1] CRAN (R 4.5.0)
##     promises      1.3.2      2024-11-28 [1] CRAN (R 4.5.0)
##     ps            1.9.1      2025-04-12 [1] CRAN (R 4.5.0)
##     purrr         1.0.4      2025-02-05 [1] CRAN (R 4.5.0)
##     R.methodsS3   1.8.2      2022-06-13 [1] CRAN (R 4.5.0)
##     R.oo          1.27.1     2025-05-02 [1] CRAN (R 4.5.0)
##     R.utils       2.13.0     2025-02-24 [1] CRAN (R 4.5.0)
##     R6            2.6.1      2025-02-15 [1] CRAN (R 4.5.0)
##     Rcpp          1.0.14     2025-01-12 [1] CRAN (R 4.5.0)
##     remotes       2.5.0.9000 2025-04-21 [1] Github (r-lib/remotes@bcd35d5)
##     rlang         1.1.6      2025-04-11 [1] CRAN (R 4.5.0)
##     roxygen2      7.3.2      2024-06-28 [1] CRAN (R 4.5.0)
##     roxyglobals   1.0.0      2023-08-21 [1] CRAN (R 4.5.0)
##     rprojroot     2.0.4      2023-11-05 [1] CRAN (R 4.5.0)
##     rstudioapi    0.17.1     2024-10-22 [1] CRAN (R 4.5.0)
##     sessioninfo * 1.2.3      2025-02-05 [1] CRAN (R 4.5.0)
##     shiny         1.10.0     2024-12-14 [1] CRAN (R 4.5.0)
##     stringi       1.8.7      2025-03-27 [1] CRAN (R 4.5.0)
##     stringr       1.5.1      2023-11-14 [1] CRAN (R 4.5.0)
##     sys           3.4.3      2024-10-04 [1] CRAN (R 4.5.0)
##     testthat      3.2.3      2025-01-13 [1] CRAN (R 4.5.0)
##     tibble        3.2.1      2023-03-20 [1] CRAN (R 4.5.0)
##     urlchecker    1.0.1      2021-11-30 [1] CRAN (R 4.5.0)
##     usethis       3.1.0      2024-11-26 [1] CRAN (R 4.5.0)
##     vctrs         0.6.5      2023-12-01 [1] CRAN (R 4.5.0)
##     withr         3.0.2      2024-10-28 [1] CRAN (R 4.5.0)
##     xfun          0.52       2025-04-02 [1] CRAN (R 4.5.0)
##     xml2          1.3.8      2025-03-14 [1] CRAN (R 4.5.0)
##     xtable        1.8-4      2019-04-21 [1] CRAN (R 4.5.0)
## 
##  [1] /Users/adamsparks/Library/R/arm64/4.5/library
##  [2] /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library
## 
##  * ── Packages attached to the search path.
##  V ── Loaded and on-disk version mismatch.
##  P ── Loaded and on-disk path mismatch.
## 
## ──────────────────────────────────────────────────────────────────────────────


adamhsparks/weathR documentation built on June 10, 2025, 1:52 p.m.