data-raw/fetch_isd-history.md

Fetch and Clean ‘isd_history.csv’ File

Adam H. Sparks 2024-03-27

PRE.fansi SPAN {padding-top: .25em; padding-bottom: .25em};

Introduction

The “isd_history.csv” file details GSOD station metadata. These data include the start and stop years used by {GSODR} to pre-check requests before querying the server for download and the country code used by {GSODR} when sub-setting for requests by country. The following checks are performed on the raw data file before inclusion in {GSODR},

Data Processing

Set up workspace

library("sessioninfo")
library("skimr")
library("countrycode")
library("data.table")

Download and clean data

# download data
new_isd_history <- fread("https://www1.ncdc.noaa.gov/pub/data/noaa/isd-history.csv")

Add/drop columns and save to disk

# pad WBAN where necessary
new_isd_history[, WBAN := sprintf("%05d", WBAN)]

# add STNID column
new_isd_history[, STNID := paste(USAF, WBAN, sep = "-")]
setcolorder(new_isd_history, "STNID")
setnames(new_isd_history, "STATION NAME", "NAME")

# remove stations where LAT or LON is NA
new_isd_history <- na.omit(new_isd_history, cols = c("LAT", "LON"))

# remove extra columns
new_isd_history[, c("USAF", "WBAN", "ICAO") := NULL]

Add country names based on FIPS

new_isd_history <-
  new_isd_history[setDT(countrycode::codelist), on = c("CTRY" = "fips")]

new_isd_history <- new_isd_history[, c(
  "STNID",
  "NAME",
  "LAT",
  "LON",
  "ELEV(M)",
  "CTRY",
  "STATE",
  "BEGIN",
  "END",
  "country.name.en",
  "iso2c",
  "iso3c"
)]

# clean data
new_isd_history[new_isd_history == -999] <- NA
new_isd_history[new_isd_history == -999.9] <- NA
new_isd_history <-
  new_isd_history[!is.na(new_isd_history$LAT) &
                    !is.na(new_isd_history$LON),]
new_isd_history <-
  new_isd_history[new_isd_history$LAT != 0 &
                    new_isd_history$LON != 0,]
new_isd_history <-
  new_isd_history[new_isd_history$LAT > -90 &
                    new_isd_history$LAT < 90,]
new_isd_history <-
  new_isd_history[new_isd_history$LON > -180 &
                    new_isd_history$LON < 180,]

# set colnames to upper case
names(new_isd_history) <- toupper(names(new_isd_history))
setnames(new_isd_history,
         old = "COUNTRY.NAME.EN",
         new = "COUNTRY_NAME")

# set country names to be upper case for easier internal verifications
new_isd_history[, COUNTRY_NAME := toupper(COUNTRY_NAME)]

# set key for joins when processing CSV files
setkeyv(new_isd_history, "STNID")[]
## Key: <STNID>
##               STNID                                              NAME    LAT
##              <char>                                            <char>  <num>
##     1: 008268-99999                                         WXPOD8278 32.950
##     2: 010010-99999                               JAN MAYEN(NOR-NAVY) 70.933
##     3: 010014-99999                                        SORSTOKKEN 59.792
##     4: 010015-99999                                        BRINGELAND 61.383
##     5: 010016-99999                                       RORVIK/RYUM 64.850
##    ---                                                                      
## 27930: A07355-00241                         VIROQUA MUNICIPAL AIRPORT 43.579
## 27931: A07357-00182 ELBOW LAKE MUNICIPAL PRIDE OF THE PRAIRIE AIRPORT 45.986
## 27932: A07359-00240                              IONIA COUNTY AIRPORT 42.938
## 27933: A51255-00445                       DEMOPOLIS MUNICIPAL AIRPORT 32.464
## 27934: A51256-00451      BRANSON WEST MUNICIPAL EMERSON FIELD AIRPORT 36.699
##            LON ELEV(M)   CTRY  STATE    BEGIN      END  COUNTRY_NAME  ISO2C
##          <num>   <num> <char> <char>    <int>    <int>        <char> <char>
##     1:  65.567  1156.7     AF        20100519 20120323   AFGHANISTAN     AF
##     2:  -8.667     9.0     NO        19310101 20240324        NORWAY     NO
##     3:   5.341    48.8     NO        19861120 20240324        NORWAY     NO
##     4:   5.867   327.0     NO        19870117 19971231        NORWAY     NO
##     5:  11.233    14.0     NO        19870116 19910806        NORWAY     NO
##    ---                                                                     
## 27930: -90.913   394.1     US     WI 20140731 20240325 UNITED STATES     US
## 27931: -95.992   367.3     US     MN 20140731 20240326 UNITED STATES     US
## 27932: -85.061   249.0     US     MI 20140731 20240325 UNITED STATES     US
## 27933: -87.954    34.1     US     AL 20140731 20240325 UNITED STATES     US
## 27934: -93.402   411.2     US     MO 20140731 20240326 UNITED STATES     US
##         ISO3C
##        <char>
##     1:    AFG
##     2:    NOR
##     3:    NOR
##     4:    NOR
##     5:    NOR
##    ---       
## 27930:    USA
## 27931:    USA
## 27932:    USA
## 27933:    USA
## 27934:    USA

Show changes from last release

# ensure we aren't using a locally installed dev version
install.packages("GSODR", repos = "https://cloud.r-project.org/")
## Installing package into '/Users/283204f/Library/R/arm64/4.3/library'
## (as 'lib' is unspecified)

## 
## The downloaded binary packages are in
##  /var/folders/r4/wwsd3hsn48j5gck6qv6npkpc0000gr/T//RtmpwyH4A4/downloaded_packages
load(system.file("extdata", "isd_history.rda", package = "GSODR"))

# select only the cols of interest
x <- names(isd_history)
new_isd_history <- new_isd_history[, ..x] 

(isd_diff <- diffobj::diffPrint(new_isd_history, isd_history))
## < new_isd_history                                                             
## > isd_history                                                                 
## @@ 8,22 / 8,22 @@                                                             
## ~               STNID                                              NAME    LAT
## ~              <char>                                            <char>  <num>
##       5: 010016-99999                                       RORVIK/RYUM 64.850
##      ---                                                                      
## < 27930: A07355-00241                         VIROQUA MUNICIPAL AIRPORT 43.579
## > 27931: A07355-00241                         VIROQUA MUNICIPAL AIRPORT 43.579
## < 27931: A07357-00182 ELBOW LAKE MUNICIPAL PRIDE OF THE PRAIRIE AIRPORT 45.986
## > 27932: A07357-00182 ELBOW LAKE MUNICIPAL PRIDE OF THE PRAIRIE AIRPORT 45.986
## < 27932: A07359-00240                              IONIA COUNTY AIRPORT 42.938
## > 27933: A07359-00240                              IONIA COUNTY AIRPORT 42.938
## < 27933: A51255-00445                       DEMOPOLIS MUNICIPAL AIRPORT 32.464
## > 27934: A51255-00445                       DEMOPOLIS MUNICIPAL AIRPORT 32.464
## < 27934: A51256-00451      BRANSON WEST MUNICIPAL EMERSON FIELD AIRPORT 36.699
## > 27935: A51256-00451      BRANSON WEST MUNICIPAL EMERSON FIELD AIRPORT 36.699
##              LON ELEV(M)   CTRY  STATE    BEGIN      END  COUNTRY_NAME  ISO2C 
##            <num>   <num> <char> <char>    <int>    <int>        <char> <char> 
##       1:  65.567  1156.7     AF        20100519 20120323   AFGHANISTAN     AF 
## <     2:  -8.667     9.0     NO        19310101 20240324        NORWAY     NO 
## >     2:  -8.667     9.0     NO        19310101 20240202        NORWAY     NO 
## <     3:   5.341    48.8     NO        19861120 20240324        NORWAY     NO 
## >     3:   5.341    48.8     NO        19861120 20240202        NORWAY     NO 
##       4:   5.867   327.0     NO        19870117 19971231        NORWAY     NO 
##       5:  11.233    14.0     NO        19870116 19910806        NORWAY     NO 
##      ---                                                                      
## < 27930: -90.913   394.1     US     WI 20140731 20240325 UNITED STATES     US 
## > 27931: -90.913   394.1     US     WI 20140731 20240202 UNITED STATES     US 
## < 27931: -95.992   367.3     US     MN 20140731 20240326 UNITED STATES     US 
## > 27932: -95.992   367.3     US     MN 20140731 20240202 UNITED STATES     US 
## < 27932: -85.061   249.0     US     MI 20140731 20240325 UNITED STATES     US 
## > 27933: -85.061   249.0     US     MI 20140731 20240203 UNITED STATES     US 
## < 27933: -87.954    34.1     US     AL 20140731 20240325 UNITED STATES     US 
## > 27934: -87.954    34.1     US     AL 20140731 20240202 UNITED STATES     US 
## < 27934: -93.402   411.2     US     MO 20140731 20240326 UNITED STATES     US 
## > 27935: -93.402   411.2     US     MO 20140731 20240202 UNITED STATES     US 
##           ISO3C                                                               
##          <char>                                                               
## @@ 34,7 / 34,7 @@                                                             
## ~         ISO3C                                                               
## ~        <char>                                                               
##       5:    NOR                                                               
##      ---                                                                      
## < 27930:    USA                                                               
##   27931:    USA                                                               
##   27932:    USA                                                               
##   27933:    USA                                                               
##   27934:    USA                                                               
## > 27935:    USA
rm(isd_history)

isd_history <- new_isd_history

View and save the data

str(isd_history)
## Classes 'data.table' and 'data.frame':   27934 obs. of  12 variables:
##  $ STNID       : chr  "008268-99999" "010010-99999" "010014-99999" "010015-99999" ...
##  $ NAME        : chr  "WXPOD8278" "JAN MAYEN(NOR-NAVY)" "SORSTOKKEN" "BRINGELAND" ...
##  $ LAT         : num  33 70.9 59.8 61.4 64.8 ...
##  $ LON         : num  65.57 -8.67 5.34 5.87 11.23 ...
##  $ ELEV(M)     : num  1156.7 9 48.8 327 14 ...
##  $ CTRY        : chr  "AF" "NO" "NO" "NO" ...
##  $ STATE       : chr  "" "" "" "" ...
##  $ BEGIN       : int  20100519 19310101 19861120 19870117 19870116 19880320 19861109 19850601 19730101 19310103 ...
##  $ END         : int  20120323 20240324 20240324 19971231 19910806 19971226 20240324 20240324 19970801 20041030 ...
##  $ COUNTRY_NAME: chr  "AFGHANISTAN" "NORWAY" "NORWAY" "NORWAY" ...
##  $ ISO2C       : chr  "AF" "NO" "NO" "NO" ...
##  $ ISO3C       : chr  "AFG" "NOR" "NOR" "NOR" ...
##  - attr(*, ".internal.selfref")=<externalptr> 
##  - attr(*, "sorted")= chr "STNID"
# write rda file to disk for use with GSODR package
save(isd_history,
     file = "../inst/extdata/isd_history.rda",
     compress = "bzip2")

save(isd_diff,
     file = "../inst/extdata/isd_diff.rda",
     compress = "bzip2")

Notes

NOAA policy

Users of these data should take into account the following (from the NCEI website):

The following data and products may have conditions placed on their international commercial use. They can be used within the U.S. or for non-commercial international activities without restriction. The non-U.S. data cannot be redistributed for commercial purposes. Re-distribution of these data by others must provide this same notification. A log of IP addresses accessing these data and products will be maintained and may be made available to data providers. For details, please consult: WMO Resolution 40. NOAA Policy

R System Information

## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.3.3 (2024-02-29)
##  os       macOS Sonoma 14.4.1
##  system   aarch64, darwin20
##  ui       X11
##  language (EN)
##  collate  en_US.UTF-8
##  ctype    en_US.UTF-8
##  tz       Australia/Perth
##  date     2024-03-27
##  pandoc   3.1.12.3 @ /opt/homebrew/bin/ (via rmarkdown)
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package     * version date (UTC) lib source
##  askpass       1.2.0   2023-09-03 [1] CRAN (R 4.3.0)
##  base64enc     0.1-3   2015-07-28 [1] CRAN (R 4.3.0)
##  cli           3.6.2   2023-12-11 [1] CRAN (R 4.3.1)
##  countrycode * 1.6.0   2024-03-22 [1] CRAN (R 4.3.1)
##  crayon        1.5.2   2022-09-29 [1] CRAN (R 4.3.0)
##  credentials   2.0.1   2023-09-06 [1] CRAN (R 4.3.0)
##  data.table  * 1.15.2  2024-02-29 [1] CRAN (R 4.3.3)
##  diffobj       0.3.5   2021-10-05 [1] CRAN (R 4.3.0)
##  digest        0.6.35  2024-03-11 [1] CRAN (R 4.3.1)
##  dplyr         1.1.4   2023-11-17 [1] CRAN (R 4.3.1)
##  evaluate      0.23    2023-11-01 [1] CRAN (R 4.3.1)
##  fansi         1.0.6   2023-12-08 [1] CRAN (R 4.3.1)
##  fastmap       1.1.1   2023-02-24 [1] CRAN (R 4.3.0)
##  generics      0.1.3   2022-07-05 [1] CRAN (R 4.3.0)
##  glue          1.7.0   2024-01-09 [1] CRAN (R 4.3.1)
##  htmltools     0.5.8   2024-03-25 [1] CRAN (R 4.3.1)
##  jsonlite      1.8.8   2023-12-04 [1] CRAN (R 4.3.1)
##  knitr         1.45    2023-10-30 [1] CRAN (R 4.3.1)
##  lifecycle     1.0.4   2023-11-07 [1] CRAN (R 4.3.1)
##  magrittr      2.0.3   2022-03-30 [1] CRAN (R 4.3.0)
##  openssl       2.1.1   2023-09-25 [1] CRAN (R 4.3.1)
##  pillar        1.9.0   2023-03-22 [1] CRAN (R 4.3.0)
##  pkgconfig     2.0.3   2019-09-22 [1] CRAN (R 4.3.0)
##  R6            2.5.1   2021-08-19 [1] CRAN (R 4.3.0)
##  repr          1.1.7   2024-03-22 [1] CRAN (R 4.3.1)
##  rlang         1.1.3   2024-01-10 [1] CRAN (R 4.3.1)
##  rmarkdown     2.26    2024-03-05 [1] CRAN (R 4.3.1)
##  rstudioapi    0.16.0  2024-03-24 [1] CRAN (R 4.3.1)
##  sessioninfo * 1.2.2   2021-12-06 [1] CRAN (R 4.3.0)
##  skimr       * 2.1.5   2022-12-23 [1] CRAN (R 4.3.0)
##  sys           3.4.2   2023-05-23 [1] CRAN (R 4.3.0)
##  tibble        3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
##  tidyselect    1.2.1   2024-03-11 [1] CRAN (R 4.3.1)
##  utf8          1.2.4   2023-10-22 [1] CRAN (R 4.3.1)
##  vctrs         0.6.5   2023-12-01 [1] CRAN (R 4.3.1)
##  xfun          0.43    2024-03-25 [1] CRAN (R 4.3.1)
##  yaml          2.3.8   2023-12-11 [1] CRAN (R 4.3.1)
## 
##  [1] /Users/283204f/Library/R/arm64/4.3/library
##  [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
## 
## ──────────────────────────────────────────────────────────────────────────────


adamhsparks/weathR documentation built on April 1, 2024, 6:30 p.m.