Skip to content

Latest commit

 

History

History
310 lines (264 loc) · 21 KB

fetch_isd-history.md

File metadata and controls

310 lines (264 loc) · 21 KB

Fetch and Clean ‘isd_history.csv’ File

Adam H. Sparks 2024-09-12

<STYLE type='text/css' scoped> PRE.fansi SPAN {padding-top: .25em; padding-bottom: .25em}; </STYLE>

Introduction

The “isd_history.csv” file details GSOD station metadata. These data include the start and stop years used by {GSODR} to pre-check requests before querying the server for download and the country code used by {GSODR} when sub-setting for requests by country. The following checks are performed on the raw data file before inclusion in {GSODR},

  • Check for valid lon and lat values;

    • isd_history where latitude or longitude are NA or both 0 are removed leaving only properly georeferenced stations,

    • isd_history where latitude is < -90˚ or > 90˚ are removed,

    • isd_history where longitude is < -180˚ or > 180˚ are removed.

  • A new field, STNID, a concatenation of the USAF and WBAN fields, is added.

Data Processing

Set up workspace

library("sessioninfo")
library("skimr")
library("countrycode")
library("data.table")

Download and clean data

# download data
new_isd_history <- fread("https://www1.ncdc.noaa.gov/pub/data/noaa/isd-history.csv")

Add/drop columns and save to disk

# pad WBAN where necessary
new_isd_history[, WBAN := sprintf("%05d", WBAN)]

# add STNID column
new_isd_history[, STNID := paste(USAF, WBAN, sep = "-")]
setcolorder(new_isd_history, "STNID")
setnames(new_isd_history, "STATION NAME", "NAME")

# remove stations where LAT or LON is NA
new_isd_history <- na.omit(new_isd_history, cols = c("LAT", "LON"))

# remove extra columns
new_isd_history[, c("USAF", "WBAN", "ICAO") := NULL]

Add country names based on FIPS

new_isd_history <-
  new_isd_history[setDT(countrycode::codelist), on = c("CTRY" = "fips")]

new_isd_history <- new_isd_history[, c(
  "STNID",
  "NAME",
  "LAT",
  "LON",
  "ELEV(M)",
  "CTRY",
  "STATE",
  "BEGIN",
  "END",
  "country.name.en",
  "iso2c",
  "iso3c"
)]

# clean data
new_isd_history[new_isd_history == -999] <- NA
new_isd_history[new_isd_history == -999.9] <- NA
new_isd_history <-
  new_isd_history[!is.na(new_isd_history$LAT) &
                    !is.na(new_isd_history$LON),]
new_isd_history <-
  new_isd_history[new_isd_history$LAT != 0 &
                    new_isd_history$LON != 0,]
new_isd_history <-
  new_isd_history[new_isd_history$LAT > -90 &
                    new_isd_history$LAT < 90,]
new_isd_history <-
  new_isd_history[new_isd_history$LON > -180 &
                    new_isd_history$LON < 180,]

# set colnames to upper case
names(new_isd_history) <- toupper(names(new_isd_history))
setnames(new_isd_history,
         old = "COUNTRY.NAME.EN",
         new = "COUNTRY_NAME")

# set country names to be upper case for easier internal verifications
new_isd_history[, COUNTRY_NAME := toupper(COUNTRY_NAME)]

# set key for joins when processing CSV files
setkeyv(new_isd_history, "STNID")

Show changes from last release

# ensure we aren't using a locally installed dev version
install.packages("GSODR", repos = "https://cloud.r-project.org/")
## Installing package into '/Users/283204f/Library/R/arm64/4.4/library'
## (as 'lib' is unspecified)

## 
## The downloaded binary packages are in
##  /var/folders/r4/wwsd3hsn48j5gck6qv6npkpc0000gr/T//RtmpFv4b2D/downloaded_packages
load(system.file("extdata", "isd_history.rda", package = "GSODR"))

# select only the cols of interest
x <- names(isd_history)
new_isd_history <- new_isd_history[, ..x] 

(isd_diff <- diffobj::diffPrint(new_isd_history, isd_history))
## < new_isd_history                                                             
## > isd_history                                                                 
## @@ 8,22 / 8,22 @@                                                             
## ~               STNID                                              NAME    LAT
## ~              <char>                                            <char>  <num>
##       5: 010016-99999                                       RORVIK/RYUM 64.850
##      ---                                                                      
## < 27923: A07355-00241                         VIROQUA MUNICIPAL AIRPORT 43.579
## > 27921: A07355-00241                         VIROQUA MUNICIPAL AIRPORT 43.579
## < 27924: A07357-00182 ELBOW LAKE MUNICIPAL PRIDE OF THE PRAIRIE AIRPORT 45.986
## > 27922: A07357-00182 ELBOW LAKE MUNICIPAL PRIDE OF THE PRAIRIE AIRPORT 45.986
## < 27925: A07359-00240                              IONIA COUNTY AIRPORT 42.938
## > 27923: A07359-00240                              IONIA COUNTY AIRPORT 42.938
## < 27926: A51255-00445                       DEMOPOLIS MUNICIPAL AIRPORT 32.464
## > 27924: A51255-00445                       DEMOPOLIS MUNICIPAL AIRPORT 32.464
## < 27927: A51256-00451      BRANSON WEST MUNICIPAL EMERSON FIELD AIRPORT 36.699
## > 27925: A51256-00451      BRANSON WEST MUNICIPAL EMERSON FIELD AIRPORT 36.699
##              LON ELEV(M)   CTRY  STATE    BEGIN      END  COUNTRY_NAME  ISO2C 
##            <num>   <num> <char> <char>    <int>    <int>        <char> <char> 
##       1:  65.567  1156.7     AF        20100519 20120323   AFGHANISTAN     AF 
## <     2:  -8.667     9.0     NO        19310101 20240909        NORWAY     NO 
## >     2:  -8.667     9.0     NO        19310101 20240718        NORWAY     NO 
## <     3:   5.341    48.8     NO        19861120 20240909        NORWAY     NO 
## >     3:   5.341    48.8     NO        19861120 20240718        NORWAY     NO 
##       4:   5.867   327.0     NO        19870117 19971231        NORWAY     NO 
##       5:  11.233    14.0     NO        19870116 19910806        NORWAY     NO 
##      ---                                                                      
## < 27923: -90.913   394.1     US     WI 20140731 20240907 UNITED STATES     US 
## > 27921: -90.913   394.1     US     WI 20140731 20240719 UNITED STATES     US 
## < 27924: -95.992   367.3     US     MN 20140731 20240907 UNITED STATES     US 
## > 27922: -95.992   367.3     US     MN 20140731 20240719 UNITED STATES     US 
## < 27925: -85.061   249.0     US     MI 20140731 20240907 UNITED STATES     US 
## > 27923: -85.061   249.0     US     MI 20140731 20240719 UNITED STATES     US 
## < 27926: -87.954    34.1     US     AL 20140731 20240908 UNITED STATES     US 
## > 27924: -87.954    34.1     US     AL 20140731 20240719 UNITED STATES     US 
## < 27927: -93.402   411.2     US     MO 20140731 20240907 UNITED STATES     US 
## > 27925: -93.402   411.2     US     MO 20140731 20240719 UNITED STATES     US 
##           ISO3C                                                               
##          <char>                                                               
## @@ 34,7 / 34,7 @@                                                             
## ~         ISO3C                                                               
## ~        <char>                                                               
##       5:    NOR                                                               
##      ---                                                                      
## > 27921:    USA                                                               
## > 27922:    USA                                                               
##   27923:    USA                                                               
##   27924:    USA                                                               
##   27925:    USA                                                               
## < 27926:    USA                                                               
## < 27927:    USA
rm(isd_history)

isd_history <- new_isd_history

View and save the data

str(isd_history)
## Classes 'data.table' and 'data.frame':   27927 obs. of  12 variables:
##  $ STNID       : chr  "008268-99999" "010010-99999" "010014-99999" "010015-99999" ...
##  $ NAME        : chr  "WXPOD8278" "JAN MAYEN(NOR-NAVY)" "SORSTOKKEN" "BRINGELAND" ...
##  $ LAT         : num  33 70.9 59.8 61.4 64.8 ...
##  $ LON         : num  65.57 -8.67 5.34 5.87 11.23 ...
##  $ ELEV(M)     : num  1156.7 9 48.8 327 14 ...
##  $ CTRY        : chr  "AF" "NO" "NO" "NO" ...
##  $ STATE       : chr  "" "" "" "" ...
##  $ BEGIN       : int  20100519 19310101 19861120 19870117 19870116 19880320 19861109 19850601 19730101 19310103 ...
##  $ END         : int  20120323 20240909 20240909 19971231 19910806 19971226 20240909 20240909 19970801 20041030 ...
##  $ COUNTRY_NAME: chr  "AFGHANISTAN" "NORWAY" "NORWAY" "NORWAY" ...
##  $ ISO2C       : chr  "AF" "NO" "NO" "NO" ...
##  $ ISO3C       : chr  "AFG" "NOR" "NOR" "NOR" ...
##  - attr(*, ".internal.selfref")=<externalptr> 
##  - attr(*, "sorted")= chr "STNID"
# write rda file to disk for use with GSODR package
save(isd_history,
     file = "../inst/extdata/isd_history.rda",
     compress = "bzip2")

save(isd_diff,
     file = "../inst/extdata/isd_diff.rda",
     compress = "bzip2")

Notes

NOAA policy

Users of these data should take into account the following (from the NCEI website):

The following data and products may have conditions placed on their international commercial use. They can be used within the U.S. or for non-commercial international activities without restriction. The non-U.S. data cannot be redistributed for commercial purposes. Re-distribution of these data by others must provide this same notification. A log of IP addresses accessing these data and products will be maintained and may be made available to data providers.
For details, please consult: WMO Resolution 40. NOAA Policy

R System Information

## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.4.1 (2024-06-14)
##  os       macOS Sonoma 14.6.1
##  system   aarch64, darwin20
##  ui       X11
##  language (EN)
##  collate  en_US.UTF-8
##  ctype    en_US.UTF-8
##  tz       Australia/Perth
##  date     2024-09-12
##  pandoc   3.3 @ /opt/homebrew/bin/ (via rmarkdown)
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package     * version date (UTC) lib source
##  askpass       1.2.0   2023-09-03 [1] CRAN (R 4.4.0)
##  base64enc     0.1-3   2015-07-28 [1] CRAN (R 4.4.0)
##  cli           3.6.3   2024-06-21 [1] CRAN (R 4.4.0)
##  colorout      1.3-1   2024-08-27 [1] Github (jalvesaq/colorout@910592d)
##  countrycode * 1.6.0   2024-03-22 [1] CRAN (R 4.4.0)
##  crayon        1.5.3   2024-06-20 [1] CRAN (R 4.4.0)
##  credentials   2.0.1   2023-09-06 [1] CRAN (R 4.4.0)
##  data.table  * 1.16.0  2024-08-27 [1] CRAN (R 4.4.1)
##  diffobj       0.3.5   2021-10-05 [1] CRAN (R 4.4.0)
##  digest        0.6.37  2024-08-19 [1] CRAN (R 4.4.1)
##  dplyr         1.1.4   2023-11-17 [1] CRAN (R 4.4.0)
##  evaluate      0.24.0  2024-06-10 [1] CRAN (R 4.4.0)
##  fansi         1.0.6   2023-12-08 [1] CRAN (R 4.4.0)
##  fastmap       1.2.0   2024-05-15 [1] CRAN (R 4.4.0)
##  generics      0.1.3   2022-07-05 [1] CRAN (R 4.4.0)
##  glue          1.7.0   2024-01-09 [1] CRAN (R 4.4.0)
##  htmltools     0.5.8.1 2024-04-04 [1] CRAN (R 4.4.0)
##  jsonlite      1.8.8   2023-12-04 [1] CRAN (R 4.4.0)
##  knitr         1.48    2024-07-07 [1] CRAN (R 4.4.0)
##  lifecycle     1.0.4   2023-11-07 [1] CRAN (R 4.4.0)
##  magrittr      2.0.3   2022-03-30 [1] CRAN (R 4.4.0)
##  openssl       2.2.1   2024-08-16 [1] CRAN (R 4.4.0)
##  pillar        1.9.0   2023-03-22 [1] CRAN (R 4.4.0)
##  pkgconfig     2.0.3   2019-09-22 [1] CRAN (R 4.4.0)
##  R6            2.5.1   2021-08-19 [1] CRAN (R 4.4.0)
##  repr          1.1.7   2024-03-22 [1] CRAN (R 4.4.0)
##  rlang         1.1.4   2024-06-04 [1] CRAN (R 4.4.0)
##  rmarkdown     2.28    2024-08-17 [1] CRAN (R 4.4.0)
##  rstudioapi    0.16.0  2024-03-24 [1] CRAN (R 4.4.0)
##  sessioninfo * 1.2.2   2021-12-06 [1] CRAN (R 4.4.0)
##  skimr       * 2.1.5   2022-12-23 [1] CRAN (R 4.4.0)
##  sys           3.4.2   2023-05-23 [1] CRAN (R 4.4.0)
##  tibble        3.2.1   2023-03-20 [1] CRAN (R 4.4.0)
##  tidyselect    1.2.1   2024-03-11 [1] CRAN (R 4.4.0)
##  utf8          1.2.4   2023-10-22 [1] CRAN (R 4.4.0)
##  vctrs         0.6.5   2023-12-01 [1] CRAN (R 4.4.0)
##  xfun          0.47    2024-08-17 [1] CRAN (R 4.4.0)
##  yaml          2.3.10  2024-07-26 [1] CRAN (R 4.4.0)
## 
##  [1] /Users/283204f/Library/R/arm64/4.4/library
##  [2] /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library
## 
## ──────────────────────────────────────────────────────────────────────────────