R/convertNaturalEarthAdm1.R

Defines functions convertNaturalEarthAdm1

Documented in convertNaturalEarthAdm1

#' @importFrom rlang .data
#' @export
#'
#' @title Convert Level 1 (state) borders shapefile
#'
#' @description Returns a simple features data frame for 1st level administrative divisions
#'
#' The full resolution file will be named "NaturalEarthAdm1.rda". In addition,
#' "_05", _02" and "_01" versions of the file will be created that that are
#' simplified to 5\%, 2\% and 1\%. Simplified versions will greatly improve the
#' speed of both searching and plotting.
#'
#' @details A state border shapefile is downloaded and converted to a
#' simple features data frame with additional columns of data. The resulting file
#' will be created in the spatial data directory which is set with
#' \code{setSpatialDataDir()}. The resulting file
#' will be created in this same spatial data directory.
#'
#' Within the \pkg{MazamaSpatialUtils} package the phrase 'state' refers to
#' administrative divisions beneath the level of the country or nation. This
#' makes sense in the United 'States'. In other countries this level is known as
#' 'province', 'territory' or some other term.
#'
#'
#' @references https://www.naturalearthdata.com

convertNaturalEarthAdm1 <- function() {

  # ----- Setup ----------------------------------------------------------------

  # Use package internal data directory
  dataDir <- getSpatialDataDir()

  # Specify the name of the dataset and file being created
  datasetName <- 'NaturalEarthAdm1'

  # ----- Get the data ---------------------------------------------------------

  # Build appropriate request URL
  url <- 'http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_1_states_provinces.zip'

  filePath <- file.path(dataDir, basename(url))
  utils::download.file(url, filePath)
  # NOTE:  This zip file has no directory so extra subdirectory needs to be created
  utils::unzip(filePath, exdir = file.path(dataDir, 'adm'))

  # ----- Convert to SFDF ------------------------------------------------------

  # Convert shapefile into simple features data frame
  # NOTE:  The 'adm' directory has been created
  dsnPath <- file.path(dataDir, 'adm')
  shpName <- 'ne_10m_admin_1_states_provinces'
  SFDF <- convertLayer(
    dsn = dsnPath,
    layer = shpName
  )

  # ----- Select useful columns and rename -------------------------------------

  # > dplyr::glimpse(SFDF, width = 75)
  # Rows: 4,596
  # Columns: 122
  # $ featurecla <chr> "Admin-1 states provinces", "Admin-1 states provinces"…
  # $ scalerank  <int> 3, 6, 2, 6, 3, 4, 4, 3, 4, 3, 4, 3, 3, 10, 9, 2, 2, 2,…
  # $ adm1_code  <chr> "ARG-1309", "URY-8", "IDN-1185", "MYS-1186", "CHL-2694…
  # $ diss_me    <int> 1309, 8, 1185, 1186, 2694, 1936, 1937, 2693, 1939, 269…
  # $ iso_3166_2 <chr> "AR-E", "UY-PA", "ID-KI", "MY-12", "CL-AP", "BO-L", "B…
  # $ wikipedia  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ iso_a2     <chr> "AR", "UY", "ID", "MY", "CL", "BO", "BO", "CL", "BO", …
  # $ adm0_sr    <int> 1, 1, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, …
  # $ name       <chr> "Entre Ríos", "Paysandú", "Kalimantan Timur", "Sabah",…
  # $ name_alt   <chr> "Entre-Rios", NA, "Kaltim", "North Borneo", NA, NA, NA…
  # $ name_local <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ type       <chr> "Provincia", "Departamento", "Propinsi", "State", "Reg…
  # $ type_en    <chr> "Province", "Department", "Province", "State", "Region…
  # $ code_local <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ code_hasc  <chr> "AR.ER", "UY.PA", "ID.KI", "MY.SA", "CL.AP", "BO.LP", …
  # $ note       <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ hasc_maybe <chr> NA, NA, NA, NA, NA, NA, NA, NA, "BO.OR|BOL-POT", NA, "…
  # $ region     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ region_cod <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ provnum_ne <int> 10, 19, 15, 1, 0, 8, 7, 0, 1, 20006, 8, 22, 5, 0, 0, N…
  # $ gadm_level <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, …
  # $ check_me   <int> 20, 0, 20, 20, 20, 0, 0, 20, 10, 20, 10, 20, 20, 20, 2…
  # $ datarank   <int> 3, 8, 1, 6, 3, 8, 6, 3, 6, 3, 5, 3, 3, 10, 8, 1, 1, 1,…
  # $ abbrev     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ postal     <chr> "ER", "PA", "KI", "SA", NA, "LP", "OR", "TA", "PO", "A…
  # $ area_sqkm  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NA, 0, 0,…
  # $ sameascity <int> -99, -99, -99, -99, 7, 6, 6, -99, -99, 6, 7, 6, -99, -…
  # $ labelrank  <int> 3, 6, 2, 6, 7, 6, 6, 3, 4, 6, 7, 6, 3, 20, 9, 2, 2, 2,…
  # $ name_len   <int> 10, 8, 16, 5, 18, 6, 5, 8, 6, 11, 5, 5, 5, 8, 7, 6, 8,…
  # $ mapcolor9  <int> 3, 2, 6, 3, 5, 2, 2, 5, 2, 5, 4, 3, 3, 6, 3, 2, 4, 4, …
  # $ mapcolor13 <int> 13, 10, 11, 6, 9, 3, 3, 9, 3, 9, 11, 13, 13, 3, 7, 2, …
  # $ fips       <chr> "AR08", "UY11", "ID14", "MY16", NA, "BL04", "BL05", "C…
  # $ fips_alt   <chr> NA, NA, NA, NA, NA, NA, NA, NA, "BL05", NA, "PE18", NA…
  # $ woe_id     <int> 2344682, 2347650, 2345723, 2346310, 56043702, 2344804,…
  # $ woe_label  <chr> "Entre Rios, AR, Argentina", "Paysandú, UY, Uruguay",…
  # $ woe_name   <chr> "Entre Ríos", "Paysandú", "Kalimantan Timur", "Sabah",…
  # $ latitude   <dbl> -32.02750, -32.09330, 1.28915, 5.31115, -18.32070, -14…
  # $ longitude  <dbl> -59.28240, -57.22400, 116.35400, 117.09500, -69.68040,…
  # $ sov_a3     <chr> "ARG", "URY", "IDN", "MYS", "CHL", "BOL", "BOL", "CHL"…
  # $ adm0_a3    <chr> "ARG", "URY", "IDN", "MYS", "CHL", "BOL", "BOL", "CHL"…
  # $ adm0_label <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, …
  # $ admin      <chr> "Argentina", "Uruguay", "Indonesia", "Malaysia", "Chil…
  # $ geonunit   <chr> "Argentina", "Uruguay", "Indonesia", "Malaysia", "Chil…
  # $ gu_a3      <chr> "ARG", "URY", "IDN", "MYS", "CHL", "BOL", "BOL", "CHL"…
  # $ gn_id      <int> 3434137, 3441242, 1641897, 1733039, 6693562, 3911924, …
  # $ gn_name    <chr> "Provincia de Entre Rios", "Departamento de Paysandu",…
  # $ gns_id     <int> -988655, -908097, -2680740, -2405166, 10749159, -69341…
  # $ gns_name   <chr> "Entre Rios", "Paysandu, Departamento de", "Kalimantan…
  # $ gn_level   <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, NA, 1, 1,…
  # $ gn_region  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ gn_a1_code <chr> "AR.08", "UY.11", "ID.14", "MY.16", "CL.16", "BO.04", …
  # $ region_sub <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ sub_code   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ gns_level  <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, NA, 1, 1,…
  # $ gns_lang   <chr> "khm", "fra", "ind", "fil", "ara", "kor", "kor", "fra"…
  # $ gns_adm1   <chr> "AR08", "UY11", "ID14", "MY16", "CI16", "BL04", "BL05"…
  # $ gns_region <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ min_label  <dbl> 6.0, 8.0, 5.0, 7.0, 6.0, 6.6, 6.6, 6.0, 6.6, 6.0, 6.6,…
  # $ max_label  <dbl> 11.0, 11.0, 10.1, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, …
  # $ min_zoom   <dbl> 6.0, 8.0, 4.6, 7.0, 6.0, 6.6, 6.6, 6.0, 6.6, 6.0, 6.6,…
  # $ wikidataid <chr> "Q44762", "Q16576", "Q3899", "Q179029", "Q2109", "Q272…
  # $ name_ar    <chr> "إنتري ريوس", "إدارة بايساندو", "كالمنتان الشرقية", "ص…
  # $ name_bn    <chr> "এন্ত্রে রিও প্রদেশ", "পেসান্ডো বিভাগ", "পূর্ব কালিমান্তান", …
  # $ name_de    <chr> "Entre Ríos", "Paysandú", "Ostkalimantan", "Sabah", "A…
  # $ name_en    <chr> "Entre Ríos", "Paysandú", "East Kalimantan", "Sabah", …
  # $ name_es    <chr> "Entre Ríos", "Paysandú", "Kalimantan Oriental", "Saba…
  # $ name_fr    <chr> "Entre Ríos", "Paysandú", "Kalimantan oriental", "Saba…
  # $ name_el    <chr> "Έντρε Ρίος", "Παϊσαντού", "Ανατολικό Καλιμαντάν", "Σα…
  # $ name_hi    <chr> "एन्ट्रे रियोस", "पयसंदु विभाग", "पूर्व कालिमंतान", "साबाह राज्…
  # $ name_hu    <chr> "Entre Ríos", "Paysandú", "Kelet-Kalimantan", "Sabah",…
  # $ name_id    <chr> "Entre Ríos", "Departemen Paysandú", "Kalimantan Timur…
  # $ name_it    <chr> "Entre Ríos", "dipartimento di Paysandú", "Kalimantan …
  # $ name_ja    <chr> "エントレ・リオス州", "パイサンドゥ県", "東カリマンタ…
  # $ name_ko    <chr> "엔트레리오스", "파이산두", "동칼리만탄", "사바", "아…
  # $ name_nl    <chr> "Entre Ríos", "Paysandú", "Oost-Kalimantan", "Sabah", …
  # $ name_pl    <chr> "Entre Ríos", "Paysandú", "Borneo Wschodnie", "Sabah",…
  # $ name_pt    <chr> "Entre Ríos", "Paysandú", "Kalimantan Oriental", "Sabá…
  # $ name_ru    <chr> "Энтре-Риос", "Пайсанду", "Восточный Калимантан", "Саб…
  # $ name_sv    <chr> "Entre Ríos", "Paysandú", "Kalimantan Timur", "Sabah",…
  # $ name_tr    <chr> "Entre Ríos eyaleti", "Paysandu Departmanı", "Doğu Kal…
  # $ name_vi    <chr> "Entre Ríos", "Paysandú", "Đông Kalimantan", "Sabah", …
  # $ name_zh    <chr> "恩特雷里奥斯省", "派桑杜省", "东加里曼丹省", "沙巴", …
  # $ ne_id      <dbl> 1159309789, 1159307733, 1159310009, 1159310033, 115931…
  # $ name_he    <chr> "אנטרה ריוס", "פאיסאנדו", "מזרח קלימנטאן", "סבה", "ארי…
  # $ name_uk    <chr> "Ентре-Ріос", "Пайсанду", "Східний Калімантан", "Сабах…
  # $ name_ur    <chr> "صوبہ انترے ریوس", "پایساندو محکمہ", "مشرقی کالیمانتان…
  # $ name_fa    <chr> "ایالت انتره ریوز", "بخش پایساندو", "کالیمانتان شرقی",…
  # $ name_zht   <chr> "恩特雷里奥斯省", "派桑杜省", "東加里曼丹省", "沙巴", …
  # $ FCLASS_ISO <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_US  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_FR  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_RU  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_ES  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_CN  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_TW  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_IN  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_NP  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_PK  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_DE  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_GB  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_BR  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_IL  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_PS  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_SA  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_EG  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_MA  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_PT  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_AR  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_JP  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_KO  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_VN  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_TR  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_ID  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_PL  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_GR  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_IT  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_NL  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_SE  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_BD  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_UA  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ FCLASS_TLC <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ geometry   <MULTIPOLYGON [°]> MULTIPOLYGON (((-58.20011 -..., MULTIPOLY…

  # Data Dictionary:
  #   featurecla --> (drop)
  #   scalerank ---> (drop)
  #   adm1_code ---> adm1_code: Unique identifier
  #   diss_me -----> (drop)
  #   iso_3166_2 --> (drop)
  #   wikipedia ---> (drop)
  #   iso_a2 ------> countryCode: 2-character country code
  #   adm0_sr -----> (drop)
  #   name --------> stateName: Native language name
  #   name_alt ----> (drop)
  #   name_local --> (drop)
  #   type --------> (drop)
  #   type_en -----> (drop)
  #   code_local --> (drop)
  #   code_hasc ---> code_hasc: HASC code
  #   note --------> (drop)
  #   hasc_maybe --> (drop)
  #   region ------> (drop)
  #   region_cod --> (drop)
  #   provnum_ne --> (drop)
  #   gadm_level --> (drop)
  #   check_me ----> (drop)
  #   datarank ----> (drop)
  #   abbrev ------> (drop)
  #   postal ------> postal: country FIPS code
  #   area_sqkm ---> area_sqkm: area in square kilometers
  #   sameascity --> (drop)
  #   labelrank ---> (drop)
  #   name_len ----> (drop)
  #   mapcolor9 ---> (drop)
  #   mapcolor13 --> (drop)
  #   fips --------> stateFIPS: FIPS code
  #   fips_alt ----> (drop)
  #   woe_id ------> (drop)
  #   woe_label ---> (drop)
  #   woe_name ----> (drop)
  #   latitude ----> latitude: latitude coordinate
  #   longitude ---> longitude: longitude coordinate
  #   sov_a3 ------> (drop)
  #   adm0_a3 -----> (drop)
  #   adm0_label --> (drop)
  #   admin -------> (drop)
  #   geounit -----> (drop)
  #   gu_a3 -------> (drop)
  #   gn_id -------> (drop)
  #   gn_name -----> (drop)
  #   gns_id ------> (drop)
  #   gns_name ----> (drop)
  #   gn_level ----> (drop)
  #   gn_region ---> (drop)
  #   gn_a1_code --> (drop)
  #   region_sub --> (drop)
  #   sub_code ----> (drop)
  #   gns_level ---> (drop)
  #   gns_lang ----> gns_lang: ISO 639-3 language identifier
  #   gns_adm1 ----> gns_adm1: adm1 identifier
  #   gns_reigon --> (drop)
  #   min_label ---> (drop)
  #   max_label ---> (drop)
  #   min_zoom ----> (drop)
  #   wikidataid --> (drop)
  #   name_* ------> (drop)
  #   ne_id -------> (drop)
  #   FCLASS* ----->

  # NOTE:  Subset to filter out exceptional areas (islands and disputed
  # NOTE:  territories) marked with '~'

  goodAreas <- stringr::str_subset(SFDF$code_hasc, "~", negate = TRUE)

  SFDF <-
    SFDF %>%
    dplyr::filter(.data$code_hasc %in% goodAreas)

  # Add the core identifiers to the simple features data frame
  SFDF$stateCode <-
    SFDF$code_hasc %>%
    stringr::str_trim() %>%
    stringr::str_sub(4,-1)
  SFDF$countryName <- MazamaSpatialUtils::codeToCountry(SFDF$iso_a2)
  SFDF$stateName <- SFDF$name
  SFDF$stateFIPS <- SFDF$fips

  # Create the new dataframe in a specific column order
  SFDF <-
    SFDF %>%
    dplyr::select(
      countryCode = .data$iso_a2,
      countryName = .data$countryName,
      stateCode = .data$stateCode,
      stateName = .data$name,
      stateFIPS = .data$stateFIPS,
      latitude = .data$latitude,
      longitude = .data$longitude,
      area_sqkm = .data$area_sqkm,
      postal = .data$postal,
      adm1_code = .data$adm1_code,
      code_hasc = .data$code_hasc,
      gns_lang = .data$gns_lang,
      gns_adm1 = .data$gns_adm1
    )

  # ----- Simplify and save ----------------------------------------------------

  uniqueIdentifier <- "adm1_code"

  simplifyAndSave(
    SFDF = SFDF,
    datasetName = datasetName,
    uniqueIdentifier = uniqueIdentifier,
    dataDir = dataDir
  )

  # ----- Clean up and return --------------------------------------------------

  # Clean up
  unlink(filePath, force = TRUE)
  unlink(dsnPath, recursive = TRUE, force = TRUE)

  return(invisible(datasetName))

}

# ===== TEST ===================================================================

if ( FALSE ) {

  library(sf)

  # Look or horizontal lines from polygons that cross the dateline.
  # NOTE:  These are sometimes created by sf::st_make_valid()
  loadSpatialData(datasetName)
  SFDF <- get(paste0(datasetName, ""))
  SFDF_05 <- get(paste0(datasetName, "_05"))
  SFDF_02 <- get(paste0(datasetName, "_02"))
  SFDF_01 <- get(paste0(datasetName, "_01"))

  plot(SFDF_01$geometry)
  dev.off(dev.list()["RStudioGD"])
  plot(SFDF_02$geometry)
  dev.off(dev.list()["RStudioGD"])
  plot(SFDF_05$geometry)
  dev.off(dev.list()["RStudioGD"])
  #plot(SFDF$geometry)

  # Try out getSpatialData()
  lons <- c(-120:-110, 0:10)
  lats <- c(30:40, 30:40)

  df <- getSpatialData(lons, lats, SFDF_01)
  df <- getSpatialData(lons, lats, SFDF_02)
  df <- getSpatialData(lons, lats, SFDF_05)
  df <- getSpatialData(lons, lats, SFDF)

  # Special Case of Russian failing to plot properly
  SFDF %>% dplyr::filter(countryCode == "RU") %>% sf::st_geometry() %>% plot()

}
MazamaScience/MazamaSpatialUtils documentation built on Sept. 14, 2023, 6 p.m.