R/convertNaturalEarthAdm0.R

Defines functions convertNaturalEarthAdm1

Documented in convertNaturalEarthAdm1

#' @importFrom rlang .data
#' @export
#'
#' @title Convert Level 1 (state) borders shapefile
#'
#' @description Returns a simple features data frame for top level administrative divisions.
#'
#' The full resolution file will be named "NaturalEarthAdm0.rda". In addition,
#' "_05", _02" and "_01" versions of the file will be created that that are
#' simplified to 5\%, 2\% and 1\%. Simplified versions will greatly improve the
#' speed of both searching and plotting.
#'
#' @details A country border shapefile is downloaded and converted to a
#' simple features data frame with additional columns of data. The resulting file
#' will be created in the spatial data directory which is set with
#' \code{setSpatialDataDir()}. The resulting file
#' will be created in this same spatial data directory.
#'
#' @references https://www.naturalearthdata.com

convertNaturalEarthAdm1 <- function() {

  # ----- Setup ----------------------------------------------------------------

  # Use package internal data directory
  dataDir <- getSpatialDataDir()

  # Specify the name of the dataset and file being created
  datasetName <- 'NaturalEarthAdm0'

  # ----- Get the data ---------------------------------------------------------

  # Build appropriate request URL
  url <- 'https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip'

  filePath <- file.path(dataDir, basename(url))
  utils::download.file(url, filePath)
  # NOTE:  This zip file has no directory so extra subdirectory needs to be created
  utils::unzip(filePath, exdir = file.path(dataDir, 'adm'))

  # ----- Convert to SFDF ------------------------------------------------------

  # Convert shapefile into simple features data frame
  # NOTE:  The 'adm' directory has been created
  dsnPath <- file.path(dataDir, 'adm')
  shpName <- 'ne_10m_admin_0_countries'
  SFDF <- convertLayer(
    dsn = dsnPath,
    layer = shpName
  )

  # ----- Select useful columns and rename -------------------------------------

  # > dplyr::glimpse(SFDF, width = 75)
  # Rows: 258
  # Columns: 169
  # $ featurecla <chr> "Admin-0 country", "Admin-0 country", "Admin-0 country…
  # $ scalerank  <int> 0, 0, 0, 0, 0, 0, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
  # $ LABELRANK  <int> 2, 3, 2, 3, 2, 2, 3, 5, 2, 2, 4, 5, 5, 2, 3, 6, 2, 6, …
  # $ SOVEREIGNT <chr> "Indonesia", "Malaysia", "Chile", "Bolivia", "Peru", "…
  # $ SOV_A3     <chr> "IDN", "MYS", "CHL", "BOL", "PER", "ARG", "GB1", "CYP"…
  # $ ADM0_DIF   <int> 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, …
  # $ LEVEL      <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
  # $ TYPE       <chr> "Sovereign country", "Sovereign country", "Sovereign c…
  # $ TLC        <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",…
  # $ ADMIN      <chr> "Indonesia", "Malaysia", "Chile", "Bolivia", "Peru", "…
  # $ ADM0_A3    <chr> "IDN", "MYS", "CHL", "BOL", "PER", "ARG", "ESB", "CYP"…
  # $ GEOU_DIF   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
  # $ GEOUNIT    <chr> "Indonesia", "Malaysia", "Chile", "Bolivia", "Peru", "…
  # $ GU_A3      <chr> "IDN", "MYS", "CHL", "BOL", "PER", "ARG", "ESB", "CYP"…
  # $ SU_DIF     <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
  # $ SUBUNIT    <chr> "Indonesia", "Malaysia", "Chile", "Bolivia", "Peru", "…
  # $ SU_A3      <chr> "IDN", "MYS", "CHL", "BOL", "PER", "ARG", "ESB", "CYP"…
  # $ BRK_DIFF   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, …
  # $ NAME       <chr> "Indonesia", "Malaysia", "Chile", "Bolivia", "Peru", "…
  # $ NAME_LONG  <chr> "Indonesia", "Malaysia", "Chile", "Bolivia", "Peru", "…
  # $ BRK_A3     <chr> "IDN", "MYS", "CHL", "BOL", "PER", "ARG", "ESB", "CYP"…
  # $ BRK_NAME   <chr> "Indonesia", "Malaysia", "Chile", "Bolivia", "Peru", "…
  # $ BRK_GROUP  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ ABBREV     <chr> "Indo.", "Malay.", "Chile", "Bolivia", "Peru", "Arg.",…
  # $ POSTAL     <chr> "INDO", "MY", "CL", "BO", "PE", "AR", "DH", "CY", "IND…
  # $ FORMAL_EN  <chr> "Republic of Indonesia", "Malaysia", "Republic of Chil…
  # $ FORMAL_FR  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ NAME_CIAWF <chr> "Indonesia", "Malaysia", "Chile", "Bolivia", "Peru", "…
  # $ NOTE_ADM0  <chr> NA, NA, NA, NA, NA, NA, "U.K.", NA, NA, NA, NA, NA, NA…
  # $ NOTE_BRK   <chr> NA, NA, NA, NA, NA, NA, "U.K. Base", NA, NA, NA, NA, "…
  # $ NAME_SORT  <chr> "Indonesia", "Malaysia", "Chile", "Bolivia", "Peru", "…
  # $ NAME_ALT   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
  # $ MAPCOLOR7  <int> 6, 2, 5, 1, 4, 3, 6, 1, 1, 4, 3, 3, 4, 4, 1, 2, 5, 1, …
  # $ MAPCOLOR8  <int> 6, 4, 1, 5, 4, 1, 6, 2, 3, 4, 2, 2, 4, 4, 3, 8, 2, 3, …
  # $ MAPCOLOR9  <int> 6, 3, 5, 2, 4, 3, 6, 3, 2, 4, 5, 5, 4, 1, 3, 6, 7, 4, …
  # $ MAPCOLOR13 <int> 11, 6, 9, 3, 11, 13, 3, 7, 2, 3, 9, 8, 12, 13, 5, 7, 3…
  # $ POP_EST    <dbl> 270625568, 31949777, 18952038, 11513100, 32510453, 449…
  # $ POP_RANK   <int> 17, 15, 14, 14, 15, 15, 5, 12, 18, 18, 13, 12, 13, 17,…
  # $ POP_YEAR   <int> 2019, 2019, 2019, 2019, 2019, 2019, 2013, 2019, 2019, …
  # $ GDP_MD     <int> 1119190, 364681, 282318, 40895, 226848, 445445, 314, 2…
  # $ GDP_YEAR   <int> 2019, 2019, 2019, 2019, 2019, 2019, 2013, 2019, 2019, …
  # $ ECONOMY    <chr> "4. Emerging region: MIKT", "6. Developing region", "5…
  # $ INCOME_GRP <chr> "4. Lower middle income", "3. Upper middle income", "3…
  # $ FIPS_10    <chr> "ID", "MY", "CI", "BL", "PE", "AR", "-99", "CY", "IN",…
  # $ ISO_A2     <chr> "ID", "MY", "CL", "BO", "PE", "AR", "-99", "CY", "IN",…
  # $ ISO_A2_EH  <chr> "ID", "MY", "CL", "BO", "PE", "AR", "-99", "CY", "IN",…
  # $ ISO_A3     <chr> "IDN", "MYS", "CHL", "BOL", "PER", "ARG", "-99", "CYP"…
  # $ ISO_A3_EH  <chr> "IDN", "MYS", "CHL", "BOL", "PER", "ARG", "-99", "CYP"…
  # $ ISO_N3     <chr> "360", "458", "152", "068", "604", "032", "-99", "196"…
  # $ ISO_N3_EH  <chr> "360", "458", "152", "068", "604", "032", "-99", "196"…
  # $ UN_A3      <chr> "360", "458", "152", "068", "604", "032", "-099", "196…
  # $ WB_A2      <chr> "ID", "MY", "CL", "BO", "PE", "AR", "-99", "CY", "IN",…
  # $ WB_A3      <chr> "IDN", "MYS", "CHL", "BOL", "PER", "ARG", "-99", "CYP"…
  # $ WOE_ID     <int> 23424846, 23424901, 23424782, 23424762, 23424919, 2342…
  # $ WOE_ID_EH  <int> 23424846, 23424901, 23424782, 23424762, 23424919, 2342…
  # $ WOE_NOTE   <chr> "Exact WOE match as country", "Exact WOE match as coun…
  # $ ADM0_ISO   <chr> "IDN", "MYS", "CHL", "BOL", "PER", "ARG", "-99", "CYP"…
  # $ ADM0_~
  # $ CONTINENT  <chr> "Asia", "Asia", "South America", "South America", "Sou…
  # $ REGION_UN  <chr> "Asia", "Asia", "Americas", "Americas", "Americas", "A…
  # $ SUBREGION  <chr> "South-Eastern Asia", "South-Eastern Asia", "South Ame…
  # $ REGION_WB  <chr> "East Asia & Pacific", "East Asia & Pacific", "Latin A…
  # $ <map related stuff>
  # $ NAME_AR    <chr> "إندونيسيا", "ماليزيا", "تشيلي", "بوليفيا", "بيرو", "ا…
  # $ NAME_BN    <chr> "ইন্দোনেশিয়া", "মালয়েশিয়া", "চিলি", "বলিভিয়া", "পেরু", "…
  # $ NAME_DE    <chr> "Indonesien", "Malaysia", "Chile", "Bolivien", "Peru",…
  # $ NAME_EN    <chr> "Indonesia", "Malaysia", "Chile", "Bolivia", "Peru", "…
  # $ NAME_ES    <chr> "Indonesia", "Malasia", "Chile", "Bolivia", "Perú", "A…
  # $ NAME_FA    <chr> "اندونزی", "مالزی", "شیلی", "بولیوی", "پرو", "آرژانتین…
  # $ NAME_FR    <chr> "Indonésie", "Malaisie", "Chili", "Bolivie", "Pérou", …
  # $ NAME_EL    <chr> "Ινδονησία", "Μαλαισία", "Χιλή", "Βολιβία", "Περού", "…
  # $ NAME_HE    <chr> "אינדונזיה", "מלזיה", "צ'ילה", "בוליביה", "פרו", "ארגנ…
  # $ NAME_HI    <chr> "इंडोनेशिया", "मलेशिया", "चिली", "बोलिविया", "पेरू", "अर्जेण्ट…
  # $ NAME_HU    <chr> "Indonézia", "Malajzia", "Chile", "Bolívia", "Peru", "…
  # $ NAME_ID    <chr> "Indonesia", "Malaysia", "Chili", "Bolivia", "Peru", "…
  # $ NAME_IT    <chr> "Indonesia", "Malaysia", "Cile", "Bolivia", "Perù", "A…
  # $ NAME_JA    <chr> "インドネシア", "マレーシア", "チリ", "ボリビア", "ペ…
  # $ NAME_KO    <chr> "인도네시아", "말레이시아", "칠레", "볼리비아", "페루"…
  # $ NAME_NL    <chr> "Indonesië", "Maleisië", "Chili", "Bolivia", "Peru", "…
  # $ NAME_PL    <chr> "Indonezja", "Malezja", "Chile", "Boliwia", "Peru", "A…
  # $ NAME_PT    <chr> "Indonésia", "Malásia", "Chile", "Bolívia", "Peru", "A…
  # $ NAME_RU    <chr> "Индонезия", "Малайзия", "Чили", "Боливия", "Перу", "А…
  # $ NAME_SV    <chr> "Indonesien", "Malaysia", "Chile", "Bolivia", "Peru", …
  # $ NAME_TR    <chr> "Endonezya", "Malezya", "Şili", "Bolivya", "Peru", "Ar…
  # $ NAME_UK    <chr> "Індонезія", "Малайзія", "Чилі", "Болівія", "Перу", "А…
  # $ NAME_UR    <chr> "انڈونیشیا", "ملائیشیا", "چلی", "بولیویا", "پیرو", "ار…
  # $ NAME_VI    <chr> "Indonesia", "Malaysia", "Chile", "Bolivia", "Peru", "…
  # $ NAME_ZH    <chr> "印度尼西亚", "马来西亚", "智利", "玻利维亚", "秘鲁", …
  # $ NAME_ZHT   <chr> "印度尼西亞", "馬來西亞", "智利", "玻利維亞", "秘魯", …
  # $ FCLASS_ISO <chr> "Admin-0 country", "Admin-0 country", "Admin-0 country…
  # $ TLC_DIFF   <chr> NA, NA, NA, NA, NA, NA, "1", NA, NA, NA, NA, NA, NA, N…
  # $ FCLASS_TLC <chr> "Admin-0 country", "Admin-0 country", "Admin-0 country…
  # $ FCLASS_~
  # $ geometry   <MULTIPOLYGON [°]> MULTIPOLYGON (((117.7036 4...., MULTIPOLY…

  # Data Dictionary:
  #   ISO_A2_EH --> countryCode
  #   <everything else>

  # NOTE:  Somaliland is treated as different from Somalia although most maps
  # NOTE:  treat them both as Somalia. Unfortunately, Somaliland has no 2-letter
  # NOTE:  ISO code and leaving it out generates maps that are obviously wrong.
  # NOTE:
  # NOTE:  Other breakaway regions are small enough that they won't be missed.
  # NOTE:  So here we glue Somaliland back onto Somalia.

  SFDF[SFDF$BRK_A3 == "SOL", "ISO_A2_EH"] <- "SO"

  # Rename key columns and remove records with no countryCode
  SFDF <-
    SFDF %>%
    dplyr::rename(
      countryCode = .data$ISO_A2_EH,
      countryName = .data$NAME_EN,
      ISO3 = .data$ISO_A3_EH,
      FIPS = .data$FIPS_10,
      UN_region = .data$REGION_UN
    ) %>%
    dplyr::filter(countryCode != "-99")

  # ----- Combine polygons -----------------------------------------------------

  copy_fields <- setdiff(names(SFDF), c("geometry"))

  SFDF <-
    SFDF %>%
    MazamaSpatialUtils::dissolve(
      "countryCode",
      copy_fields = copy_fields
    )

  # ----- Simplify and save ----------------------------------------------------

  uniqueIdentifier <- "countryCode"

  simplifyAndSave(
    SFDF = SFDF,
    datasetName = datasetName,
    uniqueIdentifier = uniqueIdentifier,
    dataDir = dataDir
  )

  # ----- Clean up and return --------------------------------------------------

  # Clean up
  unlink(filePath, force = TRUE)
  unlink(dsnPath, recursive = TRUE, force = TRUE)

  return(invisible(datasetName))

}

# ===== TEST ===================================================================

if ( FALSE ) {

  library(sf)

  # Look or horizontal lines from polygons that cross the dateline.
  # NOTE:  These are sometimes created by sf::st_make_valid()
  loadSpatialData(datasetName)
  SFDF <- get(paste0(datasetName, ""))
  SFDF_05 <- get(paste0(datasetName, "_05"))
  SFDF_02 <- get(paste0(datasetName, "_02"))
  SFDF_01 <- get(paste0(datasetName, "_01"))

  plot(SFDF_01$geometry)
  dev.off(dev.list()["RStudioGD"])
  plot(SFDF_02$geometry)
  dev.off(dev.list()["RStudioGD"])
  plot(SFDF_05$geometry)
  dev.off(dev.list()["RStudioGD"])
  #plot(SFDF$geometry)

  # Try out getSpatialData()
  lons <- c(-120:-110, 0:10)
  lats <- c(30:40, 30:40)

  df <- getSpatialData(lons, lats, SFDF_01)
  df <- getSpatialData(lons, lats, SFDF_02)
  df <- getSpatialData(lons, lats, SFDF_05)
  df <- getSpatialData(lons, lats, SFDF)

  # Special Case of Russian failing to plot properly
  SFDF %>% dplyr::filter(countryCode == "RU") %>% sf::st_geometry() %>% plot()

}

Try the MazamaSpatialUtils package in your browser

Any scripts or data that you put into this service are public.

MazamaSpatialUtils documentation built on Sept. 8, 2023, 5:22 p.m.