scripts/country_names.R

# country name & code comparisons

# Countries -> ISO3/GANC codes, and common names

# https://www.state.gov/independent-states-in-the-world/
# https://www.fgdc.gov/standards/news/GENC
# https://www.census.gov/foreign-trade/schedules/c/country2.txt

proj_countries <- proj_raw %>%
  mutate(genc3c = 
           countrycode(country, 
                       origin = "country.name", 
                       destination = "genc3c")) %>%
  mutate(country_name = 
           countrycode(genc3c,
                       origin = "genc3c",
                       destination = "country.name")) %>%
  distinct(country, genc3c, country_name)

nrow(proj_countries)
# 195


mac_countries <- mac_raw %>%
  mutate(genc3c = 
           countrycode(country, 
                       origin = "country.name", 
                       destination = "genc3c")) %>%
  mutate(country_name = 
           countrycode(genc3c,
                       origin = "genc3c",
                       destination = "country.name")) %>%
  distinct(country, country_code, genc3c, country_name)

nrow(mac_countries)
#389

nrow(distinct(mac_countries, country))
#195

group_by(mac_countries, country) %>% arrange(country) %>% filter(n() > 1) %>% View()
# seems to be some rows with NA country_code
filter(mac_raw, country == "Afghanistan") %>% View()
# appears that NA country_code exists when there are not mitigation options (although they are represented in the dataset). probably the step where country_code was assigned was prior to filling in explicit missing somewhere.

code_join <- full_join(proj_countries, 
                       filter(mac_countries, !is.na(country_code)), 
                       by = "genc3c", suffix = c(".proj", ".mac")) %>%
  rename(country_code.mac = country_code)

nrow(code_join)
# indicates all proj countries successfully matched mac countries

filter(code_join, is.na(country.proj) | is.na(country.mac)) %>% nrow()
# 0 # again, indicates all matched successfully

filter(code_join, country.proj != country.mac)

filter(code_join, genc3c != country_code.mac) %>% View()

filter(code_join, country.proj != country_name.proj) %>% select(country.proj, country_name.proj)

# conclusions:
# possibly we should work to use the official GENC codes and names
# The online, dynamic information resource that is the GENC Registry (http://nsgreg.nga.mil/genc) is the single authoritative source for the geopolitical entities (and administrative subdivisions), names, and code content of the GENC Standard; it supports multiple online data access mechanisms and downloadable (offline) information products (e.g., XML files).
# there seem to be frequent updates to the GENC data, and they don't make it as easy to download as it should be.
# there is a lot of similarity to the ISO codes, and the modifications are to be in compliance with U.S. law.
MollieCarroll/NonCO2-Figs documentation built on April 19, 2020, 6:05 p.m.