data-raw/google_nuts_matchtable_maker_incl_ex_eu_AD1.R

library(regions)
library(tidyverse)
#' @author István Zsoldos
#' Not included in package, only as helper function to create the data file.

normalize_text <- function(x) {
  
  x <- as.character(x)
  x <- tolower(x)
  x <- str_trim(x, side = "both")
  x <- gsub("\\s", "_", x)
  x <- gsub(":|!|,|;|%","", x)
  x <- gsub("____|___|__", "_", x)
  x
}

### use your own path or save a copy of the Global Mobility Reports
###  into 'data-raw'
###  This file is not synchronized with the GitHub Repo because it
### is large and will slow down gits.

gmr_csv <- read_csv( file.path('data-raw', 
                               'Global_Mobility_Report.csv'))

gmr <- gmr_csv %>%
  set_names ( c("country_code", "google_country_name", 
                "google_region_name_1", 
                "google_region_name_2", 
                "date", "retail", "grocery", 
                "parks", "transit", "workplaces", "residential") ) %>%
  mutate ( google_region_name_1 = ifelse ( country_code == "RE", 
                                           "La Réunion", 
                                           google_region_name_1), 
           google_country_name = ifelse ( country_code == "RE", 
                                          "France", 
                                          google_country_name ), 
           country_code = ifelse ( country_code == "RE", 
                                   "FR", 
                                   country_code ))

## First joining with valid NUTS codes ------------
## When there is no region name, use the country name
## And preferably the English country name, not the national language one

data("all_valid_nuts_codes", package = 'regions')
all_valid_nuts_codes 

nuts_gmr <- all_valid_nuts_codes %>%
  mutate ( country_code = get_country_code(geo)) %>%
  distinct ( country_code ) %>% 
  left_join ( gmr, 
              by = 'country_code' ) %>%
  mutate ( google_region_name_1 = case_when (
    is.na(google_region_name_1) & is.na(google_region_name_2) ~ google_country_name, 
    TRUE ~ google_region_name_1
  )) %>%
  pivot_longer ( cols = c("google_region_name_1", 
                          "google_region_name_2"), 
                 names_to = "google_region_level", 
                 values_to = "google_region_name") %>%
  mutate ( google_name = normalize_text (google_region_name )) %>%
  filter ( !is.na(google_name))

## Help table ------------------------------------------------
## Current regions and their official names in NUTS2016, 
## recent changes, name variants in NUTS2013 and NUTS2010 
## for countries where only nuts 2021 is available, ituses that

data("nuts_changes", package = 'regions')

only_2021_nuts <- all_valid_nuts_codes %>%
  mutate ( country_code = get_country_code(geo)) %>%
  select (country_code, nuts, typology) %>%
  filter( nuts %in% c("code_2016", "code_2021") & typology == "country") %>% 
  select ( -all_of ("typology")) %>%
  unique () %>% 
  mutate( count = 1 ) %>%
  pivot_wider( names_from = "nuts",
               values_from = "count") %>%
  filter( is.na(code_2016) ) %>%
  select (country_code) %>% unlist() %>% unname()

regions_and_names_2016 <- all_valid_nuts_codes %>%
  mutate ( country_code = get_country_code(geo)) %>%
  filter ( nuts == "code_2016" | country_code %in% only_2021_nuts) %>%
  rename ( code_2016 = geo ) %>% 
  left_join ( nuts_changes %>%
                select ( typology, code_2016, 
                         geo_name_2021, geo_name_2016, geo_name_2013, 
                         geo_name_2010, change_2016), 
              by = c('typology', 'code_2016')) %>%
  mutate ( country_name = countrycode::countrycode(country_code, 
                                                   "iso2c", 'country.name')
  ) %>%
  mutate ( match_name  = case_when(
    typology == "country" ~ normalize_text(country_name) ,
    nuts == "code_2021" ~ normalize_text(geo_name_2021),
    TRUE ~ normalize_text(geo_name_2016)   )
  )

## Google region names before national corrections ------------
google_region_names <- nuts_gmr %>%
  select ( country_code, google_region_level,
           google_region_name, google_name ) %>%
  filter ( complete.cases(.)) %>%
  distinct_all() %>% 
  mutate ( match_name = google_name )

##  What was found at first try  ---------------------------------

found_in_nuts_distinct <- google_region_names %>%
  left_join ( regions_and_names_2016 , 
              by = c("country_code", "match_name"))

## Making google_region_names$match_name equal to regions_and_names_2016$match_name when there is a 1-to-1 correspondence

#some general changes for better fit
#switching for local county name in SE
google_region_names <- google_region_names %>%
  mutate ( match_name = ifelse(
    (country_code == "SE" & grepl( "_county", match_name)),
    gsub("_county", "s_län", match_name), match_name)
    ) 

#getting rid of "_county" from names in RO
google_region_names <- google_region_names %>%
  mutate ( match_name = ifelse(
    (country_code == "RO" & 
       grepl( "_county", match_name)), 
       gsub("_county", "", match_name), match_name)) 

# changing some names in HU
hungary_names <- regions_and_names_2016 %>%
  filter ( country_code  == "HU") %>%
  select ( match_name )  %>%
  unlist() %>% as.character() %>% sort()

google_region_names <- google_region_names %>%
  mutate ( match_name = case_when (
    country_code == "HU" & grepl("moson-sopron", match_name) ~  hungary_names[grepl("moson-sopron", hungary_names)],
    country_code == "HU" & grepl( "hungary|budapest", match_name) ~ match_name,
    country_code == "HU" & grepl( "_county", match_name) ~ gsub("_county", "", match_name), 
    TRUE ~ match_name))

# Adding code_2016 values where match is possible
google_region_names <- google_region_names %>%
  left_join ( regions_and_names_2016 %>% 
                select (c(country_code, code_2016, match_name)) , 
              by = c("country_code", "match_name"))


# Fixing Italy
# See The Typology Of The Google Mobility Reports (COVID-19) vignette
# for Trentino-South Tyrol

# changing nuts codes
google_region_names <- google_region_names %>%
  mutate ( code_2016 = case_when (
    country_code == "IT" & match_name == "aosta" ~ "ITCX",
    country_code == "IT" & match_name == "aosta" ~ "ITC2",
    country_code == "IT" & match_name == "apulia" ~ "ITF4",
    country_code == "IT" & match_name == "lombardy" ~ "ITC4",
    country_code == "IT" & match_name == "piedmont" ~ "ITC1",
    country_code == "IT" & match_name == "sardinia" ~ "ITG2",
    country_code == "IT" & match_name == "sicily" ~ "ITG1",
    country_code == "IT" & match_name == "tuscany" ~ "ITI1",
    country_code == "IT" & match_name == "trentino-south_tyrol" ~ "ITDX", #this is a pseudo-code, because these are two regions
    TRUE ~ code_2016)) %>%
  arrange ( code_2016 )

# changing names
google_region_names <- google_region_names %>%
  mutate ( match_name = case_when (
    country_code == "IT" & match_name == "aosta" ~ "valle_d’aosta/vallée_d’aoste",
    country_code == "IT" & match_name == "apulia" ~ "puglia",
    country_code == "IT" & match_name == "lombardy" ~ "lombardia",
    country_code == "IT" & match_name == "piedmont" ~ "piemonte",
    country_code == "IT" & match_name == "sardinia" ~ "sardegna",
    country_code == "IT" & match_name == "sicily" ~ "sicilia",
    country_code == "IT" & match_name == "tuscany" ~ "toscana",
    TRUE ~ match_name))


## Fixing Belgium

# changing nuts codes
google_region_names <- google_region_names %>%
  mutate ( code_2016 = case_when (
    country_code == "BE" & match_name == "brussels" ~ "BE1",
    country_code == "BE" & match_name == "flanders" ~ "BE2",
    country_code == "BE" & match_name == "wallonia" ~ "BE3",
    TRUE ~ code_2016))

#changing names
google_region_names <- google_region_names %>%
  mutate ( match_name = case_when (
    country_code == "BE" & match_name == "brussels" ~ "région_de_bruxelles-capitale/brussels_hoofdstedelijk_gewest",
    country_code == "BE" & match_name == "flanders" ~ "vlaams_gewest",
    country_code == "BE" & match_name == "wallonia" ~ "région_wallonne",
    TRUE ~ match_name))

## Fixing Bulgaria
# changing nuts codes
google_region_names <- google_region_names %>%
  mutate ( code_2016 = case_when (
    country_code == "BG" & match_name == "blagoevgrad_province" ~ "BG413",
    country_code == "BG" & match_name == "burgas" ~ "BG341",
    country_code == "BG" & match_name == "dobrich_province" ~ "BG332",
    country_code == "BG" & match_name == "gabrovo" ~ "BG322",
    country_code == "BG" & match_name == "haskovo_province" ~ "BG422",
    country_code == "BG" & match_name == "jambol" ~ "BG343",
    country_code == "BG" & match_name == "kardzhali_province" ~ "BG425",
    country_code == "BG" & match_name == "kyustendil_province" ~ "BG415",
    country_code == "BG" & match_name == "lovec" ~ "BG315",
    country_code == "BG" & match_name == "montana_province" ~ "BG312",
    country_code == "BG" & match_name == "pazardzhik" ~ "BG423",
    country_code == "BG" & match_name == "pernik" ~ "BG414",
    country_code == "BG" & match_name == "pleven_province" ~ "BG314",
    country_code == "BG" & match_name == "plovdiv_province" ~ "BG421",
    country_code == "BG" & match_name == "razgrad" ~ "BG324",
    country_code == "BG" & match_name == "ruse" ~ "BG323",
    country_code == "BG" & match_name == "shumen_province" ~ "BG333",
    country_code == "BG" & match_name == "silistra" ~ "BG325",
    country_code == "BG" & match_name == "sliven_province" ~ "BG342",
    country_code == "BG" & match_name == "smoljan" ~ "BG424",
    country_code == "BG" & match_name == "sofia_city_province" ~ "BG411",
    country_code == "BG" & match_name == "sofia_province" ~ "BG412",
    country_code == "BG" & match_name == "stara_zagora" ~ "BG344",
    country_code == "BG" & match_name == "targovishte_province" ~ "BG334",
    country_code == "BG" & match_name == "varna" ~ "BG331",
    country_code == "BG" & match_name == "veliko_tarnovo_province" ~ "BG321",
    country_code == "BG" & match_name == "vidin" ~ "BG311",
    country_code == "BG" & match_name == "vraca" ~ "BG313",
    TRUE ~ code_2016))

# changing names
google_region_names <- google_region_names %>%
  mutate ( match_name = case_when (
    country_code == "BG" & match_name == "blagoevgrad_province" ~ "благоевград",
    country_code == "BG" & match_name == "burgas" ~ "бургас",
    country_code == "BG" & match_name == "dobrich_province" ~ "добрич",
    country_code == "BG" & match_name == "gabrovo" ~ "габрово",
    country_code == "BG" & match_name == "haskovo_province" ~ "хасково",
    country_code == "BG" & match_name == "jambol" ~ "ямбол",
    country_code == "BG" & match_name == "kardzhali_province" ~ "кърджали",
    country_code == "BG" & match_name == "kyustendil_province" ~ "кюстендил",
    country_code == "BG" & match_name == "lovec" ~ "ловеч",
    country_code == "BG" & match_name == "montana_province" ~ "монтана",
    country_code == "BG" & match_name == "pazardzhik" ~ "пазарджик",
    country_code == "BG" & match_name == "pernik" ~ "перник",
    country_code == "BG" & match_name == "pleven_province" ~ "плевен",
    country_code == "BG" & match_name == "plovdiv_province" ~ "пловдив",
    country_code == "BG" & match_name == "razgrad" ~ "разград",
    country_code == "BG" & match_name == "ruse" ~ "русе",
    country_code == "BG" & match_name == "shumen_province" ~ "шумен",
    country_code == "BG" & match_name == "silistra" ~ "силистра",
    country_code == "BG" & match_name == "sliven_province" ~ "сливен",
    country_code == "BG" & match_name == "smoljan" ~ "смолян",
    country_code == "BG" & match_name == "sofia_city_province" ~ "софия_(столица)",
    country_code == "BG" & match_name == "sofia_province" ~ "софия",
    country_code == "BG" & match_name == "stara_zagora" ~ "стара_загора",
    country_code == "BG" & match_name == "targovishte_province" ~ "търговище",
    country_code == "BG" & match_name == "varna" ~ "варна",
    country_code == "BG" & match_name == "veliko_tarnovo_province" ~ "велико_търново",
    country_code == "BG" & match_name == "vidin" ~ "видин",
    country_code == "BG" & match_name == "vraca" ~ "враца",
    TRUE ~ match_name))


## Fixing Czechia

# changing nuts codes
google_region_names <- google_region_names %>%
  mutate ( code_2016 = case_when (
    country_code == "CZ" & match_name == "central_bohemian_region" ~ "CZ020",
    country_code == "CZ" & match_name == "hradec_králové_region" ~ "CZ052",
    country_code == "CZ" & match_name == "karlovy_vary_region" ~ "CZ041",
    country_code == "CZ" & match_name == "liberec_region" ~ "CZ051",
    country_code == "CZ" & match_name == "moravian-silesian_region" ~ "CZ080",
    country_code == "CZ" & match_name == "olomouc_region" ~ "CZ071",
    country_code == "CZ" & match_name == "pardubice_region" ~ "CZ053",
    country_code == "CZ" & match_name == "plzeň_region" ~ "CZ032",
    country_code == "CZ" & match_name == "prague" ~ "CZ010",
    country_code == "CZ" & match_name == "south_bohemian_region" ~ "CZ031",
    country_code == "CZ" & match_name == "south_moravian_region" ~ "CZ064",
    country_code == "CZ" & match_name == "ústí_nad_labem_region" ~ "CZ042",
    country_code == "CZ" & match_name == "vysocina_region" ~ "CZ063",
    country_code == "CZ" & match_name == "zlin_region" ~ "CZ072",
    TRUE ~ code_2016))

# changing names
google_region_names <- google_region_names %>%
  mutate ( match_name = case_when (
    country_code == "CZ" & match_name == "central_bohemian_region" ~ "středočeský_kraj",
    country_code == "CZ" & match_name == "hradec_králové_region" ~ "královéhradecký_kraj",
    country_code == "CZ" & match_name == "karlovy_vary_region" ~ "karlovarský_kraj",
    country_code == "CZ" & match_name == "liberec_region" ~ "liberecký_kraj",
    country_code == "CZ" & match_name == "moravian-silesian_region" ~ "moravskoslezský_kraj",
    country_code == "CZ" & match_name == "olomouc_region" ~ "olomoucký_kraj",
    country_code == "CZ" & match_name == "pardubice_region" ~ "pardubický_kraj",
    country_code == "CZ" & match_name == "plzeň_region" ~ "plzeňský_kraj",
    country_code == "CZ" & match_name == "prague" ~ "hlavní_město_praha",
    country_code == "CZ" & match_name == "south_bohemian_region" ~ "jihočeský_kraj",
    country_code == "CZ" & match_name == "south_moravian_region" ~ "jihomoravský_kraj",
    country_code == "CZ" & match_name == "ústí_nad_labem_region" ~ "ústecký_kraj",
    country_code == "CZ" & match_name == "vysocina_region" ~ "kraj_vysočina",
    country_code == "CZ" & match_name == "zlin_region" ~ "zlínský_kraj",
    TRUE ~ match_name))

#additional fixing for plzen
google_region_names <- google_region_names %>%
  mutate( code_2016 = ifelse( country_code == "CZ" & grepl( "plze", match_name),
                              "CZ032", code_2016))
google_region_names <- google_region_names %>%
  mutate( match_name = ifelse( country_code == "CZ" & grepl( "plze", match_name), 
                               "plzeňský_kraj", match_name))

# Fixing Denmark
# changing nuts codes
google_region_names <- google_region_names %>%
  mutate ( code_2016 = case_when (
    country_code == "DK" & match_name == "capital_region_of_denmark" ~ "DK01",
    country_code == "DK" & match_name == "central_denmark_region" ~ "DK04",
    country_code == "DK" & match_name == "north_denmark_region" ~ "DK05",
    country_code == "DK" & match_name == "region_of_southern_denmark" ~ "DK03",
    country_code == "DK" & match_name == "region_zealand" ~ "DK02",
    TRUE ~ code_2016))

# changing names
google_region_names <- google_region_names %>%
  mutate ( match_name = case_when (
    country_code == "DK" & match_name == "capital_region_of_denmark" ~ "hovedstaden",
    country_code == "DK" & match_name == "central_denmark_region" ~ "midtjylland",
    country_code == "DK" & match_name == "north_denmark_region" ~ "nordjylland",
    country_code == "DK" & match_name == "region_of_southern_denmark" ~ "syddanmark",
    country_code == "DK" & match_name == "region_zealand" ~ "sjælland",
    TRUE ~ match_name))

# Fixing Germany
# changing nuts codes

google_region_names <- google_region_names %>%
  mutate ( code_2016 = case_when (
    country_code == "DE" & match_name == "bavaria" ~ "DE2",
    country_code == "DE" & match_name == "hesse" ~ "DE7",
    country_code == "DE" & match_name == "lower_saxony" ~ "DE9",
    country_code == "DE" & match_name == "north_rhine-westphalia" ~ "DEA",
    country_code == "DE" & match_name == "rhineland-palatinate" ~ "DEB",
    country_code == "DE" & match_name == "saxony" ~ "DED",
    country_code == "DE" & match_name == "saxony-anhalt" ~ "DEE",
    country_code == "DE" & match_name == "thuringia" ~ "DEG0",
    TRUE ~ code_2016))

# changing names
google_region_names <- google_region_names %>%
  mutate ( match_name = case_when (
    country_code == "DE" & match_name == "bavaria" ~ "bayern",
    country_code == "DE" & match_name == "hesse" ~ "hessen",
    country_code == "DE" & match_name == "lower_saxony" ~ "niedersachsen",
    country_code == "DE" & match_name == "north_rhine-westphalia" ~ "nordrhein-westfalen",
    country_code == "DE" & match_name == "rhineland-palatinate" ~ "rheinland-pfalz",
    country_code == "DE" & match_name == "saxony" ~ "sachsen",
    country_code == "DE" & match_name == "saxony-anhalt" ~ "sachsen-anhalt",
    country_code == "DE" & match_name == "thuringia" ~ "thüringen",
    TRUE ~ match_name))

# Fixing Estonia (Local government level data only, skipping for the moment, only deleting "_county")
google_region_names <- google_region_names %>%
  mutate ( match_name = ifelse(
    ( test = country_code == "EE" &  grepl( "_county", match_name)), 
      yes  = gsub("_county", "", match_name), 
      no   =  match_name)) 

# Fixing Ireland (Local government level data only, only Dublin County corresponds to NUTS3, skipping the rest for the moment)
# changing nuts code
google_region_names <- google_region_names %>%
  mutate ( code_2016 = ifelse(
    country_code == "IE" & match_name == "county_dublin", 
    "IE061", code_2016)) 

# changing name
google_region_names <- google_region_names %>%
  mutate ( match_name = ifelse(
    country_code == "IE" & match_name == "county_dublin",
    "dublin", match_name)) 

# Fixing Greece ("Decentralized Administration", regions are made up of one or more NUTS2 regions, no change for the moment)


# Fixing Spain
# changing nuts codes

google_region_names <- google_region_names %>%
  mutate ( code_2016 = case_when (
    country_code == "ES" & match_name == "andalusia" ~ "ES61",
    country_code == "ES" & match_name == "aragon" ~ "ES24",
    country_code == "ES" & match_name == "balearic_islands" ~ "ES53",
    country_code == "ES" & match_name == "basque_country" ~ "ES21",
    country_code == "ES" & match_name == "canary_islands" ~ "ES70",
    country_code == "ES" & match_name == "castile_and_león" ~ "ES41",
    country_code == "ES" & match_name == "castile-la_mancha" ~ "ES42",
    country_code == "ES" & match_name == "catalonia" ~ "ES51",
    country_code == "ES" & match_name == "ceuta" ~ "ES63",
    country_code == "ES" & match_name == "community_of_madrid" ~ "ES30",
    country_code == "ES" & match_name == "melilla" ~ "ES64",
    country_code == "ES" & match_name == "navarre" ~ "ES22",
    country_code == "ES" & match_name == "region_of_murcia" ~ "ES62",
    country_code == "ES" & match_name == "valencian_community" ~ "ES52",
    TRUE ~ code_2016))

# changing names
google_region_names <- google_region_names %>%
  mutate ( match_name = case_when (
    country_code == "ES" & match_name == "andalusia" ~ "andalucía",
    country_code == "ES" & match_name == "aragon" ~ "aragón",
    country_code == "ES" & match_name == "balearic_islands" ~ "illes_balears",
    country_code == "ES" & match_name == "basque_country" ~ "país_vasco",
    country_code == "ES" & match_name == "canary_islands" ~ "canarias",
    country_code == "ES" & match_name == "castile_and_león" ~ "castilla_y_león",
    country_code == "ES" & match_name == "castile-la_mancha" ~ "castilla-la_mancha",
    country_code == "ES" & match_name == "catalonia" ~ "cataluña",
    country_code == "ES" & match_name == "ceuta" ~ "ciudad_autónoma_de_ceuta",
    country_code == "ES" & match_name == "community_of_madrid" ~ "comunidad_de_madrid",
    country_code == "ES" & match_name == "melilla" ~ "ciudad_autónoma_de_melilla",
    country_code == "ES" & match_name == "navarre" ~ "comunidad_foral_de_navarra",
    country_code == "ES" & match_name == "region_of_murcia" ~ "región_de_murcia",
    country_code == "ES" & match_name == "valencian_community" ~ "comunidad_valenciana",
    TRUE ~ match_name))


#Fixing France
# changing nuts codes
<<<<<<< HEAD:data-raw/google_nuts_matchtable_maker_incl_ex_eu_AD1.R

=======
#google_region_names <- google_region_names %>%
#  filter ( country_code == "FR")
>>>>>>> bbcfe8b5f911327d5c182256f799ba2db0711a66:data-raw/google_nuts_matchtable_Zsopi_maker_incl_ex_eu_AD.R
google_region_names <- google_region_names %>%
  mutate ( code_2016 = case_when (
    country_code == "FR" & match_name == "brittany" ~ "FRH",
    country_code == "FR" & match_name == "centre-val_de_loire" ~ "FRB",
    country_code == "FR" & match_name == "corsica" ~ "FRM",
    country_code == "FR" & match_name == "grand_est" ~ "FRF",
    country_code == "FR" & match_name == "hauts-de-france" ~ "FRE",
    country_code == "FR" & match_name == "île-de-france" ~ "FR1",
    country_code == "FR" & match_name == "normandy" ~ "FRD",
    country_code == "FR" & match_name == "nouvelle-aquitaine" ~ "FRI",
    country_code == "FR" & match_name == "occitanie" ~ "FRJ",
    country_code == "FR" & match_name == "provence-alpes-côte_d'azur" ~ "FRL",
    TRUE ~ code_2016))

# changing names
google_region_names <- google_region_names %>%
  mutate ( match_name = case_when (
    country_code == "FR" & match_name == "brittany" ~ "bretagne",
    country_code == "FR" & match_name == "centre-val_de_loire" ~ "centre_—_val_de_loire",
    country_code == "FR" & match_name == "corsica" ~ "corse",
    country_code == "FR" & match_name == "grand_est" ~ "alsace-champagne-ardenne-lorraine",
    country_code == "FR" & match_name == "hauts-de-france" ~ "nord-pas_de_calais-picardie",
    country_code == "FR" & match_name == "île-de-france" ~ "ile-de-france",
    country_code == "FR" & match_name == "normandy" ~ "normandie",
    country_code == "FR" & match_name == "nouvelle-aquitaine" ~ "aquitaine-limousin-poitou-charentes",
    country_code == "FR" & match_name == "occitanie" ~ "languedoc-roussillon-midi-pyrénées",
    country_code == "FR" & match_name == "provence-alpes-côte_d'azur" ~ "provence-alpes-côte_d’azur",
    TRUE ~ match_name))


# Fixing Croatia
# changing nuts codes

google_region_names <- google_region_names %>%
  mutate ( code_2016 = case_when (
    country_code == "HR" & match_name == "bjelovar-bilogora_county" ~ "HR047",
    country_code == "HR" & match_name == "brod-posavina_county" ~ "HR04A",
    country_code == "HR" & match_name == "city_of_zagreb" ~ "HR041",
    country_code == "HR" & match_name == "dubrovnik-neretva_county" ~ "HR037",
    country_code == "HR" & match_name == "istria_county" ~ "HR036",
    country_code == "HR" & match_name == "karlovac_county" ~ "HR04D",
    country_code == "HR" & match_name == "koprivnica-križevci_county" ~ "HR045",
    country_code == "HR" & match_name == "krapina-zagorje_county" ~ "HR043",
    country_code == "HR" & match_name == "lika-senj_county" ~ "HR032",
    country_code == "HR" & match_name == "međimurje_county" ~ "HR046",
    country_code == "HR" & match_name == "osijek-baranja_county" ~ "HR04B",
    country_code == "HR" & match_name == "požega-slavonia_county" ~ "HR049",
    country_code == "HR" & match_name == "primorje-gorski_kotar_county" ~ "HR031",
    country_code == "HR" & match_name == "šibenik-knin_county" ~ "HR034",
    country_code == "HR" & match_name == "sisak-moslavina_county" ~ "HR04E",
    country_code == "HR" & match_name == "split-dalmatia_county" ~ "HR035",
    country_code == "HR" & match_name == "varaždin_county" ~ "HR044",
    country_code == "HR" & match_name == "virovitica-podravina_county" ~ "HR048",
    country_code == "HR" & match_name == "vukovar-srijem_county" ~ "HR04C",
    country_code == "HR" & match_name == "zadar_county" ~ "HR033",
    country_code == "HR" & match_name == "zagreb_county" ~ "HR042",
    TRUE ~ code_2016))

# changing names
google_region_names <- google_region_names %>%
  mutate ( match_name = case_when (
    country_code == "HR" & match_name == "bjelovar-bilogora_county" ~ "bjelovarsko-bilogorska_županija",
    country_code == "HR" & match_name == "brod-posavina_county" ~ "brodsko-posavska_županija",
    country_code == "HR" & match_name == "city_of_zagreb" ~ "grad_zagreb",
    country_code == "HR" & match_name == "dubrovnik-neretva_county" ~ "dubrovačko-neretvanska_županija",
    country_code == "HR" & match_name == "istria_county" ~ "istarska_županija",
    country_code == "HR" & match_name == "karlovac_county" ~ "karlovačka_županija",
    country_code == "HR" & match_name == "koprivnica-križevci_county" ~ "koprivničko-križevačka_županija",
    country_code == "HR" & match_name == "krapina-zagorje_county" ~ "krapinsko-zagorska_županija",
    country_code == "HR" & match_name == "lika-senj_county" ~ "ličko-senjska_županija",
    country_code == "HR" & match_name == "međimurje_county" ~ "međimurska_županija",
    country_code == "HR" & match_name == "osijek-baranja_county" ~ "osječko-baranjska_županija",
    country_code == "HR" & match_name == "požega-slavonia_county" ~ "požeško-slavonska_županija",
    country_code == "HR" & match_name == "primorje-gorski_kotar_county" ~ "primorsko-goranska_županija",
    country_code == "HR" & match_name == "šibenik-knin_county" ~ "HR034",
    country_code == "HR" & match_name == "sisak-moslavina_county" ~ "sisačko-moslavačka_županija",
    country_code == "HR" & match_name == "split-dalmatia_county" ~ "splitsko-dalmatinska_županija",
    country_code == "HR" & match_name == "varaždin_county" ~ "varaždinska_županija",
    country_code == "HR" & match_name == "virovitica-podravina_county" ~ "virovitičko-podravska_županija",
    country_code == "HR" & match_name == "vukovar-srijem_county" ~ "vukovarsko-srijemska_županija",
    country_code == "HR" & match_name == "zadar_county" ~ "zadarska_županija",
    country_code == "HR" & match_name == "zagreb_county" ~ "zagrebačka_županija",
    TRUE ~ match_name))


#additional fixing for međimurje_county
google_region_names <- google_region_names %>%
  mutate( code_2016 = ifelse( country_code == "HR" & grepl( "imurje_county", match_name), "HR046", code_2016))
google_region_names <- google_region_names %>%
  mutate( match_name = ifelse( country_code == "HR" & grepl( "imurje_county", match_name), "međimurska_županija", match_name))


# Fixing Latvia (Municipal data only partially finished)
# PARTIAL

google_region_names <- google_region_names %>%
  mutate ( code_2016 = case_when (
    country_code == "LV" & match_name == "riga" ~ "LV006",
    country_code == "LV" & match_name == "city_of_liepāja|ventspils|saldus_municipality|talsi_municipality|dobele_municipality" ~ "LV003",
    country_code == "LV" ~ "LV00",
    TRUE ~ code_2016)
    )
  
# Fixing Lithuania (chaning "_county" to "_apskritis" in name, this should match nuts3 regions)

# changing nuts codes
google_region_names <- google_region_names %>%
  mutate ( code_2016 = case_when (
    country_code == "LT" & match_name == "alytus_county" ~ "LT021",
    country_code == "LT" & match_name == "kaunas_county" ~ "LT022",
    country_code == "LT" & match_name == "klaipėda_county" ~ "LT023",
    country_code == "LT" & match_name == "marijampolė_county" ~ "LT024",
    country_code == "LT" & match_name == "panevėžys_county" ~ "LT025",
    country_code == "LT" & match_name == "šiauliai_county" ~ "LT026",
    country_code == "LT" & match_name == "tauragė_county" ~ "LT027",
    country_code == "LT" & match_name == "telšiai_county" ~ "LT028",
    country_code == "LT" & match_name == "utena_county" ~ "LT029",
    country_code == "LT" & match_name == "vilnius_county" ~ "LT011",
    TRUE ~ code_2016))

# changing names
google_region_names <- google_region_names %>%
  mutate ( match_name = case_when (
    country_code == "LT" & match_name == "alytus_county" ~ "alytaus_apskritis",
    country_code == "LT" & match_name == "kaunas_county" ~ "kauno_apskritis",
    country_code == "LT" & match_name == "klaipėda_county" ~ "klaipėdos_apskritis",
    country_code == "LT" & match_name == "marijampolė_county" ~ "marijampolės_apskritis",
    country_code == "LT" & match_name == "panevėžys_county" ~ "panevėžio_apskritis",
    country_code == "LT" & match_name == "šiauliai_county" ~ "šiaulių_apskritis",
    country_code == "LT" & match_name == "tauragė_county" ~ "tauragės_apskritis",
    country_code == "LT" & match_name == "telšiai_county" ~ "telšių_apskritis",
    country_code == "LT" & match_name == "utena_county" ~ "utenos_apskritis",
    country_code == "LT" & match_name == "vilnius_county" ~ "vilniaus_apskritis",
    TRUE ~ match_name))


# Fixing 	Luxembourg 
# bringig to lowest identifiable level, which is LU000
# Country has no NUTS subdivisions
google_region_names <- google_region_names %>%
  mutate ( code_2016 = case_when (
    country_code == "LU"  ~ "LU000",
    TRUE ~ code_2016)
    )

# Fixing Hungary (names were changed at the beginning)


# Fixing 	Malta
# Country has two NUTS3 subdivisions, but Google do not make
# subdivisions. Each brought to MT00 = MT0 = MT

google_region_names <- google_region_names %>%
  mutate ( code_2016 = case_when (
    country_code == "MT"  ~ "MT00",
    TRUE ~ code_2016)
  )

# Fixing the Netherlands

# changing nuts codes
google_region_names <- google_region_names %>%
  mutate ( code_2016 = case_when (
    country_code == "NL" & match_name == "friesland" ~ "NL12",
    country_code == "NL" & match_name == "limburg" ~ "NL42",
    country_code == "NL" & match_name == "north_brabant" ~ "NL41",
    country_code == "NL" & match_name == "north_holland" ~ "NL32",
    country_code == "NL" & match_name == "south_holland" ~ "NL33",
    TRUE ~ code_2016))

# changing names
google_region_names <- google_region_names %>%
  mutate ( match_name = case_when (
    country_code == "NL" & match_name == "friesland" ~ "friesland_(nl)",
    country_code == "NL" & match_name == "limburg" ~ "limburg_(nl)",
    country_code == "NL" & match_name == "north_brabant" ~ "noord-brabant",
    country_code == "NL" & match_name == "north_holland" ~ "noord-holland",
    country_code == "NL" & match_name == "south_holland" ~ "zuid-holland",
    TRUE ~ match_name))


# Fixing Austria

# changing nuts codes
google_region_names <- google_region_names %>%
  mutate ( code_2016 = case_when (
    country_code == "AT" & match_name == "carinthia" ~ "AT21",
    country_code == "AT" & match_name == "lower_austria" ~ "AT12",
    country_code == "AT" & match_name == "styria" ~ "AT22",
    country_code == "AT" & match_name == "tyrol" ~ "AT33",
    country_code == "AT" & match_name == "upper_austria" ~ "AT31",
    country_code == "AT" & match_name == "vienna" ~ "AT13",
    TRUE ~ code_2016))

# changing names
google_region_names <- google_region_names %>%
  mutate ( match_name = case_when (
    country_code == "AT" & match_name == "carinthia" ~ "kärnten",
    country_code == "AT" & match_name == "lower_austria" ~ "niederösterreich",
    country_code == "AT" & match_name == "styria" ~ "steiermark",
    country_code == "AT" & match_name == "tyrol" ~ "tirol",
    country_code == "AT" & match_name == "upper_austria" ~ "oberösterreich",
    country_code == "AT" & match_name == "vienna" ~ "wien",
    TRUE ~ match_name))


# Fixing Poland (most regions are nuts2, but "masovian_voivodeship" - "makroregion_województwo_mazowieckie" (PL9)- the region containing Warsaw - is nuts1)

# changing nuts codes
google_region_names <- google_region_names %>%
  mutate ( code_2016 = case_when (
    country_code == "PL" & match_name == "greater_poland_voivodeship" ~ "PL41",
    country_code == "PL" & match_name == "kuyavian-pomeranian_voivodeship" ~ "PL61",
    country_code == "PL" & match_name == "lesser_poland_voivodeship" ~ "PL21",
    country_code == "PL" & match_name == "łódź_voivodeship" ~ "PL71",
    country_code == "PL" & match_name == "lower_silesian_voivodeship" ~ "PL51",
    country_code == "PL" & match_name == "lublin_voivodeship" ~ "PL81",
    country_code == "PL" & match_name == "lubusz_voivodeship" ~ "PL43",
    country_code == "PL" & match_name == "masovian_voivodeship" ~ "PL9",
    country_code == "PL" & match_name == "opole_voivodeship" ~ "PL52",
    country_code == "PL" & match_name == "podkarpackie_voivodeship" ~ "PL82",
    country_code == "PL" & match_name == "podlaskie_voivodeship" ~ "PL84",
    country_code == "PL" & match_name == "pomeranian_voivodeship" ~ "PL63",
    country_code == "PL" & match_name == "silesian_voivodeship" ~ "PL22",
    country_code == "PL" & match_name == "swietokrzyskie" ~ "PL72",
    country_code == "PL" & match_name == "warmian-masurian_voivodeship" ~ "PL62",
    country_code == "PL" & match_name == "west_pomeranian_voivodeship" ~ "PL42",
    TRUE ~ code_2016))


# changing names
google_region_names <- google_region_names %>%
  mutate ( match_name = case_when (
    country_code == "PL" & match_name == "greater_poland_voivodeship" ~ "wielkopolskie",
    country_code == "PL" & match_name == "kuyavian-pomeranian_voivodeship" ~ "kujawsko-pomorskie",
    country_code == "PL" & match_name == "lesser_poland_voivodeship" ~ "małopolskie",
    country_code == "PL" & match_name == "łódź_voivodeship" ~ "łódzkie",
    country_code == "PL" & match_name == "lower_silesian_voivodeship" ~ "dolnośląskie",
    country_code == "PL" & match_name == "lublin_voivodeship" ~ "lubelskie",
    country_code == "PL" & match_name == "lubusz_voivodeship" ~ "lubuskie",
    country_code == "PL" & match_name == "masovian_voivodeship" ~ "makroregion_województwo_mazowieckie",
    country_code == "PL" & match_name == "opole_voivodeship" ~ "opolskie",
    country_code == "PL" & match_name == "podkarpackie_voivodeship" ~ "podkarpackie",
    country_code == "PL" & match_name == "podlaskie_voivodeship" ~ "podlaskie",
    country_code == "PL" & match_name == "pomeranian_voivodeship" ~ "pomorskie",
    country_code == "PL" & match_name == "silesian_voivodeship" ~ "śląskie",
    country_code == "PL" & match_name == "swietokrzyskie" ~ "świętokrzyskie",
    country_code == "PL" & match_name == "warmian-masurian_voivodeship" ~ "warmińsko-mazurskie",
    country_code == "PL" & match_name == "west_pomeranian_voivodeship" ~ "zachodniopomorskie",
    TRUE ~ match_name))

#additional fixing for lodz_county
google_region_names <- google_region_names %>% mutate( code_2016 = ifelse( country_code == "PL" & grepl( "ód", match_name) & google_name != "masovian_voivodeship", "PL71", code_2016))
google_region_names <- google_region_names %>% mutate( match_name = ifelse( country_code == "PL" & grepl( "ód", match_name) & google_name != "masovian_voivodeship", "łódzkie", match_name))


# Fixing Portugal (no easy correspondence to nuts regions, no change for the moment)
# Aveiro and Faro are the same districts!
google_region_names <- google_region_names %>%
  mutate ( code_2016 = case_when (
    country_code == "PT" & match_name == "lisbon"   ~ "PT17",
    country_code == "PT" & match_name == "azores"   ~ "PT20",
    country_code == "PT" & match_name == "madeira"  ~ "PT30",
    country_code == "PT" & match_name == "aveiro_district" ~ "PT16D",
    country_code == "PT" & match_name == "faro_district" ~ "PT16D",
    country_code == "PT" & match_name == "setubal" ~ "PT181",
    country_code == "PT" & match_name == "beja_district" ~ "PT184", #with one village in PT181
    country_code == "PT" & match_name == "santarém_district" ~ "PT16Y", #part of Centro, historical region with cross-boundary changes
    country_code == "PT" & match_name == "porto_district" ~ "PT11A",
    country_code == "PT" & match_name == "évora_district" ~ "PT187",
    country_code == "PT" & match_name == "portalegre_district" ~ "PT187",
    country_code == "PT" & match_name == "guarda_district" ~ "PT16I", #except for one municipality in Norte
    country_code == "PT" & match_name == "braga" ~ "PT11X", #Cross-boundary changes, part of Norte, PT112
    country_code == "PT" & match_name == "bragança_district" ~ "PT11E", #Cross-boundary changes, not fully the same
    country_code == "PT" & match_name == "castelo_branco_district" ~ "PT16X", #Cross-boundary changes, part of Centro
    country_code == "PT" & match_name == "vila_real_district" ~ "PT11W", #Cross-boundary changes, part of Norte
    country_code == "PT" & match_name == "viana_do_castelo_district" ~ "PT111",
    country_code == "PT" & match_name == "leiria_district" ~ "PT16F",
    country_code == "PT" & match_name == "coimbra_district" ~ "PT16E",
    country_code == "PT" & match_name == "viseu_district"~ "PT16W", #Cross-boundary changes, part of Centro
    TRUE ~ code_2016)
  ) %>%
  arrange ( code_2016 )


# Fixing Romania

# changing nuts codes
google_region_names <- google_region_names %>%
  mutate ( code_2016 = case_when (
    country_code == "RO" & match_name == "argeș" ~ "RO311",
    country_code == "RO" & match_name == "bistrița-năsăud" ~ "RO112",
    country_code == "RO" & match_name == "botoșani" ~ "RO212",
    country_code == "RO" & match_name == "brașov" ~ "RO122",
    country_code == "RO" & match_name == "bucharest" ~ "RO321",
    country_code == "RO" & match_name == "călărași" ~ "RO312",
    country_code == "RO" & match_name == "caraș-severin" ~ "RO422",
    country_code == "RO" & match_name == "constanța" ~ "RO223",
    country_code == "RO" & match_name == "dâmbovița" ~ "RO313",
    country_code == "RO" & match_name == "galați" ~ "RO224",
    country_code == "RO" & match_name == "ialomița" ~ "RO315",
    country_code == "RO" & match_name == "iași" ~ "RO213",
    country_code == "RO" & match_name == "maramureș" ~ "RO114",
    country_code == "RO" & match_name == "mehedinți" ~ "RO413",
    country_code == "RO" & match_name == "mureș" ~ "RO125",
    country_code == "RO" & match_name == "neamț" ~ "RO214",
    country_code == "RO" & match_name == "timiș" ~ "RO424",
    TRUE ~ code_2016))

# changing names
google_region_names <- google_region_names %>%
  mutate ( match_name = case_when (
    country_code == "RO" & match_name == "argeș" ~ "argeş",
    country_code == "RO" & match_name == "bistrița-năsăud" ~ "bistrița-năsăud",
    country_code == "RO" & match_name == "botoșani" ~ "botoşani",
    country_code == "RO" & match_name == "brașov" ~ "braşov",
    country_code == "RO" & match_name == "bucharest" ~ "bucureşti",
    country_code == "RO" & match_name == "călărași" ~ "călăraşi",
    country_code == "RO" & match_name == "caraș-severin" ~ "caraş-severin",
    country_code == "RO" & match_name == "constanța" ~ "constanţa",
    country_code == "RO" & match_name == "dâmbovița" ~ "dâmboviţa",
    country_code == "RO" & match_name == "galați" ~ "galaţi",
    country_code == "RO" & match_name == "ialomița" ~ "ialomiţa",
    country_code == "RO" & match_name == "iași" ~ "iaşi",
    country_code == "RO" & match_name == "maramureș" ~ "maramureş",
    country_code == "RO" & match_name == "mehedinți" ~ "mehedinţi",
    country_code == "RO" & match_name == "mureș" ~ "mureş",
    country_code == "RO" & match_name == "neamț" ~ "neamţ",
    country_code == "RO" & match_name == "timiș" ~ "timiş",
    TRUE ~ match_name))


#additional fixing for three counties where strange characters may go missing
google_region_names <- google_region_names %>% mutate( code_2016 = ifelse( country_code == "RO" & grepl( "bistr", match_name), "RO112", code_2016))
google_region_names <- google_region_names %>% mutate( match_name = ifelse( country_code == "RO" & grepl( "bistr", match_name), "bistriţa-năsăud", match_name))

google_region_names <- google_region_names %>% mutate( code_2016 = ifelse( country_code == "RO" & grepl( "cara", match_name), "RO422", code_2016))
google_region_names <- google_region_names %>% mutate( match_name = ifelse( country_code == "RO" & grepl( "cara", match_name), "caraş-severin", match_name))

google_region_names <- google_region_names %>% mutate( code_2016 = ifelse( country_code == "RO" & substr(match_name,5,6) == "ra", "RO312", code_2016))
google_region_names <- google_region_names %>% mutate( match_name = ifelse( country_code == "RO" & substr(match_name,5,6) == "ra", "călăraşi", match_name))


# Fixing Slovenia (seems like local municipal data, no change for now)


#Fixing Slovakia

# changing nuts codes
google_region_names <- google_region_names %>%
  mutate ( code_2016 = case_when (
    country_code == "SK" & match_name == "banská_bystrica_region" ~ "SK032",
    country_code == "SK" & match_name == "bratislava_region" ~ "SK010",
    country_code == "SK" & match_name == "košice_region" ~ "SK042",
    country_code == "SK" & match_name == "nitra_region" ~ "SK023",
    country_code == "SK" & match_name == "prešov_region" ~ "SK041",
    country_code == "SK" & match_name == "trenčín_region" ~ "SK022",
    country_code == "SK" & match_name == "trnava_region" ~ "SK021",
    country_code == "SK" & match_name == "žilina_region" ~ "SK031",
    TRUE ~ code_2016))

# changing names
google_region_names <- google_region_names %>%
  mutate ( match_name = case_when (
    country_code == "SK" & match_name == "banská_bystrica_region" ~ "banskobystrický_kraj",
    country_code == "SK" & match_name == "bratislava_region" ~ "bratislavský_kraj",
    country_code == "SK" & match_name == "košice_region" ~ "košický_kraj",
    country_code == "SK" & match_name == "nitra_region" ~ "nitriansky_kraj",
    country_code == "SK" & match_name == "prešov_region" ~ "prešovský_kraj",
    country_code == "SK" & match_name == "trenčín_region" ~ "trenčiansky_kraj",
    country_code == "SK" & match_name == "trnava_region" ~ "trnavský_kraj",
    country_code == "SK" & match_name == "žilina_region" ~ "žilinský_kraj",
    TRUE ~ match_name))

#additional fixing for a region where strange characters may go missing
google_region_names <- google_region_names %>% mutate( code_2016 = ifelse( country_code == "SK" & grepl( "tren", match_name), "SK022", code_2016))
google_region_names <- google_region_names %>% mutate( match_name = ifelse( country_code == "SK" & grepl( "tren", match_name), "trenčiansky_kraj", match_name))



# Fixing Finland

# changing nuts codes
google_region_names <- google_region_names %>%
  mutate ( code_2016 = case_when (
    country_code == "FI" & match_name == "central_finland" ~ "FI193",
    country_code == "FI" & match_name == "central_ostrobothnia" ~ "FI1D5",
    country_code == "FI" & match_name == "lapland" ~ "FI1D7",
    country_code == "FI" & match_name == "north_karelia" ~ "FI1D3",
    country_code == "FI" & match_name == "northern_ostrobothnia" ~ "FI1D9",
    country_code == "FI" & match_name == "northern_savonia" ~ "FI1D2",
    country_code == "FI" & match_name == "ostrobothnia" ~ "FI195",
    country_code == "FI" & match_name == "päijänne_tavastia" ~ "FI1C3",
    country_code == "FI" & match_name == "south_karelia" ~ "FI1C5",
    country_code == "FI" & match_name == "southern_ostrobothnia" ~ "FI194",
    country_code == "FI" & match_name == "southern_savonia" ~ "FI1D1",
    country_code == "FI" & match_name == "southwest_finland" ~ "FI1C1",
    country_code == "FI" & match_name == "tavastia_proper" ~ "FI1C2",
    country_code == "FI" & match_name == "uusimaa" ~ "FI1B1",
    TRUE ~ code_2016))

# changing names
google_region_names <- google_region_names %>%
  mutate ( match_name = case_when (
    country_code == "FI" & match_name == "central_finland" ~ "keski-suomi",
    country_code == "FI" & match_name == "central_ostrobothnia" ~ "keski-pohjanmaa",
    country_code == "FI" & match_name == "lapland" ~ "lappi",
    country_code == "FI" & match_name == "north_karelia" ~ "pohjois-karjala",
    country_code == "FI" & match_name == "northern_ostrobothnia" ~ "pohjois-pohjanmaa",
    country_code == "FI" & match_name == "northern_savonia" ~ "pohjois-savo",
    country_code == "FI" & match_name == "ostrobothnia" ~ "pohjanmaa",
    country_code == "FI" & match_name == "päijänne_tavastia" ~ "päijät-häme",
    country_code == "FI" & match_name == "south_karelia" ~ "etelä-karjala",
    country_code == "FI" & match_name == "southern_ostrobothnia" ~ "etelä-pohjanmaa",
    country_code == "FI" & match_name == "southern_savonia" ~ "etelä-savo",
    country_code == "FI" & match_name == "southwest_finland" ~ "varsinais-suomi",
    country_code == "FI" & match_name == "tavastia_proper" ~ "kanta-häme",
    country_code == "FI" & match_name == "uusimaa" ~ "helsinki-uusimaa",
    TRUE ~ match_name))


# Fixing Sweden

# changing nuts codes
google_region_names <- google_region_names %>%
  mutate ( code_2016 = case_when (
    country_code == "SE" & match_name == "blekinges_län" ~ "SE221",
    country_code == "SE" & match_name == "gavleborgs_län" ~ "SE313",
    country_code == "SE" & match_name == "jamtlands_län" ~ "SE322",
    country_code == "SE" & match_name == "jonkopings_län" ~ "SE211",
    country_code == "SE" & match_name == "kalmars_län" ~ "SE213",
    country_code == "SE" & match_name == "örebros_län" ~ "SE124",
    country_code == "SE" & match_name == "skånes_län" ~ "SE224",
    country_code == "SE" & match_name == "uppsalas_län" ~ "SE121",
    country_code == "SE" & match_name == "varmlands_län" ~ "SE311",
    TRUE ~ code_2016))

# changing names
google_region_names <- google_region_names %>%
  mutate ( match_name = case_when (
    country_code == "SE" & match_name == "blekinges_län" ~ "blekinge_län",
    country_code == "SE" & match_name == "gavleborgs_län" ~ "gävleborgs_län",
    country_code == "SE" & match_name == "jamtlands_län" ~ "jämtlands_län",
    country_code == "SE" & match_name == "jonkopings_län" ~ "jönköpings_län",
    country_code == "SE" & match_name == "kalmars_län" ~ "kalmar_län",
    country_code == "SE" & match_name == "örebros_län" ~ "örebro_län",
    country_code == "SE" & match_name == "skånes_län" ~ "skåne_län",
    country_code == "SE" & match_name == "uppsalas_län" ~ "uppsala_län",
    country_code == "SE" & match_name == "varmlands_län" ~ "värmlands_län",
    TRUE ~ match_name))

# Fixing Switzerland

# changing nuts codes
google_region_names <- google_region_names %>%
  mutate ( code_2016 = case_when (
    country_code == "CH" & match_name == "basel_city" ~ "CH031",
    country_code == "CH" & match_name == "canton_of_bern" ~ "CH021",
    country_code == "CH" & match_name == "canton_of_zug" ~ "CH066",
    country_code == "CH" & match_name == "fribourg" ~ "CH022",
    country_code == "CH" & match_name == "geneva" ~ "CH013",
    country_code == "CH" & match_name == "grisons" ~ "CH056",
    country_code == "CH" & match_name == "lucerne" ~ "CH061",
    country_code == "CH" & match_name == "valais" ~ "CH012",
    country_code == "CH" & match_name == "zurich" ~ "CH04",
    TRUE ~ code_2016))

# changing names
google_region_names <- google_region_names %>%
  mutate ( match_name = case_when (
    country_code == "CH" & match_name == "basel_city" ~ "basel-stadt",
    country_code == "CH" & match_name == "canton_of_bern" ~ "bern_/_berne",
    country_code == "CH" & match_name == "canton_of_zug" ~ "zug",
    country_code == "CH" & match_name == "fribourg" ~ "fribourg_/_freiburg",
    country_code == "CH" & match_name == "geneva" ~ "genève",
    country_code == "CH" & match_name == "grisons" ~ "graubünden_/_grigioni_/_grischun",
    country_code == "CH" & match_name == "lucerne" ~ "luzern",
    country_code == "CH" & match_name == "valais" ~ "valais_/_wallis",
    country_code == "CH" & match_name == "zurich" ~ "zürich",
    TRUE ~ match_name))


#View(google_region_names %>% filter (is.na(code_2016)))

google_nuts_matchtable <- google_region_names %>%
  mutate ( typology = case_when (
    nchar(code_2016) == 5 ~ 'nuts_level_3', 
    nchar(code_2016) == 4 ~ 'nuts_level_2', 
    nchar(code_2016) == 3 ~ 'nuts_level_1', 
    nchar(code_2016) == 2 ~ 'country', 
    TRUE ~  'invalid typology'
  )) %>%
  select ( -all_of(c("google_name", "match_name")))

#create list of countries where available nuts codes do not cover full country
countries_missing_full_nuts <- google_nuts_matchtable %>%
  filter ( typology == 'invalid typology') %>% select(country_code) %>% unique() %>% unlist() %>% unname()

countries_missing_full_nuts

# Adding code_2016 values again, checking for discrepancies
google_region_names <- google_region_names %>%
  left_join ( regions_and_names_2016 %>%
                select (c(country_code, code_2016, match_name)) , 
              by = c("country_code", "match_name")) %>%
  mutate ( typology = case_when (
    nchar(code_2016) == 5 ~ 'nuts_level_3', 
    nchar(code_2016) == 4 ~ 'nuts_level_2', 
    nchar(code_2016) == 3 ~ 'nuts_level_1', 
    nchar(code_2016) == 2 ~ 'country', 
    TRUE ~  'invalid typology'
  ))

# changing nuts codes
google_region_names <- google_region_names %>%
  mutate ( code_2016 = case_when (
    
    country_code == "GB" & match_name == "argyll_and_bute_council" ~ "UKM63",
    country_code == "GB" & match_name == "buckinghamshire" ~ "UKJ13",
    country_code == "GB" & match_name == "cambridgeshire" ~ "UKH12",
    country_code == "GB" & match_name == "city_of_bristol" ~ "UKK11",
    country_code == "GB" & match_name == "cornwall" ~ "UKK30",
    country_code == "GB" & match_name == "county_durham" ~ "UKC14",
    country_code == "GB" & match_name == "derry_and_strabane" ~ "UKN10",
    country_code == "GB" & match_name == "dorset" ~ "UKK22",
    country_code == "GB" & match_name == "dumfries_and_galloway" ~ "UKM92",
    country_code == "GB" & match_name == "east_sussex" ~ "UKJ22",
    country_code == "GB" & match_name == "edinburgh" ~ "UKM75",
    country_code == "GB" & match_name == "greater_london" ~ "UKI",
    country_code == "GB" & match_name == "herefordshire" ~ "UKG11",
    country_code == "GB" & match_name == "kingston_upon_hull" ~ "UKE11",
    country_code == "GB" & match_name == "na_h-eileanan_an_iar" ~ "UKM64",
    country_code == "GB" & match_name == "orkney" ~ "UKM65",
    country_code == "GB" & match_name == "shropshire" ~ "UKG22",
    country_code == "GB" & match_name == "south_ayrshire_council" ~ "UKM94",
    country_code == "GB" & match_name == "staffordshire" ~ "UKG24",
    country_code == "GB" & match_name == "wiltshire" ~ "UKK15",
    
    country_code == "GB" & match_name == "aberdeen_city" ~ "UKM50", # this is where we have more than one name that makes up a region
    country_code == "GB" & match_name == "aberdeenshire" ~ "UKM50",
    country_code == "GB" & match_name == "angus_council" ~ "UKM21",
    country_code == "GB" & match_name == "dundee_city_council" ~ "UKM21",
    country_code == "GB" & match_name == "bath_and_north_east_somerset" ~ "UKK12",
    country_code == "GB" & match_name == "north_somerset" ~ "UKK12",
    country_code == "GB" & match_name == "south_gloucestershire" ~ "UKK12",
    country_code == "GB" & match_name == "blaenau_gwent" ~ "UKL16",
    country_code == "GB" & match_name == "caerphilly_county_borough" ~ "UKL16",
    country_code == "GB" & match_name == "torfaen_principal_areaa" ~ "UKL16",
    country_code == "GB" & match_name == "borough_of_halton" ~ "UKD71", #Knowsley and St. Helens missing, not a full nuts3 region
    country_code == "GB" & match_name == "bracknell_forest" ~ "UKJ11", #part of Berkshire nuts3 region (UKJ11)
    country_code == "GB" & match_name == "reading" ~ "UKJ11",
    country_code == "GB" & match_name == "wokingham" ~ "UKJ11",
    country_code == "GB" & match_name == "west_berkshire" ~ "UKJ11",
    country_code == "GB" & match_name == "bracknell_forest" ~ "UKJ11",
    country_code == "GB" & match_name == "slough" ~ "UKJ11",
    country_code == "GB" & match_name == "windsor_and_maidenhead" ~ "UKJ11",
    country_code == "GB" & match_name == "bridgend" ~ "UKL17",
    country_code == "GB" & match_name == "neath_port_talbot_principle_area" ~ "UKL17",
    country_code == "GB" & match_name == "cardiff" ~ "UKL22",
    country_code == "GB" & match_name == "vale_of_glamorgan" ~ "UKL22",
    country_code == "GB" & match_name == "ceredigion" ~ "UKL14",
    country_code == "GB" & match_name == "carmarthenshire" ~ "UKL14",
    country_code == "GB" & match_name == "pembrokeshire" ~ "UKL14",
    country_code == "GB" & match_name == "clackmannanshire" ~ "UKM72",
    country_code == "GB" & match_name == "fife" ~ "UKM72",
    country_code == "GB" & match_name == "conwy_principal_area" ~ "UKL13",
    country_code == "GB" & match_name == "denbighshire" ~ "UKL13",
    country_code == "GB" & match_name == "derby" ~ "UKF1", # only nuts2 gives full, consistent coverage
    country_code == "GB" & match_name == "derbyshire" ~ "UKF1",
    country_code == "GB" & match_name == "nottingham" ~ "UKF1",
    country_code == "GB" & match_name == "nottinghamshire" ~ "UKF1",
    country_code == "GB" & match_name == "east_ayrshire_council" ~ "UKM93",
    country_code == "GB" & match_name == "north_ayrshire_council" ~ "UKM93",
    country_code == "GB" & match_name == "east_dunbartonshire_council	" ~ "UKM81", #helensburgh_&_lomond missing, may not be a full nuts3 region
    country_code == "GB" & match_name == "west_dunbartonshire_council" ~ "UKM81", #helensburgh_&_lomond missing, may not be a full nuts3 region
    country_code == "GB" & match_name == "inverclyde" ~ "UKM83",
    country_code == "GB" & match_name == "east_renfrewshire_council" ~ "UKM83",
    country_code == "GB" & match_name == "renfrewshire" ~ "UKM83",
    country_code == "GB" & match_name == "flintshire" ~ "UKL23",
    country_code == "GB" & match_name == "wrexham_principal_area" ~ "UKL23",
    country_code == "GB" & match_name == "hampshire" ~ "UKJ3", # only nuts2 gives full, consistent coverage
    country_code == "GB" & match_name == "isle_of_wight" ~ "UKJ3",
    country_code == "GB" & match_name == "portsmouth" ~ "UKJ3",
    country_code == "GB" & match_name == "southampton" ~ "UKJ3",
    country_code == "GB" & match_name == "hartlepool" ~ "UKC11",
    country_code == "GB" & match_name == "stockton-on-tees" ~ "UKC11",
    country_code == "GB" & match_name == "highland_council" ~ "UKM6", #NOT EXACT MATCH!!! nuts2
    country_code == "GB" & match_name == "merthyr_tydfil_county_borough" ~ "UKL15",
    country_code == "GB" & match_name == "rhondda_cynon_taff	" ~ "UKL15",
    country_code == "GB" & match_name == "middlesbrough" ~ "UKC12",
    country_code == "GB" & match_name == "redcar_and_cleveland" ~ "UKC12",
    country_code == "GB" & match_name == "midlothian" ~ "UKM73",
    country_code == "GB" & match_name == "east_lothian_council" ~ "UKM73",
    country_code == "GB" & match_name == "monmouthshire" ~ "UKL21",
    country_code == "GB" & match_name == "newport" ~ "UKL21",
    country_code == "GB" & match_name == "moray" ~ "UKM62", # may not be full nuts3 region, rest of inverness_&_nairn_and_moray_badenoch_&_strathspey missing
    country_code == "GB" & match_name == "norfolk" ~ "UKH1",# only nuts2 gives full, consistent coverage
    country_code == "GB" & match_name == "cambridgeshire_cc" ~ "UKH1",
    country_code == "GB" & match_name == "peterborough" ~ "UKH1",
    country_code == "GB" & match_name == "suffolk" ~ "UKH1",
    country_code == "GB" & match_name == "north_east_lincolnshire" ~ "UKE13",
    country_code == "GB" & match_name == "north_lincolnshire" ~ "UKE13",
    country_code == "GB" & match_name == "northamptonshire" ~ "UKF2", # only nuts2 gives full, consistent coverage
    country_code == "GB" & match_name == "leicester" ~ "UKF2",
    country_code == "GB" & match_name == "leicestershire" ~ "UKF2",
    country_code == "GB" & match_name == "rutland" ~ "UKF2",
    country_code == "GB" & match_name == "perth_and_kinross" ~ "UKM77",
    country_code == "GB" & match_name == "stirling" ~ "UKM77",
    country_code == "GB" & match_name == "surrey" ~ "UKJ2", # only nuts2 gives full, consistent coverage
    country_code == "GB" & match_name == "brighton_and_hove" ~ "UKJ2",
    country_code == "GB" & match_name == "east_sussex_cc" ~ "UKJ2",
    country_code == "GB" & match_name == "west_sussex" ~ "UKJ2",
    country_code == "GB" & match_name == "tyne_and_wear" ~ "UKC2", # only nuts2 gives consistent coverage, Sunderland (UKC23) missing 
    country_code == "GB" & match_name == "northumberland" ~ "UKC2",
    country_code == "GB" & match_name == "north_yorkshire" ~ "UKE22", # this is the name of a nuts2 region, but it seem ro refer to nuts3 one (north_yorkshire_cc)
    TRUE ~ code_2016))

# filter out lincolnshire nuts2 (exist in nuts3 too)
google_region_names <- google_region_names %>% filter (code_2016 != "UKF3" | is.na(code_2016))

google_region_names_it <- google_region_names %>% 
  filter (country_code == "IT")

# changing names
google_region_names <- google_region_names %>%
  mutate ( match_name = case_when (
    country_code == "GB" & match_name == "argyll_and_bute_council" ~ "lochaber_skye_&_lochalsh_arran_&_cumbrae_and_argyll_&_bute",
    country_code == "GB" & match_name == "buckinghamshire" ~ "buckinghamshire_cc",
    country_code == "GB" & match_name == "cambridgeshire" ~ "cambridgeshire_cc",
    country_code == "GB" & match_name == "city_of_bristol" ~ "bristol_city_of",
    country_code == "GB" & match_name == "cornwall" ~ "cornwall_and_isles_of_scilly",
    country_code == "GB" & match_name == "county_durham" ~ "durham_cc",
    country_code == "GB" & match_name == "derry_and_strabane" ~ "derry_city_and_strabane",
    country_code == "GB" & match_name == "dorset" ~ "dorset_cc",
    country_code == "GB" & match_name == "dumfries_and_galloway" ~ "dumfries_&_galloway",
    country_code == "GB" & match_name == "east_sussex" ~ "east_sussex_cc",
    country_code == "GB" & match_name == "edinburgh" ~ "edinburgh_city_of",
    country_code == "GB" & match_name == "greater_london" ~ "london",
    country_code == "GB" & match_name == "herefordshire" ~ "herefordshire_county_of",
    country_code == "GB" & match_name == "kingston_upon_hull" ~ "kingston_upon_hull_city_of",
    country_code == "GB" & match_name == "na_h-eileanan_an_iar" ~ "na_h-eileanan_siar_(western_isles)",
    country_code == "GB" & match_name == "orkney" ~ "orkney_islands",
    country_code == "GB" & match_name == "shropshire" ~ "shropshire_cc",
    country_code == "GB" & match_name == "south_ayrshire_council" ~ "south_ayrshire",
    country_code == "GB" & match_name == "staffordshire" ~ "staffordshire_cc",
    country_code == "GB" & match_name == "wiltshire" ~ "wiltshire_cc",
    TRUE ~ match_name))


## Fixing Greece

# changing nuts codes
google_region_names <- google_region_names %>%
  mutate ( code_2016 = case_when (
    country_code == "GR" & match_name == "crete_region" ~ "EL4",
    country_code == "GR" & match_name == "decentralized_administration_of_attica" ~ "EL3",
    country_code == "GR" & match_name == "decentralized_administration_of_epirus_and_western_macedonia" ~ "EL5",
    country_code == "GR" & match_name == "decentralized_administration_of_macedonia_and_thrace" ~ "EL5",
    country_code == "GR" & match_name == "decentralized_administration_of_peloponnese_western_greece_and_the_ionian" ~ "EL6",
    country_code == "GR" & match_name == "decentralized_administration_of_the_aegean" ~ "EL4",
    country_code == "GR" & match_name == "decentralized_administration_of_thessaly_and_central_greece" ~ "EL6",
    TRUE ~ code_2016)
  )


#creating matchtable

google_nuts_matchtable <- google_region_names %>%
  mutate ( typology = case_when (
    nchar(code_2016) == 5 ~ 'nuts_level_3', 
    nchar(code_2016) == 4 ~ 'nuts_level_2', 
    nchar(code_2016) == 3 ~ 'nuts_level_1', 
    nchar(code_2016) == 2 ~ 'country', 
    TRUE ~  'invalid typology'
  )) %>%
  select ( -all_of(c("google_name", "match_name")))

#create list of countries where available nuts codes do not cover full country
countries_missing_full_nuts <- google_nuts_matchtable %>%
  filter ( typology == 'invalid typology') %>% select(country_code) %>% unique() %>% unlist() %>% unname()

countries_missing_full_nuts

# Adding code_2016 values again, checking for discrepancies
google_region_names <- google_region_names %>%
  left_join ( regions_and_names_2016 %>%
                select (c(country_code, code_2016, match_name)) , 
              by = c("country_code", "match_name")) %>%
  mutate ( typology = case_when (
    nchar(code_2016) == 5 ~ 'nuts_level_3', 
    nchar(code_2016) == 4 ~ 'nuts_level_2', 
    nchar(code_2016) == 3 ~ 'nuts_level_1', 
    nchar(code_2016) == 2 ~ 'country', 
    TRUE ~  'invalid typology'
  ))


#saving results
#save(google_nuts_matchtable, file = "google_nuts_matchtable.RData")
#load("google_nuts_matchtable.RData")

usethis::use_data(google_nuts_matchtable, 
                  internal=FALSE, overwrite = TRUE)
data ( google_nuts_matchtable )
antaldaniel/regions documentation built on Sept. 27, 2022, 1:15 a.m.