R/esp-dict.R

Defines functions esp_dict_translate esp_dict_region_code

Documented in esp_dict_region_code esp_dict_translate

#' Convert and translate Spanish subdivision names and codes
#'
#' @description
#' Convert Spanish subdivision names or identifiers between different coding
#' schemes (NUTS, ISO2, province codes, etc.) or obtain human-readable
#' names.
#'
#' @encoding UTF-8
#' @family dictionary
#' @rdname esp_dict
#' @name esp_dict_region_code
#' @export
#'
#' @return
#'
#' `esp_dict_region_code()` returns a character vector with converted
#' subdivision identifiers or names. If a value cannot be matched the
#' corresponding element will be `NA` and a warning is emitted via
#' [cli::cli_alert_warning()].
#'
#'
#' @param sourcevar character string. Vector which contains the codes or names
#'   to be converted.
#' @param origin,destination character string. Coding scheme of origin and
#'   destination. One of `"text"`, `"nuts"`, `"iso2"`, `"codauto"`, or `"cpro"`.
#'
#' @details
#' The function uses internal dictionaries together with \CRANpkg{countrycode}
#' to map between schemes. When `origin == destination == "text"` the input is
#' returned unchanged. Mixing names from different administrative levels
#' (for example autonomous community and province) may produce
#' `NA` values for some entries.
#'
#' @examples
#' vals <- c("Errioxa", "Coruna", "Gerona", "Madrid")
#'
#' esp_dict_region_code(vals)
#' esp_dict_region_code(vals, destination = "nuts")
#' esp_dict_region_code(vals, destination = "cpro")
#' esp_dict_region_code(vals, destination = "iso2")
#'
#' # From ISO2 to another codes
#'
#' iso2vals <- c("ES-M", "ES-S", "ES-SG")
#' esp_dict_region_code(iso2vals, origin = "iso2")
#' esp_dict_region_code(iso2vals,
#'   origin = "iso2",
#'   destination = "nuts"
#' )
#' esp_dict_region_code(iso2vals,
#'   origin = "iso2",
#'   destination = "cpro"
#' )
#'
#' # Mixing levels
#' valsmix <- c("Centro", "Andalucia", "Seville", "Menorca")
#' esp_dict_region_code(valsmix, destination = "nuts")
#'
#' esp_dict_region_code(valsmix, destination = "codauto")
#' esp_dict_region_code(valsmix, destination = "iso2")
#'
esp_dict_region_code <- function(
  sourcevar,
  origin = "text",
  destination = "text"
) {
  validvars <- c("text", "nuts", "iso2", "codauto", "cpro")
  origin <- match_arg_pretty(origin, validvars)
  destination <- match_arg_pretty(destination, validvars)

  # Manually replace
  sourcevar <- gsub("Ciudad de ceuta", "Ceuta", sourcevar, ignore.case = TRUE)
  sourcevar <- gsub(
    "Ciudad de melilla",
    "Melilla",
    sourcevar,
    ignore.case = TRUE
  )
  sourcevar <- gsub("sta. cruz", "Santa Cruz", sourcevar, ignore.case = TRUE)
  sourcevar <- gsub("sta cruz", "Santa Cruz", sourcevar, ignore.case = TRUE)

  initsourcevar <- sourcevar

  if (origin == destination && origin == "text") {
    make_msg(
      "info",
      TRUE,
      "No conversion. {.arg origin}",
      "equal to {.arg destination}",
      paste0("({.str ", origin, "})")
    )
    return(initsourcevar)
  }

  # Create dict
  dict <- names_full

  names_dict <- unique(
    dict[grep("name", dict$variable, fixed = TRUE), c("key", "value")]
  )

  # If text convert to nuts

  if (origin == "text") {
    sourcevar <- countrycode::countrycode(
      tolower(sourcevar),
      origin = "value",
      destination = "key",
      custom_dict = names_dict,
      nomatch = "NOMATCH"
    )

    # Translate to nuts
    sourcevar <- countrycode::countrycode(
      sourcevar,
      origin = "key",
      destination = "nuts",
      custom_dict = get_master_nuts_nm(),
      nomatch = "NOMATCH"
    )

    # Replace NOMATCH
    sourcevar[sourcevar == "NOMATCH"] <- initsourcevar[sourcevar == "NOMATCH"]
    origin <- "nuts"

    # By name, perform some replacements
    # Madrid - CCAA
    sourcevar[sourcevar == "ES3"] <- "ES30"

    # Canarias - CCAA
    sourcevar[sourcevar == "ES7"] <- "ES70"

    if (destination == "iso2") {
      # Melilla - Prov
      sourcevar[sourcevar == "ES64"] <- "ES640"

      # Ceuta - Prov
      sourcevar[sourcevar == "ES63"] <- "ES630"
    }

    if (destination == "cpro") {
      nchar <- nchar(sourcevar)
      sourcevar[nchar == 4] <- paste0(sourcevar[nchar == 4], "0")
    }
  }

  # Destination
  if (destination == "text") {
    sourcevar <- countrycode::countrycode(
      sourcevar,
      origin,
      "nuts",
      custom_dict = get_master_codes(),
      nomatch = "NOMATCH"
    )

    dict_nutsall <- sf::st_drop_geometry(mapSpain::esp_nuts_2024)

    out <- countrycode::countrycode(
      sourcevar,
      "NUTS_ID",
      "NUTS_NAME",
      custom_dict = dict_nutsall,
      nomatch = "NOMATCH"
    )
  } else {
    # Solve problems
    if (origin == "nuts") {
      # Melilla - Prov
      sourcevar[sourcevar == "ES64"] <- "ES640"

      # Ceuta - Prov
      sourcevar[sourcevar == "ES63"] <- "ES630"
    }

    if (destination == "codauto") {
      # Melilla - Prov
      sourcevar[sourcevar == "ES640"] <- "ES64"

      # Ceuta - Prov
      sourcevar[sourcevar == "ES630"] <- "ES63"
    }

    out <- countrycode::countrycode(
      sourcevar,
      origin,
      destination,
      custom_dict = get_master_codes(),
      nomatch = "NOMATCH"
    )

    # Baleares
    if (destination == "cpro") {
      out[sourcevar == "ES530"] <- "07"
    }
    # Ceuta
    if (origin == "iso2" && destination == "codauto") {
      out[sourcevar == "ES-CE"] <- "18"
      out[sourcevar == "ES-ML"] <- "19"
    }
  }
  out[out %in% c("XXXXX", "YYYYY")] <- "NOMATCH"

  # Sanitize
  if (length(out[out != "NOMATCH"]) != length(sourcevar)) {
    cli::cli_alert_warning(
      paste0(
        "No match on {.arg destination = {.str {destination}}} found ",
        "for {.str {initsourcevar[out == 'NOMATCH']}}."
      )
    )
  }
  out[out == "NOMATCH"] <- NA

  out
}


#'
#' @rdname esp_dict
#' @name esp_dict_translate
#'
#' @return
#'
#' `esp_dict_translate()` translates a vector of names from one language to
#'  another :
#'   - If `all = FALSE`, a character vector with the translated name for each
#'     element of `sourcevar`.
#'   - If `all = TRUE`, a named `list` is returned where each element contains
#'     all available translations for the corresponding input value.
#'
#' @export
#'
#' @param lang character string. Target language code, available values:
#'   - `"es"`: Spanish.
#'   - `"en"`: English.
#'   - `"ca"`: Catalan.
#'   - `"ga"`: Galician.
#'   - `"eu"`: Basque.
#'
#' @param all logical. If `TRUE` the function returns all possible translations
#'   for each input as a named list. When `FALSE` (default) a single preferred
#'   translation per input is returned as a character vector.
#'
#' @examples
#' vals <- c("La Rioja", "Sevilla", "Madrid", "Jaen", "Orense", "Baleares")
#'
#' esp_dict_translate(vals)
#' esp_dict_translate(vals, lang = "es")
#' esp_dict_translate(vals, lang = "ca")
#' esp_dict_translate(vals, lang = "eu")
#' esp_dict_translate(vals, lang = "ga")
#'
#' esp_dict_translate(vals, lang = "ga", all = TRUE)
esp_dict_translate <- function(sourcevar, lang = "en", all = FALSE) {
  avlang <- c("es", "en", "ca", "ga", "eu")
  lang <- match_arg_pretty(lang, avlang)

  # Create dict
  dict <- names_full

  # Arrange prelation for results:
  # - First: prov (a_prov)
  # - Second: ccaa (b_ccaa)
  # - Last: nuts (c_nuts)

  # Upgrade provs
  dict$variable <- gsub("prov", "a_prov", dict$variable, fixed = TRUE)
  # Upgrade ccaa
  dict$variable <- gsub("ccaa", "b_ccaa", dict$variable, fixed = TRUE)
  # Upgrade nuts
  dict$variable <- gsub("nuts", "c_nuts", dict$variable, fixed = TRUE)

  names_dict <- unique(
    dict[grep("name", dict$variable, fixed = TRUE), c("key", "value")]
  )

  sourcevar_lower <- tolower(sourcevar)
  tokeys <- countrycode::countrycode(
    sourcevar_lower,
    origin = "value",
    destination = "key",
    custom_dict = names_dict,
    nomatch = "NOMATCH"
  )

  if (any(tokeys == "NOMATCH")) {
    cli::cli_alert_warning(
      paste0(
        "No match found ",
        "for {.str {sourcevar[tokeys == 'NOMATCH']}}."
      )
    )
  }

  # Create lang dict
  dict_tolang <- unique(
    dict[grep(paste0("name.", lang), dict$variable), ]
  )

  # Order using short
  shrt <- grep("short", dict_tolang$variable, fixed = TRUE)

  dict_tolang[shrt, ]$variable <- paste0("aa", dict_tolang[shrt, ]$variable)

  dict_tolang <- unique(
    dict_tolang[order(dict_tolang$variable), c("key", "value")]
  )
  namestrans <- lapply(seq_along(tokeys), function(x) {
    if (tokeys[x] == "NOMATCH") {
      return(NA)
    }

    all_trans <- dict_tolang[dict_tolang$key == tokeys[x], "value"]
    all_trans <- unlist(all_trans)
    if (isFALSE(all)) {
      all_trans <- all_trans[1]
    }
    unname(all_trans)
  })

  if (all) {
    names(namestrans) <- sourcevar
  } else {
    namestrans <- unlist(namestrans)
  }

  namestrans
}

Try the mapSpain package in your browser

Any scripts or data that you put into this service are public.

mapSpain documentation built on Jan. 17, 2026, 9:07 a.m.