data-raw/address_dictionary.R

# address dictionary of replacement words
library(readr)
library(dplyr)
address_dictionary <- read_csv('data-raw/address-lexicon.csv') %>% select(-`LEXICON:`)
names(address_dictionary) <- c('word', 'standard', 'type', 'other')

address_dictionary <- address_dictionary %>%
  mutate_at(.vars = c('word', 'standard'), .funs = 'iconv', to = "ASCII//TRANSLIT") %>%
  mutate_at(.vars = c('word', 'standard'), .funs = 'remove_punct') %>%
  mutate_at(.vars = c('word', 'standard'), .funs = 'remove_articles') %>%
  mutate_at(.vars = c('word', 'standard'), .funs = 'tolower') %>%
  filter(word != standard & !type %in% c('PROV', 'PLACEN') & standard != 'CANADA') %>%
  distinct(word, standard)

write_csv(address_dictionary, 'data-raw/address_dictionary.csv')
devtools::use_data(address_dictionary, overwrite = TRUE)
tweed1e/matchtools documentation built on May 29, 2019, 10:51 a.m.