#' Non-vectorized version of fix_unit_names()
#'
#' This function standardizes apartment numbers to the format
#' "#-# Street St, Extra Info". Vectorized over 'address'.
#'
#' @param address A vector of address strings
#'
#' @param ignore.case Ignore case for internal regex matching and rearranging.
#'
#' @return A vector of converted address strings
#' @keywords standardize, apartment, unit, suite, address
#'
#' @examples
#'
#' fix_unit_names_('halifax shopping centre 7001 mumford rd unit 166')
#'
fix_unit_names_ <- function(address, ignore.case = FALSE) {
two_nums <- "(\\d+\\b)(.*)(\\b\\d+)" # two numbers in the address
unit_names_box <- "#|unit|unite|apt|ste|suite|box|pobox|lot|site|bay|hangar|conc|bldg|room"
unit_names <- "#|unit|unite|apt|ste|suite"
apt_pattern = paste0("(.*)(\\b(", unit_names, ")\\s*(\\d+))(.*)")
# apt_pattern = paste0("(.*)(\\b(", unit_names, ")(\\s|\\d))(.*)")
prefix_unit_split <- paste0("^(\\D+)(\\b(", unit_names_box, ")(\\s|\\d))")
prefix_unit_pattern <- paste0("^(\\D+?)(\\s(", unit_names_box, ")(\\s|\\d))(.*)")
prefix_nounit_pattern <- "^(\\D+)(.*)"
prefix_unit <- paste0("^(", unit_names_box, "|rte|cmp|((n|s|e|w|ne|nw|se|sw)(\\b|\\d)))")
if (grepl(pattern = "^(rr|hwy)(\\d|\\b)", x = address, ignore.case = ignore.case)) {
# don't bother if address starts with 'rr' or 'hwy'
return(address)
} else if (!grepl(pattern = two_nums, x = address, ignore.case = ignore.case)) {
# don't bother if address has one or zero numbers in it
return(address)
} else if (grepl(pattern = prefix_unit_split, x = address, ignore.case = ignore.case)) {
# match a prefix when the address also contains a unit term
# pipe version
# temp <- address %>% stringr::str_match(pattern = stringr::regex(prefix_unit_pattern, ignore_case = ignore.case)) %>% remove_tl_whitespace()
temp <- remove_tl_whitespace(stringr::str_match(address, pattern = stringr::regex(prefix_unit_pattern, ignore_case = ignore.case)))
# temp is a matrix of things that match the regular expression groups
# *regex groups are the things in parentheses*
# rearrange---put the extraneous info at the back (that's temp[,2])
address <- paste0(temp[,4], ' ', temp[,6], ' ', temp[,2])
} else if (!grepl(pattern = prefix_unit_split, x = address, ignore.case = ignore.case) & # technically don't need, since the previous condition takes care of this condition
!grepl(pattern = prefix_unit, x = address, ignore.case = ignore.case) &
grepl(pattern = prefix_nounit_pattern, x = address, ignore.case = ignore.case)) {
# match a prefix when the address doesn't contain a unit term
temp <- remove_tl_whitespace(stringr::str_match(address, pattern = stringr::regex(prefix_nounit_pattern, ignore_case = ignore.case)))
# rearrange---put the extraneous info at the back (that's temp[,2]; temp[,3] is everything else)
address <- paste0(temp[,3], ' ', temp[,2])
}
if (grepl(pattern = apt_pattern, x = address, ignore.case = ignore.case)) {
# if there is an apartment in the address
# now switch around the unit names
# temp <- address %>% stringr::str_match(pattern = stringr::regex(apt_pattern, ignore_case = ignore.case)) %>% remove_tl_whitespace()
# temp <- remove_tl_whitespace(stringr::str_match(address, pattern = stringr::regex(apt_pattern, ignore_case = ignore.case)))
temp <- remove_tl_whitespace(stringr::str_match(address, pattern = stringr::regex(apt_pattern, ignore_case = ignore.case)))
# temp[,5] is the unit number, temp[,2] is the normal address, and temp[,6] is everything else
address <- paste0(temp[,5], '-', ifelse(temp[,2] == '', '', paste0(temp[,2], ' ')), temp[,6])
}
# return the modified address
remove_tl_whitespace(address)
}
#' Rearrange unit/apt numbers to standardize addresses
#'
#' This function standardizes apartment numbers to the format
#' #-# Street St, Extra Info. Vectorized over 'address'.
#'
#' @param address A vector of address strings
#' @param ignore.case Ignore case for internal regex matching and rearranging.
#' @return A vector of converted address strings
#' @keywords standardize, apartment, unit, suite, address
#' @examples
#'
#' fix_unit_names('halifax shopping centre 7001 mumford rd unit 166')
#'
#' fix_unit_names(c('halifax shopping centre 7001 mumford rd',
#' 'rr 2 box 166'))
#'
#' fix_unit_names('HFX SHOPPING CENTRE 7001 MUMFORD RD UNIT 166',
#' ignore.case = TRUE)
#' @export
fix_unit_names <- Vectorize(FUN = fix_unit_names_,
vectorize.args = 'address',
USE.NAMES = FALSE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.