# Final cleaning of apartment building registry, convert to spatial
library(sf)
library(janitor)
library(stringr)
library(dplyr)
devtools::load_all()
apartment_building_registry_geocoded <- read_latest_file(directory = here::here("data-raw", "points_layers", "apartment_building_registry", "geocode_clean"), suffix = "-apartment_building_registry_geocoded_clean.rds", fileext = "rds")
# Move address fields to start, clean up column names
apartment_building_registry_geocoded <- apartment_building_registry_geocoded %>%
clean_names() %>%
select(id, site_address, starts_with("bing"), everything()) %>%
select(-bing_status_code, -bing_method, -bing_confidence, -address_geocode_error, -address_for_geocoding)
# Clean up original address
apartment_building_registry_geocoded <- apartment_building_registry_geocoded %>%
mutate(
site_address = str_squish(site_address),
site_address = str_to_title(site_address)
)
# Clean up property management, retain "original" version
property_mgmt <- apartment_building_registry_geocoded %>%
distinct(prop_management_company_name) %>%
mutate(
property_management = str_to_title(prop_management_company_name),
property_management_clean = property_management %>%
str_remove_all("\\.|\\,") %>%
str_remove_all("Limited|Incorporated|Corporation|Inc|Ltd|Corp|Lp") %>%
str_replace_all("Communites", "Communities") %>%
str_replace_all("Mangement|Managemet|Mangaement|Managment|Mng|Managent", "Management") %>%
str_replace_all("Partenership", "Partnership") %>%
str_replace_all("Proeprties", "Properties") %>%
str_replace_all(" & ", "&") %>%
str_replace_all("Canda", "Canada") %>%
str_squish()
)
mgmt_fixes <- tribble(
~wrong, ~correct,
"Wj Propertiesca", "Wj Properties",
"Preston Group - Toronto On", "Preston Group",
"Penwoth Holdings", "Penworth Holdings",
"Morguar", "Morguard",
"Linfred Investment", "Linfred Investments",
"King's College Management", "Kings College Management",
"Akeliuscanadaltd", "Akelius Canada",
"Orchard Apartments (Bloor)", "Orchard Apartments",
"Orchard Apartments (307)", "Orchard Apartments",
"Orchard Apartments (2328)", "Orchard Apartments",
"Aiderbook Management", "Aiderbrook Management",
"Amk2000 Holdings", "Amk 2000 Holdings",
"Bentall Kennedy Canadace", "Bentall Kennedy (Canada)",
"Bentallgreenoak (Canada)", "Bentallgreenoak",
"Cromewll Management", "Cromwell Management",
"Kpm Property Management", "Kpm",
"Megapro X Properties", "Megapro Properties",
"Royallepage", "Royal Lepage Signature Realty"
)
property_mgmt <- property_mgmt %>%
left_join(mgmt_fixes, by = c("property_management" = "wrong")) %>%
mutate(
property_management_clean = coalesce(correct, property_management_clean),
property_management_clean = case_when(
property_management %in% c("Not Applicable", "None", "N/A") ~ NA_character_,
str_starts(property_management_clean, "Metcap") ~ "Metcap",
str_starts(property_management_clean, "Aykler") ~ "Aykler",
str_starts(property_management_clean, "Cogir") ~ "Cogir",
str_starts(property_management_clean, "Greenwin") ~ "Greenwin",
str_starts(property_management_clean, "Morguard") ~ "Morguard",
TRUE ~ property_management_clean
)
) %>%
select(-correct)
property_mgmt <- property_mgmt %>%
mutate(across(
c(property_management, property_management_clean),
function(x) {
x %>%
str_replace("^Tch$", "TCH") %>%
coalesce("Unknown")
}
))
apartment_building_registry_geocoded <- apartment_building_registry_geocoded %>%
left_join(property_mgmt, by = "prop_management_company_name") %>%
select(-prop_management_company_name)
# Convert to SF
apartment_building_registry_sf <- apartment_building_registry_geocoded %>%
st_as_sf(coords = c("bing_longitude", "bing_latitude"), crs = 4326, remove = FALSE)
# Get neighbourhood for each building
apartment_with_neighbourhood <- apartment_building_registry_sf %>%
st_intersection(neighbourhoods) %>%
as_tibble() %>%
select(id, neighbourhood)
apartment_building_registry <- apartment_building_registry_sf %>%
left_join(apartment_with_neighbourhood, by = "id") %>%
select(id, site_address, starts_with("bing"), neighbourhood, everything())
# Keep relevant columns
apartment_building_registry <- apartment_building_registry %>%
select(id, rsn, address = site_address, bing_address, neighbourhood, units = confirmed_units, storeys = confirmed_storeys, year_built, property_management = property_management, property_type, geometry)
# Check values
apartment_building_registry %>%
filter(is.na(year_built))
# Can't really find data on year built, so will have to go with "Unknown"
# Check units / storeys
apartment_building_registry %>%
filter(units < 10 | is.na(units))
apartment_building_registry %>%
filter(storeys < 3 | is.na(storeys))
# Remove the two offenders - we don't have enough info for 'em!
all(apartment_building_registry %>% filter(units == 0 & storeys == 0) %>%
pull(rsn) %in% c(4820405, 4904686))
apartment_building_registry <- apartment_building_registry %>%
filter(!rsn %in% c(4820405, 4904686))
# Check property type
apartment_building_registry %>%
as_tibble() %>%
count(property_type)
# Remove one known issue - 40-50 Alexander St appears 3 times, but is only actually 2 buildings
apartment_building_registry <- apartment_building_registry %>%
filter(rsn != 4167698)
# Save
saveRDS(apartment_building_registry, here::here("data-raw", "points_layers", "apartment_building_registry", "clean", "apartment_building_registry.rds"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.