# Aggregate census tracts to neighbourhoods, for variables of interest
# Some of the dimensions we will be showing need comparisons to the city
# Either a city average / # or a distribution of all of the neighbourhoods
# # People
#
# Population
# Number of households
# Population change
# Population density
# Household size
# One person and 2+ people incomes
# Unaffordable housing %
# Total people under poverty measure
# Visible minority population
# # Places
#
# Private dwellings by structure
# Number of bedrooms
# Renter versus Owner
# Shelter Cost
library(dplyr)
library(tidyr)
library(stringr)
library(purrr)
library(readr)
library(forcats)
#### Read data ----
census_profiles_toronto_cts <- readRDS(here::here("data-raw", "aggregate_data", "census_profiles_2016", "clean", "census_profiles_toronto_cts.rds"))
census_profiles_toronto <- readRDS(here::here("data-raw", "aggregate_data", "census_profiles_2016", "clean", "census_profiles_toronto.rds"))
neighbourhood <- list()
city <- list()
#### Function for keeping more detailed dimension dimensions -----
# There is so much hierarchy in some of these, so for each dimension, check if it's a parent - if so, don't keep it. If not, then it's the most detailed, and we want to keep it.
keep_most_detailed_dimension <- function(df, dimension_full_start) {
df_dimension <- df %>%
distinct(parent_id, dimension_full, dimension, dimension_id) %>%
filter(str_starts(dimension_full, dimension_full_start))
dimension_id <- df_dimension %>%
pull(dimension_id)
dimension_flag_parent <- map_lgl(dimension_id, ~ .x %in% df_dimension[["parent_id"]])
names(dimension_flag_parent) <- dimension_id
dimension_not_parent <- dimension_flag_parent[!dimension_flag_parent]
dimension_not_parent <- names(dimension_not_parent)
df_dimension %>%
filter(dimension_id %in% dimension_not_parent) %>%
select(dimension_id) %>%
inner_join(df, by = "dimension_id")
}
## Functions for getting total and prop -----
aggregate_total_city <- function(df) {
df %>%
summarise(value = sum(total, na.rm = TRUE))
}
aggregate_total_by_neighbourhood <- function(df) {
df %>%
group_by(neighbourhood) %>%
summarise(value = sum(total, na.rm = TRUE))
}
# When we calculate proportion we can't just sum the totals - we need to use the parent dimension because of rounding / non-response
aggregate_prop_by_neighbourhood <- function(df, dimension_full_start) {
df_children <- df %>%
keep_most_detailed_dimension(dimension_full_start)
df_children_summary <- df_children %>%
group_by(neighbourhood, group = dimension) %>%
summarise(value = sum(total, na.rm = TRUE), .groups = "drop")
df_parent <- df %>%
filter(dimension == dimension_full_start)
df_parent_summary <- df_parent %>%
aggregate_total_by_neighbourhood() %>%
rename(total = value)
df_children_summary %>%
left_join(df_parent_summary, by = "neighbourhood") %>%
mutate(prop = round(value / total, 3)) %>%
select(neighbourhood, group, prop) %>%
complete(neighbourhood, group, fill = list(prop = 0))
}
aggregate_prop_city <- function(df, dimension_full_start) {
df_children <- df %>%
keep_most_detailed_dimension(dimension_full_start) %>%
select(group = dimension, value = total) %>%
# Aggregate in cases where there was combination of some dimensions
group_by(group) %>%
summarise(value = sum(value, na.rm = TRUE))
df_parent <- df %>%
filter(dimension == dimension_full_start) %>%
select(total)
df_children %>%
bind_cols(df_parent) %>%
mutate(prop = round(value / total, 3)) %>%
select(group, prop)
}
## People ---------------------------------------------------------------- -----
### Population -----
# Dimension: "Population, 2016"
population_by_neighbourhood <- census_profiles_toronto_cts %>%
filter(dimension == "Population, 2016") %>%
aggregate_total_by_neighbourhood()
neighbourhood <- append(neighbourhood, list(population = population_by_neighbourhood))
# City
population_city <- census_profiles_toronto %>%
filter(dimension == "Population, 2016") %>%
pull(total)
city <- append(city, list(population = population_city))
### Households -----
households_by_neighbourhood <- census_profiles_toronto_cts %>%
filter(dimension == "Total - Private households by household size - 100% data") %>%
aggregate_total_by_neighbourhood()
neighbourhood <- append(neighbourhood, list(households = households_by_neighbourhood))
# City
households_city <- census_profiles_toronto %>%
filter(dimension == "Total - Private households by household size - 100% data") %>%
pull(total)
city <- append(city, list(households = households_city))
### Population change ----
# Use "Population, 2011" and compare to 2016
population_2011 <- census_profiles_toronto_cts %>%
filter(dimension == "Population, 2011") %>%
aggregate_total_by_neighbourhood()
population_change_by_neighbourhood <- population_by_neighbourhood %>%
left_join(population_2011, by = "neighbourhood", suffix = c("_2016", "_2011")) %>%
mutate(
population_change = (value_2016 - value_2011) / value_2011,
population_change = round(population_change, 3)
) %>%
select(neighbourhood, value = population_change)
rm(population_2011)
neighbourhood <- append(neighbourhood, list(population_change = population_change_by_neighbourhood))
# Compare to city with value and distribution
population_change_city <- census_profiles_toronto %>%
filter(dimension == "Population percentage change, 2011 to 2016") %>%
pull(total)
population_change_city <- population_change_city / 100
population_change_city_distribution <- population_change_by_neighbourhood["value"]
city <- append(city, list(population_change = population_change_city, population_change_distribution = population_change_city_distribution))
### Population density -----
population_density_by_neighbourhood <- census_profiles_toronto_cts %>%
filter(dimension %in% c("Population, 2016", "Land area in square kilometres")) %>%
select(geo_code, neighbourhood, dimension, total) %>%
mutate(dimension = case_when(
dimension == "Population, 2016" ~ "population",
dimension == "Land area in square kilometres" ~ "area"
)) %>%
pivot_wider(names_from = dimension, values_from = total) %>%
group_by(neighbourhood) %>%
summarize(across(c(population, area), sum, na.rm = TRUE)) %>%
mutate(population_density = round(population / area)) %>%
select(neighbourhood, value = population_density)
neighbourhood <- append(neighbourhood, list(population_density = population_density_by_neighbourhood))
# Compare to city with value and distribution
population_density_city <- census_profiles_toronto %>%
filter(dimension == "Population density per square kilometre") %>%
pull(total) %>%
round()
population_density_city_distribution <- population_density_by_neighbourhood["value"]
city <- append(city, list(population_density = population_density_city, population_density_distribution = population_density_city_distribution))
### Household size ----
# Variable: "Total - Private households by household size - 100% data"
# Retrieved from rental_supply > census_custom_tab_2016_table2
### Average total income ----
# Retrieved from census_custom_tab_2016_table1_income/
### Unaffordable housing ----
# Variable: "Total - Tenant households in non-farm, non-reserve private dwellings - 25% sample data"
# "% of tenant households spending 30% or more of its income on shelter costs"
household_tenure_by_ct <- census_profiles_toronto_cts %>%
keep_most_detailed_dimension("Total - Private households by tenure - 25% sample data") %>%
filter(dimension %in% c("Owner", "Renter")) %>%
select(neighbourhood, geo_code, dimension, total)
renter_by_ct <- household_tenure_by_ct %>%
filter(dimension == "Renter") %>%
select(-dimension, -neighbourhood) %>%
rename(renter = total)
unaffordable_housing_by_neighbourhood <- census_profiles_toronto_cts %>%
keep_most_detailed_dimension("Total - Tenant households in non-farm, non-reserve private dwellings - 25% sample data") %>%
filter(dimension == "% of tenant households spending 30% or more of its income on shelter costs") %>%
select(neighbourhood, geo_code, percent_unaffordable = total) %>%
left_join(renter_by_ct, by = "geo_code") %>%
mutate(number_unaffordable = round(renter * percent_unaffordable / 100)) %>%
group_by(neighbourhood) %>%
summarise(
value = sum(number_unaffordable, na.rm = TRUE) / sum(renter, na.rm = TRUE),
value = round(value, 3)
)
# TODO not quite right
# Danforth shows 49.6
unaffordable_housing_by_neighbourhood %>%
filter(neighbourhood == "Danforth")
# This gives 49.9
neighbourhood <- append(neighbourhood, list(unaffordable_housing = unaffordable_housing_by_neighbourhood))
# Compare to city with value and distribution
unaffordable_housing_city <- census_profiles_toronto %>%
filter(dimension == "% of tenant households spending 30% or more of its income on shelter costs") %>%
pull(total)
unaffordable_housing_city <- round(unaffordable_housing_city / 100, 3)
unaffordable_housing_city_distribution <- unaffordable_housing_by_neighbourhood["value"]
city <- append(city, list(unaffordable_housing = unaffordable_housing_city, unaffordable_housing_distribution = unaffordable_housing_city_distribution))
### Total people under poverty measure ----
# Low-income measure after tax (LIM-AT)
private_households_by_neighbourhood <- census_profiles_toronto_cts %>%
filter(dimension == "Number of persons in private households") %>%
aggregate_total_by_neighbourhood()
lim_at_by_neighbourhood <- census_profiles_toronto_cts %>%
filter(dimension == "In low income based on the Low-income measure, after tax (LIM-AT)") %>%
aggregate_total_by_neighbourhood() %>%
left_join(private_households_by_neighbourhood, by = "neighbourhood", suffix = c("_poverty", "_population")) %>%
mutate(
value = value_poverty / value_population,
value = round(value, 3)
) %>%
select(neighbourhood, value)
neighbourhood <- append(neighbourhood, list(lim_at = lim_at_by_neighbourhood))
# Compare to city with value and distribution
lim_at_city <- census_profiles_toronto %>%
filter(dimension == "In low income based on the Low-income measure, after tax (LIM-AT)") %>%
pull(total)
persons_city <- census_profiles_toronto %>%
filter(dimension == "Number of persons in private households") %>%
pull(total)
lim_at_city <- round(lim_at_city / persons_city, 3)
city <- append(city, list(lim_at = lim_at_city, lim_at_distribution = lim_at_by_neighbourhood["value"]))
### Visible minority -----
# Variable: "Total - Visible minority for the population in private households - 25% sample"
# Combine Chinese, Japanese, Korean into "East Asian"
# Combine "Filipino" with "Southeast Asian"
# These numbers seem a tiny bit off compared to the City's, even before collapsing - e.g. they have 60 for Korean vs 65 here
visible_minority_by_neighbourhood <- census_profiles_toronto_cts %>%
mutate(dimension = case_when(
dimension %in% c("Chinese", "Japanese", "Korean") ~ "East Asian",
dimension == "Filipino" ~ "Southeast Asian",
TRUE ~ dimension
)) %>%
aggregate_prop_by_neighbourhood("Total - Visible minority for the population in private households - 25% sample data")
neighbourhood <- append(neighbourhood, list(visible_minority = visible_minority_by_neighbourhood))
# Compare to city with breakdown
visible_minority_city <- census_profiles_toronto %>%
mutate(dimension = case_when(
dimension %in% c("Chinese", "Japanese", "Korean") ~ "East Asian",
dimension == "Filipino" ~ "Southeast Asian",
TRUE ~ dimension
)) %>%
aggregate_prop_city("Total - Visible minority for the population in private households - 25% sample data") %>%
mutate(
group = fct_reorder(group, prop, .desc = TRUE),
group = fct_relevel(group, "Visible minority, n.i.e.", "Multiple visible minorities", "Not a visible minority", after = Inf)
)
city <- append(city, list(visible_minority = visible_minority_city))
### Places ------- ----
### Private dwellings by structure -----
# Retrieved from rental_supply > census_custom_tab_2016_table1
### Number of bedrooms ----
# Retrieved from rental_supply > census_custom_tab_2016_table2
### Household tenure -----
# Variable: "Total - Private households by tenure - 25% sample data"
# "Band housing" (relevant when the housing is on a First Nations reserve or settlement) is not present in Toronto.
# So limit to Owner and Renter.
household_tenure_by_neighbourhood <- census_profiles_toronto_cts %>%
aggregate_prop_by_neighbourhood("Total - Private households by tenure - 25% sample data") %>%
filter(group %in% c("Owner", "Renter"))
neighbourhood <- append(neighbourhood, list(household_tenure = household_tenure_by_neighbourhood))
# Compare to city by breakdown
household_tenure_city <- census_profiles_toronto %>%
aggregate_prop_city("Total - Private households by tenure - 25% sample data") %>%
filter(group %in% c("Owner", "Renter"))
city <- append(city, list(household_tenure = household_tenure_city))
### Shelter cost -----
average_renter_shelter_cost_by_ct <- census_profiles_toronto_cts %>%
filter(dimension == "Average monthly shelter costs for rented dwellings ($)") %>%
select(neighbourhood, geo_code, avg_shelter_cost = total)
renter_shelter_cost_by_neighbourhood <- average_renter_shelter_cost_by_ct %>%
left_join(renter_by_ct, by = "geo_code") %>%
mutate(total_shelter_cost = avg_shelter_cost * renter) %>%
group_by(neighbourhood) %>%
summarise(
value = sum(total_shelter_cost, na.rm = TRUE) / sum(renter, na.rm = TRUE),
value = round(value)
)
neighbourhood <- append(neighbourhood, list(average_renter_shelter_cost = renter_shelter_cost_by_neighbourhood))
# Compare to city by value and distribution
average_renter_shelter_cost_city <- census_profiles_toronto %>%
filter(dimension == "Average monthly shelter costs for rented dwellings ($)") %>%
pull(total) %>%
round()
average_renter_shelter_cost_distribution <- renter_shelter_cost_by_neighbourhood["value"]
city <- append(city, list(average_renter_shelter_cost = average_renter_shelter_cost_city, average_renter_shelter_cost_distribution = average_renter_shelter_cost_distribution))
### Restructure data sets ----
# I want to make a list, one element for each neighbourhood, then within that have one element for each variable / dimension
neighbourhood_profiles <- neighbourhood %>%
map(~ split(.x, .x$neighbourhood))
# Some of these are just a single value, so they don't need to be in a data frame
neighbourhood_profiles[["population"]] <- neighbourhood_profiles[["population"]] %>%
map("value")
neighbourhood_profiles[["households"]] <- neighbourhood_profiles[["households"]] %>%
map("value")
neighbourhood_profiles[["population_change"]] <- neighbourhood_profiles[["population_change"]] %>%
map("value")
neighbourhood_profiles[["population_density"]] <- neighbourhood_profiles[["population_density"]] %>%
map("value")
neighbourhood_profiles[["lim_at"]] <- neighbourhood_profiles[["lim_at"]] %>%
map("value")
neighbourhood_profiles[["unaffordable_housing"]] <- neighbourhood_profiles[["unaffordable_housing"]] %>%
map("value")
neighbourhood_profiles[["average_renter_shelter_cost"]] <- neighbourhood_profiles[["average_renter_shelter_cost"]] %>%
map("value")
# Set factor levels separately for visible minority for *each* neighbourhood, so that it goes in descending order, with visible minority n.i.e., multiple visible minorities, and not a visible minority at the end
neighbourhood_profiles[["visible_minority"]] <- neighbourhood_profiles[["visible_minority"]] %>%
map(function(x) {
x %>%
mutate(
group = fct_reorder(group, prop, .desc = TRUE),
group = fct_relevel(group, "Visible minority, n.i.e.", "Multiple visible minorities", "Not a visible minority", after = Inf)
)
})
# Now there's one element per variable, and within one per neighbourhood - transpose so it's inside out!
neighbourhood_profiles <- neighbourhood_profiles %>%
transpose()
# Save as RDS, aggregate into package dataset at top level of aggregate_data folder
saveRDS(neighbourhood_profiles, here::here("data-raw", "aggregate_data", "census_profiles_2016", "aggregate", "neighbourhood_profiles.rds"))
saveRDS(city, here::here("data-raw", "aggregate_data", "census_profiles_2016", "aggregate", "city_profile.rds"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.