#' Calculate segregation metrics
#'
#' @param data a spatial sf object of the study area divided into areal
#' units (such as census tracts), containing the columns 'id', 'geometry',
#' and one column per population group indicating that group's population in
#' the local areal unit.
#' @param id_field character. The name of the identificator column in the input
#' data. Defaults to 'id'.
#' @param bandwidth numeric. A bandwidth, in meters, used to set the scale of
#' analysis for spatial segregation measurement. When set to 0, aspatial
#' metrics will be calculated.
#'
#' @return a segreg object, a list containing the input spatial data and the
#' results of the segregation metrics.
#'
#' @export
#'
#' @examples
#'
#' library(sf)
#' library(segregr)
#'
#' # load sample data from package segregr
#' marilia_sf <- st_read(system.file("extdata/marilia_2010.gpkg", package = "segregr"))
#'
#' # calculate segregation metrics
#' segregation <- measure_segregation(marilia_sf)
#'
#' # global dissimilarity index
#' segregation$D
#'
#' # global entropy
#' segregation$E
#'
#' # global information theory index H
#' segregation$H
measure_segregation <- function(data,
id_field = "id",
distance_method = "geodist",
weight_function = "gaussian",
bandwidths = 0) {
# This function calculates all segregation metrics available in the
# segregr package and returns them all in a list containing individual values
# of global metrics and data.frames of local metrics.
#
# This is the main function of the package, so it's quite long. In the future,
# I should break this down into smaller functions. For now, the code below
# is divided into 'chapters'
# 1. Extract geometries ----------------------------------------------------
## sf object with areal units (census tracts) provided by the user
areas_sf <- data[id_field]
## extract the centroid of each areal unit
locations_sf <- suppressWarnings(
sf::st_centroid(areas_sf)
)
# 2. Extract population ----------------------------------------------------
## data.frame with the population in the study area, per group
population_df <- st_set_geometry(data, NULL)
data.table::setDT(population_df)
## use the order of the columns in the input data to order group names as factors
group_names <- colnames(population_df)
group_names <- group_names[group_names != "id"]
## convert population data.frame to long form, using group_names as factors for group column
population_long_df <- data.table::melt(population_df,
id.vars = "id",
variable.name = "group",
value.name = "population"
)
population_long_df[, group := factor(group, levels = group_names)]
## N = Total population of the study area ----
N <- sum(population_long_df$population)
## Total population and proportion per group in the study area ----
group_population_df <- population_long_df[, .(total_population = sum(population)), by = group]
group_population_df[, group_proportion_city := total_population / sum(total_population)]
# 3. Calculate distances between locations ---------------------------------
distance_matrix <- calculate_distances(locations_sf, distance_method)
# 4. Calculate Gaussian weights by bandwidth -----------------------------
if (weight_function == "gaussian") {
weights_matrix <- calculate_gaussian_weights(distance_matrix, bandwidths)
} else {
weights_matrix <- calculate_step_weights(distance_matrix, bandwidths)
}
rm(distance_matrix)
# 5. Calculate Population Intensity ----------------------------------------
## population data.frame to matrix
ids <- locations_sf$id
population_matrix <- population_df %>% dplyr::select(-id) %>% base::as.matrix()
## population intensity by group and location
intensity_group <- purrr::map(names(weights_matrix), function(x) {
data.table::data.table(id = ids, bw = as.numeric(x),
data.table::as.data.table((weights_matrix[[x]] %*% population_matrix) / colSums(weights_matrix[[x]]))
)
})
## population intensity by location
intensity_local <- purrr::map(names(weights_matrix), function(x) {
data.table::data.table(id = ids, bw = as.numeric(x),
intensity = (weights_matrix[[x]] %*% rowSums(population_matrix)) / colSums(weights_matrix[[x]])
)
})
## convert matrices to data.tables
intensity_group <- data.table::rbindlist(intensity_group)
intensity_group <- data.table::melt(intensity_group, id.vars = c("id", "bw"), variable.name = "group", value.name = "intensity")
intensity_group[population_long_df, on = .(id, group), population := i.population]
data.table::setcolorder(intensity_group, neworder = c("id", "bw", "group", "population", "intensity"))
intensity_local <- data.table::rbindlist(intensity_local)
data.table::setnames(intensity_local, old = "intensity.V1", new = "intensity")
intensity_df <- intensity_group %>%
dplyr::select(-population) %>%
tidyr::pivot_wider(names_from = group, values_from = intensity) %>%
setDT()
rm(weights_matrix)
# 6. Calculate Segregation Indices -----------------------------------------
## Dissimilarity Index ---------------------------------------------------
### I = Interaction Index, used in the Dissimilarity Index equation ----
I <- population_long_df[, .(population = sum(population)), by = group]
I[, proportion := population / sum(population)]
I[, inv_proportion := 1 - proportion]
I[, partial_I := proportion * inv_proportion]
I <- sum(I$partial_I)
### Local Dissimilarity (d) ----
local_dissimilarity_df <- data.table::copy(intensity_group)
local_dissimilarity_df[,
`:=`(
population_locality = sum(population),
group_proportion_locality = intensity / sum(intensity)
),
by = .(id, bw)
]
local_dissimilarity_df[group_population_df,
on = "group",
group_proportion_city := i.group_proportion_city
]
local_dissimilarity_df[, proportion_abs_diff := abs(group_proportion_locality - group_proportion_city)]
local_dissimilarity_df[, dm := (population_locality / (2 * N * I)) * proportion_abs_diff]
local_dissimilarity_df <- local_dissimilarity_df[, .(d = sum(dm)), by = .(id, bw)]
### Global Dissimilarity (D) ----
D <- local_dissimilarity_df[, .(D = sum(d, na.rm = TRUE)), by = bw]
## Information Theory Indices (Entropy and H) ----------------------------
### Global Entropy (E) ----
E <- group_population_df[, .(group_entropy = group_proportion_city * log(1 / group_proportion_city))]
E <- sum(E$group_entropy)
### Local Entropy (e) ----
local_entropy_df <- data.table::copy(intensity_group)
local_entropy_df[, proportion := intensity / sum(intensity), by = .(id, bw)]
local_entropy_df[, group_entropy := proportion * log(1 / proportion)]
local_entropy_df <- local_entropy_df[, .(
population = sum(population),
e = sum(group_entropy, na.rm = TRUE)
), by = .(id, bw)]
### Local H Index (h) ----
local_entropy_df[, h := (population * (E - e)) / (E * N)]
### Global H Index (H)
H <- local_entropy_df[, .(H = sum(h, na.rm = TRUE)), by = bw]
## Exposure and Isolation Indices (P and Q) ------------------------------
iso_exp_df <- data.table::copy(intensity_group)
iso_exp_df[, population_group_city := sum(population, na.rm = TRUE), by = .(bw, group)]
iso_exp_df[intensity_local, on = .(id, bw), population_intensity_locality := i.intensity]
iso_exp_df[, `:=`(
proportion_group_city = intensity / population_group_city,
# proportion_group_city = population / population_group_city,
proportion_group_locality = intensity / population_intensity_locality
)]
iso_exp_df <- iso_exp_df[, .(id, bw, group, proportion_group_city, proportion_group_locality)]
iso_exp_matrix <- expand.grid(
id = locations_sf$id,
bw = bandwidths,
group_a = group_names,
group_b = group_names
) %>% data.table::setDT()
### Local Exposure and Isolation
iso_exp_matrix[iso_exp_df,
on = .(id, bw, group_a = group),
`:=`(
proportion_group_city_a = i.proportion_group_city,
proportion_group_locality_a = i.proportion_group_locality
)
]
iso_exp_matrix[iso_exp_df,
on = .(id, bw, group_b = group),
`:=`(
proportion_group_city_b = i.proportion_group_city,
proportion_group_locality_b = i.proportion_group_locality
)
]
iso_exp_matrix[, isolation_exposure := proportion_group_city_a * proportion_group_locality_b]
local_iso_exp <- iso_exp_matrix[, .(id, bw, group_a, group_b, isolation_exposure)]
### Global Exposure and Isolation
global_iso_exp <- local_iso_exp[,
.(isolation_exposure = sum(isolation_exposure, na.rm = TRUE)),
by = .(bw, group_a, group_b)
]
#### Separate Exposure and Isolation data
global_exposure <- global_iso_exp[group_a != group_b]
data.table::setnames(global_exposure, old = "isolation_exposure", new = "exposure")
global_isolation <- global_iso_exp[group_a == group_b, .(bw, group_a, isolation_exposure)]
data.table::setnames(global_isolation,
old = c("group_a", "isolation_exposure"),
new = c("group", "isolation")
)
local_exposure <- local_iso_exp[group_a != group_b]
data.table::setnames(local_exposure,
old = "isolation_exposure",
new = "exposure"
)
local_isolation <- local_iso_exp[group_a == group_b, .(id, bw, group_a, isolation_exposure)]
data.table::setnames(local_isolation,
old = c("group_a", "isolation_exposure"),
new = c("group", "isolation")
)
# 7. Return results --------------------------------------------------------
results <- list(
areal_units = areas_sf,
groups = group_names,
bandwidth = bandwidths,
population = population_df,
intensity = intensity_df,
D = D, # Global Dissimilarity
E = E, # Global Entropy
H = H, # Global H
d = local_dissimilarity_df,
h = local_entropy_df,
# Global Exposure
P = global_exposure,
# Global Isolation
Q = global_isolation,
# Local Exposure
p = local_exposure,
# Local Isolation
q = local_isolation
)
return(results)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.