Nothing
#' Theil T Index
#'
#' Calculates the Theil T Index of a given accessibility distribution. Values
#' range from 0 (when all individuals have exactly the same accessibility
#' levels) to the natural log of *n*, in which *n* is the number of individuals
#' in the accessibility dataset. If the individuals can be classified into
#' mutually exclusive and completely exhaustive groups, the index can be
#' decomposed into a between-groups inequaliy component and a within-groups
#' component.
#'
#' @template accessibility_data
#' @param sociodemographic_data A data frame. The distribution of
#' sociodemographic characteristics of the population in the study area cells.
#' Must contain the columns `id` and any others specified in `population` and
#' `socioeconomic_groups`.
#' @template opportunity_access
#' @template population
#' @param socioeconomic_groups A string. The name of the column in
#' `sociodemographic_data` whose values identify the socioeconomic groups that
#' should be used to calculate the between- and within-groups inequality
#' levels. If `NULL` (the default), between- and within-groups components are
#' not calculated and only the total aggregate inequality is returned.
#' @template group_by_access
#'
#' @return If `socioeconomic_groups` is `NULL`, a data frame containing the
#' total Theil T estimates for the study area. If not, a list containing three
#' dataframes: one summarizing the total inequality and the between- and
#' within-groups components, one listing the contribution of each group to the
#' between-groups component and another listing the contribution of each group
#' to the within-groups component.
#'
#' @family inequality
#'
#' @examplesIf identical(tolower(Sys.getenv("NOT_CRAN")), "true")
#' data_dir <- system.file("extdata", package = "accessibility")
#' travel_matrix <- readRDS(file.path(data_dir, "travel_matrix.rds"))
#' land_use_data <- readRDS(file.path(data_dir, "land_use_data.rds"))
#'
#' access <- cumulative_cutoff(
#' travel_matrix,
#' land_use_data,
#' cutoff = 30,
#' opportunity = "jobs",
#' travel_cost = "travel_time"
#' )
#'
#' ti <- theil_t(
#' access,
#' sociodemographic_data = land_use_data,
#' opportunity = "jobs",
#' population = "population"
#' )
#' ti
#'
#' # to calculate inequality between and within income deciles, we pass
#' # "income_decile" to socioeconomic_groups.
#' # some cells, however, are classified as in the decile NA because their
#' # income per capita is NaN, as they don't have any population. we filter
#' # these cells from our accessibility data, otherwise the output would include
#' # NA values (note that subsetting the data like this doesn't affect the
#' # assumption that groups are completely exhaustive, because cells with NA
#' # income decile don't have any population)
#'
#' na_decile_ids <- land_use_data[is.na(land_use_data$income_decile), ]$id
#' access <- access[! access$id %in% na_decile_ids, ]
#' sociodem_data <- land_use_data[! land_use_data$id %in% na_decile_ids, ]
#'
#' ti <- theil_t(
#' access,
#' sociodemographic_data = sociodem_data,
#' opportunity = "jobs",
#' population = "population",
#' socioeconomic_groups = "income_decile"
#' )
#' ti
#'
#' @export
theil_t <- function(accessibility_data,
sociodemographic_data,
opportunity,
population,
socioeconomic_groups = NULL,
group_by = character(0)) {
checkmate::assert_string(opportunity)
checkmate::assert_string(population)
checkmate::assert_string(socioeconomic_groups, null.ok = TRUE)
assert_access_group_by(group_by)
assert_accessibility_data(accessibility_data, opportunity, group_by)
assert_sociodemographic_data(
sociodemographic_data,
accessibility_data,
population = population,
extra_cols = socioeconomic_groups
)
if (!inherits(accessibility_data, "data.table")) {
original_class <- class(accessibility_data)
data <- data.table::as.data.table(accessibility_data)
} else {
data <- data.table::copy(accessibility_data)
}
if (!inherits(sociodemographic_data, "data.table")) {
sociodemographic_data <- data.table::as.data.table(sociodemographic_data)
}
merge_by_reference(
data,
sociodemographic_data,
population,
left_df_idcol = "id"
)
warn_extra_access_cols(accessibility_data, opportunity, group_by)
if (is.null(socioeconomic_groups)) {
result <- theil_without_groups(data, opportunity, population, group_by)
} else {
result <- theil_with_groups(
data,
sociodemographic_data,
opportunity,
population,
socioeconomic_groups,
group_by
)
}
if (exists("original_class")) {
if (inherits(result, "data.frame")) {
class(result) <- original_class
} else {
class(result$summary) <- original_class
class(result$within_group_component) <- original_class
class(result$between_group_component) <- original_class
}
}
return(result)
}
theil_without_groups <- function(data, opportunity, population, group_by) {
# we have to filter by opportunity because opportunity = 0 results in
# theil_t being NaN (because log(0) is NaN)
# TODO: throw warning
.opp_colname <- opportunity
.groups <- group_by
data[
,
avg_access := stats::weighted.mean(
get(..opportunity),
w = get(..population)
),
by = .groups
]
theil_index <- data[
get(.opp_colname) > 0,
.(
theil_t = calc_theil_t(get(..opportunity), avg_access, get(..population))
),
by = .groups
]
return(theil_index)
}
theil_with_groups <- function(data,
sociodemographic_data,
opportunity,
population,
socioeconomic_groups,
group_by) {
merge_by_reference(
data,
sociodemographic_data,
socioeconomic_groups,
left_df_idcol = "id"
)
.groups <- group_by
.socioecon_groups <- c(group_by, socioeconomic_groups)
data[
,
avg_access := stats::weighted.mean(
get(..opportunity),
w = get(..population)
),
by = .groups
]
data[
,
group_avg_access := stats::weighted.mean(
get(..opportunity),
w = get(..population)
),
by = .socioecon_groups
]
summarized_data <- data[
,
.(
group_tot_access = as.numeric(
sum(get(..opportunity) * get(..population))
),
group_tot_pop = sum(get(..population)),
group_theil_t = calc_theil_t(
get(..opportunity),
group_avg_access,
get(..population)
),
avg_access = mean(avg_access),
group_avg_access = mean(group_avg_access)
),
by = .socioecon_groups
]
summarized_data[
,
`:=`(
group_avg_access = group_tot_access / group_tot_pop,
access_share = group_tot_access / sum(group_tot_access),
pop_share = group_tot_pop / sum(group_tot_pop)
),
by = .groups
]
# summary, containing total value and total within- and between-groups
# inequality
summary <- summarized_data[
,
.(
between_group = calc_theil_t(
group_avg_access,
avg_access,
group_tot_pop
),
within_group = sum(
group_theil_t * group_tot_access / sum(group_tot_access)
)
),
by = .groups
]
summary[, total := between_group + within_group]
summary <- data.table::melt(
summary,
id.vars = .groups,
variable.name = "component",
value.name = "value"
)
summary[, component := as.character(component)]
summary[, share_of_total := 2 * value / sum(value), by = .groups]
# decomposed within group
within_group <- data.table::copy(summarized_data)
within_group[
,
value := group_theil_t * group_tot_access / sum(group_tot_access),
by = .groups
]
within_group[
,
share_of_component := value / sum(value),
by = .groups
]
cols_to_drop <- setdiff(
names(within_group),
c(.socioecon_groups, "value", "share_of_component")
)
within_group[, eval(cols_to_drop) := NULL]
data.table::setorderv(within_group, .socioecon_groups)
# decomposed between group
between_group <- data.table::copy(summarized_data)
between_group[, value := access_share * log(access_share / pop_share)]
between_group[, eval(cols_to_drop) := NULL]
data.table::setorderv(between_group, .socioecon_groups)
output_list <- list(
summary = summary,
within_group_component = within_group,
between_group_component = between_group
)
return(output_list)
}
calc_theil_t <- function(x, avg_x, weight) {
stats::weighted.mean(x / avg_x * log(x / avg_x), w = weight)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.