Nothing
#' Get population impact over time
# DESCRIPTION ##################################################################
#' @description Get population impact over time
# ARGUMENTS ####################################################################
#' @param input_with_risk_and_pop_fraction \code{Data frame} with the input data (including risk and population fraction)
# VALUE ########################################################################
#' @returns
#' This function returns a \code{data.frame} with one row for each value of the
#' concentration-response function (i.e. central estimate, lower and upper bound confidence interval).
#' Moreover, the data frame include columns such as:
#' \itemize{
#' \item Attributable fraction
#' \item Health impact
#' \item Outcome metric
#' \item And many more.
#' }
#' @author Alberto Castro & Axel Luyten
#' @keywords internal
get_impact_with_lifetable <-
function(input_with_risk_and_pop_fraction){
# GET POP IMPACT ######
# USEFUL VARIABLES ##########
# yoa means Year Of Analysis
yoa <- input_with_risk_and_pop_fraction |> dplyr::pull(year_of_analysis) |> dplyr::first()
yoa_plus_1 <- base::as.numeric(yoa) + 1
health_outcome <- base::unique(input_with_risk_and_pop_fraction$health_outcome)
is_single_year_exposure <- base::unique(input_with_risk_and_pop_fraction$approach_exposure) == "single_year"
is_constant_exposure <- base::unique(input_with_risk_and_pop_fraction$approach_exposure) == "constant"
is_with_newborns <- base::unique(input_with_risk_and_pop_fraction$approach_newborns) == "with_newborns"
is_without_newborns <- base::unique(input_with_risk_and_pop_fraction$approach_newborns) == "without_newborns"
# Store the value of time horizon (defined in compile_input())
time_horizon <- base::unique(input_with_risk_and_pop_fraction$time_horizon)
# Define the last year of the projection
last_year_projection <- yoa + time_horizon - 1
# Define the years based on n_years_projection
# e.g. 2020 to 2118
years_projection <- yoa_plus_1 : last_year_projection
# n_years_projection defines for how many years the population should be projected;
n_years_projection <- base::length(years_projection)
# Precompute column names to be use below
entry_names <- base::paste0("entry_population_", years_projection)
midyear_names <- base::paste0("midyear_population_", years_projection)
death_names <- base::paste0("deaths_", years_projection)
id_columns <-
c("geo_id_micro",
"erf_ci", "bhd_ci", "exp_ci", "dw_ci", "cutoff_ci", "duration_ci",
"sex")
# LIFETABLE SETUP ##############################################################################
lifetable_calculation <- input_with_risk_and_pop_fraction |>
dplyr::mutate(
# Duplicate bhd and year_of_analysis
# for more handy column names for life table calculations
deaths = bhd,
yoa = year_of_analysis,
# Create midyear_population_yoa
# yoa means Year Of Analysis
# It is better to do it now (before nesting tables)
midyear_population_yoa = population)
lifetable_calculation <- lifetable_calculation |>
# Get modification factor
# it works with both single exposure and exposure distribution
dplyr::mutate(
modification_factor = 1 - pop_fraction,
.after = rr) |>
# CALCULATE ENTRY POPULATION OF YEAR OF ANALYSIS (YOA)
dplyr::mutate(
entry_population_yoa = midyear_population_yoa + (deaths / 2),
.before = midyear_population_yoa) |>
# CALCULATE PROBABILITY OF SURVIVAL FROM START YEAR TO END YEAR & START YEAR TO MID YEAR
dplyr::mutate(
# probability of survival from start of year i to start of year i+1 (entry to entry)
prob_survival =
(midyear_population_yoa - (deaths / 2)) /
(midyear_population_yoa + (deaths / 2) ),
# Probability of survival from start to midyear
# For example entry_pop = 100, prob_survival = 0.8 then end_of_year_pop = 100 * 0.8 = 80.
# midyear_pop = 100 - (20/2) = 90.
prob_survival_until_midyear = 1 - ((1 - prob_survival) / 2),
# Hazard rate for calculating survival probabilities
hazard_rate = deaths / midyear_population_yoa,
.after = deaths)
# CALCULATE MODIFIED SURVIVAL PROBABILITIES
lifetable_calculation <- lifetable_calculation |>
dplyr::mutate(
# For all ages min_age and higher calculate modified survival probabilities
# Calculate first the boolean/logic column to speed up calculations below
age_end_over_min_age = age_end > min_age,
# Calculate modified hazard rate = modification factor * hazard rate = mod factor * (deaths / mid-year pop)
hazard_rate_mod =
dplyr::if_else(age_end_over_min_age,
modification_factor * hazard_rate,
hazard_rate),
# Calculate modified survival probability =
# ( 2 - modified hazard rate ) / ( 2 + modified hazard rate )
prob_survival_mod =
dplyr::if_else(age_end_over_min_age,
(2 - hazard_rate_mod) / (2 + hazard_rate_mod),
prob_survival),
prob_survival_until_midyear_mod =
dplyr::if_else(age_end_over_min_age,
1 - ((1 - prob_survival_mod) / 2),
prob_survival_until_midyear),
.after = deaths)
# Nest life tables
lifetable_calculation <- lifetable_calculation |>
tidyr::nest(
data_by_age =
c(yoa, age_group, age_start, age_end, bhd, deaths,
population,
modification_factor,
prob_survival, prob_survival_until_midyear, hazard_rate,
age_end_over_min_age, prob_survival_mod, prob_survival_until_midyear_mod, hazard_rate_mod,
# These columns at the end to link with projections
midyear_population_yoa, entry_population_yoa))
## EXPOSED PROJECTION ###########################################################################
# The exposed projection is the scenario of "business as usual"
# i.e. the scenario with the exposure to the environmental stressor as (currently) measured
# DETERMINE ENTRY POPULATION OF YOA+1 IN EXPOSED SCENARIO
lifetable_calculation <- lifetable_calculation |>
dplyr::mutate(
projection_if_exposed_by_age_and_year =
purrr::map(
.x = data_by_age,
function(.x){
.x <- .x |>
dplyr::mutate(
# End-of-year population YOA = (entry population YOA) * ( survival probability )
# Round results when multiplying by probability of surviving
# to avoid floating-point precision issue
# (i.e. result that should be zero ends up with e.g. 0.0000000000003)
# The number of decimas is random, just large to avoid changes in final results
end_population_yoa = base::round(entry_population_yoa * prob_survival, 10),
# Deaths YOA = Entry pop YOA - End pop YOA
deaths_yoa = entry_population_yoa - end_population_yoa,
# Entry population YOA+1 = lag ( End-of-year population YOA )
entry_population_yoa_plus_1 = dplyr::lag(end_population_yoa))
}
)
)
## UNEXPOSED PROJECTION ###########################################################################
# The exposed projection is the scenario without any exposure to the environmental stressor
# CALCULATE YOA MID-YEAR POPOULATION,
# YOA END-OF-YEAR POPULATION, YOA DEATHS AND
# YOA+1 ENTRY POPULATION USING MODIFIED SURVIVAL PROBABILITIES
lifetable_calculation <- lifetable_calculation |>
dplyr::mutate(
projection_if_unexposed_by_age_and_year =
purrr::map(
.x = data_by_age ,
function(.x){
.x <- .x |>
dplyr::mutate(
# Re-calculate population_yoa
# MID-YEAR POP = (entry population YOA) * ( survival probability until mid year )
# Round result to avoid floating-point precision issue
midyear_population_yoa = base::round(entry_population_yoa * prob_survival_until_midyear_mod, 10),
# Calculate end-of-year population in YOA to later determine premature deaths
# Round result to avoid floating-point precision issue
end_population_yoa = base::round(entry_population_yoa * prob_survival_mod, 10),
# Deaths YOA = Entry pop YOA - End pop YOA
deaths_yoa = entry_population_yoa - end_population_yoa,
# Entry population YOA+1 = lag ( End-of-year population YOA )
entry_population_yoa_plus_1 = dplyr::lag(end_population_yoa)
)
}
)
)
# PREMATURE DEATHS (SINGLE YEAR EXPOSURE) ######################################################
# YOA = YEAR OF ANALYSIS
if (health_outcome == "deaths" &
is_single_year_exposure) {
lifetable_calculation <- lifetable_calculation |>
# Premature deaths = YOA end-of-year population of unexposed minus exposed
dplyr::mutate(
impact_by_age_and_year =
purrr::map2(
.x = projection_if_unexposed_by_age_and_year,
.y = projection_if_exposed_by_age_and_year,
.f = ~ {
tibble::tibble(
age_start = .x$age_start,
age_end = .x$age_end,
population = .x$midyear_population_yoa,
# Change of sign in the difference unexposed minus exposed
# because if no exposure
# there are less deaths in unexposed
# For population unexposed minus exposed (without change of sign)
# because there are more population in unexposed
impact_yoa = -(.x$deaths_yoa - .y$deaths_yoa)) |>
dplyr::rename_with(.cols = dplyr::everything(),
.fn = ~ base::gsub("yoa", yoa, .x))
}
)
)
}
# YLL & PREMATURE DEATHS (CONSTANT EXPOSURE) ####################################################
if (health_outcome == "yll"| #And ("yld", "daly") if yld for life table ever implemented
is_constant_exposure) {
## PROJECT POPULATIONS #########################################################################
### DEFINE FUNCTION FOR POPULATION PROJECTION ##################################################
project_pop <- function(df, prob_survival, prob_survival_until_midyear) {
# Rename yoa columns
base::names(df) <- base::names(df) |>
# Important to repace first yoa_plus_1,
# otherwise the replacement of _yoa also affects yoa_plus_1
base::gsub("_yoa_plus_1", base::paste0("_", yoa_plus_1), x = _) |>
base::gsub("_yoa", base::paste0("_", yoa), x = _)
# Precompute complements
death_prob <- 1 - prob_survival
# Initialise matrices
entry_pop <- base::matrix(NA, nrow = base::nrow(df), ncol = n_years_projection,
# Row and column names
# NULL because no row names
dimnames = base::list(NULL, entry_names))
midyear_pop <- base::matrix(NA, nrow = base::nrow(df), ncol = n_years_projection,
dimnames = base::list(NULL, midyear_names))
deaths <- base::matrix(NA, nrow = base::nrow(df), ncol = n_years_projection,
dimnames = base::list(NULL, death_names))
# Set initial year
entry_pop[, 1] <- df[[entry_names[1]]]
midyear_pop[, 1] <- base::round(entry_pop[, 1] * prob_survival_until_midyear, 10)
deaths[, 1] <- entry_pop[, 1] * (1 - prob_survival)
# Loop across years
# E.g. starts with 1 and ends with 98;
# i (index in the number of years) is used to select both the rows and the columns
for (i in 1: (n_years_projection - 1)) {
# Round result to avoid floating-point precision issue
rows <- (i + 2):(n_years_projection + 1)
# ENTRY POP YOA+1 <- ( ENTRY POP YOA ) * ( SURVIVAL PROBABILITY YOA )
entry_pop[rows, i + 1] <- base::round(entry_pop[rows - 1, i] * prob_survival[rows - 1], 10)
# MID-YEAR POP YOA+1 <- ( ENTRY POP YOA+1) * ( SURVIVAL PROBABILITY FROM START OF YOA+1 TO MID YEAR YOA+1)
midyear_pop[rows, i + 1] <- base::round(entry_pop[rows, i + 1] * prob_survival_until_midyear[rows], 10)
# DEATHS IN YOA+1 <- ( ENTRY POP YOA+1 ) * (1 - SURVIVAL PROBABILITY YOA+1 )
deaths[rows, i + 1] <- base::round(entry_pop[rows, i + 1] * death_prob[rows], 10)
}
# Column bin matrices to input data frame
# Remove first column of entry_pop, because it exists already in input data frame
df <-
dplyr::bind_cols(df, midyear_pop, entry_pop[, -1], deaths)
return(df)
}
### SINGLE YEAR EXPOSURE #######################################################################
# Determine YLLs for baseline and impacted scenario's in the single year exposure case
if (is_single_year_exposure){
# PROJECT POPULATIONS IN BOTH IMPACTED AND BASELINE SCENARIO FROM YOA+1 UNTIL THE END
# USING MODIFIED SURVIVAL PROBABILITIES (BECAUSE AFTER YOA THERE IS NO MORE AIR POLLUTION)
lifetable_calculation <- lifetable_calculation |>
dplyr::mutate(
projection_if_exposed_by_age_and_year =
purrr::map(
.x = projection_if_exposed_by_age_and_year,
.f = ~ project_pop(
df = .x,
prob_survival = .x$prob_survival_mod,
prob_survival_until_midyear = .x$prob_survival_until_midyear_mod)),
projection_if_unexposed_by_age_and_year =
purrr::map(
.x = projection_if_unexposed_by_age_and_year,
.f = ~ project_pop(
df = .x,
prob_survival = .x$prob_survival_mod,
prob_survival_until_midyear = .x$prob_survival_until_midyear_mod))
)
### CONSTANT EXPOSURE ########################################################################
# Determine YLLs for baseline and impacted scenario's in the constant exposure case
# IF CONSTANT EXPOSURE
} else {
# PROJECT POPULATION IN EXPOSED SCENARIO
lifetable_calculation <- lifetable_calculation |>
dplyr::mutate(
projection_if_exposed_by_age_and_year =
purrr::map(
.x = projection_if_exposed_by_age_and_year,
.f = ~ project_pop(
df = .x,
prob_survival = .x$prob_survival,
prob_survival_until_midyear = .x$prob_survival_until_midyear)),
# PROJECT POPULATION IN UNEXPOSED SCENARIO
projection_if_unexposed_by_age_and_year =
purrr::map(
.x = projection_if_unexposed_by_age_and_year,
.f = ~ project_pop(
df = .x,
prob_survival = .x$prob_survival_mod,
prob_survival_until_midyear = .x$prob_survival_until_midyear_mod)
)
)
}
### DETERMINE IMPACT (YLL, PREMATURE DEATHS (CONSTANT EXPOSURE)) ###########################
# YLL and premature deaths attributable to exposure are calculated
# Helper function to be used below
calculate_impact <- function(df_unexposed, df_exposed, var_prefix) {
ages_and_pop <- df_unexposed |>
dplyr::select(age_start, age_end, population)
df_unexposed_vars <- df_unexposed |>
dplyr::select(dplyr::starts_with(var_prefix))
df_exposed_vars <- df_exposed|>
dplyr::select(dplyr::starts_with(var_prefix))
if(var_prefix == "midyear_population_"){
diff <- df_unexposed_vars - df_exposed_vars
# IF DEATHS
} else {
# The way round because otherwise negative numbers
# Reason: unexposed means more population but less deaths
diff <- - (df_unexposed_vars - df_exposed_vars)
}
impact <- dplyr::bind_cols(ages_and_pop, diff) |>
dplyr::rename_with(
.cols = dplyr::starts_with(var_prefix),
.fn = ~ base::gsub(var_prefix, "impact_", .x)
)
return(impact)
}
# Apply the helper function above to calculate impacts (deaths or yll)
# from exposed and unexposed projections
var_prefix_for_function <-
base::ifelse(health_outcome == "deaths", "deaths_",
base::ifelse(health_outcome == "yll", "midyear_population_", NA))
lifetable_calculation <- lifetable_calculation |>
dplyr::mutate(
impact_by_age_and_year =
purrr::map2(
# Attention first argument unexposed and second exposed (see function above)
.x = projection_if_unexposed_by_age_and_year,
.y = projection_if_exposed_by_age_and_year,
.f = calculate_impact,
var_prefix = var_prefix_for_function))
## NEWBORNS #################################################################
if (is_with_newborns) {
fill_right_of_diag <- function(tbl) {
# Select only the numeric matrix portion, ignoring age columns
cols <- base::setdiff(base::names(tbl), c("age_start", "age_end", "population"))
data_selection <- tbl[, cols, drop = FALSE]
for (i in 1 : base::nrow(data_selection)) {
# Extract the diagonal value
diag_value <- data_selection[i, i, drop = TRUE]
# Replace NAs to the right of the diagonal with the diagonal value
data_selection[i, (i+1):base::ncol(data_selection)] <- diag_value
}
# Drop last column from the data part
data_selection <- data_selection[, -base::ncol(data_selection), drop = FALSE]
# Assign back into the same positions
tbl[, cols] <- data_selection
return(tbl)
}
lifetable_calculation <- lifetable_calculation |>
dplyr::mutate(
impact_by_age_and_year = purrr::map(
.x = impact_by_age_and_year,
.f = fill_right_of_diag))
} else if(is_without_newborns) {
lifetable_calculation <- lifetable_calculation
}
}
# COMPILE OUTPUT ##############################################################################
# Data wrangling to get the results in the needed format
# GET DEATHS AND YLL FROM LIFETABLE
# Store total impacts by age #########
## Sum impacts
lifetable_calculation <- lifetable_calculation |>
dplyr::mutate(
impact_by_age_and_year_long = purrr::map(
.x = impact_by_age_and_year,
function(.x){
# Reshape year to long format
.x <-
tidyr::pivot_longer(data = .x,
cols = dplyr::starts_with("impact_"),
names_to = "year",
values_to = "impact",
names_prefix = "impact_") |>
# Keep only first value of population for each year
# Otherwise the population is repeated for all years
# and the sum of population, calculated in get_output(),
# will be wrong (much higher)
dplyr::mutate(.by = c(age_start, age_end),
population = ifelse(year == yoa, population, NA))
if({{health_outcome}} == "deaths"){
.x <- .x |>
## Select first year of projection
dplyr::filter(year == yoa)
} else if ({{health_outcome}} == "yll"){
.x <- .x |>
## Select all years within time horizon
dplyr::filter(year <= last_year_projection )
}
}
))
# Unnest column #####
# Unnest the obtained impacts to integrate them the main tibble
results_raw <- lifetable_calculation |>
# Remove all nested tibbles except impact_by_age_and_year_long
# which have to be nested
dplyr::select(-data_by_age,
-projection_if_exposed_by_age_and_year,
-projection_if_unexposed_by_age_and_year,
-impact_by_age_and_year) |>
# Unnest
tidyr::unnest(impact_by_age_and_year_long) |>
# Rename age_start to age_group (consistent with input and other pathways)
dplyr::rename("age_group" = "age_start") |>
# Remove age_end not needed anymore
dplyr::select(-age_end)
out <- base::list(
intermediate_calculations = lifetable_calculation,
results_raw = results_raw
)
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.