## comp_cum_risk.R | riskyr
## 2024 01 24
## Compute cumulative risks
# Task analysis: ----------
# A. Two different problem types: ------
# 1. Fixed/stable population size N: ------
# Risk factor affects some property of a stable population
# (e.g., diseases of individuals, rainy days, successful projects, etc.).
#
# DV: Proportions of affected individuals.
# Absence and presence of risk factor for individuals lead to different proportions in population:
# a. individual perspective: affected vs. unaffected proportions individuals
# b. collective perspective: # of risky events per individual (i.e., classes/segments of individuals)
# Mathematically, risk categories can be computed by binomial or multi-nominal distributions.
# However, a generative process model assumes a binary tree with 2 edges/links (risk present vs. absent) for each node.
# - Note on range of r: ----
#
# In 1. most risk factors assume a positive range 0 <= r <= 1 (i.e., only increasing risk):
# (e.g., earthquakes, diseases, death).
# Occurrences can accumulate, but not be cancelled, reduced, or withdrawn later.
#
# Allowing for recovery or risk reduction (r < 0) requires
# further assumptions and a different process,
# as this would change (reduce) the affected proportion of the population.
# The EV counts remain stable, but the proportion(s) affected decrease
# (but unclear, whether all affected segments decrease by the same rate).
# - Note on binary tree data structure: ----
#
# Data can be generated and stored as a binary tree.
# Each state corresponds to an binary expansion:
# 1 0
# 11 10 01 00
# etc.
# Thus, each state contains the entire history of each leaf
# and representation allows for all kinds of questions:
# - number of events: e.g., more than 2 events?
# - parity: e.g., more 1 than 0?
# - patterns: e.g., alternating 0 and 1?
# Constraint: Dependencies run strictly from stem to leaf,
# any reverse dependencies (from leaf to stem) cannot be captured.
# 2. Changing/variable population/reference set size N: --------
#
# Risk factor affects and changes the (magnitude/size of the reference set) population
# (e.g., sequential percentage changes, cumulative interest, reducing portfolio value, etc.)
#
# DV: Population magnitude/size of reference set.
# Note on range of r: Allowing for increases and decreases: -1 <= r <= +1.
# B. Problem variants and generalizations: ------
# Basic problems involve a constant value of r and r > 0.
#
# This implies 2 generalizations:
# 1. Variable values of r (as a vector of values).
# 2. Risk increases (0 < r <= 1) and risk reductions (-1 <= r < 0).
# ad 1. Fixed/stable population size N: ----------
# Risk factor affects some property of a stable population
# (e.g., diseases of individuals, rainy days, successful projects, etc.)
# Parameters: ----
# IV:
# r ... risk
# t ... time periods/rounds
#
# DV:
# ev ... number of events
# pc ... probability of risk factor affecting population / percent of population
# A. Recursive application of repeated risk r: ------
# 1. apply_risk() as a recursive partition of a stable population pc: ----
apply_risk <- function(r, t, pc = 100, ev = 0){
# cat(t, ": ", pc, "\n")
if (t == 0){ # stop:
# print(paste0(ev, ": ", pc))
} else { # recurse:
t_prev <- (t - 1) # simplify
pc <- c(apply_risk(r = r, t = t_prev, pc * r, ev + 1), apply_risk(r = r, t = t_prev, pc * (1 - r), ev))
}
# Output:
return(pc)
} # apply_risk().
# # Check:
# apply_risk(pc = 100, ev = 0, r = .25, t = 2)
# 2. Apply risk separately for p and ev: ------
# - a: comp_ps: Probabilities ----
comp_ps <- function(r, t, p = 100){
if (t == 0){ # stop:
# print(paste0(p))
} else { # recurse:
p <- c(comp_ps(r = r, t = (t - 1), p = (p * r)), comp_ps(r = r, t = (t - 1), p = (p * (1 - r))))
}
return(p)
} # comp_ps().
# Check:
# comp_ps(r = .25, t = 2, p = 100, )
# comp_ps(r = .25, t = 4, p = 100, )
# - b: comp_ev: Number of events ----
comp_ev <- function(r, t, ev = 0){
if (t == 0){ # stop:
# print(paste0(ev))
} else { # recurse:
ev <- c(comp_ev(r = r, t = (t - 1), ev = (ev + 1)), comp_ev(r = r, t = (t - 1), ev = ev))
}
return(ev)
} # comp_ev().
# # Check:
# comp_ev(r = .25, t = 2, ev = 0)
# comp_ev(r = .25, t = 3, ev = 0)
# - c: comp_ev_p: Combine comp_ev() and comp_ps() ----
comp_ev_p <- function(r, t, p = 100, ev = 0){
out <- NA # initialize
# Compute (using recursive functions):
events <- comp_ev(r = r, t = t, ev = ev) # a. events
cum_ps <- comp_ps(r = r, t = t, p = p ) # b. probabilities
# Combine outputs:
out <- cum_ps
names(out) <- paste0(events, "x")
return(out)
} # comp_ev_p().
# # Check:
# comp_ev_p(r = .25, t = 1, p = 100, ev = 0)
# comp_ev_p(r = .25, t = 2, p = 100, ev = 0)
# comp_ev_p(r = .25, t = 3, p = 100, ev = 0)
# comp_ev_p(r = .25, t = 4, p = 100, ev = 0)
# # Note:
# cumsum(comp_ev_p(r = .25, t = 4, p = 100, ev = 0))
# B. comp_cum_ps(): Iterative generation of cumulative risks: ------
comp_cum_ps <- function(r = 1/2, # risk per time period
t = NA, # number of time periods/rounds
N = 100, # population size
bin_names = TRUE # name states by binary sequence?
){
# Generalize version of constant r value to a vector of r values
# (with potentially different risk values, and length(r) = t):
if ((length(r) == 1) && (is.numeric(t))){
# Turn r into a vector of length t:
r <- rep(r, times = t)
# User feedback:
# message(paste0("Made r a vector of length t = ", t, ":"))
# print(r)
}
t <- length(r)
# Iterative generation:
for (i in 0:t){ # each time period i:
if (i == 0){
# Prepare data structures:
ev <- vector(mode = "list", length = t) # number of events/occurrences
ps <- vector(mode = "list", length = t) # cumulative probability of segment
bi <- vector(mode = "list", length = t) # binary representation of state
} else if (i == 1){
# initialize:
ev[[i]] <- c(1, 0)
ps[[i]] <- c(N * r[i], N * (1 - r[i]))
bi[[i]] <- c("1", "0")
if (bin_names){
names(ps[[i]]) <- bi[[i]]
} else {
names(ps[[i]]) <- paste0(ev[[i]], "x")
}
} else {
for (e in 1:length(ev[[i - 1]])){ # each element e in the previous ev vector:
ev[[i]][c((2 * e - 1), 2 * e)] <- ev[[i - 1]][e] + c(1, 0)
ps[[i]][c((2 * e - 1), 2 * e)] <- ps[[i - 1]][e] * c(r[i], (1 - r[i]))
bi[[i]][c((2 * e - 1), 2 * e)] <- paste0(bi[[i - 1]][e], c("1", "0"))
if (bin_names){
names(ps[[i]]) <- bi[[i]]
} else {
names(ps[[i]]) <- paste0(ev[[i]], "x")
}
} # for e.
} # if i.
} # for i.
# Output:
return(ps)
} # comp_cum_ps().
# # Check:
# comp_cum_ps()
# comp_cum_ps(r = .1, t = 2, N = 100)
# comp_cum_ps(r = .1, t = 5, N = 100)
# # Vector names:
# comp_cum_ps(r = .1, t = 5, bin_names = TRUE) # names show binary state (full event history)
# comp_cum_ps(r = .1, t = 5, bin_names = FALSE) # names show number of event occurrences (e.g., 2x)
# Generalization to variable values of r (as a vector):
# comp_cum_ps(r = c(.1, .2, .3), t = NA, N = 100)
# # Operations on list data structure:
# # 1. Cumulative sums:
# lapply(X = comp_cum_ps(r = .1, t = 5, N = 100), FUN = cumsum)
# # 2. Converting binary state representation to number of event occurrences and decimal state nr:
# ls <- comp_cum_ps(r = .1, t = 5, bin_names = TRUE) # use binary names
# ls
# # Get names of a list element (vector):
# l_names <- names(ls[[5]]) # extract names of list element
# sapply(X = l_names, FUN = tally, USE.NAMES = FALSE) # Number of event occurrences
# sapply(X = l_names, FUN = base_dec, base = 2) # state as decimal number (starting with 0)
# ToDo: ----
# 2. Generalization: Does this work for risk values r < 0 (reducing risks)?
# comp_cum_ps(r = c(.1, -.2, .3), t = NA, N = 100) # Answer: No, see negative segments!
# C. plot_cbar(): Compute and plot cumulative risks (as bar chart): ------
# See file "plot_cum_risk.R".
# ad 2. Changing population size N ------
# Risk factor affects and changes the (size of the) population
# (e.g., sequential percentage changes, cumulative interest, reducing value, etc.)
# Parameters: ----
# IV:
# r ... risk (as a constant value)
# t ... time periods/rounds
# OR:
# r ... as a vector of risk factor values (e.g. sequential changes)
# Generalizations of r:
#
# 1. Variable r values: To allow for vector r with a range of different values,
# Constant values of r and given value of t are interpreted as:
# rep(r, t).
#
# 2. Risk increases and reductions: -1 <= r <= +1.
# DV:
# N ... magnitude/size of population
# sg ... 2 segments (from original vs. recent change) or 2^t segments?
apply_risk_to_population <- function(r, t = NA, N = 100){
# Prepare: ----
# ToDo: Ensure that r values are in -1 <= r <= +1.
if ((length(r) == 1) && (is.numeric(t))){
# Turn r into a vector of length t:
r <- rep(r, times = t)
# User feedback:
# message(paste0("Created r as a vector of length t = ", t, ":"))
# print(r)
}
# Turn probabilities (or negative probabilities) into percentages:
if (is_prob(abs(r))){
r_neg <- (r < 0) # index of negative values
r_pc <- as_pc(abs(r))
r_pc[r_neg] <- r_pc[r_neg] * -1 # re-apply negative values
# User feedback:
# message(paste0("Turned probability r into percentages r_pc = "))
# print(r_pc)
}
# Data structure:
nr <- length(r)
N_out <- rep(NA, nr)
# Main: ----
for (i in 1:nr){
if (i == 1){
N_out[i] <- N * pc_2_fac(r_pc[i])
} else {
N_out[i] <- N_out[(i - 1)] * pc_2_fac(r_pc[i])
} # if.
} # for loop.
# Verify by comparing to aggregate change:
N_final <- N * pc_2_fac(aggr_pcs(r_pc))
eps <- .000001 # epsilon
if (abs(N_final - N_out[nr]) > eps){
warning(paste0("Final value of N_out = ", N_out[nr], " differs from aggregate change N_final = ", N_final))
}
# +++ here now +++
# Output: ----
names(N_out) <- paste0(r_pc, "%")
return(N_out)
} # apply_risk_to_population().
# # Check:
# apply_risk_to_population(.10)
# apply_risk_to_population(.10, t = 5)
# apply_risk_to_population(c(.10, .20, .30), t = 3)
#
# # Contrast:
# apply_risk_to_population(c( .20, .30, -.50))
# apply_risk_to_population(c(-.20, -.30, .50))
#
# apply_risk_to_population(c( .10, .20, .30, -.60))
# apply_risk_to_population(c(-.10, -.20, -.30, .60))
#
# apply_risk_to_population(seq(-.50, +.50, by = +.10)) # ascending pcs
# apply_risk_to_population(seq(+.50, -.50, by = -.10)) # descending pcs
# Binomial distribution function: ------
# Example from stats::Binomial:
# Compute P(45 < X < 55) for X Binomial(100, 0.50):
# sum(dbinom(x = 46:54, size = 100, prob = .50)) # 0.6317984
# # Adaptation:
# r <- .20
# t <- 4
#
# # Compare:
# comp_cum_ps(r = r, t = t, N = 1) # computes p for ALL paths and states
# dbinom(x = 0:t, size = t, prob = r) # computes summary prob for # of ev occurrences
#
# # Connection: ---
#
# ls <- comp_cum_ps(r = r, t = t, N = 1, bin_names = TRUE) # use binary names
# # Get names of a list element (vector):
# v <- ls[[t]]
# l_names <- names(v) # extract names of list element
# l_names
# n_ev <- sapply(X = l_names, FUN = tally, USE.NAMES = FALSE) # Number of event occurrences
# n_ev
#
# # Aggregate:
# tapply(v, n_ev, sum) # yields array
#
# # Verify equality:
# all.equal(as.vector(tapply(v, n_ev, sum)), dbinom(x = 0:t, size = t, prob = r))
#
# # As df: ----
#
# df <- data.frame(n_ev = n_ev,
# prob = v)
# # Aggregate df:
# tapply(df$prob, df$n_ev, sum) # yields array
#
# # Summarize using magrittr and dplyr: ----
#
# library(tidyverse)
# tb_sum <- df |>
# group_by(n_ev) |>
# summarize(sum_p = sum(prob))
# tb_sum
#
# # Verify equality:
# all.equal(tb_sum$sum_p, dbinom(x = 0:t, size = t, prob = r))
## (*) Done: ----------
# - 1. Generalization to variable values of r (as a vector).
# - Binary state names: comp_cum_ps() uses a binary state names (bin_names = TRUE).
# As those state names preserve a path's full risky event history,
# they can be used to compute the number of event occurrences (using the tally() function)
# of decimal state number (using the bin_dec() function).
## (+) ToDo: ----------
# - Using the binomial formula for computing cumulative risk of
# exact event occurrences (X = c) and ranges (X >= c).
# - 2. Generalization: Make comp_cum_risk() work for
# NEGATIVE risk values r < 0 (i.e., risk reductions).
# - Use more appropriate data structure for cumulative probabilities (ps)?
# - How to grow a (binary) tree structure in R?
## eof. ------------------------------------------
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.