# =============================================================================================================
# Project: IAS39 impairment model
# Discription: pd_tr_functions.R
# Purpose: Get data into the correct format to be used in the IAS39 provision engine
# Authors: N van Heerden, DJ de Villiers
# Date: 17 Mar 2017
# =============================================================================================================
# libraries
# ---------
require(dplyr)
require(readr)
# ---------
# read in test data set
# ---------------------
# test_data <- read_rds("test_data.rds")
# ---------------------
# Utility functions
# -----------------
# check that all closing balance is positive.
pos_closing_balance <- function(data) {
data %>% filter(closing_balance >= 0)
}
# To use:
#df <- df %>% pos_closing_balance(.)
# function:: selects the necessary variables from the base set (df)
# in: the full data frame (df) containing all the standard variables. ... are all segementer variables used.
select_variables_pd_tr <- function(data, ...) {
data %>% select(contract_key, default_flag, pointintime_month, closing_balance, months_arrears, ...)
}
# how to use:
#df <- select_variables_pd_tr(df, country, product_name)
# function::get_cohort_base
# calculates the total balance by arrears and date for each segmenter variables.
# Essentially gets the sum(base) of your cohort.
# ... are all segementer variables used
get_cohort_base <- function(data, default_definition, ...) {
summarise(
group_by(data, months_arrears, pointintime_month, ...), cohort_base = sum(closing_balance)) %>%
filter(months_arrears < default_definition)
}
# how to use:
#df_cohort_base <- get_cohort_base(df, default_definition = 4, country, product_name)
# function:: get_cohort_join.
get_cohort_join <- function(dfx, dfy, segmenter_vector) {
full_join(dfx, dfy, by = c("months_arrears", "pointintime_month", segmenter_vector))
}
# how to use:
#df_cohort_join <- get_cohort_join(dfx = df_cohort_base, dfy = df_cohort_default, segmenter_vector)
# -----------------
# PD Functions
# ------------
# function:: lead default flag
# loss id period is key user input. as a side note: using 12 month outcome on
# the 0 bucket gives input to NPV calculator.
lead_default_flag <- function(data, loss_id_period) {
data %>% mutate(lead_default_flag = lead(default_flag, loss_id_period))
}
# how to use
#df <- lead_default_flag(df, 3)
# get_cohort_default
get_cohort_default <- function(data, default_definition, ...) {
summarise(
group_by(data[data$lead_default_flag == 1, ], months_arrears, pointintime_month, ...), cohort_default = sum(closing_balance)) %>%
filter(months_arrears < default_definition)
}
# how to use:
#df_cohort_default <- get_cohort_default(df, 4, country, product_name)
# last step:
# main function that computes the dataset needed for pd's at the level specified
# by the segmenter variables.
# add: this when using function to calc pd: mutate(pd = cohort_default/cohort_base)
get_df_pd <- function(data = df, segmenter_vector, default_definition = 4, loss_id_period = 12, ...) {
data <- data %>%
arrange(., contract_key, pointintime_month) %>%
# pos_closing_balance(.) %>%
select_variables_pd_tr(., ...) %>%
lead_default_flag(., loss_id_period)
df_cohort_base <- get_cohort_base(data = data, default_definition, ...)
df_cohort_default <- get_cohort_default(data = data, default_definition, ...)
df_cohort_join <- get_cohort_join(dfx = df_cohort_base, dfy = df_cohort_default, segmenter_vector)
return(df_cohort_join)
}
# ------------
# Transition rate functions
# -------------------------
# function that leads the arrears. It calculates arrears for different emergence periods.
# will in future need to be based on the default definition.
# default definition of 4 assumed in this case.
# This function can handle 1:3 months emergence.
lead_arrears <- function(data, emergence_period) {
data <- data %>%
mutate(lead_arrears_1 = lead(months_arrears, 1)) %>%
mutate(lead_arrears_2 = lead(months_arrears, 2)) %>%
mutate(lead_arrears_3 = lead(months_arrears, 3))
if(emergence_period < 3) {
data <- data %>% select(everything(), -num_range("lead_arrears_", (emergence_period+1):3))
}
else {data <- data}
}
# for tr use default definition 1 as we are only interested in the event where
# contracts move forward out of 0 in arrears. thus any move out of 0 in arrears
# can by seen as a default in this case. will always be one regardless of what
# the emergence period is.
# gets the arrears status after 1, 2 and 3 months.
# works only for 1 months. This function not yet ready for 2:3 months.
get_cohort_arrears <- function(data, emergence_period, ...) {
if (emergence_period == 1) {
return(
summarise(
group_by(data, months_arrears, lead_arrears_1, pointintime_month, ...), # works only for 1
cohort_arrears = sum(closing_balance)
) %>%
filter(months_arrears < 1)
)
}
if (emergence_period == 3) {
return(
summarise(
group_by(data, months_arrears, lead_arrears_3, pointintime_month, ...), # works only for 1
cohort_arrears = sum(closing_balance)
) %>%
filter(months_arrears < 1)
)
}
}
# get_cohort_arrears
# function for calculating transition rates into tr_x.
# tr_x = roll from 0 to x in arrears.
get_df_tr0_x <- function(data, emergence_period, tr_x, segmenter_vector, ...) {
data <- select_variables_pd_tr(data, -default_flag, ...) %>%
arrange(., contract_key, pointintime_month) %>%
#pos_closing_balance(.) %>%
lead_arrears(., emergence_period)
tr_cohort_base <- get_cohort_base(data, 1, ...)
tr_cohort_arrears <- arrange(get_cohort_arrears(data, emergence_period, ...), by = pointintime_month)
if (emergence_period == 1) {
tr_join_df <- get_cohort_join(tr_cohort_arrears, tr_cohort_base, segmenter_vector) %>%
filter(lead_arrears_1 == 1) %>%
mutate(tr_0_1 = cohort_arrears/cohort_base)
}
if (emergence_period == 3) {
tr_join_df <- get_cohort_join(tr_cohort_arrears, tr_cohort_base, segmenter_vector) %>%
filter(lead_arrears_3 == tr_x) %>%
mutate(tr_0_x = cohort_arrears/cohort_base)
}
return(tr_join_df)
}
# get_df_tr
# main function that calculates the transition rates based on emergence period of either 1 or 3
# note that for 1 in arrears only the roll into 1 is calculated.
get_df_tr <- function(data, emergence_period, segmenter_vector, ...) {
if (emergence_period == 1) {
df_tr_1 <- get_df_tr0_x(data, emergence_period, tr_x = 1, segmenter_vector, ...)
return(df_tr_1)
}
if (emergence_period == 3) {
df_tr_1 <- get_df_tr0_x(data, emergence_period = 3, tr_x = 1, segmenter_vector, ...)
df_tr_2 <- get_df_tr0_x(data, emergence_period = 3, tr_x = 2, segmenter_vector, ...)
df_tr_3 <- get_df_tr0_x(data, emergence_period = 3, tr_x = 3, segmenter_vector, ...)
df_tr <- bind_rows(df_tr_1, df_tr_2, df_tr_3)
return(df_tr)
}
}
# use:
# get_df_tr(test_data, emergence_period = 3, segmenter_vector = c("country"), country)
# df1 <- select_variables_pd_tr(test_data, -default_flag, country) %>%
# arrange(., contract_key, pointintime_month) %>%
# pos_closing_balance(.) %>%
# lead_arrears(., emergence_period = 3)
#
# arr <- arrange(get_cohort_arrears(df1, 1, 3), pointintime_month)
# base <- get_cohort_base(df1, 1)
#
# tr_join <- get_cohort_join(arr, base, c()) %>%
# filter(lead_arrears_3 > 0)
#
# tr_1 <- tr_join %>%
# filter(lead_arrears_3 == 1) %>%
# mutate(tr_0_1 = cohort_arrears/cohort_base)
#
# tr_2 <- tr_join %>%
# filter(lead_arrears_3 == 2) %>%
# mutate(tr_0_2 = cohort_arrears/cohort_base)
#
# tr_3 <- tr_join %>%
# filter(lead_arrears_3 == 3) %>%
# mutate(tr_0_3 = cohort_arrears/cohort_base)
#
# tr_join_df <- full_join(tr_1, tr_2, pointintime_month) %>%
# full_join(., tr_3, pointintime_month)
# get_df_tr function for the specific case where emergence = 3
# get_df_tr3 <- function(data, emergence_period = 3, segmenter_vector, ...) {
# data <- select_variables_pd_tr(data, -default_flag, ...) %>%
# arrange(., contract_key, pointintime_month) %>%
# pos_closing_balance(.) %>%
# lead_arrears(., emergence_period)
#
# tr_cohort_base <- get_cohort_base(data, 1, ...)
# tr_cohort_arrears <- arrange(get_cohort_arrears(data, default_definition = 1, emergence_period = 1, ...), by = pointintime_month)
#
# tr_join_df <- get_cohort_join(tr_cohort_arrears, tr_cohort_base, segmenter_vector) %>%
# filter(lead_arrears_1 == 1) %>%
# mutate(tr_0_1 = cohort_arrears/cohort_base)
#
# return(tr_join_df)
# }
# -------------------------
# writing the test data set
# -------------------------
# df1 <- df %>%
# filter(contract_key == 66 + 100000 | contract_key == 10111 + 100000 | contract_key == 21000 + 100000 |contract_key == 37024 + 100000 |
# contract_key == 37025 + 100000 | contract_key == 8302 + 100000 | contract_key == 129805)
#
# write_rds(df1, "test_data.rds")
# -------------------------
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.