Nothing
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
## ----eval=FALSE, include=TRUE-------------------------------------------------
# library(ReSurv)
# library(reticulate)
# use_virtualenv("pyresurv")
# library(ggplot2)
## ----eval=FALSE, include=TRUE-------------------------------------------------
# library(devtools)
# devtools::install_github("edhofman/resurv")
# library(ReSurv)
# packageVersion("ReSurv")
#
## ----eval=FALSE, include=TRUE-------------------------------------------------
# ReSurv::install_pyresurv()
## ----eval=FALSE, include=TRUE-------------------------------------------------
# library(ReSurv)
# reticulate::use_virtualenv("pyresurv")
#
## ----eval=FALSE, include=TRUE-------------------------------------------------
# envname <- "./venv"
# ReSurv::install_pyresurv(envname = envname)
# pysparklyr::install_pyspark(envname = envname)
## ----eval=FALSE, include=TRUE-------------------------------------------------
# input_data <- data_generator(random_seed = 7,
# scenario = "alpha",
# time_unit = 1 / 360,
# years = 4,
# period_exposure = 200)
## ----eval=FALSE, include=TRUE-------------------------------------------------
# str(input_data)
## ----eval=FALSE, include=TRUE-------------------------------------------------
# individual_data <- IndividualDataPP(data = input_data,
# categorical_features = "claim_type",
# continuous_features = "AP",
# accident_period = "AP",
# calendar_period = "RP",
# input_time_granularity = "days",
# output_time_granularity = "quarters",
# years = 4)
#
## ----eval=FALSE, include=TRUE-------------------------------------------------
#
# resurv_cv_xgboost <- ReSurvCV(IndividualDataPP = individual_data,
# model = "XGB",
# hparameters_grid = list(booster = "gbtree",
# eta = c(.001),
# max_depth = c(3),
# subsample = c(1),
# alpha = c(0),
# lambda = c(0),
# min_child_weight = c(.5)),
# print_every_n = 1L,
# nrounds = 1,
# verbose = FALSE,
# verbose.cv = TRUE,
# early_stopping_rounds = 1,
# folds = 5,
# parallel = TRUE,
# ncores = 2,
# random_seed = 1)
#
## ----eval=FALSE, include=TRUE-------------------------------------------------
# resurv_cv_nn <- ReSurvCV(IndividualDataPP = individual_data,
# model = "NN",
# hparameters_grid = list(num_layers = c(1, 2),
# num_nodes = c(2, 4),
# optim = "Adam",
# activation = "ReLU",
# lr = .5,
# xi = .5,
# eps = .5,
# tie = "Efron",
# batch_size = as.integer(5000),
# early_stopping = "TRUE",
# patience = 20),
# epochs = as.integer(300),
# num_workers = 0,
# verbose = FALSE,
# verbose.cv = TRUE,
# folds = 3,
# parallel = FALSE,
# random_seed = as.integer(Sys.time()))
## ----eval=FALSE, include=TRUE-------------------------------------------------
#
# bounds <- list(eta = c(0, 1),
# max_depth = c(1L, 25L),
# min_child_weight = c(0, 50),
# subsample = c(0.51, 1),
# lambda = c(0, 15),
# alpha = c(0, 15))
#
#
# obj_func <- function(eta,
# max_depth,
# min_child_weight,
# subsample,
# lambda,
# alpha) {
#
# xgbcv <- ReSurvCV(
# IndividualDataPP = individual_data,
# model = "XGB",
# hparameters_grid = list(
# booster = "gbtree",
# eta = eta,
# max_depth = max_depth,
# subsample = subsample,
# alpha = lambda,
# lambda = alpha,
# min_child_weight = min_child_weight
# ),
# print_every_n = 1L,
# nrounds = 500,
# verbose = FALSE,
# verbose.cv = TRUE,
# early_stopping_rounds = 30,
# folds = 3,
# parallel = FALSE,
# random_seed = as.integer(Sys.time())
# )
#
# lst <- list(Score = -xgbcv$out.cv.best.oos$test.lkh,
# train.lkh = xgbcv$out.cv.best.oos$train.lkh)
#
# return(lst)
# }
#
#
#
# bayes_out <- bayesOpt(FUN = obj_func,
# bounds = bounds,
# initPoints = 50,
# iters.n = 1000,
# iters.k = 50,
# otherHalting = list(timeLimit = 18000))
#
## ----eval=FALSE, include=TRUE-------------------------------------------------
# bounds <- list(num_layers = c(2L, 10L),
# num_nodes = c(2L, 10L),
# optim = c(1L, 2L),
# activation = c(1L, 2L),
# lr = c(0.005, 0.5),
# xi = c(0, 0.5),
# eps = c(0, 0.5))
#
#
# obj_func <- function(num_layers,
# num_nodes,
# optim,
# activation,
# lr,
# xi,
# eps) {
#
# optim <- switch(optim,
# "Adam",
# "SGD")
#
# activation <- switch(activation, "LeakyReLU", "SELU")
# batch_size <- as.integer(5000L)
# number_layers <- as.integer(num_layers)
# num_nodes <- as.integer(num_nodes)
#
# deepsurv_cv <- ReSurvCV(IndividualData = individual_data,
# model = "NN",
# hparameters_grid = list(num_layers = number_layers,
# num_nodes = num_nodes,
# optim = optim,
# activation = activation,
# lr = lr,
# xi = xi,
# eps = eps,
# tie = "Efron",
# batch_size = batch_size,
# early_stopping = "TRUE",
# patience = 20),
# epochs = as.integer(300),
# num_workers = 0,
# verbose = FALSE,
# verbose.cv = TRUE,
# folds = 3,
# parallel = FALSE,
# random_seed = as.integer(Sys.time()))
#
#
# lst <- list(Score = -deepsurv_cv$out.cv.best.oos$test.lkh,
# train.lkh = deepsurv_cv$out.cv.best.oos$train.lkh)
#
# return(lst)
# }
#
# bayes_out <- bayesOpt(FUN = obj_func,
# bounds = bounds,
# initPoints = 50,
# iters.n = 1000,
# iters.k = 50,
# otherHalting = list(timeLimit = 18000))
#
## ----eval=FALSE, include=TRUE-------------------------------------------------
#
# resurv_fit_cox <- ReSurv(individual_data,
# hazard_model = "COX")
#
## ----eval=FALSE, include=TRUE-------------------------------------------------
# hparameters_xgb <- list(
# params = list(
# booster = "gbtree",
# eta = 0.9611239,
# subsample = 0.62851,
# alpha = 5.836211,
# lambda = 15,
# min_child_weight = 29.18158,
# max_depth = 1
# ),
# print_every_n = 0,
# nrounds = 3000,
# verbose = FALSE,
# early_stopping_rounds = 500
# )
#
#
# resurv_fit_xgb <- ReSurv(individual_data,
# hazard_model = "XGB",
# hparameters = hparameters_xgb)
#
## ----eval=FALSE, include=TRUE-------------------------------------------------
#
# hparameters_nn = list(num_layers= 2,
# early_stopping= TRUE,
# patience=350,
# verbose=FALSE,
# network_structure=NULL,
# num_nodes= 10,
# activation ="LeakyReLU",
# optim ="SGD",
# lr =0.02226655,
# xi=0.4678993,
# epsilon= 0,
# batch_size= 5000L,
# epochs= 5500L,
# num_workers= 0,
# tie="Efron" )
#
#
# resurv_fit_nn <- ReSurv(individual_data,
# hazard_model = "NN",
# hparameters = hparameters_nn)
#
#
## ----eval=FALSE, include=TRUE-------------------------------------------------
# resurv_fit_predict_q <- predict(resurv_fit_cox)
#
# individual_data_y <- IndividualDataPP(input_data,
# id = "claim_number",
# continuous_features = "AP_i",
# categorical_features = "claim_type",
# accident_period = "AP",
# calendar_period = "RP",
# input_time_granularity = "days",
# output_time_granularity = "years",
# years = 4,
# continuous_features_spline = NULL,
# calendar_period_extrapolation = FALSE)
#
# resurv_fit_predict_y <- predict(resurv_fit_cox,
# newdata = individual_data_y,
# grouping_method = "probability")
#
# individual_data_m <- IndividualDataPP(input_data,
# id = "claim_number",
# continuous_features = "AP_i",
# categorical_features = "claim_type",
# accident_period = "AP",
# calendar_period = "RP",
# input_time_granularity = "days",
# output_time_granularity = "months",
# years = 4,
# continuous_features_spline = NULL,
# calendar_period_extrapolation = FALSE)
#
# resurv_fit_predict_m <- predict(resurv_fit_cox,
# newdata = individual_data_m,
# grouping_method = "probability")
#
#
#
# model_s <- summary(resurv_fit_predict_y)
# print(model_s)
#
## ----eval=FALSE, include=TRUE-------------------------------------------------
# resurv_predict_xgb <- predict(resurv_fit_xgb)
# resurv_predict_nn <- predict(resurv_fit_nn)
## ----eval=FALSE, include=TRUE-------------------------------------------------
#
# resurv_fit_predict_q$long_triangle_format_out$output_granularity %>%
# filter(AP_o==15 & claim_type == 1) %>%
# filter(row_number()==1) %>%
# select(group_o)
#
# plot(resurv_fit_predict_q,
# granularity = "output",
# title_par = "COX: Accident Quarter 15 Claim Type 1",
# group_code = 30)
#
## ----eval=FALSE, include=TRUE-------------------------------------------------
# crps <- survival_crps(resurv_fit_cox)
# m_crps <- mean(crps$crps)
# m_crps
#
## ----eval=FALSE, include=TRUE-------------------------------------------------
# resurv_fit_predict_q$long_triangle_format_out$output_granularity %>%
# filter(AP_o==15 & claim_type == 1) %>%
# filter(row_number()==1) %>%
# select(group_o)
## ----eval=FALSE, include=TRUE-------------------------------------------------
# plot(resurv_fit_predict_q,
# granularity = "output",
# title_par = "COX: Accident Quarter 15 Claim Type 1",
# group_code = 30)
#
## ----eval=FALSE, include=TRUE-------------------------------------------------
# p1 <- input_data %>%
# as.data.frame() %>%
# mutate(claim_type = as.factor(claim_type)) %>%
# ggplot(aes(x = RT - AT, color = claim_type)) +
# stat_ecdf(size = 1) +
# labs(title = "", x = "Notification delay (in days)", y = "ECDF") +
# xlim(0.01, 1500) +
# scale_color_manual(
# values = c("royalblue", "#a71429"),
# labels = c("Claim type 0", "Claim type 1")
# ) +
# scale_linetype_manual(values = c(1, 3),
# labels = c("Claim type 0", "Claim type 1")) +
# guides(
# color = guide_legend(
# title = "Claim type",
# override.aes = list(
# color = c("royalblue", "#a71429"),
# linewidth = 2
# )
# ),
# linetype = guide_legend(
# title = "Claim type",
# override.aes = list(linetype = c(1, 3), linewidth = 0.7)
# )
# ) +
# theme_bw() +
# theme(
# axis.text = element_text(size = 20),
# axis.title.y = element_text(size = 20),
# axis.title.x = element_text(size = 20),
# legend.text = element_text(size = 20)
# )
# p1
#
#
# p2 <- input_data %>%
# as.data.frame() %>%
# mutate(claim_type = as.factor(claim_type)) %>%
# ggplot(aes(x = claim_type, fill = claim_type)) +
# geom_bar() +
# scale_fill_manual(
# values = c("royalblue", "#a71429"),
# labels = c("Claim type 0", "Claim type 1")
# ) +
# guides(fill = guide_legend(title = "Claim type")) +
# theme_bw() +
# labs(title = "", x = "Claim Type", y = "") +
# theme(
# axis.text = element_text(size = 20),
# axis.title.y = element_text(size = 20),
# axis.title.x = element_text(size = 20),
# legend.text = element_text(size = 20)
# )
# p2
#
#
## ----eval=FALSE, include=TRUE-------------------------------------------------
#
# clmodel <- resurv_fit_cox$IndividualDataPP$training.data %>%
# mutate(DP_o = 16 -
# DP_rev_o + 1) %>%
# group_by(AP_o, DP_o) %>%
# summarize(I = sum(I), .groups = "drop") %>%
# group_by(AP_o) %>%
# arrange(DP_o) %>%
# mutate(I_cum = cumsum(I), I_cum_lag = lag(I_cum, default = 0)) %>%
# ungroup() %>%
# group_by(DP_o) %>%
# reframe(df_o = sum(I_cum * (
# AP_o <= max(resurv_fit_cox$IndividualDataPP$training.data$AP_o) - DP_o +
# 1
# )) /
# sum(I_cum_lag * (
# AP_o <= max(resurv_fit_cox$IndividualDataPP$training.data$AP_o) - DP_o +
# 1
# )),
# I = sum(I * (
# AP_o <= max(resurv_fit_cox$IndividualDataPP$training.data$AP_o) - DP_o
# ))) %>%
# mutate(DP_o_join = DP_o - 1) %>%
# as.data.frame()
#
#
# clmodel %>%
# filter(DP_o > 1) %>%
# ggplot(aes(x = DP_o, y = df_o)) +
# geom_line(linewidth = 2.5,
# color = "#454555") +
# labs(title = "Chain ladder",
# x = "Development quarter",
# y = "Development factor") +
# ylim(1, 3. + .01) +
# theme_bw(base_size = rel(5)) +
# theme(plot.title = element_text(size = 20))
#
# ##
#
# clmodel_months <- individual_data_m$training.data %>%
# mutate(DP_o = 48 -
# DP_rev_o + 1) %>%
# group_by(AP_o, DP_o) %>%
# summarize(I = sum(I), .groups = "drop") %>%
# group_by(AP_o) %>%
# arrange(DP_o) %>%
# mutate(I_cum = cumsum(I), I_cum_lag = lag(I_cum, default = 0)) %>%
# ungroup() %>%
# group_by(DP_o) %>%
# reframe(df_o = sum(I_cum * (
# AP_o <= max(individual_data_m$training.data$AP_o) - DP_o + 1
# )) /
# sum(I_cum_lag * (
# AP_o <= max(individual_data_m$training.data$AP_o) - DP_o + 1
# )),
# I = sum(I * (
# AP_o <= max(individual_data_m$training.data$AP_o) - DP_o
# ))) %>%
# mutate(DP_o_join = DP_o - 1) %>%
# as.data.frame()
#
# ticks_at <- seq(1, 48, 4)
# labels_as <- as.character(ticks_at)
#
# clmodel_months %>%
# filter(DP_o > 1) %>%
# ggplot(aes(x = DP_o,
# y = df_o)) +
# geom_line(linewidth = 2.5,
# color = "#454555") +
# labs(title = "Chain ladder",
# x = "Development month",
# y = "Development factor") +
# ylim(1, 2.5 + .01) +
# scale_x_continuous(breaks = ticks_at, labels = labels_as) +
# theme_bw(base_size = rel(5)) +
# theme(plot.title = element_text(size = 20))
#
#
# ##
#
# clmodel_years <- individual_data_y$training.data %>%
# mutate(DP_o = 4 -
# DP_rev_o + 1) %>%
# group_by(AP_o, DP_o) %>%
# summarize(I = sum(I), .groups = "drop") %>%
# group_by(AP_o) %>%
# arrange(DP_o) %>%
# mutate(I_cum = cumsum(I), I_cum_lag = lag(I_cum, default = 0)) %>%
# ungroup() %>%
# group_by(DP_o) %>%
# reframe(df_o = sum(I_cum * (
# AP_o <= max(individual_data_m$training.data$AP_o) - DP_o + 1
# )) /
# sum(I_cum_lag * (
# AP_o <= max(individual_data_m$training.data$AP_o) - DP_o + 1
# )),
# I = sum(I * (
# AP_o <= max(individual_data_m$training.data$AP_o) - DP_o
# ))) %>%
# mutate(DP_o_join = DP_o - 1) %>%
# as.data.frame()
#
# ticks_at <- seq(1, 4, 1)
# labels_as <- as.character(ticks_at)
#
# clmodel_years %>%
# filter(DP_o > 1) %>%
# ggplot(aes(x = DP_o,
# y = df_o)) +
# geom_line(linewidth = 2.5,
# color = "#454555") +
# labs(title = "Chain ladder",
# x = "Development year",
# y = "Development factor") +
# ylim(1, 2.5 + .01) +
# scale_x_continuous(breaks = ticks_at, labels = labels_as) +
# theme_bw(base_size = rel(5)) +
# theme(plot.title = element_text(size = 20))
#
## ----eval=FALSE, include=TRUE-------------------------------------------------
# ct <- 0
# ap <- 12
#
# resurv_fit_predict_q$long_triangle_format_out$output_granularity %>%
# filter(AP_o == ap & claim_type == ct) %>%
# filter(row_number() == 1) %>%
# select(group_o)
#
# plot(
# resurv_fit_predict_q,
# granularity = "output",
# title_par = "COX: Accident Quarter 12 Claim Type 0",
# group_code = 23
# )
#
#
# ct <- 0
# ap <- 36
#
# resurv_fit_predict_m$long_triangle_format_out$output_granularity %>%
# filter(AP_o == ap & claim_type == ct) %>%
# filter(row_number() == 1) %>%
# select(group_o)
#
# plot(
# resurv_fit_predict_m,
# granularity = "output",
# color_par = "#a71429",
# title_par = "COX: Accident Month 36 Claim Type 0",
# group_code = 71
# )
#
#
#
# ct <- 1
# ap <- 7
#
# resurv_fit_predict_m$long_triangle_format_out$output_granularity %>%
# filter(AP_o == ap & claim_type == ct) %>%
# filter(row_number() == 1) %>%
# select(group_o)
#
# plot(
# resurv_fit_predict_m,
# granularity = "output",
# color_par = "#a71429",
# title_par = "COX: Accident Month 7 Claim Type 1",
# group_code = 14
# )
#
#
# ct <- 0
# ap <- 2
#
# resurv_fit_predict_y$long_triangle_format_out$output_granularity %>%
# filter(AP_o == ap & claim_type == ct) %>%
# filter(row_number() == 1) %>%
# select(group_o)
#
# plot(
# resurv_fit_predict_y,
# granularity = "output",
# color_par = "#FF6A7A",
# title_par = "COX: Accident Year 2 Claim Type 0",
# group_code = 3
# )
#
#
# ct <- 1
# ap <- 3
#
# resurv_fit_predict_y$long_triangle_format_out$output_granularity %>%
# filter(AP_o == ap & claim_type == ct) %>%
# filter(row_number() == 1) %>%
# select(group_o)
#
#
# plot(
# resurv_fit_predict_y,
# granularity = "output",
# color_par = "#FF6A7A",
# title_par = "COX: Accident Year 3 Claim Type 1",
# group_code = 6
# )
## ----eval=FALSE, include=TRUE-------------------------------------------------
#
# conversion_factor <- individual_data$conversion_factor
#
# max_dp_i <- 1440
#
# true_output <- resurv_fit_cox$IndividualDataPP$full.data %>%
# mutate(
# DP_rev_o = floor(max_dp_i * conversion_factor) -
# ceiling(
# DP_i * conversion_factor +
# ((AP_i - 1) %% (
# 1 / conversion_factor
# )) * conversion_factor) + 1,
# AP_o = ceiling(AP_i * conversion_factor),
# TR_o = AP_o - 1
# ) %>%
# filter(DP_rev_o <= TR_o) %>%
# group_by(claim_type, AP_o, DP_rev_o) %>%
# mutate(claim_type = as.character(claim_type)) %>%
# summarize(I = sum(I), .groups = "drop") %>%
# filter(DP_rev_o > 0)
#
# out_list <- resurv_fit_predict_q$long_triangle_format_out
# out <- out_list$output_granularity
#
# #Total output
# score_total <- out[, c("claim_type", "AP_o", "DP_o", "expected_counts")] %>%
# mutate(DP_rev_o = 16 - DP_o + 1) %>%
# inner_join(true_output, by = c("claim_type", "AP_o", "DP_rev_o")) %>%
# mutate(ave = I - expected_counts, abs_ave = abs(ave)) %>%
# # from here it is reformulated for the are tot
# ungroup() %>%
# group_by(AP_o, DP_rev_o) %>%
# reframe(abs_ave = abs(sum(ave)), I = sum(I))
#
# are_tot <- sum(score_total$abs_ave) / sum(score_total$I)
#
#
# dfs_output <- out %>%
# mutate(DP_rev_o = 16 - DP_o + 1) %>%
# select(AP_o, claim_type, DP_rev_o, f_o) %>%
# mutate(DP_rev_o = DP_rev_o) %>%
# distinct()
#
# #Cashflow on output scale.Etc quarterly cashflow development
# score_diagonal <- resurv_fit_cox$IndividualDataPP$full.data %>%
# mutate(
# DP_rev_o = floor(max_dp_i * conversion_factor) -
# ceiling(
# DP_i * conversion_factor +
# ((AP_i - 1) %% (
# 1 / conversion_factor
# )) * conversion_factor) + 1,
# AP_o = ceiling(AP_i * conversion_factor)
# ) %>%
# group_by(claim_type, AP_o, DP_rev_o) %>%
# mutate(claim_type = as.character(claim_type)) %>%
# summarize(I = sum(I), .groups = "drop") %>%
# group_by(claim_type, AP_o) %>%
# arrange(desc(DP_rev_o)) %>%
# mutate(I_cum = cumsum(I)) %>%
# mutate(I_cum_lag = lag(I_cum, default = 0)) %>%
# left_join(dfs_output, by = c("AP_o", "claim_type", "DP_rev_o")) %>%
# mutate(I_cum_hat = I_cum_lag * f_o,
# RP_o = max(DP_rev_o) - DP_rev_o + AP_o) %>%
# inner_join(true_output[, c("AP_o", "DP_rev_o")] %>% distinct()
# , by = c("AP_o", "DP_rev_o")) %>%
# group_by(AP_o, DP_rev_o) %>%
# reframe(abs_ave2_diag = abs(sum(I_cum_hat) - sum(I_cum)), I = sum(I))
#
# are_cal_q <- sum(score_diagonal$abs_ave2_diag) / sum(score_diagonal$I)
#
#
# # scoring XGB ----
#
# out_xgb <- resurv_predict_xgb$long_triangle_format_out$output_granularity
#
# score_total_xgb <- out_xgb[, c("claim_type",
# "AP_o",
# "DP_o",
# "expected_counts")] %>%
# mutate(DP_rev_o = 16 - DP_o + 1) %>%
# inner_join(true_output, by = c("claim_type", "AP_o", "DP_rev_o")) %>%
# mutate(ave = I - expected_counts, abs_ave = abs(ave)) %>%
# # from here it is reformulated for the are tot
# ungroup() %>%
# group_by(AP_o, DP_rev_o) %>%
# reframe(abs_ave = abs(sum(ave)), I = sum(I))
#
# are_tot_xgb <- sum(score_total_xgb$abs_ave) / sum(score_total$I)
#
#
# dfs_output_xgb <- out_xgb %>%
# mutate(DP_rev_o = 16 - DP_o + 1) %>%
# select(AP_o, claim_type, DP_rev_o, f_o) %>%
# mutate(DP_rev_o = DP_rev_o) %>%
# distinct()
#
# #Cashflow on output scale.Etc quarterly cashflow development
# score_diagonal_xgb <- resurv_fit_cox$IndividualDataPP$full.data %>%
# mutate(
# DP_rev_o = floor(max_dp_i * conversion_factor) -
# ceiling(
# DP_i * conversion_factor +
# ((AP_i - 1) %% (
# 1 / conversion_factor
# )) * conversion_factor) + 1,
# AP_o = ceiling(AP_i * conversion_factor)
# ) %>%
# group_by(claim_type, AP_o, DP_rev_o) %>%
# mutate(claim_type = as.character(claim_type)) %>%
# summarize(I = sum(I), .groups = "drop") %>%
# group_by(claim_type, AP_o) %>%
# arrange(desc(DP_rev_o)) %>%
# mutate(I_cum = cumsum(I)) %>%
# mutate(I_cum_lag = lag(I_cum, default = 0)) %>%
# left_join(dfs_output_xgb, by = c("AP_o", "claim_type", "DP_rev_o")) %>%
# mutate(I_cum_hat = I_cum_lag * f_o,
# RP_o = max(DP_rev_o) - DP_rev_o + AP_o) %>%
# inner_join(true_output[, c("AP_o", "DP_rev_o")] %>% distinct()
# , by = c("AP_o", "DP_rev_o")) %>%
# group_by(AP_o, DP_rev_o) %>%
# reframe(abs_ave2_diag = abs(sum(I_cum_hat) - sum(I_cum)), I = sum(I))
#
# are_cal_q_xgb <- sum(score_diagonal_xgb$abs_ave2_diag) / sum(score_diagonal$I)
#
# # scoring NN ----
#
# out_nn <- resurv_predict_nn$long_triangle_format_out$output_granularity
#
# score_total_nn <- out_nn[, c("claim_type",
# "AP_o",
# "DP_o",
# "expected_counts")] %>%
# mutate(DP_rev_o = 16 - DP_o + 1) %>%
# inner_join(true_output, by = c("claim_type", "AP_o", "DP_rev_o")) %>%
# mutate(ave = I - expected_counts, abs_ave = abs(ave)) %>%
# # from here it is reformulated for the are tot
# ungroup() %>%
# group_by(AP_o, DP_rev_o) %>%
# reframe(abs_ave = abs(sum(ave)), I = sum(I))
#
# are_tot_nn <- sum(score_total_nn$abs_ave) / sum(score_total$I)
#
#
# dfs_output_nn <- out_nn %>%
# mutate(DP_rev_o = 16 - DP_o + 1) %>%
# select(AP_o, claim_type, DP_rev_o, f_o) %>%
# mutate(DP_rev_o = DP_rev_o) %>%
# distinct()
#
# #Cashflow on output scale.Etc quarterly cashflow development
# score_diagonal_nn <- resurv_fit_cox$IndividualDataPP$full.data %>%
# mutate(
# DP_rev_o = floor(max_dp_i * conversion_factor) -
# ceiling(
# DP_i * conversion_factor +
# ((AP_i - 1) %% (
# 1 / conversion_factor
# )) * conversion_factor) + 1,
# AP_o = ceiling(AP_i * conversion_factor)
# ) %>%
# group_by(claim_type, AP_o, DP_rev_o) %>%
# mutate(claim_type = as.character(claim_type)) %>%
# summarize(I = sum(I), .groups = "drop") %>%
# group_by(claim_type, AP_o) %>%
# arrange(desc(DP_rev_o)) %>%
# mutate(I_cum = cumsum(I)) %>%
# mutate(I_cum_lag = lag(I_cum, default = 0)) %>%
# left_join(dfs_output_nn, by = c("AP_o", "claim_type", "DP_rev_o")) %>%
# mutate(I_cum_hat = I_cum_lag * f_o,
# RP_o = max(DP_rev_o) - DP_rev_o + AP_o) %>%
# inner_join(true_output[, c("AP_o", "DP_rev_o")] %>% distinct()
# , by = c("AP_o", "DP_rev_o")) %>%
# group_by(AP_o, DP_rev_o) %>%
# reframe(abs_ave2_diag = abs(sum(I_cum_hat) - sum(I_cum)), I = sum(I))
#
# are_cal_q_nn <- sum(score_diagonal_nn$abs_ave2_diag) / sum(score_diagonal$I)
#
# # Scoring Chain-Ladder ----
#
# true_output_cl <- individual_data$full.data %>%
# mutate(
# DP_rev_o = floor(max_dp_i * conversion_factor) -
# ceiling(
# DP_i * conversion_factor +
# ((AP_i - 1) %% (
# 1 / conversion_factor
# )) * conversion_factor) + 1,
# AP_o = ceiling(AP_i * conversion_factor)
# ) %>%
# filter(DP_rev_o <= TR_o) %>%
# mutate(DP_o = max(individual_data$training.data$DP_rev_o) - DP_rev_o + 1) %>%
# group_by(AP_o, DP_o, DP_rev_o) %>%
# summarize(I = sum(I), .groups = "drop") %>%
# filter(DP_rev_o > 0)
# latest_observed <- individual_data$training.data %>%
# filter(DP_rev_o >= TR_o) %>%
# mutate(DP_o = max(individual_data$training.data$DP_rev_o) - DP_rev_o + 1) %>%
# group_by(AP_o) %>%
# mutate(DP_max = max(DP_o)) %>%
# group_by(AP_o, DP_max) %>%
# summarize(I = sum(I), .groups = "drop")
#
# clmodel <- individual_data$training.data %>%
# mutate(DP_o = max(individual_data$training.data$DP_rev_o) - DP_rev_o + 1) %>%
# group_by(AP_o, DP_o) %>%
# summarize(I = sum(I), .groups = "drop") %>%
# group_by(AP_o) %>%
# arrange(DP_o) %>%
# mutate(I_cum = cumsum(I), I_cum_lag = lag(I_cum, default = 0)) %>%
# ungroup() %>%
# group_by(DP_o) %>%
# reframe(df = sum(I_cum * (
# AP_o <= max(individual_data$training.data$AP_o) - DP_o + 1
# )) /
# sum(I_cum_lag * (
# AP_o <= max(individual_data$training.data$AP_o) - DP_o + 1
# )), I = sum(I * (
# AP_o <= max(individual_data$training.data$AP_o) - DP_o
# ))) %>%
# mutate(DP_o_join = DP_o) %>%
# mutate(DP_rev_o = max(DP_o) - DP_o + 1)
#
# predictions <- expand.grid(AP_o = latest_observed$AP_o,
# DP_o = clmodel$DP_o_join) %>%
# left_join(clmodel[, c("DP_o_join", "df")], by = c("DP_o" = "DP_o_join")) %>%
# left_join(latest_observed, by = "AP_o") %>%
# rowwise() %>%
# filter(DP_o > DP_max) %>%
# ungroup() %>%
# group_by(AP_o) %>%
# arrange(DP_o) %>%
# mutate(df_cum = cumprod(df)) %>%
# mutate(I_expected = I * df_cum - I * lag(df_cum, default = 1)) %>%
# select(DP_o, AP_o, I_expected)
#
# conversion_factor <- individual_data$conversion_factor
# max_dp_i <- 1440
# score_total <- predictions %>%
# inner_join(true_output_cl, by = c("AP_o", "DP_o")) %>%
# mutate(ave = I - I_expected, abs_ave = abs(ave)) %>%
# # from here it is reformulated for the are tot
# ungroup() %>%
# group_by(AP_o, DP_rev_o) %>%
# reframe(abs_ave = abs(sum(ave)), I = sum(I))
#
# are_tot <- sum(score_total$abs_ave) / sum(score_total$I)
#
# score_diagonal <- individual_data$full.data %>%
# mutate(
# DP_rev_o = floor(max_dp_i * conversion_factor) -
# ceiling(
# DP_i * conversion_factor +
# ((AP_i - 1) %% (
# 1 / conversion_factor
# )) * conversion_factor) + 1,
# AP_o = ceiling(AP_i * conversion_factor)
# ) %>%
# group_by(claim_type, AP_o, DP_rev_o) %>%
# mutate(claim_type = as.character(claim_type)) %>%
# summarize(I = sum(I), .groups = "drop") %>%
# group_by(claim_type, AP_o) %>%
# arrange(desc(DP_rev_o)) %>%
# mutate(I_cum = cumsum(I)) %>%
# mutate(I_cum_lag = lag(I_cum, default = 0)) %>%
# mutate(DP_o = max(individual_data$training.data$DP_rev_o) - DP_rev_o + 1) %>%
# left_join(CL[, c("DP_o", "df")], by = c("DP_o")) %>%
# mutate(I_cum_hat = I_cum_lag * df,
# RP_o = max(DP_rev_o) - DP_rev_o + AP_o) %>%
# inner_join(true_output_cl[, c("AP_o", "DP_rev_o")] %>% distinct()
# , by = c("AP_o", "DP_rev_o")) %>%
# group_by(AP_o, DP_rev_o) %>%
# reframe(abs_ave2_diag = abs(sum(I_cum_hat) - sum(I_cum)), I = sum(I))
#
# are_cal_q <- sum(score_diagonal$abs_ave2_diag) / sum(score_diagonal$I)
#
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.