In this vignette, we provide the supplementary material to replicate the results of the manuscript A machine learning approach based on survival analysis for IBNR frequencies in non-life reserving.
We remark that the real data that we used to obtain the results that we included in the manuscript are private and will not be shared.
With reference to the code related to the simulation case study, we wanted to make this vignette quickly reproducible and drop extensive computations. The full code that we implemented can be found at the GitHub repository resurv-replication-code.
For optimising the hyperparameters included in this vignette, we used the procedure shown in the Hyperparameters Tuning Vignette .
library(ReSurv) reticulate::use_virtualenv("pyresurv") library(data.table) library(ggplot2)
We first fit simulate and pre-process some data from Scenario Delta.
input_data <- data_generator(random_seed = 7, scenario = 3, time_unit = 1 / 360, years = 4, period_exposure = 200) individual_data <- IndividualDataPP(input_data, id = "claim_number", continuous_features = "AP_i", categorical_features = "claim_type", accident_period = "AP", calendar_period = "RP", input_time_granularity = "days", output_time_granularity = "quarters", years = 4, continuous_features_spline = NULL, calendar_period_extrapolation = FALSE)
We fit a XGB model with optimal hyperparameters.
hparameters <- list(params = list(booster = "gbtree", eta = 0.2234094, subsample = 0.8916594, alpha = 12.44775, lambda = 5.714286, min_child_weight = 4.211996, max_depth = 2), print_every_n = 0, nrounds = 3000, verbose = FALSE, early_stopping_rounds = 500) resurv_fit <- ReSurv(individual_data, hazard_model = "XGB", hparameters = hparameters)
We predict for different time granularities.
resurv_fit_predict_q <- predict(resurv_fit, grouping_method = "probability") individual_data_y <- IndividualDataPP(input_data, id = "claim_number", continuous_features = "AP_i", categorical_features = "claim_type", accident_period = "AP", calendar_period = "RP", input_time_granularity = "days", output_time_granularity = "years", years = 4, continuous_features_spline = NULL, calendar_period_extrapolation = FALSE) resurv_fit_predict_y <- predict(resurv_fit, newdata = individual_data_y, grouping_method = "probability") individual_data_m <- IndividualDataPP(input_data, id = "claim_number", continuous_features = "AP_i", categorical_features = "claim_type", accident_period = "AP", calendar_period = "RP", input_time_granularity = "days", output_time_granularity = "months", years = 4, continuous_features_spline = NULL, calendar_period_extrapolation = FALSE) resurv_fit_predict_m <- predict(resurv_fit, newdata = individual_data_m, grouping_method = "probability")
We also plot the Chain-Ladder (CL) development factors for Monthly and Quarterly granularities
ticks_at <- seq(1, 48, 4) labels_as <- as.character(ticks_at) cl_months <- individual_data_m$training.data %>% mutate(DP_o = 48 - DP_rev_o + 1) %>% group_by(AP_o, DP_o) %>% summarize(I = sum(I), .groups = "drop") %>% group_by(AP_o) %>% arrange(DP_o) %>% mutate(I_cum = cumsum(I), I_cum_lag = lag(I_cum, default = 0)) %>% ungroup() %>% group_by(DP_o) %>% reframe(df_o = sum(I_cum * ( AP_o <= max(individual_data_m$training.data$AP_o) - DP_o + 1 )) / sum(I_cum_lag * ( AP_o <= max(individual_data_m$training.data$AP_o) - DP_o + 1 )), I = sum(I * ( AP_o <= max(individual_data_m$training.data$AP_o) - DP_o ))) %>% mutate(DP_o_join = DP_o - 1) %>% as.data.frame() cl_months %>% filter(DP_o > 1) %>% ggplot(aes(x = DP_o, y = df_o)) + geom_line(linewidth = 2.5, color = "#454555") + labs(title = "Chain ladder", x = "Development month", y = "Development factor") + ylim(1, 2.5 + .01) + scale_x_continuous(breaks = ticks_at, labels = labels_as) + theme_bw(base_size = rel(5)) + theme(plot.title = element_text(size = 20))
cl_years <- resurv_fit$IndividualDataPP$training.data %>% mutate(DP_o = 16 - DP_rev_o + 1) %>% group_by(AP_o, DP_o) %>% summarize(I = sum(I), .groups = "drop") %>% group_by(AP_o) %>% arrange(DP_o) %>% mutate(I_cum = cumsum(I), I_cum_lag = lag(I_cum, default = 0)) %>% ungroup() %>% group_by(DP_o) %>% reframe(df_o = sum(I_cum * ( AP_o <= max(resurv_fit$IndividualDataPP$training.data$AP_o) - DP_o + 1 )) / sum(I_cum_lag * ( AP_o <= max(resurv_fit$IndividualDataPP$training.data$AP_o) - DP_o + 1 )), I = sum(I * ( AP_o <= max(resurv_fit$IndividualDataPP$training.data$AP_o) - DP_o ))) %>% mutate(DP_o_join = DP_o - 1) %>% as.data.frame() cl_years %>% filter(DP_o > 1) %>% ggplot(aes(x = DP_o, y = df_o)) + geom_line(linewidth = 2.5, color = "#454555") + labs(title = "Chain ladder", x = "Development quarter", y = "Development factor") + ylim(1, 4 + .01) + theme_bw(base_size = rel(5)) + theme(plot.title = element_text(size = 20)) ticks_at <- seq(1, 16, by = 2) labels_as <- as.character(ticks_at)
For different granularities and feature combinations we plot the XGB feature dependent development factors. `
ap <- 15 ct <- 1 resurv_fit_predict_q$long_triangle_format_out$output_granularity %>% filter(AP_o == 15 & claim_type == 1) %>% filter(row_number() == 1) %>% select(group_o) plot( resurv_fit_predict_q, granularity = "output", title_par = "XGB: Accident Quarter 15 Claim Type 1", x_text_par = "Development Quarter", group_code = 30 )
ct <- 0 ap <- 15 resurv_fit_predict_q$long_triangle_format_out$output_granularity %>% filter(AP_o == ap & claim_type == ct) %>% filter(row_number() == 1) %>% select(group_o) plot( resurv_fit_predict_q, granularity = "output", title_par = "XGB: Accident Quarter 15 Claim Type 0", x_text_par = "Development Quarter", ylim_par = 4, group_code = 29 )
ct <- 0 ap <- 16 resurv_fit_predict_q$long_triangle_format_out$output_granularity %>% filter(AP_o == ap & claim_type == ct) %>% filter(row_number() == 1) %>% select(group_o) plot( resurv_fit_predict_q, granularity = "output", title_par = "XGB: Accident Quarter 16 Claim Type 0", x_text_par = "Development Quarter", ylim_par = 4, group_code = 31 )
ct <- 1 ap <- 7 resurv_fit_predict_m$long_triangle_format_out$output_granularity %>% filter(AP_o == ap & claim_type == ct) %>% filter(row_number() == 1) %>% select(group_o) plot( resurv_fit_predict_m, granularity = "output", title_par = "XGB: Accident Month 7 Claim Type 1", x_text_par = "Development Month", color_par = "#a71429", ylim_par = 10, group_code = 14 )
ct <- 0 ap <- 9 resurv_fit_predict_m$long_triangle_format_out$output_granularity %>% filter(AP_o == ap & claim_type == ct) %>% filter(row_number() == 1) %>% select(group_o) plot( resurv_fit_predict_m, granularity = "output", title_par = "XGB: Accident Month 9 Claim Type 0", x_text_par = "Development Month", color_par = "#a71429", ylim_par = 2.5, group_code = 17 )
ct <- 0 ap <- 36 resurv_fit_predict_m$long_triangle_format_out$output_granularity %>% filter(AP_o == ap & claim_type == ct) %>% filter(row_number() == 1) %>% select(group_o) plot( resurv_fit_predict_m, granularity = "output", title_par = "XGB: Accident Month 36 Claim Type 0", color_par = "#a71429", x_text_par = "Development Month", ylim_par = 2.5, group_code = 71 )
The models likelihood can be extracted using the following commands. Table 2 shows the average negative log-likelihood for each scenario over the different simulations, here we show the results for XGB, scenario Delta and simulation number 7.
resurv_fit$is_lkh resurv_fit$os_lkh
We show how to plot the fitted survival function for scenario Alpha and scenario Delta against the true survival function.
We find the true Survival Function for scenario Alpha, claim type 1.
beta <- 2 * 30 lambda <- 0.1 k <- 1 b <- 1440 alpha <- 0.5 beta0 <- 1.15129 beta1 <- 1.95601 f_correct_s0 <- function(t, alpha, beta, lambda, k, b, beta_coef) { inner <- lambda * exp(beta_coef) ^ (1 / (alpha * k)) element_one <- -beta ^ alpha * (inner) ^ (alpha * k) element_two <- (t ^ (-alpha * k) - b ^ (-alpha * k)) exp(element_one * element_two) } c_correct_grouped <- c() for (i in 0:(b - 1)) { t <- seq(i, i + 1, by = 0.001) n_t <- length(t) calculation <- f_correct_s0(t, alpha, beta, lambda, k, b, beta0) c_correct_grouped[i + 1] <- sum(1 - calculation) / n_t } c_correct_grouped <- c(1, c_correct_grouped[1:(b - 1)]) c_correct_grouped1 <- c() for (i in 0:(b - 1)) { t <- seq(i, i + 1, by = 0.001) n_t <- length(t) calculation <- f_correct_s0(t, alpha, beta, lambda, k, b, beta1) c_correct_grouped1[i + 1] <- sum(1 - calculation) / n_t } c_correct_grouped1 <- c(1, c_correct_grouped1[1:(b - 1)]) true_curve <- data.table( "DP_rev_i" = (b - 1) - seq(0, (b - 1), by = 1), "S_i" = 1 - (c_correct_grouped1), "model_label" = "TRUE" )
We generate the data for Scenario Alpha.
input_data <- data_generator( random_seed = 1, scenario = 0, time_unit = 1 / 360, years = 4, period_exposure = 200 ) individual_data <- IndividualDataPP( input_data, id = "claim_number", continuous_features = "AP_i", categorical_features = "claim_type", accident_period = "AP", calendar_period = "RP", input_time_granularity = "days", output_time_granularity = "quarters", years = 4, continuous_features_spline = NULL, calendar_period_extrapolation = FALSE )
Here the optimal hyperparameters for XGB and NN for the generated data set.
hparameters_xgb_01 <- list( params = list( booster = "gbtree", eta = 0.9887265, subsample = 0.7924135, alpha = 10.85342, lambda = 6.213317, min_child_weight = 3.042204, max_depth = 1 ), print_every_n = 0, nrounds = 3000, verbose = FALSE, early_stopping_rounds = 500 ) hparameters_nn_01 <- list( num_layers = 2, early_stopping = TRUE, patience = 350, verbose = FALSE, network_structure = NULL, num_nodes = 10, activation = "SELU", optim = "SGD", lr = 0.2741031, xi = 0.3829451, epsilon = 0, batch_size = as.integer(5000), epochs = as.integer(5500), num_workers = 0, tie = "Efron" )
We fit COX, NN and XGB on the simulated data.
resurv_fit_cox_01 <- ReSurv(individual_data, hazard_model = "COX") resurv_fit_nn_01 <- ReSurv(individual_data, hazard_model = "NN", hparameters = hparameters_nn_01) resurv_fit_xgb_01 <- ReSurv(individual_data, hazard_model = "XGB", hparameters = hparameters_xgb_01)
Extract the fitted survival curve.
hazard_frame_updated_cox <- resurv_fit_cox_01$hazard_frame hazard_frame_updated_nn <- resurv_fit_nn_01$hazard_frame hazard_frame_updated_xgb <- resurv_fit_xgb_01$hazard_frame cond_1 <- hazard_frame_updated_cox$AP_i == 13 & hazard_frame_updated_cox$claim_type == 1 estimated_cox <- hazard_frame_updated_cox[, c("S_i", "DP_rev_i")] estimated_cox <- as.data.table(estimated_cox)[, model_label := "COX"] cond_2 <- hazard_frame_updated_nn$AP_i == 13 & hazard_frame_updated_nn$claim_type == 1 estimated_nn <- hazard_frame_updated_nn[cond_2, c("S_i", "DP_rev_i")] estimated_nn <- as.data.table(estimated_nn)[, model_label := "NN"] cond_3 <- hazard_frame_updated_xgb$AP_i == 13 & hazard_frame_updated_xgb$claim_type == 1 estimated_xgb <- hazard_frame_updated_xgb[, c("S_i", "DP_rev_i")] estimated_xgb <- as.data.table(estimated_xgb)[cond3, model_label := "XGB"] dt <- rbind(estimated_cox, estimated_nn, estimated_xgb)
Plot the fitted survival curve for the three.
ggplot(data = dt, aes(x = DP_rev_i, y = S_i, color = model_label)) + geom_line(linewidth = 1) + facet_grid(~ model_label) + annotate( geom = "line", x = true_curve$DP_rev_i, y = true_curve$S_i, lty = 2, linewidth = 1 ) + scale_x_continuous( expand = c(0, 0), breaks = c(0, 440, 940), labels = c("1440", "1000", "500") ) + scale_y_continuous(expand = c(0, .001)) + xlab("Development time") + ylab("Survival function") + scale_color_manual(name = "Model", values = c("#AAAABC", "#a71429", "#4169E1")) + theme_bw() + theme( legend.position = "none", text = element_text(size = 20), axis.text.x = element_text( angle = 90, vjust = 0.5, hjust = 1 ) )
We find the true Survival Function for scenario Delta, accident day 691 and claim type 1.
my_ap = 691 period_function <- function(x) { " Add monthly seasonal effect starting from daily input. " tmp <- floor((x - 1) / 30) if ((tmp %% 12) %in% (c(2, 3, 4))) { return(-0.3) } if ((tmp %% 12) %in% (c(5, 6, 7))) { return(0.4) } if ((tmp %% 12) %in% (c(8, 9, 10))) { return(-0.7) } if ((tmp %% 12) %in% (c(11, 0, 1))) { #0 instead of 12 return(0.1) } } beta <- 2 * 30 lambda <- 0.1 k <- 1 b <- 1440 alpha <- 0.5 beta0 <- 1.15129 beta1 <- 1.95601 + period_function(my_ap) f_correct_s0 <- function(t, alpha, beta, lambda, k, b, beta_coef) { element_1 <- -beta ^ alpha element_2 <- lambda * exp(beta_coef) ^ (1 / (alpha * k)) element_ 3 <- t ^ (-alpha * k) - b ^ (-alpha * k) exp(element_1 * (element_2) ^ (alpha * k) * ()) } c_correct_grouped <- c() for (i in 0:(b - 1)) { t <- seq(i, i + 1, by = 0.001) n_t <- length(t) calculation <- f_correct_s0(t, alpha, beta, lambda, k, b, beta0) c_correct_grouped[i + 1] <- sum(1 - calculation) / n_t } c_correct_grouped <- c(1, c_correct_grouped[1:(b - 1)]) c_correct_grouped1 <- c() for (i in 0:(b - 1)) { t <- seq(i, i + 1, by = 0.001) n_t <- length(t) calculation <- f_correct_s0(t, alpha, beta, lambda, k, b, beta1) c_correct_grouped1[i + 1] <- sum(1 - calculation) / n_t } c_correct_grouped1 <- c(1, c_correct_grouped1[1:(b - 1)]) true_curve <- data.table( "DP_rev_i" = (b - 1) - seq(0, (b - 1), by = 1), "S_i" = 1 - (c_correct_grouped1), "model_label" = "TRUE" )
We generate a data set from scenario Delta.
input_data <- data_generator( random_seed = 1, scenario = 3, time_unit = 1 / 360, years = 4, period_exposure = 200 ) individual_data <- IndividualDataPP( input_data, id = "claim_number", continuous_features = "AP_i", categorical_features = "claim_type", accident_period = "AP", calendar_period = "RP", input_time_granularity = "days", output_time_granularity = "quarters", years = 4, continuous_features_spline = NULL, calendar_period_extrapolation = F )
Here the optimal parameters for the data set above for XGB and NN.
hparameters_xgb_31 <- list( params = list( booster = "gbtree", eta = 0.1801517, subsample = 0.8768306, alpha = 0.6620562, lambda = 1.379897, min_child_weight = 15.61339, max_depth = 2 ), print_every_n = 0, nrounds = 3000, verbose = FALSE, early_stopping_rounds = 500 ) hparameters_nn_31 <- list( num_layers = 2, early_stopping = TRUE, patience = 350, verbose = FALSE, network_structure = NULL, num_nodes = 2, activation = "LeakyReLU", optim = "Adam", lr = 0.3542422, xi = 0.1803953, epsilon = 0, batch_size = as.integer(5000), epochs = as.integer(5500), num_workers = 0, tie = "Efron" )
We fit COX, NN and XGB.
resurv_fit_cox_31 <- ReSurv(individual_data, hazard_model = "COX") resurv_fit_nn_31 <- ReSurv(individual_data, hazard_model = "NN", hparameters = hparameters_nn_31) resurv_fit_xgb_31 <- ReSurv(individual_data, hazard_model = "XGB", hparameters = hparameters_xgb_31)
Extract the fitted survival curve.
hazard_frame_updated_cox <- resurv_fit_cox_31$hazard_frame hazard_frame_updated_nn <- resurv_fit_nn_31$hazard_frame hazard_frame_updated_xgb <- resurv_fit_xgb_31$hazard_frame cond_1 <- hazard_frame_updated_cox$AP_i == my_ap & hazard_frame_updated_cox$claim_type == 1 estimated_cox <- hazard_frame_updated_cox[cond_1, c("S_i", "DP_rev_i")] estimated_cox <- as.data.table(estimated_cox)[, model_label := 'COX'] cond_2 <- hazard_frame_updated_nn$AP_i == my_ap & hazard_frame_updated_nn$claim_type == 1 estimated_nn <- hazard_frame_updated_nn[cond_2, c("S_i", "DP_rev_i")] estimated_nn <- as.data.table(estimated_nn)[, model_label := 'NN'] cond_3 <- hazard_frame_updated_xgb$AP_i == my_ap & hazard_frame_updated_xgb$claim_type == 1 estimated_xgb <- hazard_frame_updated_xgb[cond_3, c("S_i", "DP_rev_i")] estimated_xgb <- as.data.table(estimated_xgb)[, model_label := 'XGB'] dt <- rbind(estimated_cox, estimated_nn, estimated_xgb)
Plot the fitted survival curve for the three models.
ggplot(data = dt, aes(x = DP_rev_i, y = S_i, color = model_label)) + geom_line(linewidth = 1) + facet_grid( ~ model_label) + annotate( geom = 'line', x = true_curve$DP_rev_i, y = true_curve$S_i, lty = 2, linewidth = 1 ) + scale_x_continuous( expand = c(0, 0), breaks = c(0, 440, 940), labels = c("1440", "1000", "500") ) + scale_y_continuous(expand = c(0, .001)) + xlab("Development time") + ylab("Survival function") + scale_color_manual(name = "Model", values = c("#AAAABC", "#a71429", "#4169E1")) + theme_bw() + theme( legend.position = "none", text = element_text(size = 20), axis.text.x = element_text( angle = 90, vjust = 0.5, hjust = 1 ) )
In this Section we show how we fitted and scored our models. While we only show an example for COX, the other models were fitted and scored in a similar fashion.
seed = 1 scenario = 0 input_data <- data_generator( random_seed = seed, scenario = scenario, time_unit = 1 / 360, years = 4, period_exposure = 200 ) individual_data <- IndividualDataPP( input_data, id = "claim_number", continuous_features = "AP_i", categorical_features = "claim_type", accident_period = "AP", calendar_period = "RP", input_time_granularity = "days", output_time_granularity = "quarters", years = 4, continuous_features_spline = NULL, calendar_period_extrapolation = FALSE )
Measuring computation time.
start <- Sys.time() resurv_fit <- ReSurv(individual_data, hazard_model = "COX") resurv_fit_predict <- predict(resurv_fit, grouping_method = "probability") time <- Sys.time() - start
Total absolute relative error and total absolute error by calendar time on a quarterly grid.
conversion_factor <- individual_data$conversion_factor max_dp_i <- 1440 # Compute the continuously Ranked Probability Score (CRPS) ---- crps_dt <- ReSurv::survival_crps(resurv_fit) crps_result <- mean(crps_dt$crps) # Compute the ARE tot ---- true_output <- resurv_fit$IndividualDataPP$full.data %>% mutate( DP_rev_o = floor(max_dp_i * conversion_factor) - ceiling(DP_i * conversion_factor + ((AP_i - 1) %% ( 1 / conversion_factor )) * conversion_factor) + 1, AP_o = ceiling(AP_i * conversion_factor), TR_o = AP_o - 1 ) %>% filter(DP_rev_o <= TR_o) %>% group_by(claim_type, AP_o, DP_rev_o) %>% mutate(claim_type = as.character(claim_type)) %>% summarize(I = sum(I), .groups = "drop") %>% filter(DP_rev_o > 0) out_list <- resurv_fit_predict$long_triangle_format_out out <- out_list$output_granularity #Total output score_total <- out[, c("claim_type", "AP_o", "DP_o", "expected_counts")] %>% mutate(DP_rev_o = 16 - DP_o + 1) %>% inner_join(true_output, by = c("claim_type", "AP_o", "DP_rev_o")) %>% mutate(ave = I - expected_counts, abs_ave = abs(ave)) %>% # from here it is reformulated for the are tot ungroup() %>% group_by(AP_o, DP_rev_o) %>% reframe(abs_ave = abs(sum(ave)), I = sum(I)) are_tot <- sum(score_total$abs_ave) / sum(score_total$I) dfs_output <- out %>% mutate(DP_rev_o = 16 - DP_o + 1) %>% select(AP_o, claim_type, DP_rev_o, f_o) %>% mutate(DP_rev_o = DP_rev_o) %>% distinct() #Cashflow on output scale.Etc quarterly cashflow development score_diagonal <- resurv_fit$IndividualDataPP$full.data %>% mutate( DP_rev_o = floor(max_dp_i * conversion_factor) - ceiling(DP_i * conversion_factor + ((AP_i - 1) %% ( 1 / conversion_factor )) * conversion_factor) + 1, AP_o = ceiling(AP_i * conversion_factor) ) %>% group_by(claim_type, AP_o, DP_rev_o) %>% mutate(claim_type = as.character(claim_type)) %>% summarize(I = sum(I), .groups = "drop") %>% group_by(claim_type, AP_o) %>% arrange(desc(DP_rev_o)) %>% mutate(I_cum = cumsum(I)) %>% mutate(I_cum_lag = lag(I_cum, default = 0)) %>% left_join(dfs_output, by = c("AP_o", "claim_type", "DP_rev_o")) %>% mutate(I_cum_hat = I_cum_lag * f_o, RP_o = max(DP_rev_o) - DP_rev_o + AP_o) %>% inner_join(true_output[, c("AP_o", "DP_rev_o")] %>% distinct() , by = c("AP_o", "DP_rev_o")) %>% group_by(AP_o, DP_rev_o) %>% reframe(abs_ave2_diag = abs(sum(I_cum_hat) - sum(I_cum)), I = sum(I)) are_cal_q <- sum(score_diagonal$abs_ave2_diag) / sum(score_diagonal$I)
Scoring on an yearly grid.
individual_data2 <- IndividualDataPP( input_data, id = "claim_number", continuous_features = "AP_i", categorical_features = "claim_type", accident_period = "AP", calendar_period = "RP", input_time_granularity = "days", output_time_granularity = "years", years = 4, continuous_features_spline = NULL, calendar_period_extrapolation = F ) resurv_predict_yearly <- predict(resurv_fit, newdata = individual_data2, grouping_method = "probability") conversion_factor <- individual_data2$conversion_factor max_dp_i <- 1440 true_output <- individual_data2$full.data %>% mutate( DP_rev_o = floor(max_dp_i * conversion_factor) - ceiling(DP_i * conversion_factor + ((AP_i - 1) %% ( 1 / conversion_factor )) * conversion_factor) + 1, AP_o = ceiling(AP_i * conversion_factor), TR_o = AP_o - 1 ) %>% filter(DP_rev_o <= TR_o) %>% group_by(claim_type, AP_o, DP_rev_o) %>% mutate(claim_type = as.character(claim_type)) %>% summarize(I = sum(I), .groups = "drop") %>% filter(DP_rev_o > 0) out_list_yearly <- resurv_predict_yearly$long_triangle_format_out out_yearly <- out_list_yearly$output_granularity dfs_output <- out_yearly %>% mutate(DP_rev_o = 4 - DP_o + 1) %>% select(AP_o, claim_type, DP_rev_o, f_o) %>% mutate(DP_rev_o = DP_rev_o) %>% distinct() score_diagonal_yearly <- individual_data2$full.data %>% mutate( DP_rev_o = floor(max_dp_i * conversion_factor) - ceiling(DP_i * conversion_factor + ((AP_i - 1) %% ( 1 / conversion_factor )) * conversion_factor) + 1, AP_o = ceiling(AP_i * conversion_factor) ) %>% group_by(claim_type, AP_o, DP_rev_o) %>% mutate(claim_type = as.character(claim_type)) %>% summarize(I = sum(I), .groups = "drop") %>% group_by(claim_type, AP_o) %>% arrange(desc(DP_rev_o)) %>% mutate(I_cum = cumsum(I)) %>% mutate(I_cum_lag = lag(I_cum, default = 0)) %>% left_join(dfs_output, by = c("AP_o", "claim_type", "DP_rev_o")) %>% mutate(I_cum_hat = I_cum_lag * f_o, RP_o = max(DP_rev_o) - DP_rev_o + AP_o) %>% inner_join(true_output[, c("AP_o", "DP_rev_o")] %>% distinct() , by = c("AP_o", "DP_rev_o")) %>% group_by(AP_o, DP_rev_o) %>% reframe(abs_ave2_diag = abs(sum(I_cum_hat) - sum(I_cum)), I = sum(I)) are_cal_y = sum(score_diagonal_yearly$abs_ave2_diag) / sum(score_diagonal_yearly$I)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.