cas_call.R
In ReSurv: Machine Learning Models for Predicting Claim Counts

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)

## ----eval=FALSE, include=TRUE-------------------------------------------------
#  library(ReSurv)
#  library(reticulate)
#  use_virtualenv("pyresurv")
#  library(ggplot2)

## ----eval=FALSE, include=TRUE-------------------------------------------------
#  library(devtools)
#  devtools::install_github("edhofman/resurv")
#  library(ReSurv)
#  packageVersion("ReSurv")
#  

## ----eval=FALSE, include=TRUE-------------------------------------------------
#  ReSurv::install_pyresurv()

## ----eval=FALSE, include=TRUE-------------------------------------------------
#  library(ReSurv)
#  reticulate::use_virtualenv("pyresurv")
#  

## ----eval=FALSE, include=TRUE-------------------------------------------------
#  envname <- "./venv"
#  ReSurv::install_pyresurv(envname = envname)
#  pysparklyr::install_pyspark(envname = envname)

## ----eval=FALSE, include=TRUE-------------------------------------------------
#  input_data <- data_generator(random_seed = 7,
#                               scenario = "alpha",
#                               time_unit = 1 / 360,
#                               years = 4,
#                               period_exposure = 200)

## ----eval=FALSE, include=TRUE-------------------------------------------------
#  str(input_data)

## ----eval=FALSE, include=TRUE-------------------------------------------------
#  individual_data <- IndividualDataPP(data = input_data,
#                                      categorical_features = "claim_type",
#                                      continuous_features = "AP",
#                                      accident_period = "AP",
#                                      calendar_period = "RP",
#                                      input_time_granularity = "days",
#                                      output_time_granularity = "quarters",
#                                      years = 4)
#  

## ----eval=FALSE, include=TRUE-------------------------------------------------
#  
#  resurv_cv_xgboost <- ReSurvCV(IndividualDataPP = individual_data,
#                                model = "XGB",
#                                hparameters_grid = list(booster = "gbtree",
#                                                        eta = c(.001),
#                                                        max_depth = c(3),
#                                                        subsample = c(1),
#                                                        alpha = c(0),
#                                                        lambda = c(0),
#                                                        min_child_weight = c(.5)),
#                                print_every_n = 1L,
#                                nrounds = 1,
#                                verbose = FALSE,
#                                verbose.cv = TRUE,
#                                early_stopping_rounds = 1,
#                                folds = 5,
#                                parallel = TRUE,
#                                ncores = 2,
#                                random_seed = 1)
#  

## ----eval=FALSE, include=TRUE-------------------------------------------------
#  resurv_cv_nn <- ReSurvCV(IndividualDataPP = individual_data,
#                           model = "NN",
#                           hparameters_grid = list(num_layers = c(1, 2),
#                                                   num_nodes = c(2, 4),
#                                                   optim = "Adam",
#                                                   activation = "ReLU",
#                                                   lr = .5,
#                                                   xi = .5,
#                                                   eps = .5,
#                                                   tie = "Efron",
#                                                   batch_size = as.integer(5000),
#                                                   early_stopping = "TRUE",
#                                                   patience = 20),
#                           epochs = as.integer(300),
#                           num_workers = 0,
#                           verbose = FALSE,
#                           verbose.cv = TRUE,
#                           folds = 3,
#                           parallel = FALSE,
#                           random_seed = as.integer(Sys.time()))

## ----eval=FALSE, include=TRUE-------------------------------------------------
#  
#  bounds <- list(eta = c(0, 1),
#                 max_depth = c(1L, 25L),
#                 min_child_weight = c(0, 50),
#                 subsample = c(0.51, 1),
#                 lambda = c(0, 15),
#                 alpha = c(0, 15))
#  
#  
#  obj_func <- function(eta,
#                       max_depth,
#                       min_child_weight,
#                       subsample,
#                       lambda,
#                       alpha) {
#  
#    xgbcv <- ReSurvCV(
#      IndividualDataPP = individual_data,
#      model = "XGB",
#      hparameters_grid = list(
#        booster = "gbtree",
#        eta = eta,
#        max_depth = max_depth,
#        subsample = subsample,
#        alpha = lambda,
#        lambda = alpha,
#        min_child_weight = min_child_weight
#      ),
#      print_every_n = 1L,
#      nrounds = 500,
#      verbose = FALSE,
#      verbose.cv = TRUE,
#      early_stopping_rounds = 30,
#      folds = 3,
#      parallel = FALSE,
#      random_seed = as.integer(Sys.time())
#    )
#  
#    lst <- list(Score = -xgbcv$out.cv.best.oos$test.lkh,
#                train.lkh = xgbcv$out.cv.best.oos$train.lkh)
#  
#    return(lst)
#  }
#  
#  
#  
#  bayes_out <- bayesOpt(FUN = obj_func,
#                        bounds = bounds,
#                        initPoints = 50,
#                        iters.n = 1000,
#                        iters.k = 50,
#                        otherHalting = list(timeLimit = 18000))
#  

## ----eval=FALSE, include=TRUE-------------------------------------------------
#  bounds <- list(num_layers = c(2L, 10L),
#                 num_nodes = c(2L, 10L),
#                 optim = c(1L, 2L),
#                 activation = c(1L, 2L),
#                 lr = c(0.005, 0.5),
#                 xi = c(0, 0.5),
#                 eps = c(0, 0.5))
#  
#  
#  obj_func <- function(num_layers,
#                       num_nodes,
#                       optim,
#                       activation,
#                       lr,
#                       xi,
#                       eps) {
#  
#    optim <- switch(optim,
#                    "Adam",
#                    "SGD")
#  
#    activation <- switch(activation, "LeakyReLU", "SELU")
#    batch_size <- as.integer(5000L)
#    number_layers <- as.integer(num_layers)
#    num_nodes <- as.integer(num_nodes)
#  
#    deepsurv_cv <- ReSurvCV(IndividualData = individual_data,
#                            model = "NN",
#                            hparameters_grid = list(num_layers = number_layers,
#                                                    num_nodes = num_nodes,
#                                                    optim = optim,
#                                                    activation = activation,
#                                                    lr = lr,
#                                                    xi = xi,
#                                                    eps = eps,
#                                                    tie = "Efron",
#                                                    batch_size = batch_size,
#                                                    early_stopping = "TRUE",
#                                                    patience  = 20),
#                            epochs = as.integer(300),
#                            num_workers = 0,
#                            verbose = FALSE,
#                            verbose.cv = TRUE,
#                            folds = 3,
#                            parallel = FALSE,
#                            random_seed = as.integer(Sys.time()))
#  
#  
#    lst <- list(Score = -deepsurv_cv$out.cv.best.oos$test.lkh,
#                train.lkh = deepsurv_cv$out.cv.best.oos$train.lkh)
#  
#    return(lst)
#  }
#  
#  bayes_out <- bayesOpt(FUN = obj_func,
#                        bounds = bounds,
#                        initPoints = 50,
#                        iters.n = 1000,
#                        iters.k = 50,
#                        otherHalting = list(timeLimit = 18000))
#  

## ----eval=FALSE, include=TRUE-------------------------------------------------
#  
#  resurv_fit_cox <- ReSurv(individual_data,
#                       hazard_model = "COX")
#  

## ----eval=FALSE, include=TRUE-------------------------------------------------
#  hparameters_xgb <- list(
#    params = list(
#      booster = "gbtree",
#      eta = 0.9611239,
#      subsample = 0.62851,
#      alpha = 5.836211,
#      lambda = 15,
#      min_child_weight = 29.18158,
#      max_depth = 1
#    ),
#    print_every_n = 0,
#    nrounds = 3000,
#    verbose = FALSE,
#    early_stopping_rounds = 500
#  )
#  
#  
#  resurv_fit_xgb <- ReSurv(individual_data,
#                           hazard_model = "XGB",
#                           hparameters = hparameters_xgb)
#  

## ----eval=FALSE, include=TRUE-------------------------------------------------
#  
#  hparameters_nn = list(num_layers= 2,
#              early_stopping= TRUE,
#              patience=350,
#              verbose=FALSE,
#              network_structure=NULL,
#              num_nodes= 10,
#              activation ="LeakyReLU",
#              optim ="SGD",
#              lr =0.02226655,
#              xi=0.4678993,
#              epsilon= 0,
#              batch_size= 5000L,
#              epochs= 5500L,
#              num_workers= 0,
#              tie="Efron" )
#  
#  
#  resurv_fit_nn <- ReSurv(individual_data,
#                       hazard_model = "NN",
#                       hparameters = hparameters_nn)
#  
#  

## ----eval=FALSE, include=TRUE-------------------------------------------------
#  resurv_fit_predict_q <- predict(resurv_fit_cox)
#  
#  individual_data_y <- IndividualDataPP(input_data,
#                                        id = "claim_number",
#                                        continuous_features = "AP_i",
#                                        categorical_features = "claim_type",
#                                        accident_period = "AP",
#                                        calendar_period = "RP",
#                                        input_time_granularity = "days",
#                                        output_time_granularity = "years",
#                                        years = 4,
#                                        continuous_features_spline = NULL,
#                                        calendar_period_extrapolation = FALSE)
#  
#  resurv_fit_predict_y <- predict(resurv_fit_cox,
#                                  newdata = individual_data_y,
#                                  grouping_method = "probability")
#  
#  individual_data_m <- IndividualDataPP(input_data,
#                                        id = "claim_number",
#                                        continuous_features = "AP_i",
#                                        categorical_features = "claim_type",
#                                        accident_period = "AP",
#                                        calendar_period = "RP",
#                                        input_time_granularity = "days",
#                                        output_time_granularity = "months",
#                                        years = 4,
#                                        continuous_features_spline = NULL,
#                                        calendar_period_extrapolation = FALSE)
#  
#  resurv_fit_predict_m <- predict(resurv_fit_cox,
#                                  newdata = individual_data_m,
#                                  grouping_method = "probability")
#  
#  
#  
#  model_s <- summary(resurv_fit_predict_y)
#  print(model_s)
#  

## ----eval=FALSE, include=TRUE-------------------------------------------------
#  resurv_predict_xgb <- predict(resurv_fit_xgb)
#  resurv_predict_nn <- predict(resurv_fit_nn)

## ----eval=FALSE, include=TRUE-------------------------------------------------
#  
#  resurv_fit_predict_q$long_triangle_format_out$output_granularity %>%
#      filter(AP_o==15 & claim_type == 1) %>%
#      filter(row_number()==1) %>%
#      select(group_o)
#  
#  plot(resurv_fit_predict_q,
#           granularity = "output",
#           title_par = "COX: Accident Quarter 15 Claim Type 1",
#           group_code = 30)
#  

## ----eval=FALSE, include=TRUE-------------------------------------------------
#  crps <- survival_crps(resurv_fit_cox)
#  m_crps <- mean(crps$crps)
#  m_crps
#  

## ----eval=FALSE, include=TRUE-------------------------------------------------
#  resurv_fit_predict_q$long_triangle_format_out$output_granularity %>%
#      filter(AP_o==15 & claim_type == 1) %>%
#      filter(row_number()==1) %>%
#      select(group_o)

## ----eval=FALSE, include=TRUE-------------------------------------------------
#  plot(resurv_fit_predict_q,
#           granularity = "output",
#           title_par = "COX: Accident Quarter 15 Claim Type 1",
#           group_code = 30)
#  

## ----eval=FALSE, include=TRUE-------------------------------------------------
#  p1 <- input_data %>%
#    as.data.frame() %>%
#    mutate(claim_type = as.factor(claim_type)) %>%
#    ggplot(aes(x = RT - AT, color = claim_type)) +
#    stat_ecdf(size = 1) +
#    labs(title = "", x = "Notification delay (in days)", y = "ECDF") +
#    xlim(0.01, 1500) +
#    scale_color_manual(
#      values = c("royalblue", "#a71429"),
#      labels = c("Claim type 0", "Claim type 1")
#    ) +
#    scale_linetype_manual(values = c(1, 3),
#                          labels = c("Claim type 0", "Claim type 1")) +
#    guides(
#      color = guide_legend(
#        title = "Claim type",
#        override.aes = list(
#          color = c("royalblue", "#a71429"),
#          linewidth = 2
#        )
#      ),
#      linetype = guide_legend(
#        title = "Claim type",
#        override.aes = list(linetype = c(1, 3), linewidth = 0.7)
#      )
#    ) +
#    theme_bw() +
#    theme(
#      axis.text = element_text(size = 20),
#      axis.title.y = element_text(size = 20),
#      axis.title.x  = element_text(size = 20),
#      legend.text = element_text(size = 20)
#    )
#  p1
#  
#  
#  p2 <- input_data %>%
#    as.data.frame() %>%
#    mutate(claim_type = as.factor(claim_type)) %>%
#    ggplot(aes(x = claim_type, fill = claim_type)) +
#    geom_bar() +
#    scale_fill_manual(
#      values = c("royalblue", "#a71429"),
#      labels = c("Claim type 0", "Claim type 1")
#    ) +
#    guides(fill = guide_legend(title = "Claim type")) +
#    theme_bw() +
#    labs(title = "", x = "Claim Type", y = "") +
#    theme(
#      axis.text = element_text(size = 20),
#      axis.title.y = element_text(size = 20),
#      axis.title.x  = element_text(size = 20),
#      legend.text = element_text(size = 20)
#    )
#  p2
#  
#  

## ----eval=FALSE, include=TRUE-------------------------------------------------
#  
#  clmodel <- resurv_fit_cox$IndividualDataPP$training.data %>%
#    mutate(DP_o = 16 -
#             DP_rev_o + 1) %>%
#    group_by(AP_o, DP_o) %>%
#    summarize(I = sum(I), .groups = "drop") %>%
#    group_by(AP_o) %>%
#    arrange(DP_o) %>%
#    mutate(I_cum = cumsum(I), I_cum_lag = lag(I_cum, default = 0)) %>%
#    ungroup() %>%
#    group_by(DP_o) %>%
#    reframe(df_o = sum(I_cum * (
#      AP_o <= max(resurv_fit_cox$IndividualDataPP$training.data$AP_o) - DP_o +
#        1
#    )) /
#      sum(I_cum_lag * (
#        AP_o <= max(resurv_fit_cox$IndividualDataPP$training.data$AP_o) - DP_o +
#          1
#      )),
#    I = sum(I * (
#      AP_o <= max(resurv_fit_cox$IndividualDataPP$training.data$AP_o) - DP_o
#    ))) %>%
#    mutate(DP_o_join = DP_o - 1) %>%
#    as.data.frame()
#  
#  
#  clmodel %>%
#    filter(DP_o > 1) %>%
#    ggplot(aes(x = DP_o, y = df_o)) +
#    geom_line(linewidth = 2.5,
#              color = "#454555") +
#    labs(title = "Chain ladder",
#         x = "Development quarter",
#         y = "Development factor") +
#    ylim(1, 3. + .01) +
#    theme_bw(base_size = rel(5)) +
#    theme(plot.title = element_text(size = 20))
#  
#  ##
#  
#  clmodel_months <- individual_data_m$training.data %>%
#    mutate(DP_o = 48 -
#             DP_rev_o + 1) %>%
#    group_by(AP_o, DP_o) %>%
#    summarize(I = sum(I), .groups = "drop") %>%
#    group_by(AP_o) %>%
#    arrange(DP_o) %>%
#    mutate(I_cum = cumsum(I), I_cum_lag = lag(I_cum, default = 0)) %>%
#    ungroup() %>%
#    group_by(DP_o) %>%
#    reframe(df_o = sum(I_cum * (
#      AP_o <= max(individual_data_m$training.data$AP_o) - DP_o + 1
#    )) /
#      sum(I_cum_lag * (
#        AP_o <= max(individual_data_m$training.data$AP_o) - DP_o + 1
#      )),
#    I = sum(I * (
#      AP_o <= max(individual_data_m$training.data$AP_o) - DP_o
#    ))) %>%
#    mutate(DP_o_join = DP_o - 1) %>%
#    as.data.frame()
#  
#  ticks_at <- seq(1, 48, 4)
#  labels_as <- as.character(ticks_at)
#  
#  clmodel_months %>%
#    filter(DP_o > 1) %>%
#    ggplot(aes(x = DP_o,
#               y = df_o)) +
#    geom_line(linewidth = 2.5,
#              color = "#454555") +
#    labs(title = "Chain ladder",
#         x = "Development month",
#         y = "Development factor") +
#    ylim(1, 2.5 + .01) +
#    scale_x_continuous(breaks = ticks_at, labels = labels_as) +
#    theme_bw(base_size = rel(5)) +
#    theme(plot.title = element_text(size = 20))
#  
#  
#  ##
#  
#  clmodel_years <- individual_data_y$training.data %>%
#    mutate(DP_o = 4 -
#             DP_rev_o + 1) %>%
#    group_by(AP_o, DP_o) %>%
#    summarize(I = sum(I), .groups = "drop") %>%
#    group_by(AP_o) %>%
#    arrange(DP_o) %>%
#    mutate(I_cum = cumsum(I), I_cum_lag = lag(I_cum, default = 0)) %>%
#    ungroup() %>%
#    group_by(DP_o) %>%
#    reframe(df_o = sum(I_cum * (
#      AP_o <= max(individual_data_m$training.data$AP_o) - DP_o + 1
#    )) /
#      sum(I_cum_lag * (
#        AP_o <= max(individual_data_m$training.data$AP_o) - DP_o + 1
#      )),
#    I = sum(I * (
#      AP_o <= max(individual_data_m$training.data$AP_o) - DP_o
#    ))) %>%
#    mutate(DP_o_join = DP_o - 1) %>%
#    as.data.frame()
#  
#  ticks_at <- seq(1, 4, 1)
#  labels_as <- as.character(ticks_at)
#  
#  clmodel_years %>%
#    filter(DP_o > 1) %>%
#    ggplot(aes(x = DP_o,
#               y = df_o)) +
#    geom_line(linewidth = 2.5,
#              color = "#454555") +
#    labs(title = "Chain ladder",
#         x = "Development year",
#         y = "Development factor") +
#    ylim(1, 2.5 + .01) +
#    scale_x_continuous(breaks = ticks_at, labels = labels_as) +
#    theme_bw(base_size = rel(5)) +
#    theme(plot.title = element_text(size = 20))
#  

## ----eval=FALSE, include=TRUE-------------------------------------------------
#  ct <- 0
#  ap <- 12
#  
#  resurv_fit_predict_q$long_triangle_format_out$output_granularity %>%
#    filter(AP_o == ap & claim_type == ct) %>%
#    filter(row_number() == 1) %>%
#    select(group_o)
#  
#  plot(
#    resurv_fit_predict_q,
#    granularity = "output",
#    title_par = "COX: Accident Quarter 12 Claim Type 0",
#    group_code = 23
#  )
#  
#  
#  ct <- 0
#  ap <- 36
#  
#  resurv_fit_predict_m$long_triangle_format_out$output_granularity %>%
#    filter(AP_o == ap & claim_type == ct) %>%
#    filter(row_number() == 1) %>%
#    select(group_o)
#  
#  plot(
#    resurv_fit_predict_m,
#    granularity = "output",
#    color_par = "#a71429",
#    title_par = "COX: Accident Month 36 Claim Type 0",
#    group_code = 71
#  )
#  
#  
#  
#  ct <- 1
#  ap <- 7
#  
#  resurv_fit_predict_m$long_triangle_format_out$output_granularity %>%
#    filter(AP_o == ap & claim_type == ct) %>%
#    filter(row_number() == 1) %>%
#    select(group_o)
#  
#  plot(
#    resurv_fit_predict_m,
#    granularity = "output",
#    color_par = "#a71429",
#    title_par = "COX: Accident Month 7 Claim Type 1",
#    group_code = 14
#  )
#  
#  
#  ct <- 0
#  ap <- 2
#  
#  resurv_fit_predict_y$long_triangle_format_out$output_granularity %>%
#    filter(AP_o == ap & claim_type == ct) %>%
#    filter(row_number() == 1) %>%
#    select(group_o)
#  
#  plot(
#    resurv_fit_predict_y,
#    granularity = "output",
#    color_par = "#FF6A7A",
#    title_par = "COX: Accident Year 2 Claim Type 0",
#    group_code = 3
#  )
#  
#  
#  ct <- 1
#  ap <- 3
#  
#  resurv_fit_predict_y$long_triangle_format_out$output_granularity %>%
#    filter(AP_o == ap & claim_type == ct) %>%
#    filter(row_number() == 1) %>%
#    select(group_o)
#  
#  
#  plot(
#    resurv_fit_predict_y,
#    granularity = "output",
#    color_par = "#FF6A7A",
#    title_par = "COX: Accident Year 3 Claim Type 1",
#    group_code = 6
#  )

## ----eval=FALSE, include=TRUE-------------------------------------------------
#  
#  conversion_factor <- individual_data$conversion_factor
#  
#  max_dp_i <- 1440
#  
#  true_output <- resurv_fit_cox$IndividualDataPP$full.data %>%
#    mutate(
#      DP_rev_o = floor(max_dp_i * conversion_factor) -
#        ceiling(
#                DP_i * conversion_factor +
#                  ((AP_i - 1) %% (
#                    1 / conversion_factor
#                  )) * conversion_factor) + 1,
#      AP_o = ceiling(AP_i * conversion_factor),
#      TR_o = AP_o - 1
#    ) %>%
#    filter(DP_rev_o <= TR_o) %>%
#    group_by(claim_type, AP_o, DP_rev_o) %>%
#    mutate(claim_type = as.character(claim_type)) %>%
#    summarize(I = sum(I), .groups = "drop") %>%
#    filter(DP_rev_o > 0)
#  
#  out_list <- resurv_fit_predict_q$long_triangle_format_out
#  out <- out_list$output_granularity
#  
#  #Total output
#  score_total <- out[, c("claim_type", "AP_o", "DP_o", "expected_counts")] %>%
#    mutate(DP_rev_o = 16 - DP_o + 1) %>%
#    inner_join(true_output, by = c("claim_type", "AP_o", "DP_rev_o")) %>%
#    mutate(ave = I - expected_counts, abs_ave = abs(ave)) %>%
#    # from here it is reformulated for the are tot
#    ungroup() %>%
#    group_by(AP_o, DP_rev_o) %>%
#    reframe(abs_ave = abs(sum(ave)), I = sum(I))
#  
#  are_tot <- sum(score_total$abs_ave) / sum(score_total$I)
#  
#  
#  dfs_output <- out %>%
#    mutate(DP_rev_o = 16 - DP_o + 1) %>%
#    select(AP_o, claim_type, DP_rev_o, f_o) %>%
#    mutate(DP_rev_o = DP_rev_o) %>%
#    distinct()
#  
#  #Cashflow on output scale.Etc quarterly cashflow development
#  score_diagonal <- resurv_fit_cox$IndividualDataPP$full.data  %>%
#    mutate(
#      DP_rev_o = floor(max_dp_i * conversion_factor) -
#        ceiling(
#                DP_i * conversion_factor +
#                  ((AP_i - 1) %% (
#                    1 / conversion_factor
#                  )) * conversion_factor) + 1,
#      AP_o = ceiling(AP_i * conversion_factor)
#    ) %>%
#    group_by(claim_type, AP_o, DP_rev_o) %>%
#    mutate(claim_type = as.character(claim_type)) %>%
#    summarize(I = sum(I), .groups = "drop") %>%
#    group_by(claim_type, AP_o) %>%
#    arrange(desc(DP_rev_o)) %>%
#    mutate(I_cum = cumsum(I)) %>%
#    mutate(I_cum_lag = lag(I_cum, default = 0)) %>%
#    left_join(dfs_output, by = c("AP_o", "claim_type", "DP_rev_o")) %>%
#    mutate(I_cum_hat =  I_cum_lag * f_o,
#           RP_o = max(DP_rev_o) - DP_rev_o + AP_o) %>%
#    inner_join(true_output[, c("AP_o", "DP_rev_o")] %>%  distinct()
#               , by = c("AP_o", "DP_rev_o")) %>%
#    group_by(AP_o, DP_rev_o) %>%
#    reframe(abs_ave2_diag = abs(sum(I_cum_hat) - sum(I_cum)), I = sum(I))
#  
#  are_cal_q <- sum(score_diagonal$abs_ave2_diag) / sum(score_diagonal$I)
#  
#  
#  # scoring XGB ----
#  
#  out_xgb <- resurv_predict_xgb$long_triangle_format_out$output_granularity
#  
#  score_total_xgb <- out_xgb[, c("claim_type",
#                                 "AP_o",
#                                 "DP_o",
#                                 "expected_counts")] %>%
#    mutate(DP_rev_o = 16 - DP_o + 1) %>%
#    inner_join(true_output, by = c("claim_type", "AP_o", "DP_rev_o")) %>%
#    mutate(ave = I - expected_counts, abs_ave = abs(ave)) %>%
#    # from here it is reformulated for the are tot
#    ungroup() %>%
#    group_by(AP_o, DP_rev_o) %>%
#    reframe(abs_ave = abs(sum(ave)), I = sum(I))
#  
#  are_tot_xgb <- sum(score_total_xgb$abs_ave) / sum(score_total$I)
#  
#  
#  dfs_output_xgb <- out_xgb %>%
#    mutate(DP_rev_o = 16 - DP_o + 1) %>%
#    select(AP_o, claim_type, DP_rev_o, f_o) %>%
#    mutate(DP_rev_o = DP_rev_o) %>%
#    distinct()
#  
#  #Cashflow on output scale.Etc quarterly cashflow development
#  score_diagonal_xgb <- resurv_fit_cox$IndividualDataPP$full.data  %>%
#    mutate(
#      DP_rev_o = floor(max_dp_i * conversion_factor) -
#        ceiling(
#                DP_i * conversion_factor +
#                  ((AP_i - 1) %% (
#                    1 / conversion_factor
#                  )) * conversion_factor) + 1,
#      AP_o = ceiling(AP_i * conversion_factor)
#    ) %>%
#    group_by(claim_type, AP_o, DP_rev_o) %>%
#    mutate(claim_type = as.character(claim_type)) %>%
#    summarize(I = sum(I), .groups = "drop") %>%
#    group_by(claim_type, AP_o) %>%
#    arrange(desc(DP_rev_o)) %>%
#    mutate(I_cum = cumsum(I)) %>%
#    mutate(I_cum_lag = lag(I_cum, default = 0)) %>%
#    left_join(dfs_output_xgb, by = c("AP_o", "claim_type", "DP_rev_o")) %>%
#    mutate(I_cum_hat =  I_cum_lag * f_o,
#           RP_o = max(DP_rev_o) - DP_rev_o + AP_o) %>%
#    inner_join(true_output[, c("AP_o", "DP_rev_o")] %>%  distinct()
#               , by = c("AP_o", "DP_rev_o")) %>%
#    group_by(AP_o, DP_rev_o) %>%
#    reframe(abs_ave2_diag = abs(sum(I_cum_hat) - sum(I_cum)), I = sum(I))
#  
#  are_cal_q_xgb <- sum(score_diagonal_xgb$abs_ave2_diag) / sum(score_diagonal$I)
#  
#  # scoring NN ----
#  
#  out_nn <- resurv_predict_nn$long_triangle_format_out$output_granularity
#  
#  score_total_nn <- out_nn[, c("claim_type",
#                               "AP_o",
#                               "DP_o",
#                               "expected_counts")] %>%
#    mutate(DP_rev_o = 16 - DP_o + 1) %>%
#    inner_join(true_output, by = c("claim_type", "AP_o", "DP_rev_o")) %>%
#    mutate(ave = I - expected_counts, abs_ave = abs(ave)) %>%
#    # from here it is reformulated for the are tot
#    ungroup() %>%
#    group_by(AP_o, DP_rev_o) %>%
#    reframe(abs_ave = abs(sum(ave)), I = sum(I))
#  
#  are_tot_nn <- sum(score_total_nn$abs_ave) / sum(score_total$I)
#  
#  
#  dfs_output_nn <- out_nn %>%
#    mutate(DP_rev_o = 16 - DP_o + 1) %>%
#    select(AP_o, claim_type, DP_rev_o, f_o) %>%
#    mutate(DP_rev_o = DP_rev_o) %>%
#    distinct()
#  
#  #Cashflow on output scale.Etc quarterly cashflow development
#  score_diagonal_nn <- resurv_fit_cox$IndividualDataPP$full.data  %>%
#    mutate(
#      DP_rev_o = floor(max_dp_i * conversion_factor) -
#        ceiling(
#                DP_i * conversion_factor +
#                  ((AP_i - 1) %% (
#                    1 / conversion_factor
#                  )) * conversion_factor) + 1,
#      AP_o = ceiling(AP_i * conversion_factor)
#    ) %>%
#    group_by(claim_type, AP_o, DP_rev_o) %>%
#    mutate(claim_type = as.character(claim_type)) %>%
#    summarize(I = sum(I), .groups = "drop") %>%
#    group_by(claim_type, AP_o) %>%
#    arrange(desc(DP_rev_o)) %>%
#    mutate(I_cum = cumsum(I)) %>%
#    mutate(I_cum_lag = lag(I_cum, default = 0)) %>%
#    left_join(dfs_output_nn, by = c("AP_o", "claim_type", "DP_rev_o")) %>%
#    mutate(I_cum_hat =  I_cum_lag * f_o,
#           RP_o = max(DP_rev_o) - DP_rev_o + AP_o) %>%
#    inner_join(true_output[, c("AP_o", "DP_rev_o")] %>%  distinct()
#               , by = c("AP_o", "DP_rev_o")) %>%
#    group_by(AP_o, DP_rev_o) %>%
#    reframe(abs_ave2_diag = abs(sum(I_cum_hat) - sum(I_cum)), I = sum(I))
#  
#  are_cal_q_nn <- sum(score_diagonal_nn$abs_ave2_diag) / sum(score_diagonal$I)
#  
#  # Scoring Chain-Ladder ----
#  
#  true_output_cl <- individual_data$full.data %>%
#    mutate(
#      DP_rev_o = floor(max_dp_i * conversion_factor) -
#        ceiling(
#                DP_i * conversion_factor +
#                  ((AP_i - 1) %% (
#                    1 / conversion_factor
#                  )) * conversion_factor) + 1,
#      AP_o = ceiling(AP_i * conversion_factor)
#    ) %>%
#    filter(DP_rev_o <= TR_o) %>%
#    mutate(DP_o = max(individual_data$training.data$DP_rev_o) - DP_rev_o + 1) %>%
#    group_by(AP_o, DP_o, DP_rev_o) %>%
#    summarize(I = sum(I), .groups = "drop") %>%
#    filter(DP_rev_o > 0)
#  latest_observed <- individual_data$training.data %>%
#    filter(DP_rev_o >= TR_o) %>%
#    mutate(DP_o = max(individual_data$training.data$DP_rev_o) - DP_rev_o + 1) %>%
#    group_by(AP_o) %>%
#    mutate(DP_max = max(DP_o)) %>%
#    group_by(AP_o, DP_max) %>%
#    summarize(I = sum(I), .groups = "drop")
#  
#  clmodel <- individual_data$training.data %>%
#    mutate(DP_o = max(individual_data$training.data$DP_rev_o) - DP_rev_o + 1) %>%
#    group_by(AP_o, DP_o) %>%
#    summarize(I = sum(I), .groups = "drop") %>%
#    group_by(AP_o) %>%
#    arrange(DP_o) %>%
#    mutate(I_cum = cumsum(I), I_cum_lag = lag(I_cum, default = 0)) %>%
#    ungroup() %>%
#    group_by(DP_o) %>%
#    reframe(df = sum(I_cum * (
#      AP_o <= max(individual_data$training.data$AP_o) - DP_o + 1
#    )) /
#      sum(I_cum_lag * (
#        AP_o <= max(individual_data$training.data$AP_o) - DP_o + 1
#      )), I = sum(I * (
#      AP_o <= max(individual_data$training.data$AP_o) - DP_o
#    ))) %>%
#    mutate(DP_o_join = DP_o) %>%
#    mutate(DP_rev_o = max(DP_o) - DP_o + 1)
#  
#  predictions <- expand.grid(AP_o = latest_observed$AP_o,
#                             DP_o = clmodel$DP_o_join) %>%
#    left_join(clmodel[, c("DP_o_join", "df")], by = c("DP_o" = "DP_o_join")) %>%
#    left_join(latest_observed, by = "AP_o") %>%
#    rowwise() %>%
#    filter(DP_o > DP_max) %>%
#    ungroup() %>%
#    group_by(AP_o) %>%
#    arrange(DP_o) %>%
#    mutate(df_cum = cumprod(df)) %>%
#    mutate(I_expected = I * df_cum - I * lag(df_cum, default = 1)) %>%
#    select(DP_o, AP_o, I_expected)
#  
#  conversion_factor <- individual_data$conversion_factor
#  max_dp_i <- 1440
#  score_total <- predictions  %>%
#    inner_join(true_output_cl, by = c("AP_o", "DP_o")) %>%
#    mutate(ave = I - I_expected, abs_ave = abs(ave)) %>%
#    # from here it is reformulated for the are tot
#    ungroup() %>%
#    group_by(AP_o, DP_rev_o) %>%
#    reframe(abs_ave = abs(sum(ave)), I = sum(I))
#  
#  are_tot <- sum(score_total$abs_ave) / sum(score_total$I)
#  
#  score_diagonal <- individual_data$full.data  %>%
#    mutate(
#      DP_rev_o = floor(max_dp_i * conversion_factor) -
#        ceiling(
#                DP_i * conversion_factor +
#                  ((AP_i - 1) %% (
#                    1 / conversion_factor
#                  )) * conversion_factor) + 1,
#      AP_o = ceiling(AP_i * conversion_factor)
#    ) %>%
#    group_by(claim_type, AP_o, DP_rev_o) %>%
#    mutate(claim_type = as.character(claim_type)) %>%
#    summarize(I = sum(I), .groups = "drop") %>%
#    group_by(claim_type, AP_o) %>%
#    arrange(desc(DP_rev_o)) %>%
#    mutate(I_cum = cumsum(I)) %>%
#    mutate(I_cum_lag = lag(I_cum, default = 0)) %>%
#    mutate(DP_o = max(individual_data$training.data$DP_rev_o) - DP_rev_o + 1) %>%
#    left_join(CL[, c("DP_o", "df")], by = c("DP_o")) %>%
#    mutate(I_cum_hat =  I_cum_lag * df,
#           RP_o = max(DP_rev_o) - DP_rev_o + AP_o) %>%
#    inner_join(true_output_cl[, c("AP_o", "DP_rev_o")] %>%  distinct()
#               , by = c("AP_o", "DP_rev_o")) %>%
#    group_by(AP_o, DP_rev_o) %>%
#    reframe(abs_ave2_diag = abs(sum(I_cum_hat) - sum(I_cum)), I = sum(I))
#  
#  are_cal_q <- sum(score_diagonal$abs_ave2_diag) / sum(score_diagonal$I)
#
Any scripts or data that you put into this service are public.
ReSurv documentation built on April 4, 2025, 1:39 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
ReSurv
Machine Learning Models for Predicting Claim Counts

inst/doc/cas_call.R
In ReSurv: Machine Learning Models for Predicting Claim Counts

Try the ReSurv package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ReSurv Machine Learning Models for Predicting Claim Counts

inst/doc/cas_call.R In ReSurv: Machine Learning Models for Predicting Claim Counts

Try the ReSurv package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ReSurv
Machine Learning Models for Predicting Claim Counts

inst/doc/cas_call.R
In ReSurv: Machine Learning Models for Predicting Claim Counts