R/glmReserve.R
In ChainLadder: Statistical Methods and Models for Claims Reserving in General Insurance

Documented in glmReserve plot.glmReserve print.glmReserve resid.glmReserve summary.glmReserve

###################################################
##    glm-based insurance loss reserving 
##            Wayne Zhang 
##     actuary_zhang@hotmail.com
###################################################

glmReserve <- function(triangle, var.power = 1, link.power = 0,
                       cum = TRUE, mse.method = c("formula", "bootstrap"), 
                       nsim = 1000, nb = FALSE, ...){
  origin <- NULL
  call <- match.call()
  mse.method <- match.arg(mse.method)
  
  if (!("triangle") %in% class(triangle))
    stop("triangle must be of class 'triangle'")
  if ("offset" %in% names(list(...)))
    stop("'offset' should be passed using the 
         'exposure' attribute of the triangle!")
  if ("weight" %in% names(list(...)))
    stop("'weight' should not be used")  
  
  # convert to incremental losses if needed    
  tr.incr <- if (cum) cum2incr(triangle) else triangle    
  
  # create family
  fam <- tweedie(ifelse(!is.null(var.power), var.power, 1.5), link.power)
  
  # convert to long format
  lda <-  as.data.frame(tr.incr, origin = names(dimnames(tr.incr))[1], 
                        dev = names(dimnames(tr.incr))[2])
  names(lda)[1:3] <- c("origin", "dev", "value")
  lda <- transform(lda, origin = factor(origin, levels = dimnames(triangle)[[1]]))
  
  # create offset
  if (is.null(attr(tr.incr, "exposure"))) {
    lda$offset <-  rep(0, nrow(lda))
  } else {
    # Allow exposures to be expanded to the long data.frame by matching
    #   names(exposure) with triangle's origin values, or by the arithmetic
    #   formula that has always been there, if triangles origin values are
    #   convertible from their character representations to numeric.
    expo <- attr(tr.incr, "exposure")
    if (is.null(names(expo))) names(expo) <- NA
    if (all(names(expo) %in% lda$origin)) lda$offset <- 
        fam$linkfun(attr(tr.incr, "exposure")[lda$origin])
    else {
      numorig <- suppressWarnings(as.numeric(lda$origin))
      if (any(is.na(numorig))) stop(
        "Unnamed exposures incompatible when triangle's origin values are not convertible from character to numeric."
      )
      lda$offset <- 
        fam$linkfun(attr(tr.incr, "exposure")[numorig - min (numorig) + 1])
    }
  }
  
  # divide data 
  ldaFit <- subset(lda, !is.na(lda$value)) #warnings?  
  ldaOut <- subset(lda, is.na(lda$value)) 
   
  # fit the model
  if (nb){
    ldaFit$value <- round(ldaFit$value)  #warning
    glmFit <- glm.nb(value ~ factor(origin) + factor(dev), 
                  data = ldaFit, ...)
    phi <- 1.0
    fam <- glmFit$family
  } else {
    if (!is.null(var.power)){
      glmFit <- glm(value ~ factor(origin) + factor(dev), family = fam,
                data = ldaFit, offset = offset, ...)
      phi <- with(glmFit, sum(weights * residuals^2) / df.residual)
    } else{ 
      glmFit <- cpglm(value ~ factor(origin) + factor(dev),
                    link = link.power, data = ldaFit, offset = offset, ...)
      phi <- glmFit$phi
      # update fam
      fam <- tweedie(glmFit$p, link.power)
    }
  }
  ################################
  ## calculate reserves 
  ################################
  
  # prediction for each cell              
  yp <- predict(glmFit, newdata = ldaOut, type = "response")
  eta <- as.numeric(predict(glmFit, newdata = ldaOut, type = "link"))
                
  # sum to get reserve by year
  resMeanAy <- tapply(yp, factor(ldaOut$origin), sum)
  resMeanTot <- sum(resMeanAy)

  ################################
  ## calculate prediction errs 
  ################################                
                
  if (mse.method == "formula"){    
    
    # process variance 
    ypv <- fam$variance(yp)
    mseProcAy <-  phi * tapply(ypv, factor(ldaOut$origin), sum)
    mseProcTot <-  phi * sum(ypv) 
    
    # estimation variance                
    
    # get design matrix for prediction
    Terms <- delete.response(terms(glmFit))
    if (class(glmFit)[1] == "cpglm"){
      xlevels <- .getXlevels(Terms, glmFit$model.frame)
    } else { 
      xlevels <- glmFit$xlevels
    }
    X <- model.matrix(Terms, ldaOut, xlev = xlevels)
    
    # var(eta)
    v <- X %*% vcov(glmFit) %*% t(X)  # vcov has already been scaled
    muEta <- fam$mu.eta(eta)
    mseEstAy <- sapply(sort(unique(ldaOut$origin)), function(x){
  				        id <- ldaOut$origin == x	
                  # d(mu)/d(eta) from the delta method
  				        tmp <- diag(muEta[id], nrow = length(eta[id]))
  				        sum(tmp %*% v[id, id] %*% t(tmp))    				        
              })
    mseEstTot <-  sum(diag(muEta) %*% v %*% diag(muEta))
    
    # pred errs
    S.E <- sqrt(c(mseProcAy, mseProcTot) + c(mseEstAy, mseEstTot))          
  }
                       
  if (mse.method == "bootstrap"){
    nO <- nrow(ldaFit)
    nB <- length(coef(glmFit))
    # residual inflation factor
    bias <- sqrt(nO/df.residual(glmFit))
    
    resMeanAyB <- resPredAyB <- matrix(0, nsim, length(resMeanAy))    
    sims.par <- matrix(0, nsim, nB + 2)
    mu <- fitted(glmFit)
    mup <- sqrt(fam$variance(mu))
    
    # loop nsim times 
    for (i in 1:nsim){
      # while loop to avoid negative generated incremental loss
      ybad <- 1 
      while (ybad){
        rn <- bias * sample(resid(glmFit, type = "pearson"), nO, replace = TRUE)
        yB <- rn * mup + mu
        if (all(yB >= 0) || (!is.null(var.power) && var.power == 0))
          ybad <- 0  # Normal 
      }
      
      # fit model on new data
      if (nb){
        yB <- round(yB)
        glmFitB <- glm.nb(yB ~ factor(origin) + factor(dev), 
                         data = ldaFit)
        phi <- 1.0
        cf <- c(coef(glmFitB), 1.0, glmFitB$theta)
      } else {
        if (!is.null(var.power)){
          glmFitB <- glm(yB ~ factor(origin) + factor(dev),
                          family = fam, data = ldaFit, offset = offset, ...)
          phi <- with(glmFitB, sum(weights * residuals^2) / df.residual)
          cf <- c(coef(glmFitB), phi, var.power)
        } else{ 
          glmFitB <- cpglm(yB ~ factor(origin) + factor(dev),
                          link = link.power, data = ldaFit, offset = offset, ...)
          cf <- c(coef(glmFitB), glmFitB$phi, glmFitB$p)
        }      
      }
      # mean and prediction
      ymB <- predict(glmFitB, newdata = ldaOut, type = "response")
      if (nb){
        ypB <- rnbinom(length(ymB), size = glmFitB$theta, mu = ymB)
      } else {
        ypB <- rtweedie(length(ymB), mu = ymB, phi = cf[nB + 1], power = cf[nB + 2])
      }
      # save simulations
      resMeanAyB[i, ] <- as.numeric(tapply(ymB, factor(ldaOut$origin), sum))      
      resPredAyB[i, ] <- as.numeric(tapply(ypB, factor(ldaOut$origin), sum))      
      sims.par[i, ] <- unname(cf)
    }
    # get names
    dimnames(sims.par)[[2]] <- c(names(coef(glmFit)), "phi", "p")
    dimnames(resMeanAyB)[[2]] <- dimnames(resPredAyB)[[2]] <- 
            as.character(sort(unique(ldaOut$origin)))
    # pred errs
    S.E <- c(apply(resPredAyB, 2, sd), sd(rowSums(resPredAyB)))  
  }
  
  # compile results
  IBNR <- round(c(resMeanAy, resMeanTot))
  CV <- S.E / IBNR
  Latest <- getLatestCumulative(incr2cum(tr.incr))
  Latest <- Latest[-(1:(length(Latest) - length(unique(ldaOut$origin))))]
  Latest <- c(Latest, total = sum(Latest))
  Ultimate <- Latest + IBNR
  resDf <- data.frame(Latest = Latest, Dev.To.Date = Latest/Ultimate,
                      Ultimate = Ultimate, IBNR = IBNR,
                      S.E = S.E, CV = CV)
	row.names(resDf) <- names(Latest)
  
  # produce fullly projected triangle
  ldaOut$value <- round(yp)
  FullTriangle <- as.triangle(rbind(ldaFit, ldaOut))
  if (cum) FullTriangle <- incr2cum(FullTriangle)
  
  # output
  out <- list(call = call, summary = resDf,
                Triangle = triangle,
                FullTriangle = FullTriangle,
                model = glmFit, 
                sims.par =  matrix(0),
                sims.reserve.mean =  matrix(0),
                sims.reserve.pred =  matrix(0))
  
  if (mse.method == "bootstrap"){
    out$sims.par <- sims.par 
    out$sims.reserve.mean <- resMeanAyB
    out$sims.reserve.pred <- resPredAyB
  }
  class(out) <- "glmReserve"
  return(out)  
}

# summary and print method for glmReserve
summary.glmReserve <- function(object, type = c("triangle", "model"), ...){
  type <- match.arg(type)
  if (type == "triangle")
    print(object$summary)
  if (type == "model")
    summary(object$model)
}

print.glmReserve <- function(x, ...) {
  summary(x)
  
  invisible(x)
}

# return pearson residuals (scaled)
residuals.glmReserve <- function(object, ...){
  m <- object$model
  r <- residuals(m, type = "pearson")
  if (class(m)[1] == "glm") {
    phi <- sum((m$weights * m$residuals^2)[m$weights > 0])/m$df.residual
  } else if (class(m)[1] == "glm"){
    phi <- 1
  } else {
    phi <- m$phi
  }
  return(r/sqrt(phi))
}

resid.glmReserve <- function(object, ...)
  residuals(object)

# 1 Original triangle 
# 2 Full triangle 
# 3 Reserve distribution
# 4 Residual plot
# 5 QQnorm
plot.glmReserve <- function(x, which = 1, ...){
  model <- x$model
  if (which == 1){
    plot(x$Triangle, ...)  
  } else 
  if (which == 2){
    plot(x$FullTriangle, ...)  
  } else 
  if (which == 3){
    sim <- x$sims.reserve.pred
    if (ncol(sim) == 1)
      stop('No predictions found, try bootstrap?')
    sim <- cbind(sim, total = rowSums(sim))
    nm <- colnames(sim)
    sim <- data.frame(group = factor(rep(nm, rep(nrow(sim), ncol(sim))), levels = nm),
                                   prediction = as.vector(sim))
    prediction <- NULL
    gg <- ggplot(sim, aes(prediction))
    gg <- gg + geom_density() + 
      facet_wrap(~group, nrow = 2, scales = "free") +
      scale_x_continuous("predicted reserve")
    print(gg) 
  } else 
  if (which == 4){
    plot(resid(x) ~ fitted(model), ...)
    abline(h = 0, col = "#B3B3B3")
  } else  
  if (which == 5){
    qqnorm(resid(x), ...)
    qqline(resid(x))
  }   
}

Any scripts or data that you put into this service are public.

ChainLadder documentation built on July 9, 2023, 5:12 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

ChainLadder
Statistical Methods and Models for Claims Reserving in General Insurance

R/glmReserve.R
In ChainLadder: Statistical Methods and Models for Claims Reserving in General Insurance

Defines functions plot.glmReserve resid.glmReserve residuals.glmReserve print.glmReserve summary.glmReserve glmReserve

Documented in glmReserve plot.glmReserve print.glmReserve resid.glmReserve summary.glmReserve

Try the ChainLadder package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ChainLadder Statistical Methods and Models for Claims Reserving in General Insurance

R/glmReserve.R In ChainLadder: Statistical Methods and Models for Claims Reserving in General Insurance

Defines functions plot.glmReserve resid.glmReserve residuals.glmReserve print.glmReserve summary.glmReserve glmReserve

Documented in glmReserve plot.glmReserve print.glmReserve resid.glmReserve summary.glmReserve

Try the ChainLadder package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ChainLadder
Statistical Methods and Models for Claims Reserving in General Insurance

R/glmReserve.R
In ChainLadder: Statistical Methods and Models for Claims Reserving in General Insurance