####
#### The mean function should fit any distributions, DISTR, as long as dDISTR, pDISTR, are defined.
###dependencies
# actuar
### the code below checks and installs dependencies
# Dependencies <- c("actuar", "maxLik", "dplyr", "ggplot2", "cowplot", "rjags")
# packagelist <- rownames(installed.packages())
# for (dep in Dependencies ) if(dep %in% packagelist == FALSE) {install.packages(dep)}
# library(actuar)
# library(maxLik)
# library(dplyr)
# library(ggplot2)
# library(cowplot)
# library(rjags)
####sourcing other modules only work if this main module is sourced
#source(paste(dirname(sys.frame(1)$ofile), "/Truncation Functions.R", sep=""))
#source(paste(dirname(sys.frame(1)$ofile), "/Visual diagnosis of fits.R", sep=""))
#' this is the generic likelihood function, it calls functions of various distributions
#' @param y is the observed amount, subject to censoring and gross of attachment
#' @param FUN distribution name
#' @param Att is the left truncation point, frquently used in severity modeling context
#' @param Lmt right cenoring point
#' @param rTrunc is the right truncation point, this is more applicable in reporting lag modeling for CM policies
#' @param parnames parameter names such as c('shape', 'scale') for gamma
#' @param fixedpar parameters values that are fixed
#' @param fixedparn names of fixed parameters
#' @param link link function
#' @param xmat x matrix for predictors/independent variables
#' @param modelpr which parameter is modeled
#' @param ll_sign changes the direction of loglikelihood
#' @param ... additional variables passed to distribution functions
#' x needs to be converted to matrix form using model.matrix
#' @export
#' @examples
#' FitDistLoglik(parm = c(0.5, 0.5, 3, 12, 3, 1.7), y = y, FUN='mixlnorm2', Att = 1e4, Lmt = Inf, parnames = c('ws', 'meanlogs', 'sdlogs'))
FitDistLoglik <- function(parm, y, FUN="gamma", Att, Lmt, rTrunc=Inf, parnames=c("shape", "scale"),
fixedpar=NULL, fixedparn=NULL, link = "identity", Xmat = NULL,
modelpar = "scale", ll_sign = 1, ...) {
if ((length(parm) %% length(parnames) != 0) & ((length(parm) + 1) %% length(parnames) != 0) ) {
print('number of parameters are not a multiple of paramter names')
return(NULL)
}
vec_len <- length(parm) %/% length(parnames)
parm_list <- lapply(1:length(parnames),
function(x) parm[((x-1) * vec_len + 1):(x * vec_len) ])
parm_list <- setNames(parm_list, parnames)
## this generates both the link function and inverse link function
if (!is.null(Xmat) & !is.null(modelpar)) {
Lk <- switch(link,
'log' = log,
"identity" = identity,
"inverse" = function(x) 1/x)
iLk <- switch(link,
'log' = exp,
"identity" = identity,
"inverse" = function(x) 1/x)
# x <- x[y>0, ]
# y <- y[y>0]
distparm <- lapply(rep(NA, length(parnames)), identity)
distparm[[which(parnames == modelpar)]] <- iLk(Xmat %*% parm[1:ncol(Xmat)])
fixedplst <- which(parnames != modelpar)
for (i in 1:length(fixedplst)) distparm[[fixedplst[i]]] <- parm[ncol(Xmat)+i]
} else {
distparm <- lapply(parm_list, identity)
}
### likelihood of uncensored observations, truncation considered in the dtrunc function
if (is.null(fixedpar)) args_fit <- setNames(append(distparm, list( FUN, Att, rTrunc)),
c(parnames, "FUN", "Att", "rTrunc")) else
args_fit <- setNames(append(append(distparm, list( FUN, Att, rTrunc)),
#lapply(fixedpar, identity)),
list(fixedpar)
),
c(parnames, "FUN", "Att", "rTrunc", fixedparn))
f <- do.call(dtrunc, append(list(x = y + Att), args_fit))
### likelihood of censored observations, truncation considered in the ptrunc function
S <- 1 - do.call(ptrunc, append(list(x = y + Att), args_fit))
l <- ifelse(y >= Lmt, S, f) ### will calculate likelihood even if observation exceeds limit due to ALAE
ll <- log(l)
return(sum(ll) * ll_sign)
}
#' this is the fitting function
#' @param y the observation net of the trunction point, for GU observations, y should be GU amount - Att
#' @param Att the truncation points;
#' @param Lmt the censoring limit
#' @param Fun determines the distribution function, eg. "gamma", "lnorm", "pareto";
#' @param ini the initial set of values; x is observations
#' @param mtd the optimizing method, default is "Nelder-Mead", as it doesn't require gradient function
#' @param parnames parameter names
#' @param fixedpar fixed parameters values
#' @param fixedparn names of fixed parameters
#' @param covardf data frame for covariates
#' @param formula model formula
#' @param modelpar which severity parameters influenced by covariates
#' @param link link function applied to modeled parameter
#' @param gn_ll the gradient function of logliklihood, this should be NULL, unless you are sure that the gradient function of the logliklihood is appropriate
#' @param gn_ll_mtd when gn_ll is NULL, gradient function is generated using numerical differentiation, if this parameter is set as NULL, then no gradient function will be supplied to optimization, for other options refer to methods of the grad function from numDeriv package
#' @param prior (BETA) priors for the parameters can be supplied, currently only normal and require specifying all parameters
#' @param ll_scale (BETA) constant that scales loglikelihoods to be more normalized, default to 0
#' @param lower lower bound of the parameters, see optimx for more detail
#' @param upper upper bound of the parameters, see optimx for more detail
#' @param ... additional distribtuional parameters passed
#' For Untruncated fit, just leave out Att and rTrunc
#' For uncenored fit, just leave out Lmt
#' @examples
#' n = 5000
#' x <- rgamma(n, shape=2, scale = 5)
#' Att <- sample(seq(0, 1, 0.2), n, replace = TRUE)
#' Lmt <- sample(seq(5, 20, 1), n, replace = TRUE)
#' x1 <- pmin(pmax(x - Att, 0), Lmt)
#' dat <- data.frame(cbind(x, Att, Lmt, x1))
#' dat1 <- dat[dat$x1 > 0, ]
#' (gammafit <- sevfit(y=dat1$x1, Att=dat1$Att, Lmt = dat1$Lmt, FUN="gamma", ini=c(2, 2), mtd="NM",
#' parnames=c("shape", "scale")))
#' AIC(gammafit)
#' @export
sevfit <- function(y, Att=0, Lmt=Inf, rTrunc=Inf, FUN, ini, mtd = "nlminb", parnames,
fixedpar=NULL, fixedparn=NULL, covardf = NULL, formula = ~ 1,
modelpar = NULL, link = "identity", gn_ll = NULL, gn_ll_mtd = "Richardson",
prior = NULL, ll_scale = 0, lower = -Inf, upper = Inf,
...) {
if (length(ini) %% length(parnames) != 0) {
print('number of parameters are not a multiple of paramter names')
return(NULL)
}
vec_len <- length(ini) %/% length(parnames)
parnames_og <- parnames
parnames <- unlist(lapply(parnames,
function(x) paste0(rep(x, vec_len), seq(1, vec_len, 1)) ))
Num.Exceed.Lmt <- length(y[y>Lmt]) ### this calulates number of obervations exceed censoring limit
if (Num.Exceed.Lmt > 0)
print(paste("Warnings: There are ", Num.Exceed.Lmt,
" Observations greater than Censoring Limit. Data has been automatically censored at the value rather than the censoring limit during the likelihood calculation"))
Lk <- switch(link,
'log' = log,
"identity" = identity,
"inverse" = function(x) 1/x)
iLk <- switch(link,
'log' = exp,
"identity" = identity,
"inverse" = function(x) 1/x)
if (is.null(covardf)) {
x <- NULL
ncolx <- 0
nofa <- length(parnames)
} else {
x <- model.matrix(formula, covardf)
ncolx <- ncol(x)
nofa <- length(parnames) - 1
}
### this initializes parmeters very useful for regression applications
if (is.null(ini) | length(ini) != (ncolx + nofa)) {
ini <- rep(0.03, (ncolx + 1))
if (!is.null(modelpar)){ if (modelpar =="scale" ) ini[1] <- Lk(mean(y)) }
ini[(ncolx + 1):(ncolx + nofa)] <- 1
} else {
}
# if (is.null(modelpar)) names(ini) <- parnames else
# names(ini) <- c(names(as.data.frame(x)), parnames[parnames!=modelpar])
if (is.null(ll_scale)) {
ll_scale <- do.call(FitDistLoglik,
args = list(parm = ini, y = y, FUN=FUN,
Att = Att, Lmt = Lmt, rTrunc=rTrunc, parnames = parnames_og,
fixedpar=fixedpar, fixedparn=fixedparn,
modelpar = modelpar,link=link, ll_sign = 1
)
)
}
fn_ll_local <- function(pars) {
args_ll <- list(parm = pars, y = y, FUN=FUN,
Att = Att, Lmt = Lmt, rTrunc=rTrunc, parnames = parnames_og,
fixedpar=fixedpar, fixedparn=fixedparn,
modelpar = modelpar,link=link, ll_sign = 1
)
do.call(FitDistLoglik, args = args_ll) - ll_scale
}
fn_ll_local_w_pr <- function(pars) {
vec_len <- length(pars) %/% length(parnames_og)
parm_list <- lapply(1:length(parnames_og),
function(x) pars[((x-1) * vec_len + 1):(x * vec_len) ])
parm_list <- setNames(parm_list, parnames_og)
args_ll <- list(parm = pars, y = y, FUN=FUN,
Att = Att, Lmt = Lmt, rTrunc=rTrunc, parnames = parnames_og,
fixedpar=fixedpar, fixedparn=fixedparn,
modelpar = modelpar,link=link, ll_sign = 1
)
do.call(FitDistLoglik, args = args_ll) + gen_prior_like(prior, parm_list)- ll_scale
}
if (is.null(gn_ll)) {
if(is.null(gn_ll_mtd)) {
gn_ll_local <- NULL
gn_ll_local_w_pr <- NULL
} else{
gn_ll_local <- function(pars) {
numDeriv::grad(fn_ll_local, x = pars, method=gn_ll_mtd
, method.args=list(r=4, d = 0.0001)
)
}
gn_ll_local_w_pr <- function(pars) {
numDeriv::grad(fn_ll_local_w_pr, x = pars, method=gn_ll_mtd
, method.args=list(r=4, d = 0.0001)
)
}
}
} else {
gn_ll_local <- function(pars) {
gn_ll(parm = pars, y = y, FUN=FUN,
Att = Att, Lmt = Lmt, rTrunc=rTrunc, parnames_og,
fixedpar=fixedpar, fixedparn=fixedparn,
modelpar = modelpar,link=link, ll_sign = 1)
}
}
if (is.null(prior)) {
maxfit <- optimx::optimx(par = ini, fn = fn_ll_local, gr = gn_ll_local ,
lower = lower, upper = upper,
method = mtd, control= list(maximize = T,
kkt = F))
} else {
maxfit <- optimx::optimx(par = ini, fn = fn_ll_local_w_pr, gr = gn_ll_local_w_pr,
lower = lower, upper = upper,
method = mtd, control= list(maximize = T,
kkt = F))
}
fitted_pars <- as.vector(t(maxfit[1, 1:length(ini)] ))
fitted_parm_list <- lapply(1:length(parnames_og),
function(x) fitted_pars[((x-1) * vec_len + 1):(x * vec_len) ])
fitted_parm_list <- setNames(fitted_parm_list, parnames_og)
if (!is.null(fixedpar))
fitted_parm_list <- append(fitted_parm_list, setNames(list(fixedpar), fixedparn ))
structure(list(fit = maxfit, parameters = fitted_parm_list
, distr = FUN
), class = "Ironfit")
}
#' calculated raw loglikelihood without penalization from paramters
#' @param fit an Ironfit object returned by sevfit function
#' @export
logLik.Ironfit <- function(obj, y, Att = 0, Lmt = Inf, rTrunc = Inf) {
do.call(FitDistLoglik, args = list(y = y - Att,
Att = Att,
Lmt = Lmt,
rTrunc = rTrunc,
parm = as.vector(unlist(obj$parameters)),
parnames = names(obj$parameters),
FUN = obj$distr)
)
}
#' print method of Ironfit object
#' @param fit oject from sevfit
#' @param act_ps the actual percentiles, typically (seq(1, n , 1) -0.5) / n
#' @param Att the left truncation point
#' @export
qq_calc <- function(fit, act_ps, Att = 0) {
var_qs <- append(list(p = act_ps, Att = Att, FUN = fit$distr), fit$parameters )
time <- system.time(exp_ys <- do.call(qfunc, var_qs ))
return(list(exp_ys= exp_ys, time = time))
}
#'prepares dataframes suitable for qq and pp plots
#'@param fit object from sevfit
#'@param y is the GU loss amount
#'@param ... are other parameters passed to distribution functions, such as Att, and rtrunc
#'@export
qpPlot_prepare <- function(fit, y, ...) {
vars_fit <- append(list(FUN = fit$distr), fit$parameters )
add_args <- list(...)
act_ps <- (seq(1, length(y), 1) - 0.5) / length(y)
rlt <- do.call(qq_calc, args = append(list(fit = fit, act_ps), add_args ))
exp_ys <- rlt$exp_ys
exp_ps <- do.call(ptrunc, args = append(append(list(x = sort(y)), vars_fit), add_args ))
qq_df = tibble::tibble(exp_ys = exp_ys, act_ys = sort(y))
pp_df = tibble::tibble(exp_ps = exp_ps, act_ps = act_ps)
qq_plot <- qq_df %>%
ggplot(mapping = aes(x = exp_ys, y = act_ys)) +
geom_point()+geom_abline(slope = 1) +
theme_bw()+ scale_y_continuous(labels = comma, limits = c(0, NA)) +
scale_x_continuous(labels = comma, limits = c(0, NA))
pp_plot <- pp_df %>%
ggplot(mapping = aes(x = exp_ps, y = act_ps)) +
geom_point()+geom_abline(slope = 1) +
theme_bw()
structure(list(pp_df = pp_df, qq_df = qq_df
, qq_plot = qq_plot, pp_plot = pp_plot
), class = "Ironfit_plot")
}
#'plot method of Ironfit object
#'@param obj object from qpPlot_prepare
#'@param type type of plot, qq vs pp plots
#'@export
plot.Ironfit_plot <- function(obj, type, ...) {
plot_Ironfit <- switch(type,
"pp" = obj$pp_plot
, "qq" = obj$qq_plot
)
return(plot_Ironfit)
}
#' print method of Ironfit object
#' @export
print.Ironfit <- function(x) {
return(print(x$fit))
}
S <- function(x, Fx, ...) {
p <- get(paste("p", Fx, sep = ""))
1 - p(x, ...)
}
Sx <- function(x, Fx, parnames, parvars) {
args_Sx <- setNames(append(list(x, Fx), lapply(parvars, identity)),
c("x", "Fx", parnames))
do.call(S, args = args_Sx)
}
###generic LEV function
LEVcalc <- function(l, u, Fx, parnames, parvars) {
args_inte <- setNames(append(list(S, l, u, Fx), lapply(parvars, identity)),
c("f", "lower", "upper", "Fx", parnames))
do.call(integrate, args = args_inte)$value
}
### this function allows the user to calculate LEVs using fitted model
### at this point probably it will make sense to create an object for this truncated and censored fit
predict.FitTrunc <- function(coefs, parnames, covardf = NULL, formula = ~ 1, Fx,
modelpar = NULL, link = "identity", Att=0, Lmt=Inf) {
#coefs <- coef(fit)
Lk <- switch(link,
'log' = log,
"identity" = identity,
"inverse" = function(x) 1/x)
iLk <- switch(link,
'log' = exp,
"identity" = identity,
"inverse" = function(x) 1/x)
non_model_parname <- parnames[!parnames %in% modelpar]
non_model_pars <- coefs[names(coefs) %in% non_model_parname]
coefs <- coefs[!names(coefs) %in% non_model_parname]
model_par_v <- iLk(model.matrix(formula, covardf) %*% coefs)
#print(summary(model_par_v))
parvars_lst <- lapply(model_par_v, function(x) c(x, non_model_pars))
list(LEV = mapply(LEVcalc, l = Att, u = Att + Lmt, parvars = parvars_lst
, MoreArgs = list(parnames = parnames, Fx = Fx))
, Surv = mapply(Sx, x = Att, parvars = parvars_lst
, MoreArgs = list(parnames = parnames, Fx = Fx))
)
}
####example for several distributions; un-comment the below codes to test
#
# (paretofit <- sevfit(y=dat1$x1, Att=dat1$Att, Lmt = dat1$Lmt, FUN="pareto", ini=c(2, 2), mtd="NM",
# parnames=c("shape", "scale")))
# AIC(paretofit)
# (lnromfit <- sevfit(y=dat1$x1, Att=dat1$Att, Lmt = dat1$Lmt, FUN="lnorm", ini=c(2, 2), mtd="NM",
# parnames=c("meanlog", "sdlog")))
# AIC(lnromfit)
#
# QQ1 <- transQQplot(y=dat1$x1, FUN="lnorm", Att=dat1$Att, Lmt = dat1$Lmt, rTrunc=Inf,
# parnames=c("meanlog", "sdlog"), parm=lnromfit$estimate)
#
# QQ2 <- transQQplot(y=dat1$x1, FUN="gamma", Att=dat1$Att, Lmt = dat1$Lmt, rTrunc=Inf,
# parnames=c("shape", "scale"), parm=gammafit$estimate)
#
#
# QQ3 <- transQQplot(y=dat1$x1, FUN="pareto", Att=dat1$Att, Lmt = dat1$Lmt, rTrunc=Inf,
# parnames=c("shape", "scale"), parm=paretofit$estimate)
#
# k <- plot_grid(QQ1[[1]], QQ1[[2]], QQ1[[3]], align='v', ncol =1)
# plot_grid(QQ1[[3]], QQ2[[3]], QQ3[[3]], align='v', ncol =1)
# plot_grid(QQ1[[1]], QQ2[[1]], QQ3[[1]], align='v', ncol =1)
###these are regression examples
#n <- 2500
#set.seed(10003)
#dat <- data.frame(x1 = rnorm(n),
# x2 = rnorm(n))
#dat$y <- unlist(lapply(exp(dat$x1*0.1 + dat$x2*-0.15 + 7.824046),
# function(x) rgamma(1, shape = 0.5, scale = x)))
#dat$Att <- sample(seq(0, 6.499199e+02, length.out = 4), n, replace = TRUE)
#dat$Lmt <- sample(seq(1.644087e+04/3, 1.644087e+04/1.5, length.out = 4), n, replace = TRUE)
#dat$y1 <- pmin(pmax(dat$y - dat$Att, 0), dat$Lmt)
#dat1 <- dat[dat$y1 > 0, ]
#system.time(gammafit <- sevfit(y=dat1$y1, Att=dat1$Att, Lmt = dat1$Lmt, FUN="gamma", ini=c(2, 2),
# mtd="NM",
# parnames=c("shape", "scale"), covardf = dat1, formula = y ~ x1 + x2,
# modelpar = "scale",
# link = "log"))
#system.time(gammafit2 <- sevfit(y=dat1$y1, Att=dat1$Att, Lmt = dat1$Lmt, FUN="gamma", ini=c(2, 2),
# mtd="NR",
# parnames=c("shape", "scale"), covardf = dat1, formula = y ~ x1 + x2,
# modelpar = "scale",
# link = "log"))
#summary(gammafit2)
#summary(gammafit)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.