Nothing
## Copyright (C) 2012 Marius Hofert and Valerie Chavez-Demoulin
##
## This program is free software; you can redistribute it and/or modify it under
## the terms of the GNU General Public License as published by the Free Software
## Foundation; either version 3 of the License, or (at your option) any later
## version.
##
## This program is distributed in the hope that it will be useful, but WITHOUT
## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
## FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
## details.
##
## You should have received a copy of the GNU General Public License along with
## this program; if not, see <http://www.gnu.org/licenses/>.
## *G*eneralized *A*dditive *M*odeling for *E*xtremes
##
## Remarks:
## 1) Notation:
## paper | Valerie's thesis | Valerie's former code | Coles' book + ismev
## xi -kappa kappa xi
## beta sigma sigma sigma
## nu nu eta --
## 2) GPD(xi in IR, beta > 0) distribution function (same as in ismev + EKM; up to notation):
## G_{xi,beta}(x) = 1-(1+xi*x/beta)^(-1/xi) if xi!=0
## = 1-exp(-x/beta) if xi =0
## x>=0 when xi>=0 and x in [0,-beta/xi] when xi<0
## Note: in EKM, x ~> (x-nu)/beta (with the same meaning of beta)
## 3) GPD(xi, beta) density for xi>0:
## g_{xi,beta}(x) = (1+xi*x/beta)^(-(1+1/xi))/beta if x>0
## 4) We need xi > -1 in order for beta (= exp(nu) / (1+xi)) > 0. In comparison
## to Chavez-Demoulin, Embrechts, Hofert (2016), the below code was updated
## in 2021 ('QRM' version 0.4-33) to also include a reparameterization in xi
## (= exp(eta) - 1).
### Auxiliary functions ########################################################
##' Reparameterized log-likelihood l^r(eta, nu; y_1,..,y_n) = l(xi, beta;
##' y_1,..,y_n) = l(exp(eta)-1, exp(nu)/(1+xi); y_1,..,y_n) = l(exp(eta)-1, exp(nu-eta);
##' y_1,..,y_n), where l(xi, beta; y_1,..,y_n) denotes the log-likelihood of a
##' GPD(xi, beta) distribution
##'
##' @title Reparameterized log-Likelihood l^r
##' @param y vector of excesses (over a high threshold u)
##' @param eta vector of GPD(xi, beta) parameters eta (= log(1+xi))
##' @param nu vector of GPD(xi, beta) parameters nu (= log(beta*(1+xi)) = log(beta)+eta);
##' orthogonal to eta in the Fisher information metric: mixed derivative
##' (w.r.t. eta and nu) of the reparameterized log-likelihood is 0
##' @return reparametrized log-likelihood l^r
##' @author Marius Hofert
rlogL <- function(y, eta, nu, verbose = TRUE)
{
stopifnot((n <- length(y)) > 0, length(eta)==n, length(nu)==n)
## Before reparametrization of xi
## ii <- xi <= -1
## if(all(ii)) stop("Can't adjust xi <= -1 since there are no xi > -1") # beta = exp(nu)/(1+xi)
## if(any(ii)) { # not a valid reparameterization either (for all xi <= -1) but we make it one
## perc <- 100*sum(ii)/length(ii) # percentage of xi <= -1
## if(verbose) warning(round(perc,2), "% of all xi are <= -1 and are adjusted to be > -1")
## xi[ii] <- mean(xi[!ii])
## }
xi <- expm1(eta)
res <- rep(-Inf, n)
ii <- (xi < 0 & (0 <= y & y < -exp(nu)/(xi*(1+xi)))) | (xi > 0 & y >= 0)
if(any(ii)) {
xi. <- xi[ii]
nu. <- nu[ii]
res[ii] <- log1p(xi.)-nu.-(1+1/xi.)*log1p(xi.*(1+xi.)*exp(-nu.)*y[ii])
}
ii <- xi == 0
if(any(ii)) res[ii] <- -(nu[ii]+exp(-nu[ii])*y[ii])
sum(res)
}
##' @title Function to Adjust Derivatives
##' @param x vector of values (derivatives)
##' @param order order of the derivatives to be adjusted (1, 2)
##' @param verbose logical indicating whether warnings about adjustments of
##' the derivatives are printed
##' @return adjusted derivatives
##' @author Marius Hofert
##' Note: This is an auxiliary function of DrlogL()
adjustD <- function(x, order, verbose = TRUE)
{
stopifnot(order==1 || order==2)
## order == 1
ii <- is.finite(x) # indices of value which are fine (not NA, NaN, Inf, or -Inf)
res <- x
if(any(!ii)) {
if(sum(ii)==0) stop("Can't adjust derivatives, there are no finite values")
if(verbose) warning(sum(!ii)," (",
round(100*sum(!ii)/length(res), 2),
"%) non-finite derivatives adjusted")
res[!ii] <- mean(res[ii])
}
## order == 2 => additionally check that 2nd order derivatives are negative
ii <- res < 0
if(order == 2 && any(!ii)) {
if(sum(ii)==0) stop("Can't adjust second-order derivatives, there are no negative values")
if(verbose) warning(sum(!ii)," (",
round(100*sum(!ii)/length(res), 2),
"%) non-negative second-order derivatives adjusted")
res[!ii] <- mean(res[ii])
}
## return
res
}
##' @title Compute Derivatives of the Reparameterized log-Likelihood
##' @param y vector of excesses (over a high threshold u)
##' @param eta vector of GPD(xi, beta) parameters eta (= log(1+xi))
##' @param nu vector of GPD(xi, beta) parameters nu (= log(beta*(1+xi)) = log(beta)+eta)
##' @param adjust logical indicating whether non-real values of the derivatives are adjusted
##' @param verbose logical indicating whether modified arguments are printed
##' @return (n x 4) matrix containing the partial derivatives of the
##' reparameterized log-likelihood l^r where
##' column 1: derivative of the reparameterized log-likelihood w.r.t. eta
##' column 2: derivative of the reparameterized log-likelihood w.r.t. nu
##' column 3: 2nd derivative of the reparameterized log-likelihood w.r.t. eta
##' column 4: 2nd derivative of the reparameterized log-likelihood w.r.t. nu
##' @author Marius Hofert
##' Note: Column 3 and 4 have different sign than in the old code
DrlogL <- function(y, eta, nu, adjust = TRUE, verbose = TRUE)
{
stopifnot((n <- length(y)) > 0, length(eta)==n, length(nu)==n)
## Main
res <- matrix(0, nrow = n, ncol = 4,
dimnames = list(NULL, c("rl.eta", "rl.nu", "rl.etaeta", "rl.nunu")))
xi <- expm1(eta)
ii <- (xi < 0 & (0 <= y & y < -exp(nu)/(xi*(1+xi)))) | (xi > 0 & y >= 0)
if(any(ii)) {
## auxiliary terms
eta. <- eta[ii]
xi. <- xi[ii]
nu. <- nu[ii]
y. <- y[ii]
a <- 1+xi.*(1+xi.)*exp(-nu.)*y.
la <- log1p(xi.*(1+xi.)*exp(-nu.)*y.)
a. <- exp(-nu.)*y.*(1+2*xi.)
## main
res[ii,"rl.eta"] <- exp(eta.)*(1/(1+xi.) + la/xi.^2 - (1+1/xi.)*a./a) # formerly: 1/(1+xi.) + la/xi.^2 - (1+1/xi.)*a./a
res[ii,"rl.etaeta"] <- exp(eta.)*(-1/(1+xi.)^2 - 2*la/xi.^3 + 2*a./(a*xi.^2) -
(1+1/xi.)*exp(-nu.)*y. * (2*a - (1+2*xi.)^2 * exp(-nu.)*y.) / a^2) +
exp(eta.)*(1/(1+xi.) + la/xi.^2 - (1+1/xi.)*a./a) # formerly: -1/(1+xi.)^2 - 2*la/xi.^3 + 2*a./(a*xi.^2) - (1+1/xi.)*exp(-nu.)*y. * (2*a - (1+2*xi.)^2 * exp(-nu.)*y.) / a^2
res[ii,"rl.nu"] <- (-1+(1+xi.)*exp(-nu.)*y.) / a
res[ii,"rl.nunu"] <- -(1+xi.)^2*y.*exp(-nu.)/a^2
}
ii <- xi == 0
if(any(ii)) {
## auxiliary terms
ynu <- y[ii]*exp(-nu[ii])
res[ii,"rl.eta"] <- 0
res[ii,"rl.etaeta"] <- 0
res[ii,"rl.nu"] <- -1+ynu
res[ii,"rl.nunu"] <- -ynu
}
## replace non-finite derivatives by mean of all finite ones
if(adjust) {
res[,"rl.eta"] <- adjustD(res[,"rl.eta"], order = 1, verbose = verbose)
res[,"rl.etaeta"] <- adjustD(res[,"rl.etaeta"], order = 2, verbose = verbose)
res[,"rl.nu"] <- adjustD(res[,"rl.nu"], order = 1, verbose = verbose)
res[,"rl.nunu"] <- adjustD(res[,"rl.nunu"], order = 2, verbose = verbose)
}
## return
res
}
##' @title Compute Update (one Iteration) in gamGPDfit()
##' @param y data.frame containing the excesses over the threshold in a column
##' labeled yname
##' @param eta.nu 2-column matrix of GPD parameters (eta, nu) to be updated
##' @param etaFrhs right-hand side of the formula for eta in the gam() call
##' for fitting eta
##' @param nuFrhs right-hand side of the formula for nu in the gam() call
##' for fitting nu
##' @param yname string containing the name of the column of y which contains
##' the excesses
##' @param adjust logical indicating whether non-real values of the derivatives are adjusted
##' @param verbose logical indicating whether warnings about adjustments of
##' the derivatives, wrong arguments in DrlogL() and failed gam() calls are printed
##' @param ... additional arguments passed to gam()
##' @return a list of length four containing
##' element 1 (eta): object of class gamObject for eta as returned by mgcv::gam()
##' element 2 (nu): object of class gamObject for nu as returned by mgcv::gam()
##' element 3 (eta.weights): weights associated with eta
##' element 4 (nu.weights): weights associated with nu
##' or list() (in case gam() or the Newton step failed)
##' @author Marius Hofert
##' Note: That's a helper function of gamGPDfit()
gamGPDfitUp <- function(y, eta.nu, etaFrhs, nuFrhs, yname, adjust=TRUE, verbose=TRUE, ...)
{
stopifnot(is.data.frame(y), (dim. <- dim(y))[2] >= 1,
length(which(colnames(y)==yname))==1,
(n <- dim.[1]) > 0, dim(eta.nu)==c(n, 2))
## pick out eta and nu (for readability)
eta <- eta.nu[,1]
nu <- eta.nu[,2]
## compute one Newton step in xi
DrLL <- tryCatch(DrlogL(y[,yname], eta = eta, nu = nu, adjust = adjust, verbose = verbose), # (n1,4) matrix
error = function(e) e)
if(is(DrLL, "simpleError")) return(list())
rl.eta <- DrLL[,"rl.eta"] # score in eta
rl.etaeta. <- DrLL[,"rl.etaeta"] # -weight
Newton.eta <- eta - rl.eta / rl.etaeta. # Newton step
## concatenate Newton.eta and rl.etaeta. to y, build formula, and estimate eta
if("Newton.eta" %in% colnames(y))
stop("y is not allowed to have a column named 'Newton.eta'")
y. <- cbind(y, Newton.eta = Newton.eta, rl.etaeta. = rl.etaeta.)
eta.formula <- update(etaFrhs, Newton.eta~.) # build formula Newton.eta ~ etaFrhs
## note: the following tryCatch() was used to avoid the error
## "no valid set of coefficients has been found: please supply starting values"
## when using gamGPDboot() is called with a small sample size
eta.obj <- tryCatch(gam(eta.formula, data = y., weights = -rl.etaeta., ...),
error = function(e) e) # updated eta object of type gamObject
## Update on 2020-07-07: eta.formula's 'by' argument needs to be a factor! Otherwise:
## Error in smoothCon(split$smooth.spec[[i]], data, knots, absorb.cons, scale.penalty = scale.penalty, : Can't find by variable
## => see also https://stackoverflow.com/questions/45832928/gam-model-error
if(is(eta.obj, "simpleError")) return(list())
## warning("gam() produced the error:", conditionMessage(eta.obj), " when fitting eta; propagate list() as result")
## build fitted (eta) object and check
eta.fit <- fitted(eta.obj)
## compute one Newton step in nu (for given new eta)
DrLL <- tryCatch(DrlogL(y[,yname], eta = eta.fit, nu = nu, adjust = adjust, verbose = verbose), # (n1,4) matrix
error = function(e) e)
if(is(DrLL, "simpleError")) return(list())
rl.nu <- DrLL[,"rl.nu"] # score in nu
rl.nunu. <- DrLL[,"rl.nunu"] # -weight
Newton.nu <- nu - rl.nu / rl.nunu. # Newton step
## concatenate Newton.nu and rl.nunu. to y, build formula, and estimate nu
if("Newton.nu" %in% colnames(y))
stop("y is not allowed to have a column named 'Newton.nu'")
y. <- cbind(y, Newton.nu=Newton.nu, rl.nunu.=rl.nunu.)
nu.formula <- update(nuFrhs, Newton.nu~.) # build formula Newton.nu ~ nuFrhs
nu.obj <- tryCatch(gam(nu.formula, data = y., weights = -rl.nunu., ...),
error = function(e) e) # updated nu object of type gamObject
if(is(nu.obj, "simpleError")) return(list())
## warning("gam() produced the error:", conditionMessage(nu.obj), " when fitting nu; propagate list() as result")
## return list of two gamObject objects (for eta, nu)
list(eta = eta.obj, nu = nu.obj, eta.weights = -rl.etaeta., nu.weights = -rl.nunu.)
## naming (eta, nu) not ideal but guarantees colnames(param.old) = colnames(param.new) in gamGPDfit()
}
### gamGPDfit() ################################################################
##' @title Semi-parametric Estimation of GPD Parameters via Penalized
##' Maximum Likelihood Estimation Based on Reweighted Least Squares (Backfitting)
##' @param x data.frame containing the losses (all other columns are treated
##' as covariates)
##' @param threshold POT threshold above which losses are considered
##' @param nextremes number of excesses
##' @param datvar name of the data column which contains the data to be modeled,
##' for example, the losses
##' @param etaFrhs right-hand side of the formula for eta in the gam() call
##' for fitting eta
##' @param nuFrhs right-hand side of the formula for nu in the gam() call
##' for fitting nu
##' @param init bivariate vector containing initial values for (xi, beta)
##' @param niter maximal number of iterations in the backfitting algorithm
##' @param include.updates logical indicating whether updates for eta and nu are
##' returned as well
##' @param eps.eta epsilon for stopping criterion for eta
##' @param eps.nu epsilon for stopping criterion for nu
##' @param progress logical indicating whether progress information is displayed
##' @param adjust logical indicating whether non-real values of the derivatives are adjusted
##' @param verbose logical passed to gamGPDfitUp() (thus to DrlogL() and
##' adjustD())
##' @param ... additional arguments passed to gam() (called by gamGPDfitUp())
##' @return a list (see below) or list() (in case gam() or the Newton step in gamGPDfitUp() failed)
##' @author Marius Hofert
gamGPDfit <- function(x, threshold, nextremes = NULL, datvar, etaFrhs, nuFrhs,
init = fit.GPD(x[,datvar], threshold=threshold, type="pwm", verbose=FALSE)$par.ests,
niter = 32, include.updates = FALSE, eps.eta = 1e-5, eps.nu = 1e-5,
progress = TRUE, adjust = TRUE, verbose = FALSE, ...)
{
## checks
stopifnot(is.data.frame(x), length(init) == 2, niter >= 1, eps.eta > 0, eps.nu > 0)
has.threshold <- !missing(threshold)
has.nextr <- !is.null(nextremes)
if(has.threshold && has.nextr)
warning("Only one of 'threshold' and 'nextremes' is allowed -- will take 'threshold'") # both threshold and nextremes given
if(!has.threshold && !has.nextr)
stop("Provide either 'threshold' or 'nextremes'") # none of threshold or nextremes given
dim. <- dim(x) # dimension of x
stopifnot((n <- dim.[1])>=1, dim.[2]>=2) # there should at least be one observation (actually, quite a bit more) and one column of covariates
if(has.nextr) { # nextremes given but no threshold
stopifnot(0 < nextremes, nextremes <= n)
threshold <- quantile(x[,datvar], probs=1-nextremes/n, names=FALSE)
} # => now we can work with threshold
## determine excesses
y. <- x[x[,datvar]>threshold,] # pick out excesses; note: y. still contains covariates
y.[,datvar] <- y.[,datvar] - threshold # replace excesses by excess sizes
n.ex <- nrow(y.) # number of excesses
## initial values for xi, eta, beta, and nu (nu = reparameterization of beta)
xi.init <- init[1]
beta.init <- init[2]
nu.init <- log((1+xi.init) * beta.init)
eta.init <- log(1+xi.init)
## iteration ###############################################################
iter <- 1 # iteration number
updates <- list() # (empty) list of update (gamGPDfitUp) objects
while(TRUE){
## update/fit parameters (eta, nu)
param.old <- if(iter==1)
matrix(rep(c(eta.init, nu.init), each=n.ex), ncol=2,
dimnames=list(rownames(y.), c("eta", "nu"))) # (n.ex,2)-matrix with cols "eta" and "nu"
else param.new
updates[[iter]] <- gamGPDfitUp(y., eta.nu = param.old,
etaFrhs = etaFrhs, nuFrhs = nuFrhs,
yname = datvar, adjust = adjust, verbose = verbose, ...) # returns a list of two gam() objects containing the fitted (eta, nu) (or list() in case a gam() call fails)
## check whether gam() calls failed
if(!length(updates[[iter]])) {
warning("gam() call(s) failed, will return list()")
return(list()) # one or both gam() calls in gamGPDfitUp() failed
}
## check convergence status of gam() in gamGPDfitUp()
conv <- sapply(updates[[iter]][c("eta", "nu")], function(x) x$converged)
if(any(!conv))
warning("gam() in gamGPDfitUp() did not converge for ", paste(c("eta","nu")[!conv], collapse=", "))
## check parameter dimensions (param.old and param.new have same rownames/colnames)
param.new <- sapply(updates[[iter]][c("eta", "nu")], fitted)
if(any(dim(param.new)!=dim(param.old)))
stop("gamGPDfitUp() returned an updated gamObject object of wrong dimension")
## tracing
MRD.. <- colMeans(abs((param.old-param.new)/param.old)) # mean relative distance for (eta, nu)
MRD. <- c(iter=iter, MRD..[1], MRD..[2])
MRD <- if(iter==1) MRD. else rbind(MRD, MRD., deparse.level=0)
if(progress){
cat("Mean relative differences in iteration ", iter,
" (eta, nu): (", sprintf("%g", MRD..[1]), ", ",
sprintf("%g", MRD..[2]), ")\n", sep="")
}
## check for "convergence"
conv <- c(MRD..[1] <= eps.eta, MRD..[2] <= eps.nu)
if(all(conv)) break
if(iter >= niter){
if(progress) warning("Reached 'niter' without the required precision for ",
paste(c("eta","nu")[!conv], collapse=", "))
break
}
iter <- iter + 1 # update iteration
}
## finish and return #######################################################
## fitted parameters
eta <- param.new[,"eta"] # fitted eta's
xi <- expm1(eta)
nu <- param.new[,"nu"] # fitted nu's
beta <- exp(nu)/(1+xi) # corresponding (fitted) beta (= exp(nu-eta))
## check beta for being a valid reparameterization (otherwise pGPD() fails)
## note: nu can be too small (=> beta = 0) => set corresponding beta to NA
ii <- is.finite(beta) & beta > 0
beta[!ii] <- NA
## compute the residuals
resi <- mapply(function(x, xi, beta) if(!is.na(beta)) -log1p(-pGPD(x, xi=xi, beta=beta)) else NA,
x=y.[,datvar], xi=xi, beta=beta)
## log-likelihood
logL <- rlogL(y=y.[,datvar], eta=eta, nu=nu) # reparameterized log-likelihood (reparameterization doesn't matter, it's the same *value* as the original log-likelihood)
## fitted gamObjects for eta and nu and standard errors
## (contained in updates but for convenience we treat this additionally)
etaObj <- updates[[iter]]$eta
nuObj <- updates[[iter]]$nu
se.eta <- updates[[iter]]$eta.weights
se.nu <- updates[[iter]]$nu.weights
## determine *all* combinations of the provided covariates
## eta
eta.covars <- names(etaObj$var.summary)
x.eta <- x[,eta.covars, drop = FALSE] # all cols with covariates used for fitting eta
all.covars.eta <- lapply(seq_len(ncol(x.eta)), function(j) sort(unique(x.eta[,j]))) # determine levels
## => we only can determine those which appear at least in one column
names(all.covars.eta) <- colnames(x.eta) # put in names
## nu
nu.covars <- names(nuObj$var.summary)
x.nu <- x[,nu.covars, drop=FALSE] # all cols with covariates used for fitting nu
all.covars.nu <- lapply(seq_len(ncol(x.nu)), function(j) sort(unique(x.nu[,j]))) # determine levels
names(all.covars.nu) <- colnames(x.nu) # put in names
## build list
res <- list(xi=xi, # estimated xi
beta=beta, # estimated beta
eta=eta, # estimated eta
nu=nu, # estimated nu
se.eta=se.eta, # standard error for eta
se.nu=se.nu, # standard error for nu
eta.covar=all.covars.eta, # (unique) covariates for eta
nu.covar=all.covars.nu, # (unique) covariates for nu
covar=y.[,union(eta.covars, nu.covars), drop=FALSE], # *available* (not necessarily all) covariate combinations used for fitting beta (= eta *and* nu)
y=y.[,datvar], # excesses
res=resi, # residuals
MRD=MRD, # mean relative distances between old/new (eta, nu) for all iterations
logL=logL, # log-likelihood at the estimated parameters
etaObj=etaObj, # gamObject for estimated eta (return object of mgcv::gam())
nuObj=nuObj) # gamObject for estimated nu (return object of mgcv::gam())
if(include.updates) res <- c(res, etaUpdates=lapply(updates, `[[`, "eta"), # updates for eta for each iteration (list of gamObject objects); contains etaObj as last element
nuUpdates=lapply(updates, `[[`, "nu")) # updates for nu for each iteration (list of gamObject objects); contains nuObj as last element
## return
res
}
### gamGPDboot() ###############################################################
##' @title Post-blackend Bootstrap of Chavez-Demoulin and Davison (2005) for gamGPDfit()
##' @param x see gamGPDfit()
##' @param B number of bootstrap replications
##' @param threshold see gamGPDfit()
##' @param nextremes see gamGPDfit()
##' @param datvar see gamGPDfit()
##' @param etaFrhs see gamGPDfit()
##' @param nuFrhs see gamGPDfit()
##' @param init see gamGPDfit()
##' @param niter see gamGPDfit()
##' @param include.updates see gamGPDfit()
##' @param eps.eta see gamGPDfit()
##' @param eps.nu see gamGPDfit()
##' @param boot.progress logical indicating whether progress information is displayed
##' @param progress see gamGPDfit() (only used if progress==TRUE)
##' @param adjust logical indicating whether non-real values of the derivatives are adjusted
##' @param verbose see gamGPDfit() (only used if progress==TRUE)
##' @param debug logical indicating whether initial fit is saved
##' @param ... see gamGPDfit()
##' @return a list of length B+1, the first component being the fitted object
##' as returned by gamGPDfit(); the other components contain similar
##' objects based on the B bootstrap replications, but can be list() in case
##' gamGPDfit() reports it (see there)
##' @author Marius Hofert
gamGPDboot <- function(x, B, threshold, nextremes=NULL, datvar, etaFrhs, nuFrhs,
init=fit.GPD(x[,datvar], threshold=threshold, type="pwm", verbose=FALSE)$par.ests,
niter=32, include.updates=FALSE, eps.eta=1e-5, eps.nu=1e-5,
boot.progress=TRUE, progress=FALSE, adjust=TRUE, verbose=FALSE,
debug=FALSE, ...)
{
## progress
if(boot.progress) {
if(progress) cat("\nStarting initial fit:\n") else {
pb <- txtProgressBar(max=B+1, style=if(isatty(stdout())) 3 else 1) # setup progress bar
on.exit(close(pb)) # close progress bar
}
}
## (major) fit using gamGPDfit()
fit <- gamGPDfit(x=x, threshold=threshold, nextremes=nextremes, datvar=datvar,
etaFrhs=etaFrhs, nuFrhs=nuFrhs, init=init, niter=niter,
include.updates=include.updates, eps.eta=eps.eta, eps.nu=eps.nu,
progress=if(!boot.progress) FALSE else progress, adjust=adjust,
verbose=if(!boot.progress) FALSE else verbose, ...)
## => can be list() (if gam() in gamGPDfitUp() failed)
if(!length(fit))
stop("gamGPDfit() reported that gam() in gamGPDfitUp() failed in the major fit (already)")
## progress
if(boot.progress) if(progress) cat("\n") else setTxtProgressBar(pb, 1)
## pick out fitted values
eta <- fit$eta # fitted eta
nu <- fit$nu # fitted nu
xi <- expm1(eta) # fitted xi
beta <- exp(nu)/(1+xi) # fitted beta
## for debugging
if(debug) save(fit, file="gamGPDboot_debug.rda")
## post-blackened bootstrap; see Chavez-Demoulin and Davison (2005)
rnum <- 1 # iteration number
bfit <- lapply(1:B, function(b){
## resample residuals within each group of (same) covariates
## => in particular, the *number* of excesses remains the same
## for each covariate combination
rr <- ave(fit$res, fit$covar,
FUN=function(r) if(length(r)==1) r else
sample(r, size=length(r), replace=TRUE))
## compute corresponding excesses
y. <- mapply(qGPD, p=-expm1(-rr), xi=xi, beta=beta) # reconstruct excesses from residuals
x. <- data.frame(fit$covar, y=y.) # add excesses/covariates
## progress
if(boot.progress && progress) cat("Starting fit in bootstrap run ",
b, " of ", B, ":\n", sep="")
## call gamGPDfit()
## note: threshold=0 (since the data is x.!)
## => we discard those excesses which are equal to 0
bfitobj <- gamGPDfit(x=x., threshold=0, nextremes=nextremes, datvar="y",
etaFrhs=etaFrhs, nuFrhs=nuFrhs,
init=fit.GPD(x.[,"y"], threshold=0, type="pwm", verbose=FALSE)$par.ests,
niter=niter, include.updates=include.updates,
eps.eta=eps.eta, eps.nu=eps.nu,
progress=if(!boot.progress) FALSE else progress,
adjust=adjust,
verbose=if(!boot.progress) FALSE else verbose, ...)
## => can be list() (if gam() in gamGPDfitUp() failed)
## progress
if(boot.progress) if(progress) cat("\n") else setTxtProgressBar(pb, b+1)
## return fit
bfitobj
})
## return list of length B+1 containing all the gamGPDfit() objects
c(fit=list(fit), bfit=bfit)
}
### Functions to extract fitted results and for predicting #####################
##' @title Extract Fits in a minimalized form
##' @param x object as returned by gam()
##' @return a list with components
##' covar: containing the ('minimalized') covariate combinations
##' fit: corresponding fitted values
##' @author Marius Hofert
get.gam.fit <- function(x)
{
## check x
stopifnot(inherits(x, "gam"))
## determine the minimal grid which contains all combinations of covars used for fitting the model
covar.nms <- if(length(x$var.summary)>0) names(x$var.summary) else "1" # names of covariates used for fitting (right-hand side of the formula in gam()); NULL if no covariates are used
covars <- if(length(x$var.summary)>0) x$model[,covar.nms, drop=FALSE] else NULL # build 'minimal' grid of covariates used for fitting
## determine fitted values for each covariate combination in (the 'minimalized') covars
## => Only combinations of the covariates used for fitting are given and thus
## there are no fitted values returned for each excess.
y <- cbind(covars, value=x$fitted.values) # value = lambda or rho
frml <- as.formula(paste("value ~", paste(rev(covar.nms), collapse=" + ")))
covar.hat <- aggregate(frml, data=y, function(z) z[1]) # pick out only first value (they are equal anyways)
covar.hat <- covar.hat[,rev(names(covar.hat)), drop=FALSE] # revert again to keep column order [now value is in the *first* column]
## return
list(covar = if(ncol(covar.hat) > 1) covar.hat[,-1] else NULL, # covariate combinations used for fitting
fit = covar.hat[,1]) # fitted value
}
##' @title Compute Predicted lambda or rho and Pointwise Asymptotic Two-Sided 1-alpha Confidence Intervals
##' @param x object as returned by gam()
##' @param newdata 'newdata' object as required by predict(); named data.frame
##' of type expand.grid(covar1=, covar2=) with at least the covariates
##' used for fitting with gam(); if more are provided, predict() returns
##' values which are equal uniformly over all of these additional
##' covariates. Each covariate which appears when fitting with gam() can
##' have more values than were actually used in gam() (for example,
##' half-years). In this case predict() 'interpolates' correctly with the
##' fitted model.
##' @param alpha significance level
##' @return a list with components
##' covar: containing the covariate combinations as provided by newdata
##' predict: the predicted lambda or rho
##' CI.low: lower CI [based on *predicted* values]
##' CI.up: upper CI [based on *predicted* values]
##' @author Marius Hofert
gam.predict <- function(x, newdata=NULL, alpha=0.05, value=c("lambda", "rho"))
{
## check x
stopifnot(inherits(x, "gam"))
value <- match.arg(value)
## default for newdata
if(is.null(newdata)) { # choose useful default (exactly the covariates used for fitting but *all* combis of such)
if(length(x$var.summary) > 0) {
covar.nms <- names(x$var.summary) # names of covariates used for fitting (right-hand side of the formula in gam())
## determine the minimal grid which contains all combinations of covars used for fitting the model
covars <- x$model[,covar.nms, drop=FALSE] # build 'minimal' grid of covariates used for fitting
covars. <- lapply(1:ncol(covars), function(j) sort(unique(covars[,j]))) # determine levels
names(covars.) <- colnames(covars) # put in names
newdata <- expand.grid(rev(covars.))[,rev(seq_len(length(covars.))), drop=FALSE] # expand grid with reverted covariates (and reverting back) to guarantee the same sorting order of rows as covars
## check newdata
if(!all(covar.nms %in% colnames(newdata)))
stop("'newdata' requires at least the covariates used for fitting ", value, " via gam()")
} else {
newdata <- data.frame(dummy.covar=1)
}
}
## predict (note: can contain half-years etc.)
pred <- predict(x, newdata=newdata, se.fit=TRUE) # predict object
pred. <- as.numeric(pred$fit) # predicted values (lambda/rho)
se. <- as.numeric(pred$se.fit) # standard error
qa2 <- qnorm(1-alpha/2) # 1-alpha/2 quantile of N(0,1)
## return (exp() is due to family=poisson used for lambda;
## rho comes from a logistic regression => not needed there)
list(covar = if(length(x$var.summary) > 0) newdata else NULL, # covariate combinations as specified by newdata
predict = if(value=="lambda") exp(pred.) else pred., # predicted lambda/rho
CI.low = if(value=="lambda") exp(pred.-qa2*se.) else pred.-qa2*se., # lower CI [based on *predicted* values]
CI.up = if(value=="lambda") exp(pred.+qa2*se.) else pred.+qa2*se.) # upper CI [based on *predicted* values]
}
##' @title Compute Fitted GPD Parameters xi and beta and Bootstrapped Pointwise Two-Sided
##' 1-alpha Confidence Intervals
##' @param x object as returned by gamGPDboot()
##' @param alpha significance level
##' @return a list with components
##' xi: a list with components
##' covar: a data.frame (possibly data.frame()) containing the 'minimal'
##' covariate combinations for the covariates used for fitting xi
##' fit: corresponding fitted xi's
##' CI.low: corresponding lower CIs
##' CI.up: corresponding upper CIs
##' boot: a matrix containing the corresponding bootstrapped xi's
##' beta: same as xi, just for beta.
##' @author Marius Hofert
##' Note: Standard errors as for lambda would be available via predict(..., se.fit=TRUE),
##' but only for xi and nu, not for beta. That's why we need bootstrapped values
##' here (and thus only have CIs for the fitted values)
get.GPD.fit <- function(x, alpha=0.05)
{
if(length(x[[1]])==0)
stop("length(x[[1]]) must be > 0; otherwise, already the fitting step in gamGPDboot() failed")
## => x[[1]] serves as a role model (for the minimized grid of covariate combinations)
x. <- x[sapply(x, length) > 0] # pick out non-empty (= non-failed gam()) sublists
l. <- length(x.)
if(l. == 0) stop("no non-failed fitted object available")
l <- length(x)
if(l. != l)
warning("found ", l-l.," list() in x (failed fits); those (resampled) cases will be discarded")
## => guarantees that xi.mat. etc. are indeed matrices
## Note: 1) The matrices xi.mat. and beta.mat. below typically have a huge number of rows nr.
## For each combination of covariates, they contain the same fitted value for
## each excess. This is 'overhead' we don't want. We therefore now pick
## out the fitted values for each unique combination of covariates which was
## originally used for fitting.
## 2) We first consider beta, since that consists of the largest number of covars
## (by having those of xi *and* nu)
## determine the minimal grid which only contains all *available* combinations of
## covars used for fitting xi and nu (and thus *beta*) [in particular not for each excess]
## x.[[1]] is the role model; this is the 'long' version (covariate combination for each excesses)
covars. <- x.[[1]]$covar
if(length(covars.) == 0) { # neither xi nor nu (thus beta) depend on covariates
covar.index <- 1
covars <- data.frame()
} else { # at least one of xi or nu has covariates => minimize to unique covariate combinations
covar.nms <- names(covars.) # names of covariates used for fitting xi and nu (thus beta)
y <- cbind(x.[[1]]$covar, index=seq_len(nrow(covars.))) # append index ('long' version)
frml <- as.formula(paste("index ~", paste(rev(covar.nms), collapse=" + "))) # formula [with reverted covariates to guarantee the same sorting order of rows (cols are then reverted but we don't care here)]
covar.index <- aggregate(frml, data=y, FUN=function(z) z[1])[,"index"] # pick out only first value (they are equal anyways)
covars <- y[covar.index, covar.nms, drop=FALSE] # minimal version
rownames(covars) <- NULL
}
## now consider the eta covariates (and thus those of xi)
xi.covars. <- x.[[1]]$eta.covar
if(length(xi.covars.) == 0) {
xi.covar.index <- 1
xi.covars <- data.frame()
} else { # xi has at least one covariate => minimize to unique covariate combinations
xi.covar.nms <- names(xi.covars.) # names of covariates used for fitting xi/eta
xi.frml <- as.formula(paste("index ~", paste(rev(xi.covar.nms), collapse=" + ")))
xi.covar.index <- aggregate(xi.frml, data=y, FUN=function(z) z[1])[,"index"] # pick out only first value (they are equal anyways)
xi.covars <- y[xi.covar.index, xi.covar.nms, drop=FALSE] # minimal version
rownames(xi.covars) <- NULL
}
## determine fitted values for each covariate combination in (the 'minimalized') covars
## pick out all (max. B+1) vectors of estimated xi/beta; (nr, <= B+1) matrix
xi.mat. <- sapply(x., `[[`, "xi")
xi.mat <- xi.mat.[xi.covar.index, , drop=FALSE]
rownames(xi.mat) <- NULL; colnames(xi.mat) <- NULL
beta.mat. <- sapply(x., `[[`, "beta")
beta.mat <- beta.mat.[covar.index, , drop=FALSE]
rownames(beta.mat) <- NULL; colnames(beta.mat) <- NULL
## compute CIs (derived from fitted values + bootstrapped ones)
## 'na.rm=TRUE' is for those covariate combinations where there is no data
## (=> the whole row is NA)
xi.CI <- t(apply(xi.mat, 1, quantile, probs=c(alpha/2, 1-alpha/2), na.rm=TRUE, names=FALSE)) # CIs (low, up) for xi; (nrow(xi.mat), 2) matrix
beta.CI <- t(apply(beta.mat, 1, quantile, probs=c(alpha/2, 1-alpha/2), na.rm=TRUE, names=FALSE)) # CIs (low, up) for beta; (nrow(beta.mat), 2) matrix
## result (note: beta depends on xi and nu => on all covariates)
list(xi = list(covar = xi.covars, # covariate combinations used for fitting (or data.frame())
fit = xi.mat[,1], # fitted xi
CI.low = xi.CI[,1], # lower CI
CI.up = xi.CI[,2], # upper CI
boot = if(ncol(xi.mat) > 1) xi.mat[,-1] else NULL), # bootstrapped xi's
beta = list(covar = covars, # covariate combinations used for fitting (or data.frame())
fit = beta.mat[,1], # fitted beta
CI.low = beta.CI[,1], # lower CI
CI.up = beta.CI[,2], # upper CI
boot = if(ncol(beta.mat) > 1) beta.mat[,-1] else NULL)) # bootstrapped beta's
}
##' @title Compute Predicted GPD Parameters xi and beta
##' @param x object as returned by gamGPDboot()
##' @param xi.newdata 'newdata' object for xi as required by predict(); named data.frame
##' of type expand.grid(covar1=, covar2=) with at least the covariates
##' used for fitting xi with gam(); if more are provided, predict() returns
##' values which are equal uniformly over all of these additional
##' covariates. Each covariate which appears when fitting with gam() can
##' have more values than were actually used in gam() (for example,
##' half-years). In this case predict() 'interpolates' correctly with the
##' fitted model.
##' @param beta.newdata similar to xi.newdata. Since beta is estimated based on xi
##' (and nu), beta.newdata must contain at least the covariates of xi.newdata.
##' @return a list with components
##' xi: a list with components
##' covar: a data.frame containing the covariate combinations as provided by xi.newdata
##' predict: the predicted xi's
##' beta: same as xi, just for beta; predicted values are based on beta.newdata
##' @author Marius Hofert
GPD.predict <- function(x, xi.newdata = NULL, beta.newdata = NULL)
{
## default for xi.newdata and beta.newdata
if(is.null(xi.newdata)) { # choose useful default (covariates used for fitting but *all* combis of such)
xi.newdata <- if(length(x[[1]]$eta.covar)>0)
expand.grid(rev(x[[1]]$eta.covar))[,rev(seq_len(length(x[[1]]$eta.covar))), drop=FALSE]
else data.frame(xi.dummy.covar=1)
}
if(is.null(beta.newdata)) {
fulllist <- c(x[[1]]$eta.covar, x[[1]]$nu.covar) # some may be duplicate
sublist <- fulllist[unique(names(fulllist))] # pick out maximal unique subset
beta.newdata <- if(length(x[[1]]$eta.covar)>0 || length(x[[1]]$nu.covar)>0)
expand.grid(rev(sublist))[,rev(seq_len(length(sublist))), drop=FALSE] # build grid
else data.frame(beta.dummy.covar=1)
}
## check xi.newdata and beta.newdata (true for the default)
if(!all(names(x[[1]]$eta.covar) %in% colnames(xi.newdata)))
stop("'xi.newdata' requires at least the covariates used for fitting xi via gamGPDfit()")
if(!all(names(x[[1]]$covar) %in% colnames(beta.newdata)))
stop("'beta.newdata' requires at least the covariates used for fitting beta via gamGPDfit()")
## => implicitly also checks that beta.newdata contains the covariates of both eta and nu
## predict
## note: - *.newdata can be of different length than fitted values
## (can contain half-years etc.)
## - eta.newdata and beta.newdata can be of different length
## (depending on the covariates used for fitting, for example)
eta.pred <- as.numeric(predict(x[[1]]$etaObj, newdata=xi.newdata)) # predict eta on xi.newdata
eta.pred.beta <- as.numeric(predict(x[[1]]$etaObj, newdata=beta.newdata)) # predict eta on beta.newdata
nu.pred.beta <- as.numeric(predict(x[[1]]$nuObj, newdata=beta.newdata)) # predict nu on beta.newdata
## note: - there is no betaObj so we can't predict beta directly (only through nu)
## - we predict eta and nu both on beta.newdata since that's what we need
## to predict beta (see below)
## result
list(xi = list(covar = if(length(x[[1]]$eta.covar)>0) xi.newdata else NULL, # covariate combinations as specified by newdata
predict = expm1(eta.pred)), # predicted xi
beta = list(covar = if(length(x[[1]]$eta.covar)>0 || length(x[[1]]$nu.covar)>0)
beta.newdata else NULL, # covariate combinations as specified by newdata
predict = exp(nu.pred.beta - eta.pred.beta))) # predicted beta
}
### Computing risk measures ####################################################
##' @title Compute Value-at-Risk or Expected Shortfall for a GPD
##' @param x matrix with three columns containing rho (estimate for \bar{F}(u)
##' depending on covariates; obtained via logistic regression),
##' corresponding xi and beta
##' @param alpha confidence level
##' @param u threshold
##' @param method either "VaR" for Value-at-Risk or "ES" for expected shortfall
##' @return Value-at-Risk or expected shortfall
risk.measure <- function(x, alpha, u, method = c("VaR", "ES"))
{
if(!is.matrix(x)) x <- rbind(x, deparse.level=0L)
stopifnot(ncol(x)==3)
rho <- x[,1] # rho (estimate for \bar{F}(u) depending on covariates)
xi <- x[,2] # corresponding xi
beta <- x[,3] # corresponding beta
method <- match.arg(method)
switch(method,
"VaR"={
## The theory:
## The number of exceedances until t is Poi(Lambda(t)) distributed,
## hence N_t ~ Poi(Lambda(t)); in our case N_{x, t} ~
## Poi(Lambda(x, t)). The number of exceedances in [t, t+1]
## (1y here) is thus Poi(Lambda(x, [t,t+1])) distributed, with
## Lambda(x, [t,t+1]) = \int_t^{t+1} lambda(x, s) ds. Since our
## lambda(x, .) is constant in [t,t+1] (only depending on
## covariates x other than time), we obtain Lambda(x, [t,t+1]) =
## lambda(x, t) * (t+1 - t) = lambda(x, t). So the number of
## exceedances in [t,t+1] is Poi(lambda(x, t)) distributed and
## thus the expected number of exceedances in [t,t+1] is
## lambda(x, t); we do this additional approximation of
## considering the *expected* number to bring lambda into play
## and thus obtain a parametric estimators (depending on covariates).
## The denominator in the formula for VaR has to be
## \bar{F}(u) ~= N_u/n (s. QRM book, p. 283) ~= E[N_u]/n (our
## additional approximation) ~= lambda(x, t)/n_{x, t}, where
## n_{x, t} is the number of overall losses for covariate x in
## [t, t+1].
##
## The problems:
## 1) lambda(x, t)/n_{x, t} does not have to be in [0,1]
## (but it is for large enough thresholds u; if this is not
## the case (lambda [= expected number of exceedances] > n_{x, t}
## [= total number of events]), this clearly indicates that u
## needs to be larger).
## 2) If t is in the future, how would we predict n_{x, t}?
##
## The/A solution:
## We directly approximate \bar{F}(u) by a *logistic* regression
## => predict() available
u + (beta/xi) * (( (1-alpha) / rho )^(-xi) - 1)
},
"ES"={
ES <- (risk.measure(x, alpha=alpha, u=u, method="VaR")+beta-xi*u)/(1-xi)
## adjust to be Inf if xi >= 1 (i.e., ES < 0)
## that's a convention, see p. 79 Coles (2001)
if(any(xi >= 1)) ES[xi >= 1] <- Inf
ES
},
stop("wrong method"))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.