# R/taildep.R In mev: Modelling of Extreme Values

#### Defines functions plot.mev_taildep

#' Coefficient of tail correlation and tail dependence
#'
#' For data with unit Pareto margins, the coefficient of tail dependence \eqn{\eta} is defined  via \deqn{\Pr(\min(X) > x) = L(x)x^{-1/\eta},}
#' where \eqn{L(x)} is a slowly varying function. Ignoring the latter, several estimators of \eqn{\eta} can be defined. In unit Pareto margins, \eqn{\eta} is a nonnegative shape parameter that can be estimated by fitting a generalized Pareto distribution above a high threshold. In exponential margins, \eqn{\eta} is a scale parameter and the maximum likelihood estimator of the latter is the Hill estimator. Both methods are based on peaks-over-threshold and the user can choose between pointwise confidence confint obtained through a likelihood ratio test statistic (\code{"lrt"}) or the Wald statistic (\code{"wald"}).
#'
#' The most common approach for estimation is the empirical survival copula, by evaluating the proportion of sample minima with uniform margins that exceed a given \eqn{x}. An alternative estimator uses a smoothed estimator of the survival copula using Bernstein polynomial, resulting in the so-called \code{betacop} estimator. Approximate pointwise confidence confint for the latter are obtained by assuming the proportion of points is binomial.
#'
#' The coefficient of tail correlation \eqn{\chi} is
#' \deqn{\chi = \lim_{u \to 1} \frac{\Pr(F_1(X_1)>u, \ldots, F_D(X_D)>u)}{1-u}.}
#' Asymptotically independent vectors have \eqn{\chi = 0}. The estimator uses an estimator of the survival copula
#' @note As of version 1.15, the percentiles used are from the minimum variable. This ensures that, regardless of the number of variables,
#' there is no error message returned because the quantile levels are too low for there to be observations
#' @export
#' @param data an \eqn{n} by \eqn{d} matrix of multivariate observations
#' @param u vector of percentiles between 0 and 1
#' @param nq number of quantiles of the structural variable at which to form a grid; only used if \code{u = NULL}.
#' @param qlim limits for the sequence \code{u} of the structural variable
#' @param depmeas dependence measure, either of \code{"eta"} or \code{"chi"}
#' @param confint string indicating the type of confidence interval for \eqn{\eta}, one of \code{"wald"} or \code{"lrt"}
#' @param level the confidence level required (default to 0.95).
#' @param trunc logical indicating whether the estimates and confidence intervals should be truncated in \eqn{[0,1]}
#' @param ties.method string indicating the type of method for \code{rank}; see \code{\link[base]{rank}} for a list of options. Default to \code{"random"}
#' @param empirical.transformation logical indicating whether observations should be transformed to pseudo-uniform scale (default to \code{TRUE}); otherwise, they are assumed to be uniform
#' @param method named list giving the estimation method for \code{eta} and \code{chi}. Default to \code{"emp"} for both.
#' @param plot logical; should graphs be plotted?
#' @param ... additional arguments passed to \code{plot}; current support for \code{main}, \code{xlab}, \code{ylab}, \code{add} and further \code{pch}, \code{lty}, \code{type}, \code{col} for points; additional arguments for confidence intervals are handled via \code{cipch}, \code{cilty}, \code{citype}, \code{cicol}.
#' @return a named list with elements
#' \itemize{
#' \item \code{u}: a \code{K} vector of percentile levels
#' \item \code{eta}: a \code{K} by 3 matrix with point estimates, lower and upper confidence intervals
#' \item \code{chi}: a \code{K} by 3 matrix with point estimates, lower and upper confidence intervals
#' }
#' @seealso \code{\link[evd]{chiplot}} for bivariate empirical estimates of \eqn{\chi}{chi} and \eqn{\bar{\chi}}{chibar}.
#' @examples
#' \dontrun{
#' set.seed(765)
#' # Max-stable model
#' dat <- rmev(n = 1000, d = 4, param = 0.7, model = "log")
#' taildep(dat, confint = 'wald')
#' }
#' @importFrom "utils" "combn"
taildep <- function (data,
u = NULL,
nq = 40,
qlim = c(0.8, 0.99),
depmeas = c("eta","chi"),
method = list(eta = c("emp","betacop", "gpd", "hill"),
chi = c("emp","betacop")),
confint = c("wald","lrt"),
level = 0.95,
trunc = TRUE,
empirical.transformation = TRUE,
ties.method = "random",
plot = TRUE, ...) {
if(is.character(depmeas)){
depmeas <- which(c("eta", "chi") %in% match.arg(depmeas, c("eta", "chi"), several.ok = TRUE))
}
depmeas_names <- c("eta", "chi")[depmeas]
if((! length(depmeas) %in% c(1L, 2L)) || !all(depmeas  %in% c(1L, 2L))){
stop("Invalid argument for \"depmeas\": must be either \"eta\",\"chi\" or both.")
}
if(! (is.list(method) && isTRUE(all(depmeas_names %in% names(method))))){
stop(paste0("\"method\" should be a list with components ",
paste(depmeas_names[depmeas], collapse = " and "),"."))
}
ties.method <- match.arg(ties.method, c("average", "first", "last", "random", "max", "min"))
stopifnot(is.logical(empirical.transformation))
data <- as.matrix(data)
data <- na.omit(data)
n <- nrow(data)
D <- ncol(data)
stopifnot(D > 1)
if(1 %in% depmeas){
methodeta <- match.arg(method$eta[1], choices = c("emp","betacop", "gpd", "hill")) } else{ methodeta <- c() } if(2 %in% depmeas){ methodchi <- match.arg(method$chi[1], choices = c("emp","betacop"))
} else{
methodchi <- c()
}
method <- c(methodeta, methodchi)
if(empirical.transformation){
# map observations to uniform scale
if("betacop" %in% method){
if(ties.method != "random"){
warning("Beta copula does not allow for ties; switching to ties.method = \"random\"")
ties.method <- "random"
}
}
datarank <- apply(data, 2, rank, ties.method = ties.method)
rowmin <- apply(datarank, 1, min)/(n+1)
} else{
if("betacop" %in% method){
warning("Beta copula is rank-based; ignoring argument \"empirical.transformation\".")
datarank <- apply(data, 2, rank, ties.method = ties.method)
}
rowmin <- apply(data, 1, min)
stopifnot(min(rowmin) > 0,
max(rowmin) < 1)
}
eps <- .Machine$double.eps^0.5 qlim2 <- c(min(rowmin) + eps, max(rowmin) - eps) if(is.null(u)){ if (!is.null(qlim)) { qlim <- sort(qlim) stopifnot(length(qlim) == 2L, qlim[1] > 1/(n+1), qlim[2] < n/(n+1)) } else{ stop("Invalid input: neither \"qlim\" nor \"u\" is provided.") } # Define u in terms of quantiles of the structure variable min Y qlims <- quantile(x = rowmin, probs = c(qlim[1], qlim[2])) u <- seq(from = qlims[1], to = qlims[2], length = nq) } else{ u <- sort(u) if(min(u) < qlim2[1] || max(u) > qlim2[2]){ warning("upper quantile limit is too high or lower quantile limit is too low") } #u <- u[u < qlim2[2] & u > qlim2[1]] nq <- length(u) } confint <- match.arg(confint) cnst <- qnorm((1 + level)/2) if("emp" %in% method){ rmin <- rowmin cbaru <- sapply(u, function(ui){sum(rmin > ui)})/n if(1 %in% depmeas && methodeta == "emp"){ etau <- log(1 - u) / log(cbaru) seeta <- sqrt((((log(1 - u)^2)/(log(cbaru)^4 * cbaru)) * (1 - cbaru)) / n) est_eta <- cbind(eta = etau, etalow = etau - cnst * seeta, etaupp = etau + cnst * seeta) } if(2 %in% depmeas && methodchi == "emp"){ chiu <- cbaru / (1 - u) sechi <- sqrt(cbaru * (1 - cbaru)/(1 - u)^2/n) est_chi <- cbind(chi = chiu, chilow = chiu - cnst * sechi, chiupp = chiu + cnst * sechi) } } if (1 %in% depmeas && methodeta == "gpd"){ ps <- 1/(1-rowmin) if(confint == "lrt"){ est_eta <- t(sapply(1/(1-u), function(th){ # At least 15 exceedances for model fit if(sum(ps>th) > 15){ fitu <- suppressWarnings(fit.gpd(ps, threshold = th)) prof <- try(suppressWarnings(gpd.pll(param = "shape", dat = ps, threshold = th, mod = "prof", mle = fitu$estimate, plot = FALSE)))
co <- try(suppressWarnings(confint(prof, prob = c((1-level)/2, (1+level)/2), print = FALSE)))
if(inherits(x = co, what = "try-error")){
return(c(fitu$estimate[2], rep(NA, 2))) } else{ return(co) } } else{ return(rep(NA, 3)) } })) } else{ est_eta <- t(sapply(1/(1-u), function(th){ if(sum(ps>th) > 15){ fitu <- suppressWarnings(fit.gpd(ps, threshold = th)) if(fitu$estimate[2] < -0.5){
c(fitu$estimate[2], NA, NA) } else{ c(fitu$estimate[2], fitu$estimate[2] + c(-1,1)*fitu$std.err[2]*qnorm((1+level)/2))
}
} else{
return(rep(NA, 3))
}
}))
}
} else if (1 %in% depmeas && methodeta == "hill"){
expll <- function(x, il){
sum(-log(il) - x/il)
}
es <- -log(1-rowmin)
est_eta <- t(sapply(-log(1-u), function(th){
samp <- es[es>th] - th
mle <- mean(samp)
maxll <- expll(samp, mle)
if(confint == "lrt"){
low <- try(uniroot(f = function(il){ 2*(expll(samp, il) - maxll) + qchisq(level, 1)}, interval = c(1e-8, mle)))
upp <- try(uniroot(f = function(il){ 2*(expll(samp, il) - maxll) + qchisq(level, 1)}, interval = c(mle, 5)))
return(c(mle,
ifelse(inherits(low, what = "try-error"), NA, low),
ifelse(inherits(upp, what = "try-error"), NA, upp)))
} else if(confint == "wald"){
return(c(mle, mle + c(-1,1)*mle/sqrt(length(samp))*qnorm((1+level)/2)))
}
}))
}
if("betacop" %in% method){
# This uses the ranks, not the observations
cbaru <- numeric(nq)
for(i in seq_len(nq)){
# Beta copula
# Evaluate quantiles of beta at u
# Model defined in terms of the survival copula Pr(V_i > u, i = 1,...,d)
# Flip arguments u -> 1-u
# and ranks (via n:1 instead of 1:n)
Fu <- sapply(rev(seq_len(n)), function(r){
suppressWarnings(pbeta(1 - u[i], r, n+1-r, log.p = TRUE))
})
cbaru[i] <- mean(exp(rowSums(matrix(Fu[datarank], nrow = n))))
}
cbaru <- pmax(cbaru, 0)
if(2 %in% depmeas && methodchi == "betacop"){
chiu <- cbaru / (1 - u)
sechi <- sqrt(cbaru * (1 - cbaru)/(1 - u)^2/n)
est_chi <- cbind(chiu, chiu - cnst * sechi, chiu + cnst * sechi)
}
if(1 %in% depmeas && methodeta == "betacop"){
etau <- log(1 - u) / log(cbaru)
seeta <- sqrt(((((log(1 - u))^2)/(log(cbaru)^4 * cbaru)) * (1 - cbaru)) / n)
est_eta <- cbind(etau, etau - cnst * seeta, etau + cnst * seeta)
}
}
if (trunc) {
if(1 %in% depmeas){
est_eta[est_eta > 1] <- 1
est_eta[est_eta < 0] <- 0
}
if(2 %in% depmeas){
est_chi[est_chi > 1] <- 1
est_chi[est_chi < 0] <- 0
}
}
out <- list(u = u)
if(1 %in% depmeas){
colnames(est_eta) <- c("coef", "lowerci", "upperci")
out$eta <- est_eta out$eta_method <- methodeta
out$eta_confint_method <- confint } if(2 %in% depmeas){ colnames(est_chi) <- c("coef", "lowerci", "upperci") out$chi <- est_chi
out$chi_method <- methodchi } out$dim <- dim(data)
class(out) <- "mev_taildep"
if(plot){
plot(out, ...)
}
invisible(out)
}

plot.mev_taildep <- function(x, ...){
if(length(x$u) < 3){ return(invisible(NULL)) } ellips <- list(...) depmeas <- c(!is.null(x$eta), !is.null(x$chi)) if(sum(depmeas) == 0){ stop("Invalid argument") } else if(sum(depmeas) == 2){ if(!is.null(ellips$which)){
if(is.character(ellips$which)){ show <- c("eta", "chi") %in% match.arg(ellips$which, choices = c("eta", "chi"), several.ok = TRUE)
} else if(is.numeric(ellips$which)){ show <- 1:2 %in% ellips$which
} else if(is.logical(ellips$which)){ show <- ellips$which
}
} else{
show <- rep(TRUE, 2)
}
} else{
show <- depmeas
}

if(!is.null(ellips$add)){ if(!isTRUE(ellips$add)){
if(sum(show) == 2){
par(mfrow = c(1,2))
}
on.exit(par(old.par))
}
} else{
if(sum(show) == 2){
par(mfrow = c(1,2))
}
on.exit(par(old.par))
}
if (is.null(ellips$main)) { main <- rep("",2)[show] } else{ if(length(ellips$main) != sum(show)){
stop("Invalid input: \"main\" must be of the same length as \"which\"")
}
main <- ellips$main } if (is.null(ellips$xlab)) {
xlab <- rep("u", 2)
} else{
xlab <- rep(ellips$xlab, length.out = 2) } if (is.null(ellips$ylab)) {
ylab <- expression(eta, chi)
} else{
if(length(ellips$ylab) != sum(show)){ stop("Invalid input: \"ylab\" must be of the same length as \"which\"") } ylab <- rep(ellips$ylab, length.out = 2)
}
if (is.null(ellips$col)) { col <- rep(1, 2) } else{ col <- rep(ellips$col, length.out = 2)
}
if (is.null(ellips$lty)) { lty <- rep(1, 2) } else{ lty <- rep(ellips$lty, length.out = 2)
}
if (is.null(ellips$pch)) { pch <- rep(20, 2) } else{ pch <- rep(ellips$pch, length.out = 2)
}
if (is.null(ellips$type)) { type <- rep("p", 2) } else{ type <- rep(ellips$type, length.out = 2)
}
#Arguments for confidence lines, with ci

if (is.null(ellips$cicol)) { cicol <- rep("grey", 2) } else{ cicol <- rep(ellips$cicol, length.out = 2)
}
if (is.null(ellips$cilty)) { cilty <- rep(2, 2) } else{ cilty <- rep(ellips$cilty, length.out = 2)
}
if (is.null(ellips$cipch)) { cipch <- rep(95, 2) #corresponds to - sign } else{ cipch <- rep(ellips$cipch, length.out = 2)
}
if (is.null(ellips$citype)) { citype <- rep("p", 2) } else{ citype <- rep(ellips$citype, length.out = 2)
}
D <- x$dim if (show[1]) { matplot(x$u, x$eta, type = c(type[1], rep(citype[1], 2)), pch = c(pch[1], rep(cipch[1], 2)), lty = c(lty[1], rep(cilty[1], 2)), col = c(col[1], rep(cicol[1], 2)), xlim = c(min(x$u) - diff(range(x$u))/100, pmin(1, max(x$u) + diff(range(x$u))/100)), ylim = c(0, 1), xaxs = "i", yaxs ="i", main = main[1], xlab = xlab[1], ylab = ylab[1], bty = "l", panel.first = { segments(x$u[1], 0, 1, 0, lty = 5, col = "grey")
segments(x$u[1], 1, 1, 1, lty = 5, col = "grey") segments(x$u[1], 1/D, 1, 1/D, lty = 5, col = "grey")
})
}
if (show[2]) {
matplot(x$u, x$chi, type = c(type[2], rep(citype[2], 2)), pch = c(pch[2], rep(cipch[2], 2)),
lty = c(lty[2], rep(cilty[2], 2)),
col = c(col[2], rep(cicol[2], 2)),
xlim = c(min(x$u) - diff(range(x$u))/100, pmin(1, max(x$u) + diff(range(x$u))/100)),
ylim = c(0, 1),
xaxs = "i",
yaxs ="i",
main = main[2],
bty = "l",
xlab = xlab[2],
ylab = ylab[2],
panel.first = {
segments(x$u[1], 0, 1, 0, lty = 5, col = "grey") segments(x$u[1], 1, 1, 1, lty = 5, col = "grey")
})
}

}


## Try the mev package in your browser

Any scripts or data that you put into this service are public.

mev documentation built on April 20, 2023, 5:10 p.m.