# R/extcoef.R In mev: Modelling of Extreme Values

#### Documented in extcoef

#' Estimators of the extremal coefficient
#'
#' These functions estimate the extremal coefficient using an approximate sample
#' from the Frechet distribution.
#'
#' The \bold{Smith} estimator: suppose \eqn{Z(x)} is simple max-stable vector
#' (i.e., with unit Frechet marginals). Then
#'  \eqn{1/Z} is unit exponential and \eqn{1/\max(Z(s_1), Z(s_2))} is exponential
#'  with rate \eqn{\theta = \max\{Z(s_1), Z(s_2)\}}.
#'  The extremal index for the pair can therefore be calculated using the reciprocal mean.
#'
#' The \bold{Schlather and Tawn} estimator: the likelihood of the naive estimator for a pair
#' of two sites \eqn{A} is
#' \deqn{ \mathrm{card}\left\{ j: \max_{i \in A} X_i^{(j)}\bar{X}_i)>z \right\}
#' \log(\theta_A) - \theta_A \sum_{j=1}^n \left[ \max \left\{z, \max_{i \in A}
#' (X_i^{(j)}\bar{X}_i)\right\}\right]^{-1},}
#' where \eqn{\bar{X}_i = n^{-1} \sum_{j=1}^n 1/X_i^{(j)}} is the harmonic mean and \eqn{z}
#' is a threshold on the unit Frechet scale.
#' The search for the maximum likelihood estimate for every pair \eqn{A}
#'  is restricted to the interval \eqn{[1,3]}. A binned version of the extremal coefficient cloud is also returned.
#' The Schlather estimator is not self-consistent. The Schlather and Tawn estimator includes as special case
#' the Smith estimator if we do not censor the data (\code{p = 0}) and do not standardize observations by their harmonic mean.
#'
#'
#' The \bold{F-madogram} estimator is a non-parametric estimate based on a stationary process
#' \eqn{Z}; the extremal coefficient satisfies
#' \deqn{\theta(h)=\frac{1+2\nu(h)}{1-2\nu(h)},}
#' where
#' \deqn{\nu(h) = \frac{1}{2} \mathsf{E}[|F(Z(s+h)-F(Z(s))|]}
#' The implementation only uses complete pairs to calculate the relative ranks.
#'
#' All estimators are coded in plain R and computations are not optimized. The estimation
#' time can therefore be significant for large data sets. If there are no missing observations,
#' the routine \code{fmadogram} from the \code{SpatialExtremes} package should be prefered as it is
#' noticeably faster.
#'
#' The data will typically consist of max-stable vectors or block maxima.
#' Both of the Smith and the Schlather--Tawn estimators require unit Frechet margins; the margins will be standardized
#' to the unit Frechet scale, either parametrically or nonparametrically unless \code{standardize = FALSE}.
#' If \code{method = "parametric"}, a parametric GEV model is fitted to each column of \code{dat} using maximum likelihood
#'  estimation and transformed back using the probability integral transform. If \code{method = "nonparametric"},
#'  using the empirical distribution function. The latter is the default, as it is appreciably faster.
#' @export
#' @importFrom grDevices rgb
#' @references Schlather, M. and J. Tawn (2003). A dependence measure for multivariate and spatial extremes, \emph{Biometrika}, \bold{90}(1), pp.139--156.
#' @references Cooley, D., P. Naveau and P. Poncet (2006). Variograms for spatial max-stable random fields,  In: Bertail P., Soulier P., Doukhan P. (eds) \emph{Dependence in Probability and Statistics}. Lecture Notes in Statistics, vol. 187. Springer, New York, NY
#' @references R. J. Erhardt, R. L. Smith (2012), Approximate Bayesian computing for spatial extremes, \emph{Computational Statistics and Data Analysis}, \bold{56}, pp.1468--1481.
#' @param dat an \code{n} by \code{D} matrix of unit Frechet observations
#' @param coord an optional \code{d} by \code{D} matrix of location coordinates
#' @param estimator string indicating which model estimates to compute, one of \code{smith}, \code{schlather} or \code{fmado}.
#' @param method string indicating which method to use to transform the margins. See \bold{Details}
#' @param standardize logical; should observations be transformed to unit Frechet scale? Default is to transform
#' @param prob probability of not exceeding threshold \code{thresh}
#' @param thresh threshold parameter (default is to keep all data if \code{prob = 0}).
#' @param plot logical; should cloud of pairwise empirical estimates be plotted? Default to \code{TRUE}.
#' @param ... additional parameters passed to the function, currently ignored.
#' @return an invisible list with vectors \code{dist} if \code{coord} is non-null or else a matrix of pairwise indices \code{ind},
#'  \code{extcoef} and the supplied \code{estimator}, \code{fmado} and \code{binned}. If \code{estimator == "schlather"}, an additional matrix with 2 columns containing the binned distance \code{binned} with the \code{h} and the binned extremal coefficient.
#' @examples
#' \dontrun{
#' coord <- 10*cbind(runif(50), runif(50))
#' di <- as.matrix(dist(coord))
#' dat <- rmev(n = 1000, d = 100, param = 3, sigma = exp(-di/2), model = 'xstud')
#' res <- extcoef(dat = dat, coord = coord)
#' # Extremal Student extremal coefficient function
#'
#' XT.extcoeffun <- function(h, nu, corrfun, ...){
#'   if(!is.function(corrfun)){
#'     stop('Invalid function \"corrfun\".')
#'   }
#'   h <- unique(as.vector(h))
#'   rhoh <- sapply(h, corrfun, ...)
#'   cbind(h = h, extcoef = 2*pt(sqrt((nu+1)*(1-rhoh)/(1+rhoh)), nu+1))
#' }
#' #This time, only one graph with theoretical extremal coef
#' plot(res$dist, res$extcoef, ylim = c(1,2), pch = 20); abline(v = 2, col = 'gray')
#' extcoefxt <- XT.extcoeffun(seq(0, 10, by = 0.1), nu = 3,
#'                             corrfun = function(x){exp(-x/2)})
#' lines(extcoefxt[,'h'], extcoefxt[,'extcoef'], type = 'l', col = 'blue', lwd = 2)
#' # Brown--Resnick extremal coefficient function
#' BR.extcoeffun <- function(h, vario, ...){
#'   if(!is.function(vario)){
#'     stop('Invalid function \"vario\".')
#'   }
#'   h <- unique(as.vector(h))
#'   gammah <- sapply(h, vario, ...)
#'   cbind(h = h, extcoef = 2*pnorm(sqrt(gammah/4)))
#' }
#' extcoefbr<- BR.extcoeffun(seq(0, 20, by = 0.25), vario = function(x){2*x^0.7})
#' lines(extcoefbr[,'h'], extcoefbr[,'extcoef'], type = 'l', col = 'orange', lwd = 2)
#'
#' coord <- 10*cbind(runif(20), runif(20))
#' di <- as.matrix(dist(coord))
#' dat <- rmev(n = 1000, d = 20, param = 3, sigma = exp(-di/2), model = 'xstud')
#' res <- extcoef(dat = dat, coord = coord, estimator = "smith")
#' }
extcoef <- function(dat,
coord = NULL,
thresh = NULL,
standardize = TRUE,
method = c("nonparametric", "parametric"),
prob = 0,
plot = TRUE,
...) {
if(is.null(coord) && !is.null(list(...)$loc)){ coord <- list(...)$loc
}
stopifnot(is.matrix(dat),
ncol(dat) >= 2,
nrow(coord) == ncol(dat))
estimator <- match.arg(estimator)
fr <- dat
#Transform margins to unit Frechet
method <- match.arg(method)
if(method == "nonparametric"){
for(j in 1:ncol(dat)){
nj <- sum(!is.na(dat[, j])) + 1L
fr[,j] <- -1/log(rank(dat[,j],
na.last = "keep",
ties.method = "random")/nj)
}
} else{ #parametric
for(j in 1:ncol(dat)){
fr[,j] <- -1/log(
do.call(what = mev::pgev,
c(list(q = dat[,j]),
fit.gev(dat[,j])$estimate))) } } } if(estimator == "schlather"){ if (is.null(thresh)) { thresh <- -1/log(prob) #threshold on unit Frechet scale } # Negative log-likelihood function nll <- function(theta, X, thresh) { -sum(X > thresh) * log(theta) + theta * sum(1/pmax(thresh, X)) } #Compute harmonic mean, reweight estimators harmo_mean <- apply(1/fr, 2, mean, na.rm = TRUE) transfo_fr <- sweep(x = fr, MARGIN = 2, STATS = harmo_mean, FUN = "/") } N <- ncol(dat) * (ncol(dat) - 1)/2 theta_vals <- rep(0, N) if(!is.null(coord)){ dist_vals <- rep(0, N) } else{ ind_vals <- matrix(0L, nrow = N, ncol = 2) } if(estimator == "schlather"){ overlap_size <- rep(0, N) } k <- 0L for (i in 1L:(ncol(dat) - 1L)) { for (j in (i + 1L):ncol(dat)) { k <- k + 1L if(!is.null(coord)){ dist_vals[k] <- dist(coord[c(i, j), ]) } else{ ind_vals[k,] <- c(i, j) } if (estimator == "schlather") { X <- as.vector(apply((na.omit(transfo_fr[, c(i, j)])), 1, max)) if (length(X) > 1) { theta_vals[k] <- sum(X > thresh)/sum(1/pmax(thresh, X)) } else{ theta_vals[k] <- NA } overlap_size[k] <- sqrt(length(X)) #sqrt of size of overlapping set } else if (estimator == "fmado") { # F-madogram estimator Y <- apply(na.omit(dat[, c(i, j)]), 2, rank, ties.method = "random") if(is.null(dim(Y))){ next() } else{ nu <- sum(abs(Y[, 1] - Y[, 2]))/(2 * nrow(Y)^2) theta_vals[k] <- (1 + 2 * nu)/(1 - 2 * nu) } } else if(estimator == "smith"){ theta_vals[k] <- 1/mean(1/apply(na.omit(fr[,c(i, j)]), 1, max)) } } } reslist <- list() # Reorder theta observations by distance (sorted) if(!is.null(coord)){ theta_vals <- theta_vals[order(dist_vals)] if(estimator == "schlather"){ if(any(theta_vals > 3)){ theta_vals[theta_vals > 3] <- 3 } else if(any(theta_vals < 1)){ theta_vals[theta_vals < 1] <- 1 } overlap_size <- overlap_size[order(dist_vals)] } dist_vals <- sort(dist_vals) if (estimator == "schlather") { # na.omit will strip columns with NA if either method not calculated d_max <- 0.75 * dist_vals[length(dist_vals)] # Keep fraction only of data and create bins del <- d_max/sqrt(N) h <- seq(del/2, d_max, by = del) # Bin thetas by interval, with halfwidth eps = del/2 bin <- findInterval(dist_vals, c(0, h + del/2)) ubin <- unique(bin) binned_theta <- as.vector(by(data = cbind(theta_vals, overlap_size), INDICES = bin, FUN = function(xmat) { weighted.mean(x = xmat[, 1], w = xmat[, 2]) }, simplify = TRUE)) reslist$binned <- na.omit(cbind(h = h[ubin], binned.extcoef = binned_theta))

}
reslist$dist = dist_vals class(reslist) <- "mev_extcoef" } else{ reslist$ind <- ind_vals
}
reslist$estimator <- estimator reslist$extcoef <- theta_vals
if(plot && !is.null(coord)){
plot(reslist)
}
return(invisible(reslist))
}

#' @export
plot.mev_extcoef <- function(x, ...){
ellipsis <- list(...)
nellips <- names(ellipsis)
if("tikz" %in% nellips){
tikz <- ellipsis$tikz stopifnot(is.logical(tikz)) } else{ tikz <- FALSE } ymax <- 1.1*min(c(max(c(2.1,x$extcoef),
na.rm = TRUE),
3), na.rm = TRUE)
xmax <- 1.02*max(x$dist, na.rm = TRUE) if (x$estimator == "schlather"){
graphics::plot(x = x$dist, y = x$extcoef,
xlab = ifelse(tikz, "$h$", "h"),
ylab = ifelse(tikz, "$\\theta$", expression(theta(h))),
bty = "l",
cex = 0.8,
col = grDevices::rgb(0, 0, 0, alpha = 0.5),
ylim = c(1, ymax),
yaxs = "i",
xaxs = "i",
xlim = c(0, xmax))
lines(x$binned[, 1], x$binned[, 2], col = 2, lwd = 2)
} else{
graphics::plot(x = x$dist, y = x$extcoef,
xlab = ifelse(tikz, "$h$", "h"),
ylab = ifelse(tikz, "$\\theta$", expression(theta(h))),
bty = "l",
cex = 0.8,
col = grDevices::rgb(0, 0, 0, alpha = 0.5),
ylim = c(1, ymax),
yaxs = "i", xaxs = "i",
xlim = c(0, xmax))

}
abline(h = 2, col = "grey")
title(main = "Extremal coefficient", sub = switch(x\$estimator,
schlather = "Schlather and Tawn estimator",