Nothing
#' Estimators of the extremal coefficient
#'
#' These functions estimate the extremal coefficient using an approximate sample
#' from the Frechet distribution.
#'
#' The \bold{Smith} estimator: suppose \eqn{Z(x)} is simple max-stable vector
#' (i.e., with unit Frechet marginals). Then
#' \eqn{1/Z} is unit exponential and \eqn{1/\max(Z(s_1), Z(s_2))} is exponential
#' with rate \eqn{\theta = \max\{Z(s_1), Z(s_2)\}}.
#' The extremal index for the pair can therefore be calculated using the reciprocal mean.
#'
#' The \bold{Schlather and Tawn} estimator: the likelihood of the naive estimator for a pair
#' of two sites \eqn{A} is
#' \deqn{ \mathrm{card}\left\{ j: \max_{i \in A} X_i^{(j)}\bar{X}_i)>z \right\}
#' \log(\theta_A) - \theta_A \sum_{j=1}^n \left[ \max \left\{z, \max_{i \in A}
#' (X_i^{(j)}\bar{X}_i)\right\}\right]^{-1},}
#' where \eqn{\bar{X}_i = n^{-1} \sum_{j=1}^n 1/X_i^{(j)}} is the harmonic mean and \eqn{z}
#' is a threshold on the unit Frechet scale.
#' The search for the maximum likelihood estimate for every pair \eqn{A}
#' is restricted to the interval \eqn{[1,3]}. A binned version of the extremal coefficient cloud is also returned.
#' The Schlather estimator is not self-consistent. The Schlather and Tawn estimator includes as special case
#' the Smith estimator if we do not censor the data (\code{p = 0}) and do not standardize observations by their harmonic mean.
#'
#'
#' The \bold{F-madogram} estimator is a non-parametric estimate based on a stationary process
#' \eqn{Z}; the extremal coefficient satisfies
#' \deqn{\theta(h)=\frac{1+2\nu(h)}{1-2\nu(h)},}
#' where
#' \deqn{\nu(h) = \frac{1}{2} \mathsf{E}[|F(Z(s+h)-F(Z(s))|]}
#' The implementation only uses complete pairs to calculate the relative ranks.
#'
#' All estimators are coded in plain R and computations are not optimized. The estimation
#' time can therefore be significant for large data sets. If there are no missing observations,
#' the routine \code{fmadogram} from the \code{SpatialExtremes} package should be prefered as it is
#' noticeably faster.
#'
#' The data will typically consist of max-stable vectors or block maxima.
#' Both of the Smith and the Schlather--Tawn estimators require unit Frechet margins; the margins will be standardized
#' to the unit Frechet scale, either parametrically or nonparametrically unless \code{standardize = FALSE}.
#' If \code{method = "parametric"}, a parametric GEV model is fitted to each column of \code{dat} using maximum likelihood
#' estimation and transformed back using the probability integral transform. If \code{method = "nonparametric"},
#' using the empirical distribution function. The latter is the default, as it is appreciably faster.
#' @export
#' @importFrom grDevices rgb
#' @references Schlather, M. and J. Tawn (2003). A dependence measure for multivariate and spatial extremes, \emph{Biometrika}, \bold{90}(1), pp.139--156.
#' @references Cooley, D., P. Naveau and P. Poncet (2006). Variograms for spatial max-stable random fields, In: Bertail P., Soulier P., Doukhan P. (eds) \emph{Dependence in Probability and Statistics}. Lecture Notes in Statistics, vol. 187. Springer, New York, NY
#' @references R. J. Erhardt, R. L. Smith (2012), Approximate Bayesian computing for spatial extremes, \emph{Computational Statistics and Data Analysis}, \bold{56}, pp.1468--1481.
#' @param dat an \code{n} by \code{D} matrix of unit Frechet observations
#' @param coord an optional \code{d} by \code{D} matrix of location coordinates
#' @param estimator string indicating which model estimates to compute, one of \code{smith}, \code{schlather} or \code{fmado}.
#' @param method string indicating which method to use to transform the margins. See \bold{Details}
#' @param standardize logical; should observations be transformed to unit Frechet scale? Default is to transform
#' @param prob probability of not exceeding threshold \code{thresh}
#' @param thresh threshold parameter (default is to keep all data if \code{prob = 0}).
#' @param plot logical; should cloud of pairwise empirical estimates be plotted? Default to \code{TRUE}.
#' @param ... additional parameters passed to the function, currently ignored.
#' @return an invisible list with vectors \code{dist} if \code{coord} is non-null or else a matrix of pairwise indices \code{ind},
#' \code{extcoef} and the supplied \code{estimator}, \code{fmado} and \code{binned}. If \code{estimator == "schlather"}, an additional matrix with 2 columns containing the binned distance \code{binned} with the \code{h} and the binned extremal coefficient.
#' @examples
#' \dontrun{
#' coord <- 10*cbind(runif(50), runif(50))
#' di <- as.matrix(dist(coord))
#' dat <- rmev(n = 1000, d = 100, param = 3, sigma = exp(-di/2), model = 'xstud')
#' res <- extcoef(dat = dat, coord = coord)
#' # Extremal Student extremal coefficient function
#'
#' XT.extcoeffun <- function(h, nu, corrfun, ...){
#' if(!is.function(corrfun)){
#' stop('Invalid function \"corrfun\".')
#' }
#' h <- unique(as.vector(h))
#' rhoh <- sapply(h, corrfun, ...)
#' cbind(h = h, extcoef = 2*pt(sqrt((nu+1)*(1-rhoh)/(1+rhoh)), nu+1))
#' }
#' #This time, only one graph with theoretical extremal coef
#' plot(res$dist, res$extcoef, ylim = c(1,2), pch = 20); abline(v = 2, col = 'gray')
#' extcoefxt <- XT.extcoeffun(seq(0, 10, by = 0.1), nu = 3,
#' corrfun = function(x){exp(-x/2)})
#' lines(extcoefxt[,'h'], extcoefxt[,'extcoef'], type = 'l', col = 'blue', lwd = 2)
#' # Brown--Resnick extremal coefficient function
#' BR.extcoeffun <- function(h, vario, ...){
#' if(!is.function(vario)){
#' stop('Invalid function \"vario\".')
#' }
#' h <- unique(as.vector(h))
#' gammah <- sapply(h, vario, ...)
#' cbind(h = h, extcoef = 2*pnorm(sqrt(gammah/4)))
#' }
#' extcoefbr<- BR.extcoeffun(seq(0, 20, by = 0.25), vario = function(x){2*x^0.7})
#' lines(extcoefbr[,'h'], extcoefbr[,'extcoef'], type = 'l', col = 'orange', lwd = 2)
#'
#' coord <- 10*cbind(runif(20), runif(20))
#' di <- as.matrix(dist(coord))
#' dat <- rmev(n = 1000, d = 20, param = 3, sigma = exp(-di/2), model = 'xstud')
#' res <- extcoef(dat = dat, coord = coord, estimator = "smith")
#' }
extcoef <- function(dat,
coord = NULL,
thresh = NULL,
estimator = c("schlather", "smith", "fmado"),
standardize = TRUE,
method = c("nonparametric", "parametric"),
prob = 0,
plot = TRUE,
...) {
if(is.null(coord) && !is.null(list(...)$loc)){
coord <- list(...)$loc
}
stopifnot(is.matrix(dat),
ncol(dat) >= 2,
nrow(coord) == ncol(dat))
estimator <- match.arg(estimator)
fr <- dat
#Transform margins to unit Frechet
if(standardize && estimator != "fmado"){
method <- match.arg(method)
if(method == "nonparametric"){
for(j in 1:ncol(dat)){
nj <- sum(!is.na(dat[, j])) + 1L
fr[,j] <- -1/log(rank(dat[,j],
na.last = "keep",
ties.method = "random")/nj)
}
} else{ #parametric
for(j in 1:ncol(dat)){
fr[,j] <- -1/log(
do.call(what = mev::pgev,
c(list(q = dat[,j]),
fit.gev(dat[,j])$estimate)))
}
}
}
if(estimator == "schlather"){
if (is.null(thresh)) {
thresh <- -1/log(prob) #threshold on unit Frechet scale
}
# Negative log-likelihood function
nll <- function(theta, X, thresh) {
-sum(X > thresh) * log(theta) + theta * sum(1/pmax(thresh, X))
}
#Compute harmonic mean, reweight estimators
harmo_mean <- apply(1/fr, 2, mean, na.rm = TRUE)
transfo_fr <- sweep(x = fr,
MARGIN = 2,
STATS = harmo_mean,
FUN = "/")
}
N <- ncol(dat) * (ncol(dat) - 1)/2
theta_vals <- rep(0, N)
if(!is.null(coord)){
dist_vals <- rep(0, N)
} else{
ind_vals <- matrix(0L, nrow = N, ncol = 2)
}
if(estimator == "schlather"){
overlap_size <- rep(0, N)
}
k <- 0L
for (i in 1L:(ncol(dat) - 1L)) {
for (j in (i + 1L):ncol(dat)) {
k <- k + 1L
if(!is.null(coord)){
dist_vals[k] <- dist(coord[c(i, j), ])
} else{
ind_vals[k,] <- c(i, j)
}
if (estimator == "schlather") {
X <- as.vector(apply((na.omit(transfo_fr[, c(i, j)])), 1, max))
if (length(X) > 1) {
theta_vals[k] <- sum(X > thresh)/sum(1/pmax(thresh, X))
} else{
theta_vals[k] <- NA
}
overlap_size[k] <- sqrt(length(X)) #sqrt of size of overlapping set
} else if (estimator == "fmado") {
# F-madogram estimator
Y <- apply(na.omit(dat[, c(i, j)]), 2,
rank, ties.method = "random")
if(is.null(dim(Y))){
next()
} else{
nu <- sum(abs(Y[, 1] - Y[, 2]))/(2 * nrow(Y)^2)
theta_vals[k] <- (1 + 2 * nu)/(1 - 2 * nu)
}
} else if(estimator == "smith"){
theta_vals[k] <- 1/mean(1/apply(na.omit(fr[,c(i, j)]), 1, max))
}
}
}
reslist <- list()
# Reorder theta observations by distance (sorted)
if(!is.null(coord)){
theta_vals <- theta_vals[order(dist_vals)]
if(estimator == "schlather"){
if(any(theta_vals > 3)){
theta_vals[theta_vals > 3] <- 3
} else if(any(theta_vals < 1)){
theta_vals[theta_vals < 1] <- 1
}
overlap_size <- overlap_size[order(dist_vals)]
}
dist_vals <- sort(dist_vals)
if (estimator == "schlather") {
# na.omit will strip columns with NA if either method not calculated
d_max <- 0.75 * dist_vals[length(dist_vals)]
# Keep fraction only of data and create bins
del <- d_max/sqrt(N)
h <- seq(del/2, d_max, by = del)
# Bin thetas by interval, with halfwidth eps = del/2
bin <- findInterval(dist_vals, c(0, h + del/2))
ubin <- unique(bin)
binned_theta <- as.vector(by(data = cbind(theta_vals, overlap_size),
INDICES = bin, FUN = function(xmat) {
weighted.mean(x = xmat[, 1], w = xmat[, 2])
}, simplify = TRUE))
reslist$binned <- na.omit(cbind(h = h[ubin], binned.extcoef = binned_theta))
}
reslist$dist = dist_vals
class(reslist) <- "mev_extcoef"
} else{
reslist$ind <- ind_vals
}
reslist$estimator <- estimator
reslist$extcoef <- theta_vals
if(plot && !is.null(coord)){
plot(reslist)
}
return(invisible(reslist))
}
#' @export
plot.mev_extcoef <- function(x, ...){
ellipsis <- list(...)
nellips <- names(ellipsis)
if("tikz" %in% nellips){
tikz <- ellipsis$tikz
stopifnot(is.logical(tikz))
} else{
tikz <- FALSE
}
ymax <- 1.1*min(c(max(c(2.1,x$extcoef),
na.rm = TRUE),
3), na.rm = TRUE)
xmax <- 1.02*max(x$dist, na.rm = TRUE)
if (x$estimator == "schlather"){
graphics::plot(x = x$dist,
y = x$extcoef,
xlab = ifelse(tikz, "$h$", "h"),
ylab = ifelse(tikz, "$\\theta$", expression(theta(h))),
bty = "l",
cex = 0.8,
col = grDevices::rgb(0, 0, 0, alpha = 0.5),
ylim = c(1, ymax),
yaxs = "i",
xaxs = "i",
xlim = c(0, xmax))
lines(x$binned[, 1], x$binned[, 2], col = 2, lwd = 2)
} else{
graphics::plot(x = x$dist,
y = x$extcoef,
xlab = ifelse(tikz, "$h$", "h"),
ylab = ifelse(tikz, "$\\theta$", expression(theta(h))),
bty = "l",
cex = 0.8,
col = grDevices::rgb(0, 0, 0, alpha = 0.5),
ylim = c(1, ymax),
yaxs = "i", xaxs = "i",
xlim = c(0, xmax))
}
abline(h = 2, col = "grey")
title(main = "Extremal coefficient", sub = switch(x$estimator,
schlather = "Schlather and Tawn estimator",
fmado = "F-madogram estimator",
smith = "Smith estimator"))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.