#' Generalized Pareto distribution
#'
#' Likelihood, score function and information matrix, bias,
#' approximate ancillary statistics and sample space derivative
#' for the generalized Pareto distribution
#'
#' @author Leo Belzile
#' @name gpd
#' @param par vector of \code{scale} and \code{shape}
#' @param dat sample vector
#' @param tol numerical tolerance for the exponential model
#' @param method string indicating whether to use the expected (\code{'exp'}) or the observed (\code{'obs'} - the default) information matrix.
#' @param V vector calculated by \code{gpd.Vfun}
#' @param n sample size
#' @section Usage: \preformatted{gpd.ll(par, dat, tol=1e-5)
#' gpd.ll.optim(par, dat, tol=1e-5)
#' gpd.score(par, dat)
#' gpd.infomat(par, dat, method = c('obs','exp'))
#' gpd.bias(par, n)
#' gpd.Fscore(par, dat, method = c('obs','exp'))
#' gpd.Vfun(par, dat)
#' gpd.phi(par, dat, V)
#' gpd.dphi(par, dat, V)}
#'
#' @section Functions:
#'
#' \itemize{
#' \item{\code{gpd.ll}:} {log likelihood}
#' \item{\code{gpd.ll.optim}:} {negative log likelihood parametrized in terms of \code{log(scale)} and shape
#' in order to perform unconstrained optimization}
#' \item{\code{gpd.score}:} {score vector}
#' \item{\code{gpd.infomat}:} {observed or expected information matrix}
#' \item{\code{gpd.bias}:} {Cox-Snell first order bias}
#' \item{\code{gpd.Fscore}:} {Firth's modified score equation}
#' \item{\code{gpd.Vfun}:} {vector implementing conditioning on approximate ancillary statistics for the TEM}
#' \item{\code{gpd.phi}:} {canonical parameter in the local exponential family approximation}
#' \item{\code{gpd.dphi}:} {derivative matrix of the canonical parameter in the local
#' exponential family approximation}
#' }
#' @references Firth, D. (1993). Bias reduction of maximum likelihood estimates, \emph{Biometrika}, \strong{80}(1), 27--38.
#' @references Coles, S. (2001). \emph{An Introduction to Statistical Modeling of Extreme Values}, Springer, 209 p.
#' @references Cox, D. R. and E. J. Snell (1968). A general definition of residuals, \emph{Journal of the Royal Statistical Society: Series B (Methodological)}, \strong{30}, 248--275.
#' @references Cordeiro, G. M. and R. Klein (1994). Bias correction in ARMA models, \emph{Statistics and Probability Letters}, \strong{19}(3), 169--176.
#' @references Giles, D. E., Feng, H. and R. T. Godwin (2016). Bias-corrected maximum likelihood estimation of the parameters of the generalized Pareto distribution, \emph{Communications in Statistics - Theory and Methods}, \strong{45}(8), 2465--2483.
#'
NULL
#' @title Generalized extreme value distribution
#'
#' @description Likelihood, score function and information matrix, bias,
#' approximate ancillary statistics and sample space derivative
#' for the generalized extreme value distribution
#'
#' @name gev
#' @param par vector of \code{loc}, \code{scale} and \code{shape}
#' @param dat sample vector
#' @param method string indicating whether to use the expected (\code{'exp'}) or the observed (\code{'obs'} - the default) information matrix.
#' @param V vector calculated by \code{gev.Vfun}
#' @param n sample size
#' @param p vector of probabilities
#' @section Usage: \preformatted{gev.ll(par, dat)
#' gev.ll.optim(par, dat)
#' gev.score(par, dat)
#' gev.infomat(par, dat, method = c('obs','exp'))
#' gev.retlev(par, p)
#' gev.bias(par, n)
#' gev.Fscore(par, dat, method=c('obs','exp'))
#' gev.Vfun(par, dat)
#' gev.phi(par, dat, V)
#' gev.dphi(par, dat, V)}
#'
#'
#' @section Functions:
#' \itemize{
#' \item{\code{gev.ll}:} {log likelihood}
#' \item{\code{gev.ll.optim}:} {negative log likelihood parametrized in terms of location, \code{log(scale)} and shape
#' in order to perform unconstrained optimization}
#' \item{\code{gev.score}:} {score vector}
#' \item{\code{gev.infomat}:} {observed or expected information matrix}
#' \item{\code{gev.retlev}:} {return level, corresponding to the \eqn{(1-p)}th quantile}
#' \item{\code{gev.bias}:} {Cox-Snell first order bias}
#' \item{\code{gev.Fscore}:} {Firth's modified score equation}
#' \item{\code{gev.Vfun}:} {vector implementing conditioning on approximate ancillary statistics for the TEM}
#' \item{\code{gev.phi}:} {canonical parameter in the local exponential family approximation}
#' \item{\code{gev.dphi}:} {derivative matrix of the canonical parameter in the local exponential family approximation}
#' }
#' @references Firth, D. (1993). Bias reduction of maximum likelihood estimates, \emph{Biometrika}, \strong{80}(1), 27--38.
#' @references Coles, S. (2001). \emph{An Introduction to Statistical Modeling of Extreme Values}, Springer, 209 p.
#' @references Cox, D. R. and E. J. Snell (1968). A general definition of residuals, \emph{Journal of the Royal Statistical Society: Series B (Methodological)}, \strong{30}, 248--275.
#' @references Cordeiro, G. M. and R. Klein (1994). Bias correction in ARMA models, \emph{Statistics and Probability Letters}, \strong{19}(3), 169--176.
NULL
#' Log likelihood for the generalized Pareto distribution
#'
#' Function returning the density of an \code{n} sample from the GP distribution.
#' \code{gpd.ll.optim} returns the negative log likelihood parametrized in terms of \code{log(scale)} and shape
#' in order to perform unconstrained optimization
#' @seealso \code{\link{gpd}}
#' @inheritParams gpd
#' @export
#' @keywords internal
gpd.ll <- function(par, dat, tol = 1e-05) {
sigma = par[1]
if(sigma < 0){
return(-Inf)
}
xi = par[2]
n <- length(dat)
if (abs(xi) > tol) {
if(xi > -1){
#Most scenarios
as.vector(-n * log(sigma) - (1 + 1/xi) * sum(log(pmax(1 + xi/sigma * dat, 0))))
} else if(isTRUE(all.equal(xi, -1, check.attributes = FALSE))){
-n*log(max(dat))
} else{
-Inf
}
} else {
as.vector(-n * log(sigma) - sum(dat)/sigma)
}
}
#' @rdname gpd.ll
#' @inheritParams gpd
#' @export
gpd.ll.optim <- function(par, dat, tol = 1e-05) {
sigma = exp(par[1])
xi = par[2]
if (abs(xi) > tol) {
length(dat) * log(sigma) + (1 + 1/xi) * sum(log(pmax(1 + xi/sigma * dat, 0)))
} else {
length(dat) * log(sigma) + sum(dat)/sigma
}
}
#' Score vector for the generalized Pareto distribution
#'
#' @seealso \code{\link{gpd}}
#' @inheritParams gpd
#' @export
#' @keywords internal
gpd.score <- function(par, dat) {
sigma = par[1]
xi = as.vector(par[2])
if (!isTRUE(all.equal(0, xi, check.attributes = FALSE))) {
c(sum(dat * (xi + 1)/(sigma^2 * (dat * xi/sigma + 1)) - 1/sigma), sum(-dat * (1/xi + 1)/(sigma *
(dat * xi/sigma + 1)) + log(pmax(dat * xi/sigma + 1, 0))/xi^2))
} else {
c(sum((dat - sigma)/sigma^2), sum(1/2 * (dat - 2 * sigma) * dat/sigma^2))
}
}
#' Information matrix for the generalized Pareto distribution
#'
#' The function returns the expected or observed information matrix.
#' @seealso \code{\link{gpd}}
#' @inheritParams gpd
#' @param nobs number of observations
#' @export
#' @keywords internal
gpd.infomat <- function(par, dat, method = c("obs", "exp"), nobs = length(dat)) {
method <- match.arg(method)
dat <- as.vector(dat)
sigma <- as.vector(par[1])
xi <- as.vector(par[2])
if (method == "obs") {
if (any((1 + xi * dat/sigma) < 0)) {
stop("Data outside of range specified by parameter, yielding a zero likelihood")
}
c11 <- (length(dat) - (1 + xi) * sum(dat * (2 * sigma + xi * dat)/(sigma + xi * dat)^2))/(sigma^2)
if (!isTRUE(all.equal(xi, 0, check.attributes = FALSE))) {
c12 <- (1 + xi) * sum(dat/(sigma + xi * dat)^2)/xi - sum(dat/(sigma + xi * dat))/(sigma *
xi)
c22 <- (2/xi^2) * sum(dat/(sigma + xi * dat)) - 2 * sum(log(pmax(dat * xi/sigma + 1, 0)))/(xi^3) +
(1 + 1/xi) * sum(dat^2/(sigma + xi * dat)^2)
} else {
c12 <- sum(-(dat^2 - dat * sigma)/sigma^3)
c22 <- sum(-1/3 * (2 * dat - 3 * sigma) * dat^2/sigma^3)
}
infomat <- -matrix(c(c11, c12, c12, c22), nrow = 2, ncol = 2, byrow = TRUE)
} else if (method == "exp") {
k22 <- -2/((1 + xi) * (1 + 2 * xi))
k11 <- -1/(sigma^2 * (1 + 2 * xi))
k12 <- -1/(sigma * (1 + xi) * (1 + 2 * xi))
infomat <- -nobs * cbind(c(k11, k12), c(k12, k22)) #fixed 12-10-2016
}
colnames(infomat) <- rownames(infomat) <- c("scale","shape")
return(infomat)
}
#' Tangent exponential model statistics for the generalized Pareto distribution
#'
#' Matrix of approximate ancillary statistics, sample space derivative of the
#' log likelihood and mixed derivative for the generalized Pareto distribution.
#' @seealso \code{\link{gpd}}
#' @inheritParams gpd
#' @export
#' @name gpd.temstat
#' @keywords internal
gpd.Vfun <- function(par, dat) {
sigma = par[1]
xi = par[2]
cbind(dat/sigma, sigma * (dat * xi/sigma + 1)^(-1/xi) * (log(dat * xi/sigma + 1)/xi^2 - dat/(sigma *
(dat * xi/sigma + 1) * xi))/(dat * xi/sigma + 1)^(-1/xi - 1))
}
#' @inheritParams gpd
#' @rdname gpd.temstat
#' @export
gpd.phi <- function(par, dat, V) {
sigma = par[1]
xi = par[2]
rbind(-(xi + 1)/(sigma * (dat * xi/sigma + 1))) %*% V
}
#' @inheritParams gpd
#' @export
#' @rdname gpd.temstat
gpd.dphi <- function(par, dat, V) {
sigma = par[1]
xi = as.vector(par[2])
if (!isTRUE(all.equal(xi, 0, check.attributes = FALSE))) {
rbind(xi * (1/xi + 1)/(sigma^2 * (dat * xi/sigma + 1)) - dat * xi^2 * (1/xi + 1)/(sigma^3 * (dat *
xi/sigma + 1)^2), -(1/xi + 1)/(sigma * (dat * xi/sigma + 1)) + dat * (xi + 1)/(sigma^2 *
(dat * xi/sigma + 1)^2) + 1/(sigma * (dat * xi/sigma + 1) * xi)) %*% V
} else {
rbind(rep(sigma^(-2), length(dat)), (dat - sigma)/sigma^2) %*% V
}
}
#' log likelihood for the generalized extreme value distribution
#'
#' Function returning the density of an \code{n} sample from the GEV distribution.
#'
#' \code{gev.ll.optim} returns the negative log likelihood parametrized in terms of location, \code{log(scale)} and shape in order to perform unconstrained optimization
#'
#' @inheritParams gev
#' @export
#' @keywords internal
#' @seealso \code{\link{gev}}
gev.ll <- function(par, dat) {
dat <- as.vector(dat)
par <- as.vector(par)
tx <- pmax(1 + par[3]/par[2] * (dat - par[1]), 0)
if (!isTRUE(all.equal(as.vector(par[3]), 0, tolerance = 1e-07, check.attributes = FALSE))) {
sum(-log(par[2]) - (1/par[3] + 1) * log(tx) - tx^(-1/par[3]))
} else {
sum(-log(par[2]) - (dat - par[1])/par[2] - exp(-(dat - par[1])/par[2]))
}
}
#' @rdname gev.ll
#' @inheritParams gev
#' @export
#' @keywords internal
gev.ll.optim <- function(par, dat) {
tpar = par
tpar[2] = exp(par[2])
# parameters with log-scale
nll = -gev.ll(tpar, dat)
return(nll)
}
#' Score vector for the generalized extreme value distribution
#'
#' @inheritParams gev
#' @export
#' @keywords internal
gev.score <- function(par, dat) {
mu = par[1]
sigma = par[2]
xi = as.vector(par[3])
if (!isTRUE(all.equal(xi, 0, tolerance = 1e-10, check.attributes = FALSE))) {
c(sum(-(pmax(0, -(mu - dat) * xi/sigma + 1))^(-1/xi - 1)/sigma - xi * (1/xi + 1)/(sigma * ((mu -
dat) * xi/sigma - 1))),
sum(-(dat - mu) * ((dat - mu) * xi/sigma + 1)^(-1/xi - 1)/sigma^2 +
(dat - mu) * (xi + 1)/(sigma^2 * ((dat - mu) * xi/sigma + 1)) - 1/sigma),
sum(-(mu - dat) * (1/xi + 1)/(sigma * ((mu - dat) * xi/sigma - 1)) -
(log1p(-(mu - dat) * xi/sigma)/xi^2 - (mu - dat)/(sigma * ((mu - dat) * xi/sigma - 1) * xi))/
(-(mu - dat) * xi/sigma + 1)^(1/xi) + log1p(-(mu - dat) * xi/sigma)/xi^2))
} else {
c(sum(-exp(mu/sigma - dat/sigma)/sigma + 1/sigma),
sum(mu * exp(mu/sigma - dat/sigma)/sigma^2 -
dat * exp(mu/sigma - dat/sigma)/sigma^2 - mu/sigma^2 - 1/sigma + dat/sigma^2),
sum((exp(-(dat/sigma)) *
(dat - mu) * (exp(mu/sigma) * (mu - dat) + exp(dat/sigma) * (dat - mu - 2 * sigma)))/(2 * sigma^2)))
}
}
#' Information matrix for the generalized extreme value distribution
#'
#' The function returns the expected or observed information matrix.
#' @inheritParams gev
#' @param nobs number of observations
#' @export
#' @keywords internal
gev.infomat <- function(par, dat, method = c("obs", "exp"), nobs = length(dat)) {
method <- match.arg(method)
par <- as.vector(par)
if (method == "exp") {
# (Expected) Fisher information does not depend on location parameter
if (length(par) == 3) {
sigma = par[2]
xi = as.vector(par[3])
} else {
sigma = par[1]
xi = as.vector(par[2])
}
if(xi < -0.5){
stop("The Fisher information is not defined if the shape parameter of the GEV is less than -0.5")
}
# Limiting case when xi=0 Fix to value at zero
if (abs(xi) < 0.001) {
return(nobs * cbind(c(1/sigma^2, -0.422784335098467/sigma^2, 0.41184033042644/sigma), c(-0.422784335098467/sigma^2,
1.82368066085288/sigma^2, 0.332484907160274/sigma), c(0.41184033042644/sigma, 0.332484907160274/sigma,
2.42360605517703)))
}
p = (1 + xi)^2 * gamma(1 + 2 * xi)
q = gamma(2 + xi) * (digamma(1 + xi) + (1 + xi)/xi)
infomat <- nobs * cbind(c(p/sigma^2, -(p - gamma(2 + xi))/(sigma^2 * xi), (p/xi - q)/(sigma *
xi)), c(-(p - gamma(2 + xi))/(sigma^2 * xi), (1 - 2 * gamma(2 + xi) + p)/(sigma^2 * xi^2),
-(1 + digamma(1) + (1 - gamma(2 + xi))/xi - q + p/xi)/(sigma * xi^2)), c((p/xi - q)/(sigma *
xi), -(1 + digamma(1) + (1 - gamma(2 + xi))/xi - q + p/xi)/(sigma * xi^2), (pi^2/6 + (1 +
digamma(1) + 1/xi)^2 - 2 * q/xi + p/xi^2)/xi^2))
if (xi < 0.003 & xi > 0) {
# Linearization because numerically unstable in this region
y2 <- (pi^2/6 + (1 + digamma(1) + 1/0.003)^2 - 2 * (gamma(2 + 0.003) * (digamma(1 + 0.003) +
(1 + 0.003)/0.003))/0.003 + (1 + 0.003)^2 * gamma(1 + 2 * 0.003)/0.003^2)/0.003^2
y1 <- 2.42360605517703
infomat[3, 3] <- nobs * ((y2 - y1)/0.003 * xi + y1)
} else if (xi > -0.003 & xi < 0) {
y2 <- (pi^2/6 + (1 + digamma(1) - 1/0.003)^2 + 2 * (gamma(2 - 0.003) * (digamma(1 - 0.003) -
(1 - 0.003)/0.003))/0.003 + (1 - 0.003)^2 * gamma(1 - 2 * 0.003)/0.003^2)/0.003^2
y1 <- 2.42360605517703
infomat[3, 3] <- nobs * (-(y2 - y1)/0.003 * xi + y1)
}
colnames(infomat) <- rownames(infomat) <- c("loc","scale","shape")
return(infomat)
} else if (method == "obs") {
if (length(par) != 3) {
stop("Invalid parameter vector")
}
mu = par[1]
sigma = par[2]
xi = as.vector(par[3])
if(xi < -0.5){
stop("The observed information matrix is not defined if the shape parameter of the GEV is less than -0.5")
}
# Bug fixed 21-10-2016 (parameter were defined after they were used).
if (any((1 + xi * (dat - mu)/sigma) < 0)) {
stop("Data outside of range specified by parameter, yielding a zero likelihood")
}
infomat <- matrix(0, ncol = 3, nrow = 3)
if (!isTRUE(all.equal(xi, 0, check.attributes = FALSE))) {
infomat[1, 1] <- sum(-((dat - mu) * xi/sigma + 1)^(-1/xi - 2) * (xi + 1)/sigma^2 + xi^2 *
(1/xi + 1)/(sigma^2 * ((dat - mu) * xi/sigma + 1)^2))
infomat[1, 2] <- infomat[2, 1] <- sum(-(dat - mu) * ((dat - mu) * xi/sigma + 1)^(-1/xi -
2) * (xi + 1)/sigma^3 + ((dat - mu) * xi/sigma + 1)^(-1/xi - 1)/sigma^2 - xi * (1/xi +
1)/(sigma^2 * ((dat - mu) * xi/sigma + 1)) + (dat - mu) * xi^2 * (1/xi + 1)/(sigma^3 *
((dat - mu) * xi/sigma + 1)^2))
infomat[1, 3] <- infomat[3, 1] <- sum((-(mu - dat) * xi/sigma + 1)^(-1/xi - 1) * ((mu - dat) *
(1/xi + 1)/(sigma * ((mu - dat) * xi/sigma - 1)) - log1p(-(mu - dat) * xi/sigma)/xi^2)/sigma -
(1/xi + 1)/(sigma * ((mu - dat) * xi/sigma - 1)) + (mu - dat) * (xi + 1)/(sigma^2 * ((mu -
dat) * xi/sigma - 1)^2) + 1/(sigma * ((mu - dat) * xi/sigma - 1) * xi))
infomat[2, 3] <- infomat[3, 2] <- sum((mu - dat) * (-(mu - dat) * xi/sigma + 1)^(-1/xi -
1) * (log1p(-(mu - dat) * xi/sigma)/xi^2 - (mu - dat)/(sigma * ((mu - dat) * xi/sigma -
1) * xi))/sigma^2 + (mu - dat) * (1/xi + 1)/(sigma^2 * ((mu - dat) * xi/sigma - 1)) -
(mu - dat)^2 * (xi + 1)/(sigma^3 * ((mu - dat) * xi/sigma - 1)^2) - (mu - dat)/(sigma^2 *
((mu - dat) * xi/sigma - 1) * xi) + (mu - dat)^2/(sigma^3 * (-(mu - dat) * xi/sigma +
1)^(1/xi) * ((mu - dat) * xi/sigma - 1)^2))
infomat[3, 3] <- sum(-(log1p(-(mu - dat) * xi/sigma)/xi^2 - (mu - dat)/(sigma * ((mu -
dat) * xi/sigma - 1) * xi))^2/(-(mu - dat) * xi/sigma + 1)^(1/xi) + (2 * log1p(-(mu - dat) *
xi/sigma)/xi^3 - 2 * (mu - dat)/(sigma * ((mu - dat) * xi/sigma - 1) * xi^2) - (mu -
dat)^2/(sigma^2 * ((mu - dat) * xi/sigma - 1)^2 * xi))/(-(mu - dat) * xi/sigma + 1)^(1/xi) +
(mu - dat)^2 * (1/xi + 1)/(sigma^2 * ((mu - dat) * xi/sigma - 1)^2) - 2 * log1p(-(mu -
dat) * xi/sigma)/xi^3 + 2 * (mu - dat)/(sigma * ((mu - dat) * xi/sigma - 1) * xi^2))
infomat[2, 2] <- sum(-(mu - dat)^2 * (-(mu - dat) * xi/sigma + 1)^(-1/xi - 2) * (xi + 1)/sigma^4 -
2 * (mu - dat) * (-(mu - dat) * xi/sigma + 1)^(-1/xi - 1)/sigma^3 - 2 * (mu - dat) *
(xi + 1)/(sigma^3 * ((mu - dat) * xi/sigma - 1)) + (mu - dat)^2 * xi^2 * (1/xi + 1)/(sigma^4 *
((mu - dat) * xi/sigma - 1)^2) + 1/sigma^2)
} else {
infomat[1, 1] <- sum(-exp(mu/sigma - dat/sigma)/sigma^2)
infomat[1, 2] <- infomat[2, 1] <- sum(((mu + sigma) * exp(mu/sigma) - dat * exp(mu/sigma) -
sigma * exp(dat/sigma)) * exp(-dat/sigma)/sigma^3)
infomat[2, 2] <- sum(-2 * mu * exp(mu/sigma - dat/sigma)/sigma^3 + 2 * dat * exp(mu/sigma -
dat/sigma)/sigma^3 + 2 * mu/sigma^3 + 1/sigma^2 - 2 * dat/sigma^3 - (mu^2 * exp(mu/sigma) -
2 * mu * dat * exp(mu/sigma) + dat^2 * exp(mu/sigma)) * exp(-dat/sigma)/sigma^4)
infomat[1, 3] <- infomat[3, 1] <- sum(-1/2 * (mu^2 * exp(mu/sigma) + 2 * mu * sigma * exp(mu/sigma) -
2 * mu * dat * exp(mu/sigma) - 2 * sigma * dat * exp(mu/sigma) + dat^2 * exp(mu/sigma) -
2 * mu * sigma * exp(dat/sigma) - 2 * sigma^2 * exp(dat/sigma) + 2 * sigma * dat * exp(dat/sigma)) *
exp(-dat/sigma)/sigma^3)
infomat[2, 3] <- infomat[3, 2] <- sum(1/2 * (mu^2 * exp(mu/sigma) + 2 * mu * sigma * exp(mu/sigma) -
2 * mu * dat * exp(mu/sigma) - 2 * sigma * dat * exp(mu/sigma) + dat^2 * exp(mu/sigma) -
2 * mu * sigma * exp(dat/sigma) - 2 * sigma^2 * exp(dat/sigma) + 2 * sigma * dat * exp(dat/sigma)) *
(mu - dat) * exp(-dat/sigma)/sigma^4)
infomat[3, 3] <- sum(exp(-(dat/sigma))*(4*exp(dat/sigma)* sigma*(2*mu + 3*sigma - 2*dat) -
exp(mu/sigma)*(3*mu + 8*sigma - 3*dat)*(mu - dat))*(mu - dat)^2)/(12*sigma^4)
}
colnames(infomat) <- rownames(infomat) <- c("loc","scale","shape")
return(-infomat)
}
}
#' Tangent exponential model statistics for the generalized extreme value distribution
#'
#' Matrix of approximate ancillary statistics, sample space derivative of the
#' log likelihood and mixed derivative for the generalized extreme value distribution.
#' @seealso \code{\link{gev}}
#' @inheritParams gev
#' @export
#' @name gev.temstat
#' @keywords internal
gev.Vfun <- function(par, dat) {
cbind(1, (dat - par[1])/par[2], par[2] * (-(par[1] - dat) * par[3]/par[2] + 1)^(-1/par[3]) * (log1p(-(par[1] -
dat) * par[3]/par[2] )/par[3]^2 - (par[1] - dat)/(par[2] * ((par[1] - dat) * par[3]/par[2] -
1) * par[3]))/(-(par[1] - dat) * par[3]/par[2] + 1)^(-1/par[3] - 1))
}
#' @rdname gev.temstat
#' @inheritParams gev
#' @export
#' @keywords internal
gev.phi <- function(par, dat, V) {
mu = par[1]
sigma = par[2]
xi = as.vector(par[3])
if (!isTRUE(all.equal(xi, 0))) {
t(((dat - mu) * xi/sigma + 1)^(-1/xi - 1)/sigma + xi * (1/xi + 1)/(sigma * ((mu - dat) * xi/sigma -
1))) %*% V
} else {
t(exp((mu - dat)/sigma)/sigma - 1/sigma) %*% V
}
}
#' @rdname gev.temstat
#' @inheritParams gev
#' @export
#' @keywords internal
gev.dphi <- function(par, dat, V) {
mu = par[1]
sigma = par[2]
xi = as.vector(par[3])
if (!isTRUE(all.equal(xi, 0))) {
rbind((-(mu - dat) * xi/sigma + 1)^(-1/xi - 2) * (xi + 1)/sigma^2 - xi^2 * (1/xi + 1)/(sigma^2 *
((mu - dat) * xi/sigma - 1)^2), -(mu - dat) * (-(mu - dat) * xi/sigma + 1)^(-1/xi - 2) *
(xi + 1)/sigma^3 - (-(mu - dat) * xi/sigma + 1)^(-1/xi - 1)/sigma^2 - xi * (1/xi + 1)/(sigma^2 *
((mu - dat) * xi/sigma - 1)) + (mu - dat) * xi^2 * (1/xi + 1)/(sigma^3 * ((mu - dat) * xi/sigma -
1)^2), -(-(mu - dat) * xi/sigma + 1)^(-1/xi - 1) * ((mu - dat) * (1/xi + 1)/(sigma * ((mu -
dat) * xi/sigma - 1)) - log1p(-(mu - dat) * xi/sigma)/xi^2)/sigma + (1/xi + 1)/(sigma *
((mu - dat) * xi/sigma - 1)) - (mu - dat) * (xi + 1)/(sigma^2 * ((mu - dat) * xi/sigma -
1)^2) - 1/(sigma * ((mu - dat) * xi/sigma - 1) * xi)) %*% V
} else {
rbind(exp(-dat/sigma + mu/sigma)/sigma^2, (sigma * exp(dat/sigma) + (dat - mu - sigma) * exp(mu/sigma)) *
exp(-dat/sigma)/sigma^3, 1/2 * (2 * dat * sigma * exp(dat/sigma) - 2 * mu * sigma * exp(dat/sigma) -
2 * sigma^2 * exp(dat/sigma) + dat^2 * exp(mu/sigma) - 2 * dat * mu * exp(mu/sigma) + mu^2 *
exp(mu/sigma) - 2 * dat * sigma * exp(mu/sigma) + 2 * mu * sigma * exp(mu/sigma)) * exp(-dat/sigma)/sigma^3) %*%
V
}
}
#' @title Generalized Pareto distribution (expected shortfall parametrization)
#'
#' @description Likelihood, score function and information matrix,
#' approximate ancillary statistics and sample space derivative
#' for the generalized Pareto distribution parametrized in terms of expected shortfall.
#'
#' The parameter \code{m} corresponds to \eqn{\zeta_u}/(1-\eqn{\alpha}), where \eqn{\zeta_u} is the rate of exceedance over the threshold
#' \code{u} and \eqn{\alpha} is the percentile of the expected shortfall.
#' Note that the actual parametrization is in terms of excess expected shortfall, meaning expected shortfall minus threshold.
#'
#' @details The observed information matrix was calculated from the Hessian using symbolic calculus in Sage.
#'
#' @author Leo Belzile
#' @name gpde
#' @param par vector of length 2 containing \eqn{e_m} and \eqn{\xi}, respectively the expected shortfall at probability 1/(1-\eqn{\alpha}) and the shape parameter.
#' @param dat sample vector
#' @param m number of observations of interest for return levels. See \strong{Details}
#' @param tol numerical tolerance for the exponential model
#' @param method string indicating whether to use the expected (\code{'exp'}) or the observed (\code{'obs'} - the default) information matrix.
#' @param nobs number of observations
#' @param V vector calculated by \code{gpde.Vfun}
#'
#' @section Usage: \preformatted{gpde.ll(par, dat, m, tol=1e-5)
#' gpde.ll.optim(par, dat, m, tol=1e-5)
#' gpde.score(par, dat, m)
#' gpde.infomat(par, dat, m, method = c('obs', 'exp'), nobs = length(dat))
#' gpde.Vfun(par, dat, m)
#' gpde.phi(par, dat, V, m)
#' gpde.dphi(par, dat, V, m)}
#'
#' @section Functions:
#'
#' \itemize{
#' \item{\code{gpde.ll}:} {log likelihood}
#' \item{\code{gpde.ll.optim}:} {negative log likelihood parametrized in terms of log expected
#' shortfall and shape in order to perform unconstrained optimization}
#' \item{\code{gpde.score}:} {score vector}
#' \item{\code{gpde.infomat}:} {observed information matrix for GPD parametrized in terms of rate of expected shortfall and shape}
#' \item{\code{gpde.Vfun}:} {vector implementing conditioning on approximate ancillary statistics for the TEM}
#' \item{\code{gpde.phi}:} {canonical parameter in the local exponential family approximation}
#' \item{\code{gpde.dphi}:} {derivative matrix of the canonical parameter in the local exponential family approximation}
#' }
NULL
#' @title Generalized Pareto distribution (return level parametrization)
#'
#' @description Likelihood, score function and information matrix,
#' approximate ancillary statistics and sample space derivative
#' for the generalized Pareto distribution parametrized in terms of return levels.
#'
#' @details The observed information matrix was calculated from the Hessian using symbolic calculus in Sage.
#' @details The interpretation for \code{m} is as follows: if there are on average \eqn{m_y} observations per year above the threshold, then \eqn{m=Tm_y} corresponds to \eqn{T}-year return level.
#'
#' @author Leo Belzile
#' @name gpdr
#' @param par vector of length 2 containing \eqn{y_m} and \eqn{\xi}, respectively the \eqn{m}-year return level and the shape parameter.
#' @param dat sample vector
#' @param m number of observations of interest for return levels. See \strong{Details}
#' @param tol numerical tolerance for the exponential model
#' @param method string indicating whether to use the expected (\code{'exp'}) or the observed (\code{'obs'} - the default) information matrix.
#' @param nobs number of observations
#' @param V vector calculated by \code{gpdr.Vfun}
#'
#' @section Usage: \preformatted{gpdr.ll(par, dat, m, tol=1e-5)
#' gpdr.ll.optim(par, dat, m, tol=1e-5)
#' gpdr.score(par, dat, m)
#' gpdr.infomat(par, dat, m, method = c('obs', 'exp'), nobs = length(dat))
#' gpdr.Vfun(par, dat, m)
#' gpdr.phi(par, V, dat, m)
#' gpdr.dphi(par, V, dat, m)}
#' @section Functions:
#'
#' \itemize{
#' \item{\code{gpdr.ll}:} {log likelihood}
#' \item{\code{gpdr.ll.optim}:} {negative log likelihood parametrized in terms of \code{log(scale)} and shape
#' in order to perform unconstrained optimization}
#' \item{\code{gpdr.score}:} {score vector}
#' \item{\code{gpdr.infomat}:} {observed information matrix for GPD parametrized in terms of rate of \eqn{m}-year return level and shape}
#' \item{\code{gpdr.Vfun}:} {vector implementing conditioning on approximate ancillary statistics for the TEM}
#' \item{\code{gpdr.phi}:} {canonical parameter in the local exponential family approximation}
#' \item{\code{gpdr.dphi}:} {derivative matrix of the canonical parameter in the local exponential family approximation}
#' }
NULL
#' @title Generalized extreme value distribution (return level parametrization)
#'
#' @description Likelihood, score function and information matrix,
#' approximate ancillary statistics and sample space derivative
#' for the generalized extreme value distribution parametrized in terms of the return level \eqn{z}, scale and shape.
#'
#' @author Leo Belzile
#' @name gevr
#' @param par vector of \code{retlev}, \code{scale} and \code{shape}
#' @param dat sample vector
#' @param p tail probability, corresponding to \eqn{(1-p)}th quantile for \eqn{z}
#' @param method string indicating whether to use the expected (\code{'exp'}) or the observed (\code{'obs'} - the default) information matrix.
#' @param nobs number of observations
#' @param V vector calculated by \code{gevr.Vfun}
#'
#' @section Usage: \preformatted{gevr.ll(par, dat, p)
#' gevr.ll.optim(par, dat, p)
#' gevr.score(par, dat, p)
#' gevr.infomat(par, dat, p, method = c('obs', 'exp'), nobs = length(dat))
#' gevr.Vfun(par, dat, p)
#' gevr.phi(par, dat, p, V)
#' gevr.dphi(par, dat, p, V)}
#'
#' @section Functions:
#' \itemize{
#' \item{\code{gevr.ll}:} {log likelihood}
#' \item{\code{gevr.ll.optim}:} {negative log likelihood parametrized in terms of return levels, \code{log(scale)} and shape in order to perform unconstrained optimization}
#' \item{\code{gevr.score}:} {score vector}
#' \item{\code{gevr.infomat}:} {observed information matrix}
#' \item{\code{gevr.Vfun}:} {vector implementing conditioning on approximate ancillary statistics for the TEM}
#' \item{\code{gevr.phi}:} {canonical parameter in the local exponential family approximation}
#' \item{\code{gevr.dphi}:} {derivative matrix of the canonical parameter in the local exponential family approximation}
#' }
NULL
#' Negative log likelihood of the generalized Pareto distribution (expected shortfall)
#'
#' @seealso \code{\link{gpde}}
#' @inheritParams gpde
#' @keywords internal
#' @export
gpde.ll <- function(par, dat, m) {
es = par[1]
xi = as.vector(par[2])
if (any(xi > 1, es < 0, min(1 + (m^xi - 1 + xi)/((1 - xi) * es) * dat) < 0)) {
return(-1e+10)
}
xizero <- abs(xi) < 1e-9
sigmae = ifelse(!xizero, es * (1 - xi) * xi/(m^xi - 1 + xi), es/(log(m)+1))
return(gpd.ll(par = c(sigmae, xi), dat = dat))
}
#' Negative log likelihood of the generalized Pareto distribution (expected shortfall) - optimization
#' The negative log likelihood is parametrized in terms of log expected shortfall and shape in order to perform unconstrained optimization
#' @rdname gpde.ll
#' @inheritParams gpde
#' @keywords internal
#' @export
gpde.ll.optim <- function(par, dat, m) {
es = exp(par[1])
xi = par[2]
if (xi > 1) {
return(1e+10)
}
-sum(-log(xi * (1 - xi)/(m^xi - 1 + xi)) - log(es) - (1 + 1/xi) * log1p((m^xi - 1 + xi)/((1 - xi) *
es) * dat))
}
#' Score vector for the GP distribution (expected shortfall)
#' @seealso \code{\link{gpde}}
#' @inheritParams gpde
#' @keywords internal
#' @export
gpde.score <- function(par, dat, m) {
es = par[1]
xi = par[2]
if(missing(m)){
stop("User must provide \"m\" parameter in \"gpde.score\".")
}
xizero <- abs(xi) < 1e-4
if(!xizero){
c(sum(-1/es + dat * (m^xi + xi - 1) * (1/xi + 1)/(es^2 * (xi - 1) * (dat * (m^xi + xi - 1)/(es *
(xi - 1)) - 1))), sum(-((m^xi * log(m) + 1) * dat/(es * (xi - 1)) - dat * (m^xi + xi - 1)/(es *
(xi - 1)^2)) * (1/xi + 1)/(dat * (m^xi + xi - 1)/(es * (xi - 1)) - 1) + (m^xi + xi - 1) * ((m^xi *
log(m) + 1) * (xi - 1) * xi/(m^xi + xi - 1)^2 - (xi - 1)/(m^xi + xi - 1) - xi/(m^xi + xi - 1))/((xi -
1) * xi) + log1p(-dat * (m^xi + xi - 1)/(es * (xi - 1)))/xi^2))
} else{
c(sum((dat*log(m) + dat - es)/es^2),
sum(1/2*((dat^2 - dat*es)*log(m)^3 + (3*dat^2 - 5*dat*es + es^2)*log(m)^2 + dat^2 -
4*dat*es + 2*es^2 + (3*dat^2 - 8*dat*es + 2*es^2)*log(m))/(es^2*log(m) + es^2)))
}
}
#' Observed information matrix for the GP distribution (expected shortfall)
#'
#' The information matrix is parametrized in terms of excess expected shortfall and shape
#' @seealso \code{\link{gpde}}
#' @inheritParams gpde
#' @keywords internal
#' @export
gpde.infomat <- function(par, dat, m, method = c("obs", "exp"), nobs = length(dat)) {
method <- match.arg(method) #default to observed information
es = as.vector(par[1])
xi = as.vector(par[2])
if(missing(m)){
stop("User must provide \"m\" parameter in \"gpde.infomat\".")
}
if (xi < -0.5) {
return(matrix(NA, 2, 2))
}
xizero <- abs(xi) < 1e-5
logm <- log(m)
if (method == "obs") {
if(!xizero){
k11 = sum(-1/es^2 + 2 * dat * (m^xi + xi - 1) * (1/xi + 1)/(es^3 * (xi - 1) * (dat * (m^xi +
xi - 1)/(es * (xi - 1)) - 1)) - dat^2 * (m^xi + xi - 1)^2 * (1/xi + 1)/(es^4 * (xi - 1)^2 *
(dat * (m^xi + xi - 1)/(es * (xi - 1)) - 1)^2))
k22 = sum(-((m^xi * logm + 1) * dat/(es * (xi - 1)) - dat * (m^xi + xi - 1)/(es * (xi - 1)^2))^2 *
(1/xi + 1)/(dat * (m^xi + xi - 1)/(es * (xi - 1)) - 1)^2 + (dat * m^xi * logm^2/(es * (xi -
1)) - 2 * (m^xi * logm + 1) * dat/(es * (xi - 1)^2) + 2 * dat * (m^xi + xi - 1)/(es * (xi -
1)^3)) * (1/xi + 1)/(dat * (m^xi + xi - 1)/(es * (xi - 1)) - 1) - (m^xi * (xi - 1) * xi *
logm^2/(m^xi + xi - 1)^2 - 2 * (m^xi * logm + 1)^2 * (xi - 1) * xi/(m^xi + xi - 1)^3 +
2 * (m^xi * logm + 1) * (xi - 1)/(m^xi + xi - 1)^2 + 2 * (m^xi * logm + 1) * xi/(m^xi +
xi - 1)^2 - 2/(m^xi + xi - 1)) * (m^xi + xi - 1)/((xi - 1) * xi) - (m^xi * logm + 1) *
((m^xi * logm + 1) * (xi - 1) * xi/(m^xi + xi - 1)^2 - (xi - 1)/(m^xi + xi - 1) - xi/(m^xi +
xi - 1))/((xi - 1) * xi) + (m^xi + xi - 1) * ((m^xi * logm + 1) * (xi - 1) * xi/(m^xi +
xi - 1)^2 - (xi - 1)/(m^xi + xi - 1) - xi/(m^xi + xi - 1))/((xi - 1) * xi^2) + (m^xi + xi -
1) * ((m^xi * logm + 1) * (xi - 1) * xi/(m^xi + xi - 1)^2 - (xi - 1)/(m^xi + xi - 1) -
xi/(m^xi + xi - 1))/((xi - 1)^2 * xi) - 2 * ((m^xi * logm + 1) * dat/(es * (xi - 1)) -
dat * (m^xi + xi - 1)/(es * (xi - 1)^2))/(xi^2 * (dat * (m^xi + xi - 1)/(es * (xi - 1)) -
1)) + 2 * log1p(-dat * (m^xi + xi - 1)/(es * (xi - 1)))/xi^3)
k12 = sum(-((m^xi * logm + 1) * dat/(es^2 * (xi - 1)) - dat * (m^xi + xi - 1)/(es^2 * (xi -
1)^2)) * (1/xi + 1)/(dat * (m^xi + xi - 1)/(es * (xi - 1)) - 1) + dat * (m^xi + xi - 1) *
((m^xi * logm + 1) * dat/(es * (xi - 1)) - dat * (m^xi + xi - 1)/(es * (xi - 1)^2)) * (1/xi +
1)/(es^2 * (xi - 1) * (dat * (m^xi + xi - 1)/(es * (xi - 1)) - 1)^2) + dat * (m^xi + xi -
1)/(es^2 * (xi - 1) * xi^2 * (dat * (m^xi + xi - 1)/(es * (xi - 1)) - 1)))
} else{
k11 <- sum(2*dat*es*log(m) + 2*dat*es - es^2)/es^4
k12 <- 0.5*sum((2*dat*log(m)^2 - es*log(m)^2 + 4*dat*log(m) - 4*es*log(m) + 2*dat - 4*es)*dat)/es^3
k22 <- sum(1/12*(4*(2*dat^3 - 3*dat^2*es + dat*es^2)*log(m)^5 + (40*dat^3 - 72*dat^2*es + 32*dat*es^2 - es^3)*log(m)^4 +
4*(20*dat^3 - 45*dat^2*es + 25*dat*es^2 - es^3)*log(m)^3 + 8*dat^3 - 36*dat^2*es + 48*dat*es^2 -
12*es^3 + 4*(20*dat^3 - 57*dat^2*es + 42*dat*es^2 - 3*es^3)*log(m)^2 +
8*(5*dat^3 - 18*dat^2*es + 18*dat*es^2 - 3*es^3)*log(m))/(es^3*log(m)^2 + 2*es^3*log(m) + es^3))
}
infomat <- cbind(c(k11, k12), c(k12, k22))
} else if (method == "exp") {
sigmae = ifelse(!xizero, es * (1 - xi) * xi/(m^xi - 1 + xi), es/(log(m)+1))
if(!xizero){
Jac <- rbind(c((1 - xi) * xi/(m^xi - 1 + xi), (m^xi * logm + 1) * es * (xi - 1) * xi/(m^xi +
xi - 1)^2 - es * (xi - 1)/(m^xi + xi - 1) - es * xi/(m^xi + xi - 1)), c(0, 1))
} else{
Jac <- rbind(c(1/(logm+1), -1/2*(log(m)^2 + 2*log(m) + 2)*es/(log(m)^2 + 2*log(m) + 1)), c(0, 1))
}
infomat <- t(Jac) %*% gpd.infomat(par = c(sigmae, xi), dat = dat, method = "exp", nobs = nobs) %*%
Jac
}
colnames(infomat) <- rownames(infomat) <- c("es","shape")
return(infomat)
}
#' Tangent exponential model statistics for the generalized Pareto distribution (expected shortfall)
#'
#' Vector implementing conditioning on approximate ancillary statistics for the TEM
#' @seealso \code{\link{gpde}}
#' @name gpde.temstat
#' @inheritParams gpde
#' @keywords internal
#' @export
gpde.Vfun <- function(par, dat, m) {
es = par[1]
xi = par[2]
cbind(dat/es, es * (xi - 1) * xi * (-dat * (m^xi + xi - 1)/(es * (xi - 1)) + 1)^(-1/xi) * (((m^xi *
log(m) + 1) * dat/(es * (xi - 1)) - dat * (m^xi + xi - 1)/(es * (xi - 1)^2))/(xi * (dat * (m^xi +
xi - 1)/(es * (xi - 1)) - 1)) - log1p(-dat * (m^xi + xi - 1)/(es * (xi - 1)))/xi^2)/((m^xi +
xi - 1) * (-dat * (m^xi + xi - 1)/(es * (xi - 1)) + 1)^(-1/xi - 1)))
}
#' Canonical parameter in the local exponential family approximation
#'
#' @inheritParams gpde
#' @rdname gpde.temstat
#' @keywords internal
#' @export
gpde.phi <- function(par, dat, V, m) {
es = par[1]
xi = par[2]
rbind(-(m^xi + xi - 1) * (1/xi + 1)/(es * (xi - 1) * (dat * (m^xi + xi - 1)/(es * (xi - 1)) - 1))) %*%
V
}
## Derivative matrix of the canonical parameter in the local exponential family approximation
#' @inheritParams gpde
#' @rdname gpde.temstat
#' @keywords internal
#' @export
gpde.dphi <- function(par, dat, V, m) {
es = par[1]
xi = par[2]
rbind((m^xi + xi - 1) * (1/xi + 1)/(es^2 * (xi - 1) * (dat * (m^xi + xi - 1)/(es * (xi - 1)) - 1)) -
dat * (m^xi + xi - 1)^2 * (1/xi + 1)/(es^3 * (xi - 1)^2 * (dat * (m^xi + xi - 1)/(es * (xi -
1)) - 1)^2), (m^xi + xi - 1) * ((m^xi * log(m) + 1) * dat/(es * (xi - 1)) - dat * (m^xi +
xi - 1)/(es * (xi - 1)^2)) * (1/xi + 1)/(es * (xi - 1) * (dat * (m^xi + xi - 1)/(es * (xi - 1)) -
1)^2) - (m^xi * log(m) + 1) * (1/xi + 1)/(es * (xi - 1) * (dat * (m^xi + xi - 1)/(es * (xi -
1)) - 1)) + (m^xi + xi - 1) * (1/xi + 1)/(es * (xi - 1)^2 * (dat * (m^xi + xi - 1)/(es * (xi -
1)) - 1)) + (m^xi + xi - 1)/(es * (xi - 1) * xi^2 * (dat * (m^xi + xi - 1)/(es * (xi - 1)) -
1))) %*% V
}
#' Negative log likelihood of the generalized Pareto distribution (return levels)
#'
#' @seealso \code{\link{gpdr}}
#' @inheritParams gpdr
#' @keywords internal
#' @export
gpdr.ll <- function(par, dat, m) {
if(missing(m)){
stop("Need to specify the reciprocal tail probability \"m\"")
}
ym = par[1]
xi = par[2]
if (par[1] < 0) {
return(-1e+15)
}
nn = length(dat)
pr <- m^xi - 1
sigmar <- ifelse(abs(xi) > 1e-7, ym * xi/(m^xi - 1), ym/log(m))
return(gpd.ll(par = c(sigmar, xi), dat = dat))
# if (abs(xi) > 1e-7) {
# -nn * log(xi/pr) - nn * log(ym) - (1 + 1/xi) * sum(log(pmax(1 + exp(log(dat) - log(ym)) * pr,0)))
# } else {
# nn * log(log(m)) - nn * log(ym) + sum(-dat * log(m)/ym)
# }
}
#' Negative log likelihood parametrized in terms of log return level and shape in order to perform unconstrained optimization
#' @inheritParams gpdr
#' @keywords internal
#' @rdname gpdr.ll
#' @export
gpdr.ll.optim <- function(par, dat, m) {
tol <- 1e-05
nn = length(dat)
ym = exp(par[1])
xi = par[2]
p <- m^xi - 1
if (abs(xi) > tol) {
-(-nn * log(xi/p) - nn * log(ym) - (1 + 1/xi) * sum(log(pmax(1 + exp(log(dat) - log(ym)) * p,
0))))
} else {
-(nn * log(log(m)) - nn * log(ym) + sum(-dat * log(m)/ym))
}
}
#' Score of the profile log likelihood for the GP distribution (return levels parametrization)
#' @seealso \code{\link{gpdr}}
#' @inheritParams gpdr
#' @keywords internal
#' @export
gpdr.score <- function(par, dat, m) {
if(missing(m)){
stop("Need to specify the reciprocal tail probability \"m\"")
}
nn = length(dat)
xi = par[2]
ym = par[1]
p <- (m)^xi - 1
xizero <- abs(xi) < 1e-6
if(!xizero){
as.vector(c(-nn/ym + (1 + 1/xi) * sum(dat * p/(ym + dat * p))/ym, -nn/xi + exp(log(nn) + xi * log(m) + log(log(m)))/p +
sum(log1p(dat/ym * p))/xi^2 - (1 + 1/xi) * sum(exp(log(dat) + log(log(m)) + xi * log(m) - log(ym +
dat * p)))))
} else{
as.vector(c(sum((dat*log(m) - ym))/ym^2,
0.5*sum((dat^2*log(m)^2 + ym^2*log(m) - (dat*log(m)^2 + 2*dat*log(m))*ym))/ym^2))
}
}
#' Observed information matrix for GP distribution (return levels)
#'
#'The information matrix is parametrized in terms of rate of \code{m}-year return level and shape
#' @seealso \code{\link{gpdr}}
#' @inheritParams gpdr
#' @keywords internal
#' @export
gpdr.infomat <- function(par, dat, m, method = c("obs", "exp"), nobs = length(dat)) {
if(missing(m)){
stop("Need to specify the reciprocal tail probability \"m\"")
}
xi = as.vector(par[2])
r = as.vector(par[1])
method <- method[1]
if (xi < -0.5) {
return(matrix(NA, 2, 2))
}
xizero <- abs(xi) < 1e-5
logm <- log(m)
if (method == "obs") {
if(!xizero){
info <- matrix(ncol = 2, nrow = 2)
info[1, 1] <- sum(dat^2 * (m^xi - 1)^2 * (1/xi + 1)/((dat * (m^xi - 1)/r + 1)^2 * r^4) - 2 *
dat * (m^xi - 1) * (1/xi + 1)/((dat * (m^xi - 1)/r + 1) * r^3) + 1/r^2)
info[2, 2] <- sum(dat^2 * (m^xi)^2 * (1/xi + 1) * logm^2/((dat * (m^xi - 1)/r + 1)^2 * r^2) -
dat * m^xi * (1/xi + 1) * logm^2/((dat * (m^xi - 1)/r + 1) * r) + m^xi * (m^xi * xi * logm/(m^xi -
1)^2 - 1/(m^xi - 1)) * logm/xi + (m^xi * xi * logm^2/(m^xi - 1)^2 - 2 * (m^xi)^2 * xi *
logm^2/(m^xi - 1)^3 + 2 * m^xi * logm/(m^xi - 1)^2) * (m^xi - 1)/xi - (m^xi - 1) * (m^xi *
xi * logm/(m^xi - 1)^2 - 1/(m^xi - 1))/xi^2 + 2 * dat * m^xi * logm/((dat * (m^xi - 1)/r +
1) * r * xi^2) - 2 * log1p(dat * (m^xi - 1)/r)/xi^3)
info[2, 1] <- info[1, 2] <- sum(-dat^2 * (m^xi - 1) * m^xi * (1/xi + 1) * logm/((dat * (m^xi -
1)/r + 1)^2 * r^3) + dat * m^xi * (1/xi + 1) * logm/((dat * (m^xi - 1)/r + 1) * r^2) -
dat * (m^xi - 1)/((dat * (m^xi - 1)/r + 1) * r^2 * xi^2))
infomat <- -info
} else{
k11 <- sum((2*dat*r*log(m) - r^2))/r^4
k12 <- 0.5*sum((2*dat*log(m) - r*log(m) - 2*r)*dat)*log(m)/r^3
k22 <- sum((8*dat^3*log(m)^3 - r^3*log(m)^2 + 4*(dat*log(m)^3 + 3*dat*log(m)^2)*r^2 - 12*(dat^2*log(m)^3 + dat^2*log(m)^2)*r)/(12*r^3))
infomat <- matrix(c(k11, k12, k12, k22), byrow = TRUE, ncol = 2, nrow = 2)
}
} else if (method == "exp") {
sigmar <- ifelse(!xizero, r * xi/(m^xi - 1), r/logm)
if(!xizero){
Jac <- rbind(c(xi/(m^xi - 1), -m^xi * r * xi * logm/(m^xi - 1)^2 + r/(m^xi - 1)), c(0, 1))
} else{
Jac <- rbind(c(1/logm, -0.5*r), c(0, 1))
}
infomat <- t(Jac) %*% gpd.infomat(par = c(sigmar, xi), method = "exp", dat = dat, nobs = length(dat)) %*% Jac
}
colnames(infomat) <- rownames(infomat) <- c("retlev","shape")
return(infomat)
}
#' Tangent exponential model statistics for the generalized Pareto distribution (return level)
#'
#' Vector implementing conditioning on approximate ancillary statistics for the TEM
#' @seealso \code{\link{gpdr}}
#' @name gpdr.temstat
#' @inheritParams gpdr
#' @keywords internal
#' @export
gpdr.Vfun <- function(par, dat, m) {
xi = par[2]
ym = par[1]
p <- m^xi - 1
cbind(dat/ym, -(ym + dat * p)/p * ((m)^xi * log(m)/(p + ym/dat) - log1p(dat/ym * p)/xi))
}
#' Canonical parameter in the local exponential family approximation
#' @seealso \code{\link{gpdr}}
#' @rdname gpdr.temstat
#' @inheritParams gpdr
#' @keywords internal
#' @export
gpdr.phi <- function(par, dat, V, m) {
xi = par[2]
r = par[1]
matrix(-(1 + 1/xi) * (m^xi - 1)/(r + dat * (m^xi - 1)), nrow = 1) %*% V
}
#' Derivative of the canonical parameter \eqn{\phi(\theta)} in the local exponential family approximation
#' @rdname gpdr.temstat
#' @inheritParams gpdr
#' @keywords internal
#' @export
gpdr.dphi <- function(par, dat, V, m) {
xi = par[2]
r = par[1]
p = m^xi - 1
rbind((1 + 1/xi) * p/(r + dat * p)^2, p/(xi^2 * (r + dat * p)) - (1 + 1/xi) * log(m) * m^xi * r/(r +
dat * p)^2) %*% V
}
####################################### GEV in terms of return levels ###
#' Return level for the generalized extreme value distribution
#'
#' This function returns the \eqn{1-p}th quantile of the GEV.
#' @inheritParams gev
#' @seealso \code{\link{gev}}
#' @keywords internal
#' @export
gev.retlev <- function(par, p) {
mu = par[1]
sigma = par[2]
xi = as.vector(par[3])
if (!isTRUE(all.equal(xi, 0))) {
mu - sigma/xi * (1 - (-log(1 - p))^(-xi))
} else {
mu - sigma * log(-log(-p + 1))
}
}
#' This function returns the mean of N observations from the GEV.
#' @inheritParams gevN
#' @seealso \code{\link{gevN}}
#' @keywords internal
#' @export
gevN.mean <- function(par, N) {
mu = par[1]
sigma = par[2]
xi = as.vector(par[3])
if (!isTRUE(all.equal(xi, 0))) {
mu - sigma/xi * (1 - N^xi * gamma(1 - xi))
} else {
mu + sigma * (log(N) - psigamma(1))
}
}
gevN.quant <- function(par, N, q) {
mu = par[1]
sigma = par[2]
xi = as.vector(par[3])
if (!isTRUE(all.equal(xi, 0))) {
mu - sigma/xi * (1 - (N/log(1/q))^xi)
} else {
mu + sigma * (log(N) - log(log(1/q)))
}
}
#' This function returns the mean of N maxima from the GP.
#' @inheritParams gpdN
#' @seealso \code{\link{gpdN}}
#' @keywords internal
#' @export
gpdN.mean <- function(par, N) {
sigma = par[1]
xi = as.vector(par[2])
if (!isTRUE(all.equal(xi, 0))) {
(exp(lgamma(N + 1) + lgamma(1 - xi) - lgamma(N + 1 - xi)) - 1) * sigma/xi
} else {
(-psigamma(1) + psigamma(N + 1)) * sigma
}
}
#' This function returns the qth percentile of N maxima from the GP.
#' @inheritParams gpdN
#' @seealso \code{\link{gpdN}}
#' @keywords internal
#' @export
gpdN.quant <- function(par, q, N) {
sigma = par[1]
xi = as.vector(par[2])
if (!isTRUE(all.equal(xi, 0))) {
sigma/xi * ((1 - q^(1/N))^(-xi) - 1)
} else {
sigma * (-log(-q^(1/N) + 1))
}
}
#' Negative log likelihood of the generalized extreme value distribution (return levels)
#'
#' @seealso \code{\link{gevr}}
#' @inheritParams gevr
#' @keywords internal
#' @export
gevr.ll <- function(par, dat, p) {
z = par[1]
sigma = par[2]
xi = as.vector(par[3])
muf = ifelse(!isTRUE(all.equal(xi, 0)),
z + sigma/xi * (1 - (-log(1 - p))^(-xi)),
sigma * log(-log(1 - p)) + z)
if (!isTRUE(all.equal(xi, 0, tolerance = 1e-08))) {
sum(-log(sigma) - (1/xi + 1) * log(pmax(1 + xi * (dat - muf)/sigma, 0)) - pmax(1 + xi * (dat -
muf)/sigma, 0)^(-1/xi))
} else {
sum((muf - dat)/sigma - exp((muf - dat)/sigma) - log(sigma))
}
}
#' Negative log likelihood parametrized in terms of location, log return level and shape in order to perform unconstrained optimization
#' @rdname gevr.ll
#' @inheritParams gevr
#' @keywords internal
#' @export
gevr.ll.optim <- function(par, dat, p) {
tpar = par
tpar[2] = exp(par[2])
# if(tpar[3] <= -1){return(1e20)}
-gevr.ll(tpar, dat, p)
}
#' Score of the log likelihood for the GEV distribution (return levels)
#' @seealso \code{\link{gevr}}
#' @inheritParams gevr
#' @keywords internal
#' @export
gevr.score <- function(par, dat, p) {
z = par[1]
sigma = par[2]
xi = par[3]
log1mp <- log(1 - p)
if(abs(xi) > 1e-4){
c(sum((((dat - z) * xi/sigma + 1/(-log1mp)^xi)^(-1/xi - 2) * (xi + 1) * exp(-1/((dat - z) *
xi/sigma + 1/(-log1mp)^xi)^(1/xi))/sigma^2 - ((dat - z) * xi/sigma + 1/(-log1mp)^xi)^(-2/xi -
2) * exp(-1/((dat - z) * xi/sigma + 1/(-log1mp)^xi)^(1/xi))/sigma^2) * sigma * ((dat - z) *
xi/sigma + 1/(-log1mp)^xi)^(1/xi + 1) * exp(1/(((dat - z) * xi/sigma + 1/(-log1mp)^xi)^(1/xi)))),
sum(-(dat * (-log1mp)^xi - z * (-log1mp)^xi - (dat * (-log1mp)^xi - z * (-log1mp)^xi -
sigma) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma *
(-log1mp)^xi))^(1/xi))/((sigma * dat * xi * (-log1mp)^xi - sigma * xi * z * (-log1mp)^xi
+ sigma^2) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma *
(-log1mp)^xi))^(1/xi))), sum(-(xi * z * (-log1mp)^xi - (dat * (-log1mp)^xi -
sigma * log(-log1mp)) * xi + ((dat * (-log1mp)^xi - sigma * log(-log1mp)) *
xi^2 + (dat * (-log1mp)^xi - sigma * log(-log1mp)) * xi - (xi^2 * (-log1mp)^xi +
xi * (-log1mp)^xi) * z) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi +
sigma)/(sigma * (-log1mp)^xi))^(1/xi) + (dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi
- (dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma) * ((dat * xi *
(-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi))^(1/xi) +
sigma) * log((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma *
(-log1mp)^xi)))/((dat * xi^3 * (-log1mp)^xi - xi^3 * z * (-log1mp)^xi + sigma *
xi^2) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi))^(1/xi))))
} else{
as.vector(c(sum(
(1 + exp((-dat + z)/sigma)*log1mp)/sigma),
sum((-sigma + dat - z + exp((-dat + z)/sigma)*(dat - z)*log1mp))/sigma^2,
sum((1/(2*sigma^2))*(( exp(z/sigma)*(-dat + z)*log1mp* (-dat + z + 2*sigma*log(-log1mp)) + exp(dat/sigma)*((dat - z)*(-2*sigma + dat - z) + 2*sigma*(sigma - dat + z)*log(-log1mp)))/ exp(dat/sigma)))
))
}
}
#' Observed information matrix for GEV distribution (return levels)
#'
#'The information matrix is parametrized in terms of return level (\eqn{(1-p)}th quantile), scale and shape.
#' @seealso \code{\link{gevr}}
#' @inheritParams gevr
#' @param nobs integer number of observations
#' @keywords internal
#' @export
gevr.infomat <- function(par, dat, method = c("obs", "exp"), p, nobs = length(dat)) {
method <- match.arg(method)
z = par[1]
sigma = par[2]
xi = par[3]
log1mp <- log(1 - p)
logmlog1mp <- log(-log1mp)
if (method == "obs") {
info <- matrix(0, ncol = 3, nrow = 3)
if(abs(xi) > 1e-4){
info[1, 1] <- sum(-(xi * (-log1mp)^(2 * xi) - (xi^2 * (-log1mp)^(2 * xi) + xi *
(-log1mp)^(2 * xi)) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi +
sigma)/(sigma * (-log1mp)^xi))^(1/xi) + (-log1mp)^(2 * xi))/((dat^2 * xi^2 * (-log1mp)^(2 * xi) +
xi^2 * z^2 * (-log1mp)^(2 * xi) + 2 * sigma * dat * xi * (-log1mp)^xi +
sigma^2 - 2 * (dat * xi^2 * (-log1mp)^(2 * xi) + sigma * xi * (-log1mp)^xi) * z) *
((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi))^(1/xi)))
info[1, 2] <- info[2, 1] <- sum(-(dat * (-log1mp)^(2 * xi) - z * (-log1mp)^(2 *
xi) - sigma * (-log1mp)^xi + (sigma * xi * (-log1mp)^xi + sigma * (-log1mp)^xi) *
((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi))^(1/xi))/
((sigma * dat^2 * xi^2 * (-log1mp)^(2 * xi) + sigma * xi^2 * z^2 *
(-log1mp)^(2 * xi) + 2 * sigma^2 * dat * xi * (-log1mp)^xi + sigma^3 - 2 * (sigma *
dat * xi^2 * (-log1mp)^(2 * xi) + sigma^2 * xi * (-log1mp)^xi) * z) * ((dat * xi *
(-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi))^(1/xi)))
info[1, 3] <- info[3, 1] <- sum(-((sigma * (-log1mp)^xi * logmlog1mp - dat *
(-log1mp)^(2 * xi)) * xi^2 + (sigma * (-log1mp)^xi * logmlog1mp - dat *
(-log1mp)^(2 * xi)) * xi + (xi^2 * (-log1mp)^(2 * xi) + xi * (-log1mp)^(2 *
xi)) * z - (sigma * xi^3 * (-log1mp)^xi * logmlog1mp + xi^2 * z *
(-log1mp)^(2 * xi) + (sigma * (-log1mp)^xi * (logmlog1mp + 1) - dat * (-log1mp)^(2 *
xi)) * xi^2) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma *
(-log1mp)^xi))^(1/xi) + (dat * xi * (-log1mp)^(2 * xi) - xi * z * (-log1mp)^(2 *
xi) + sigma * (-log1mp)^xi) * log((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/
(sigma * (-log1mp)^xi)))/((dat^2 * xi^4 * (-log1mp)^(2 * xi) +
xi^4 * z^2 * (-log1mp)^(2 * xi) + 2 * sigma * dat * xi^3 * (-log1mp)^xi + sigma^2 *
xi^2 - 2 * (dat * xi^4 * (-log1mp)^(2 * xi) + sigma * xi^3 * (-log1mp)^xi) * z) *
((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi))^(1/xi)))
info[2, 2] <- sum((dat^2 * xi * (-log1mp)^(2 * xi) + (xi * (-log1mp)^(2 * xi) -
(-log1mp)^(2 * xi)) * z^2 - dat^2 * (-log1mp)^(2 * xi) + 2 * sigma * dat * (-log1mp)^xi -
2 * (dat * xi * (-log1mp)^(2 * xi) - dat * (-log1mp)^(2 * xi) + sigma *
(-log1mp)^xi) * z - (dat^2 * xi * (-log1mp)^(2 * xi) + xi * z^2 * (-log1mp)^(2 *
xi) + 2 * sigma * dat * (-log1mp)^xi - sigma^2 - 2 * (dat * xi * (-log1mp)^(2 *
xi) + sigma * (-log1mp)^xi) * z) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/
(sigma * (-log1mp)^xi))^(1/xi))/((sigma^2 * dat^2 * xi^2 *
(-log1mp)^(2 * xi) + sigma^2 * xi^2 * z^2 * (-log1mp)^(2 * xi) + 2 * sigma^3 * dat * xi *
(-log1mp)^xi + sigma^4 - 2 * (sigma^2 * dat * xi^2 * (-log1mp)^(2 * xi) + sigma^3 *
xi * (-log1mp)^xi) * z) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi +
sigma)/(sigma * (-log1mp)^xi))^(1/xi)))
info[3, 2] <- info[2, 3] <- sum(-((sigma * dat * (-log1mp)^xi * logmlog1mp -
dat^2 * (-log1mp)^(2 * xi)) * xi^2 - (xi^2 * (-log1mp)^(2 * xi) + xi *
(-log1mp)^(2 * xi)) * z^2 + (sigma * dat * (-log1mp)^xi * logmlog1mp - dat^2 *
(-log1mp)^(2 * xi)) * xi - ((sigma * (-log1mp)^xi * logmlog1mp - 2 * dat * (-log(-p +
1))^(2 * xi)) * xi^2 + (sigma * (-log1mp)^xi * logmlog1mp - 2 * dat *
(-log1mp)^(2 * xi)) * xi) * z - (sigma * dat * xi^3 * (-log1mp)^xi * logmlog1mp - xi^2 *
z^2 * (-log1mp)^(2 * xi) + (sigma * dat * (-log1mp)^xi * (logmlog1mp + 1) -
dat^2 * (-log1mp)^(2 * xi)) * xi^2 - (sigma * xi^3 * (-log1mp)^xi * logmlog1mp +
(sigma * (-log1mp)^xi * (logmlog1mp + 1) - 2 * dat * (-log1mp)^(2 *
xi)) * xi^2) * z) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma *
(-log1mp)^xi))^(1/xi) + (dat^2 * xi * (-log1mp)^(2 * xi) + xi * z^2 *
(-log1mp)^(2 * xi) + sigma * dat * (-log1mp)^xi - (2 * dat * xi * (-log1mp)^(2 * xi) +
sigma * (-log1mp)^xi) * z) * log((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/
(sigma * (-log1mp)^xi)))/((sigma * dat^2 * xi^4 * (-log1mp)^(2 *
xi) + sigma * xi^4 * z^2 * (-log1mp)^(2 * xi) + 2 * sigma^2 * dat * xi^3 *
(-log1mp)^xi + sigma^3 * xi^2 - 2 * (sigma * dat * xi^4 * (-log1mp)^(2 * xi) + sigma^2 * xi^3 *
(-log1mp)^xi) * z) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma *
(-log1mp)^xi))^(1/xi)))
info[3, 3] <- sum((sigma * dat * xi^4 * (-log1mp)^xi * logmlog1mp^2 + (4 * sigma *
dat * (-log1mp)^xi * logmlog1mp - 3 * dat^2 * (-log1mp)^(2 * xi)) * xi^3 +
(2 * sigma * dat * (-log1mp)^xi * (logmlog1mp - 1) - (logmlog1mp^2 - 2 *
logmlog1mp) * sigma^2 - dat^2 * (-log1mp)^(2 * xi)) * xi^2 - (3 * xi^3 *
(-log1mp)^(2 * xi) + xi^2 * (-log1mp)^(2 * xi)) * z^2 - (dat^2 * xi^2 *
(-log1mp)^(2 * xi) + xi^2 * z^2 * (-log1mp)^(2 * xi) + 2 * sigma * dat * xi * (-log1mp)^xi +
sigma^2 - 2 * (dat * xi^2 * (-log1mp)^(2 * xi) + sigma * xi * (-log1mp)^xi) * z) *
log((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma *
(-log1mp)^xi))^2 - (sigma * xi^4 * (-log1mp)^xi * logmlog1mp^2 + 2 * (2 * sigma *
(-log1mp)^xi * logmlog1mp - 3 * dat * (-log1mp)^(2 * xi)) * xi^3 + 2 * (sigma *
(-log1mp)^xi * (logmlog1mp - 1) - dat * (-log1mp)^(2 * xi)) * xi^2) * z -
(sigma * dat * xi^5 * (-log1mp)^xi * logmlog1mp^2 + ((logmlog1mp^2 + 2 *
logmlog1mp) * sigma * dat * (-log1mp)^xi - dat^2 * (-log1mp)^(2 * xi)) *
xi^4 + (4 * sigma * dat * (-log1mp)^xi * logmlog1mp - 3 * dat^2 *
(-log1mp)^(2 * xi)) * xi^3 - 2 * (sigma * dat * (-log1mp)^xi - sigma^2 * logmlog1mp) *
xi^2 - (xi^4 * (-log1mp)^(2 * xi) + 3 * xi^3 * (-log1mp)^(2 * xi)) *
z^2 - (sigma * xi^5 * (-log1mp)^xi * logmlog1mp^2 + ((logmlog1mp^2 +
2 * logmlog1mp) * sigma * (-log1mp)^xi - 2 * dat * (-log1mp)^(2 * xi)) *
xi^4 + 2 * (2 * sigma * (-log1mp)^xi * logmlog1mp - 3 * dat * (-log1mp)^(2 * xi)) *
xi^3 - 2 * sigma * xi^2 * (-log1mp)^xi) * z) * ((dat * xi * (-log1mp)^xi -
xi * z * (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi))^(1/xi) + 2 *
(dat^2 * xi^3 * (-log1mp)^(2 * xi) - (sigma * dat * (-log1mp)^xi * (log(-log(-p +
1)) - 2) - dat^2 * (-log1mp)^(2 * xi)) * xi^2 + (xi^3 * (-log1mp)^(2 * xi) +
xi^2 * (-log1mp)^(2 * xi)) * z^2 + (sigma * dat * (-log1mp)^xi - sigma^2 *
(logmlog1mp - 1)) * xi - (2 * dat * xi^3 * (-log1mp)^(2 * xi) - (sigma *
(-log1mp)^xi * (logmlog1mp - 2) - 2 * dat * (-log1mp)^(2 * xi)) * xi^2 +
sigma * xi * (-log1mp)^xi) * z - (dat^2 * xi^3 * (-log1mp)^(2 * xi) + xi^3 *
z^2 * (-log1mp)^(2 * xi) + 2 * sigma * dat * xi^2 * (-log1mp)^xi + sigma^2 *
xi - 2 * (dat * xi^3 * (-log1mp)^(2 * xi) + sigma * xi^2 * (-log1mp)^xi) *
z) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma *
(-log1mp)^xi))^(1/xi)) * log((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma *
(-log1mp)^xi)))/((dat^2 * xi^6 * (-log1mp)^(2 * xi) + xi^6 * z^2 * (-log1mp)^(2 *
xi) + 2 * sigma * dat * xi^5 * (-log1mp)^xi + sigma^2 * xi^4 - 2 * (dat * xi^6 *
(-log1mp)^(2 * xi) + sigma * xi^5 * (-log1mp)^xi) * z) * ((dat * xi * (-log1mp)^xi -
xi * z * (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi))^(1/xi)))
} else{ #xi numerically zero
info[1, 1] <- sum(( exp((-dat + z)/sigma)*log1mp))/sigma^2
info[1, 2] <- sum(-((sigma + exp((-dat + z)/sigma)*(sigma - dat + z)*log1mp)))/sigma^3
info[2, 1] <- info[1, 2]
info[1, 3] <- sum((1/(2*sigma^3))*((2* exp(dat/sigma)* sigma*(sigma - dat + z + sigma*logmlog1mp) + exp(z/sigma)* log1mp*((dat - z)*(-2*sigma + dat - z) + 2*sigma*(sigma - dat + z)* logmlog1mp))/ exp(dat/sigma)))
info[3, 1] <- info[1, 3]
info[2, 2] <- sum(sigma*(sigma - 2*dat + 2*z) + exp((-dat + z)/sigma)*(dat - z)*(-2*sigma + dat - z) * log1mp)/sigma^4
info[2, 3] <- sum((1/(2*sigma^4))*(((dat - z)*(2*exp(dat/sigma)*sigma*(sigma - dat + z + sigma*logmlog1mp) + exp(z/sigma)* log1mp*((dat - z)*(-2*sigma + dat - z) + 2*sigma*(sigma - dat + z)* logmlog1mp)))/exp(dat/sigma)))
info[3, 2] <- info[2, 3]
info[3, 3] <- sum((1/(12*sigma^4))*((dat - z)*(4*sigma*((dat - z)*(3*sigma - 2*dat + 2*z) - 3*sigma*logmlog1mp*(2*(sigma - dat + z) + sigma*logmlog1mp)) + exp((-dat + z)/sigma)*log1mp*((-8*sigma + 3*dat - 3*z)*(dat - z)^2 + 12*sigma*logmlog1mp*((dat - z)*(2*sigma - dat + z) - sigma*(sigma - dat + z)*logmlog1mp)))))
}
infomat <- -info
# else expected information matrix
} else {
muf = z + sigma/xi * (1 - (-log(1 - p))^(-xi))
if (!isTRUE(all.equal(xi, 0, tolerance = 1e-07, check.attributes = FALSE))) {
Jac <- rbind(c(1, -((-log1mp)^(-xi) - 1)/xi, sigma * (-log1mp)^(-xi) * log(-log1mp)/xi +
sigma * ((-log1mp)^(-xi) - 1)/xi^2), c(0, 1, 0), c(0, 0, 1))
} else {
Jac <- rbind(c(1, log(-log1mp), -sigma * log(-log1mp)^2 + 1/2 * sigma * (-log1mp)^(-xi)
* log(-log1mp)^2), c(0, 1, 0), c(0, 0, 1))
}
infomat <- t(Jac) %*% gev.infomat(dat = dat, method = "exp", par = c(muf, sigma, xi)) %*% Jac
}
colnames(infomat) <- rownames(infomat) <- c("retlev","scale","shape")
return(infomat)
}
#' Tangent exponential model statistics for the GEV distribution (return level)
#'
#' Vector implementing conditioning on approximate ancillary statistics for the TEM
#' @seealso \code{\link{gevr}}
#' @name gevr.temstat
#' @inheritParams gevr
#' @keywords internal
#' @export
gevr.Vfun <- function(par, dat, p) {
z = par[1]
sigma = par[2]
xi = par[3]
log1mp <- log(1-p)
cbind(1, (dat - z)/sigma, sigma * ((dat - z) * xi/sigma + (-log1mp)^(-xi))^(-1/xi) * (((-log1mp)^(-xi) * log(-log1mp) - (dat - z)/sigma)/(((dat - z) * xi/sigma + (-log1mp)^(-xi)) *
xi) + log((dat - z) * xi/sigma + (-log1mp)^(-xi))/xi^2)/((dat - z) * xi/sigma + (-log1mp)^(-xi))^(-1/xi - 1))
}
#' Canonical parameter in the local exponential family approximation
#' @rdname gevr.temstat
#' @inheritParams gevr
#' @keywords internal
#' @export
gevr.phi <- function(par, dat, p, V) {
z = par[1]
sigma = par[2]
xi = par[3]
log1mp <- log(1-p)
t(-((xi * (-log1mp)^xi + (-log1mp)^xi) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi))^(1/xi) - (-log1mp)^xi)/((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi))^(1/xi))) %*% V
}
#' Derivative of the canonical parameter \eqn{\phi(\theta)} in the local exponential family approximation
#' @rdname gevr.temstat
#' @inheritParams gevr
#' @keywords internal
#' @export
gevr.dphi <- function(par, dat, p, V) {
z = par[1]
sigma = par[2]
xi = par[3]
log1mp <- log(1 - p)
rbind((xi * (-log1mp)^(2 * xi) - (xi^2 * (-log1mp)^(2 * xi) + xi * (-log1mp)^(2 *
xi)) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi))^(1/xi) + (-log1mp)^(2 * xi))/((dat^2 * xi^2 * (-log1mp)^(2 * xi) + xi^2 *
z^2 * (-log1mp)^(2 * xi) + 2 * sigma * dat * xi * (-log1mp)^xi + sigma^2 - 2 * (dat *
xi^2 * (-log1mp)^(2 * xi) + sigma * xi * (-log1mp)^xi) * z) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi))^(1/xi)), (dat * (-log1mp)^(2 * xi) - z * (-log1mp)^(2 * xi) - sigma * (-log1mp)^xi + (sigma * xi * (-log1mp)^xi + sigma * (-log1mp)^xi) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi +
sigma)/(sigma * (-log1mp)^xi))^(1/xi))/((sigma * dat^2 * xi^2 * (-log1mp)^(2 * xi) +
sigma * xi^2 * z^2 * (-log1mp)^(2 * xi) + 2 * sigma^2 * dat * xi * (-log1mp)^xi + sigma^3 -
2 * (sigma * dat * xi^2 * (-log1mp)^(2 * xi) + sigma^2 * xi * (-log1mp)^xi) * z) *
((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi))^(1/xi)),
((sigma * (-log1mp)^xi * log(-log1mp) - dat * (-log1mp)^(2 * xi)) * xi^2 + (sigma *
(-log1mp)^xi * log(-log1mp) - dat * (-log1mp)^(2 * xi)) * xi + (xi^2 * (-log1mp)^(2 * xi) + xi *
(-log1mp)^(2 * xi)) * z - (sigma * xi^3 * (-log1mp)^xi * log(-log1mp) + xi^2 * z * (-log1mp)^(2 * xi) + (sigma * (-log1mp)^xi * (log(-log1mp) +
1) - dat * (-log1mp)^(2 * xi)) * xi^2) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi))^(1/xi) + (dat * xi * (-log1mp)^(2 * xi) -
xi * z * (-log1mp)^(2 * xi) + sigma * (-log1mp)^xi) * log((dat * xi * (-log1mp)^xi - xi * z *
(-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi)))/((dat^2 * xi^4 *
(-log1mp)^(2 * xi) + xi^4 * z^2 * (-log1mp)^(2 * xi) + 2 * sigma * dat * xi^3 *
(-log1mp)^xi + sigma^2 * xi^2 - 2 * (dat * xi^4 * (-log1mp)^(2 * xi) + sigma *
xi^3 * (-log1mp)^xi) * z) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi +
sigma)/(sigma * (-log1mp)^xi))^(1/xi))) %*% V
}
#' @title Generalized Pareto distribution (mean of maximum of N exceedances parametrization)
#'
#' @description Likelihood, score function and information matrix,
#' approximate ancillary statistics and sample space derivative
#' for the generalized Pareto distribution parametrized in terms of average maximum of \code{N} exceedances.
#'
#' The parameter \code{N} corresponds to the number of threshold exceedances of interest over which the maxima is taken.
#' \eqn{z} is the corresponding expected value of this block maxima.
#' Note that the actual parametrization is in terms of excess expected mean, meaning expected mean minus threshold.
#'
#' @details The observed information matrix was calculated from the Hessian using symbolic calculus in Sage.
#'
#' @author Leo Belzile
#' @name gpdN
#' @param par vector of length 2 containing \eqn{z} and \eqn{\xi}, respectively the mean excess of the maxima of N exceedances above the threshold and the shape parameter.
#' @param dat sample vector
#' @param N block size for threshold exceedances.
#' @param tol numerical tolerance for the exponential model
#' @param V vector calculated by \code{gpdN.Vfun}
#' @section Usage: \preformatted{gpdN.ll(par, dat, N, tol=1e-5)
#' gpdN.score(par, dat, N)
#' gpdN.infomat(par, dat, N, method = c('obs', 'exp'), nobs = length(dat))
#' gpdN.Vfun(par, dat, N)
#' gpdN.phi(par, dat, N, V)
#' gpdN.dphi(par, dat, N, V)}
#'
#' @section Functions:
#'
#' \itemize{
#' \item{\code{gpdN.ll}:} {log likelihood}
#' \item{\code{gpdN.score}:} {score vector}
#' \item{\code{gpdN.infomat}:} {observed information matrix for GP parametrized in terms of mean of the maximum of \code{N} exceedances and shape}
#' \item{\code{gpdN.Vfun}:} {vector implementing conditioning on approximate ancillary statistics for the TEM}
#' \item{\code{gpdN.phi}:} {canonical parameter in the local exponential family approximation}
#' \item{\code{gpdN.dphi}:} {derivative matrix of the canonical parameter in the local exponential family approximation}
#' }
NULL
#' Negative log likelihood of the generalized Pareto distribution (mean of maximum of N exceedances parametrization)
#'
#' @seealso \code{\link{gpdN}}
#' @inheritParams gpdN
#' @keywords internal
#' @export
gpdN.ll <- function(par, dat, N) {
xi = par[2]
z = par[1]
euler_gamma = 0.57721566490153231044
sigma = ifelse(abs(xi) > 1e-8,
z * xi/(exp(lgamma(N + 1) + lgamma(-xi + 1) - lgamma(N - xi + 1)) - 1),
z / (euler_gamma + psigamma(N + 1)))
gpd.ll(par = c(sigma, xi), dat = dat)
}
#' Score vector of the generalized Pareto distribution (mean of maximum of N exceedances parametrization)
#' @seealso \code{\link{gpdN}}
#' @inheritParams gpdN
#' @keywords internal
#' @export
gpdN.score <- function(par, dat, N) {
z = par[1]
xi = par[2]
cst <- exp(lgamma(N + 1) + lgamma(1 - xi) - lgamma(N + 1 - xi))
xizero <- abs(xi) < 1e-6
if(!xizero){
as.vector(c(sum(dat * (cst - 1) * (1/xi + 1)/(z^2 * (dat * (cst - 1)/z + 1)) - 1/z), sum(-(psigamma(N - xi +
1) * cst - psigamma(-xi + 1) * cst) * dat * (1/xi + 1)/(z * (dat * (cst - 1)/z + 1)) + ((psigamma(N -
xi + 1) * cst - psigamma(-xi + 1) * cst) * xi * z/(cst - 1)^2 - z/(cst - 1)) * (cst - 1)/(xi *z) +
log(dat * (cst - 1)/z + 1)/xi^2)))
} else{
euler_gamma <- 0.57721566490153231044
psi1pN <- psigamma(1 + N)
psip1pN <- psigamma(1 + N, deriv = 1)
as.vector(c( sum(dat*euler_gamma - z + dat*psi1pN)/z^2,
sum((6*dat^2*euler_gamma^3 - 12*dat*euler_gamma^2*z - 6*dat*euler_gamma^3*z -
dat*euler_gamma*pi^2*z + 6*euler_gamma^2*z^2 + pi^2*z^2 +
6*(3*dat^2*euler_gamma - dat*(2 + 3*euler_gamma)*z + z^2)*
psigamma(1 + N)^2 +
6*dat*(dat - z)*psigamma(1 + N)^3 + 6*(dat*euler_gamma - z)*z*
psigamma(deriv = 1, 1+N) + psigamma(1 + N)*(18*dat^2*euler_gamma^2 -
dat*(24*euler_gamma + 18*euler_gamma^2 + pi^2)*z + 12*euler_gamma*z^2 +
6*dat*z*psigamma(deriv = 1, 1+N)))/(12*z^2*(euler_gamma + psigamma(1 + N))))))
}
}
#' Information matrix of the generalized Pareto distribution (mean of maximum of N exceedances parametrization)
#' @seealso \code{\link{gpdN}}
#' @inheritParams gpdN
#' @keywords internal
#' @export
gpdN.infomat <- function(par, dat, N, method = c("obs", "exp"), nobs = length(dat)) {
z = as.vector(par[1])
xi = as.vector(par[2])
if (xi < -0.5) {
return(matrix(NA, 2, 2))
}
method <- method[1] #default corresponds to observed information rather than Fisher information
xizero <- abs(xi) < 1e-4
# Fisher information matrix
cst <- exp(lgamma(N + 1) + lgamma(1 - xi) - lgamma(N + 1 - xi))
if (method == "exp") {
if(!xizero){
sigmaf <- z * xi/(cst - 1)
Jac <- rbind(c(xi/(cst - 1),
-(digamma(N - xi + 1) * cst - digamma(-xi + 1) * cst) * xi * z/(cst - 1)^2 + z/(cst - 1)), c(0, 1))
} else{
euler_gamma <- 0.57721566490153231044
psi1pN <- psigamma(1 + N)
sigmaf <- z/(euler_gamma + psi1pN)
Jac <- rbind(c(1/(euler_gamma + psi1pN),
-(z*(6*euler_gamma^2 + pi^2 + 12*euler_gamma*psigamma(1 + N) +
6*psigamma(1 + N)^2 - 6*psigamma(1 + N, deriv = 1)))/
(12*(euler_gamma + psigamma(1 + N))^2)), c(0,1))
}
infomat <- t(Jac) %*% gpd.infomat(par = c(sigmaf, xi), dat = dat, method = "exp") %*% Jac
} else if (method == "obs") {
# Observed information
if(!xizero){
k11 <- sum(2 * dat * (cst - 1) * (1/xi + 1)/(z^3 * (dat * (cst - 1)/z + 1)) - dat^2 * (cst -
1)^2 * (1/xi + 1)/(z^4 * (dat * (cst - 1)/z + 1)^2) - 1/z^2)
k12 <- sum(-(digamma(N - xi + 1) * cst - digamma(-xi + 1) * cst) * dat * (1/xi + 1)/(z^2 * (dat *
(cst - 1)/z + 1)) + (digamma(N - xi + 1) * cst - digamma(-xi + 1) * cst) * dat^2 * (cst -
1) * (1/xi + 1)/(z^3 * (dat * (cst - 1)/z + 1)^2) + dat * (cst - 1)/(xi^2 * z^2 * (dat *
(cst - 1)/z + 1)))
k22 <- sum(-(digamma(N - xi + 1) * cst - digamma(-xi + 1) * cst)^2 * dat^2 * (1/xi + 1)/(z^2 *
(dat * (cst - 1)/z + 1)^2) + (digamma(N - xi + 1)^2 * cst - 2 * digamma(N - xi + 1) * digamma(-xi +
1) * cst + digamma(-xi + 1)^2 * cst - psigamma(deriv = 1, N - xi + 1) * cst + psigamma(deriv = 1,
-xi + 1) * cst) * dat * (1/xi + 1)/(z * (dat * (cst - 1)/z + 1)) - (digamma(N - xi + 1) *
cst - digamma(-xi + 1) * cst) * ((digamma(N - xi + 1) * cst - digamma(-xi + 1) * cst) * xi *
z/(cst - 1)^2 - z/(cst - 1))/(xi * z) + (2 * (digamma(N - xi + 1) * cst - digamma(-xi + 1) *
cst)^2 * xi * z/(cst - 1)^3 - (digamma(N - xi + 1)^2 * cst - 2 * digamma(N - xi + 1) * digamma(-xi +
1) * cst + digamma(-xi + 1)^2 * cst - psigamma(deriv = 1, N - xi + 1) * cst + psigamma(deriv = 1,
-xi + 1) * cst) * xi * z/(cst - 1)^2 - 2 * (digamma(N - xi + 1) * cst - digamma(-xi + 1) *
cst) * z/(cst - 1)^2) * (cst - 1)/(xi * z) + ((digamma(N - xi + 1) * cst - digamma(-xi +
1) * cst) * xi * z/(cst - 1)^2 - z/(cst - 1)) * (cst - 1)/(xi^2 * z) - 2 * (digamma(N - xi +
1) * cst - digamma(-xi + 1) * cst) * dat/(xi^2 * z * (dat * (cst - 1)/z + 1)) + 2 * log(dat *
(cst - 1)/z + 1)/xi^3)
} else{
euler_gamma <- 0.57721566490153231044
psigamma1pN <- psigamma(1 + N)
zeta3 <- 1.2020569031595944587
psigammap1pN <- psigamma(1 + N, deriv = 1)
psigammapp1pN <- psigamma(1 + N, deriv = 2)
k11 <- -sum(-2*dat*euler_gamma + z - 2*dat*psigamma1pN)/z^3
k12 <- -sum((1/(12*z^3))*(dat*(-12*dat*euler_gamma^2 + (6*euler_gamma*(2 + euler_gamma) + pi^2)*z +
12*(-2*dat*euler_gamma + z + euler_gamma*z)*psigamma1pN +
6*(-2*dat + z)*psigamma1pN^2 - 6*z*psigammap1pN)))
k22 <- -sum((-96*dat^3*euler_gamma^5 + 24*dat^2*euler_gamma^3*(6*euler_gamma*(1 + euler_gamma) + pi^2)*z -
24*dat*euler_gamma^2*(2*euler_gamma^2*(3 + euler_gamma) + (1 + euler_gamma)*pi^2)*
z^2 + (12*euler_gamma^4 + 12*euler_gamma^2*pi^2 - pi^4)*z^3 -
12*((40*dat^3*euler_gamma + 4*dat*(3 + 5*euler_gamma)*z^2 - z^3 -
12*dat^2*(z + 5*euler_gamma*z))*psigamma1pN^4 +
4*dat*(2*dat^2 - 3*dat*z + z^2)*psigamma1pN^5 +
2*psigamma1pN^3*(40*dat^3*euler_gamma^2 - dat^2*(12*euler_gamma*(2 + 5*euler_gamma) + pi^2)*z +
dat*(4*euler_gamma*(6 + 5*euler_gamma) + pi^2)*z^2 -2*euler_gamma*z^3 + 6*dat*(dat - z)*z*psigammap1pN) +
z*((12*dat^2*euler_gamma^3 - 12*dat*euler_gamma^2*(1 + euler_gamma)*z + (6*euler_gamma^2 - pi^2)*z^2)*
psigammap1pN + 3*z^2*psigammap1pN^2 + 4*euler_gamma*(dat*euler_gamma - z)*z*
(psigammapp1pN + 2*zeta3)) + psigamma1pN^2*(80*dat^3*euler_gamma^3 -
6*dat^2*euler_gamma*(4*euler_gamma*(3 + 5*euler_gamma) + pi^2)*z + 2*dat*(4*euler_gamma^2*(9 + 5*euler_gamma) +
(1 + 3*euler_gamma)*pi^2)*z^2 - (6*euler_gamma^2 + pi^2)*z^3 +6*z*(6*dat^2*euler_gamma - 2*dat*(1 + 3*euler_gamma)*z + z^2)*
psigammap1pN + 4*dat*z^2*(psigammapp1pN + 2*zeta3)) +2*psigamma1pN*(euler_gamma*(20*dat^3*euler_gamma^3 -
3*dat^2*euler_gamma*(2*euler_gamma*(4 + 5*euler_gamma) + pi^2)*z + dat*(2*euler_gamma^2*(12 + 5*euler_gamma) +
(2 + 3*euler_gamma)*pi^2)*z^2 - (2*euler_gamma^2 + pi^2)*z^3) +6*euler_gamma*z*(3*dat^2*euler_gamma - dat*(2 + 3*euler_gamma)*z + z^2)*
psigammap1pN - 2*z^2*(-2*dat*euler_gamma + z)*(psigammapp1pN + 2*zeta3))))/(144*z^3*(psigamma1pN + euler_gamma)^2))
}
infomat <- cbind(c(k11, k12), c(k12, k22))
}
colnames(infomat) <- rownames(infomat) <- c("Nmean","shape")
return(infomat)
}
#' Tangent exponential model statistics for the generalized Pareto distribution (mean of maximum of N exceedances parametrization)
#'
#' Vector implementing conditioning on approximate ancillary statistics for the TEM
#' @seealso \code{\link{gpdN}}
#' @name gpdN.temstat
#' @inheritParams gpdN
#' @keywords internal
#' @export
gpdN.Vfun <- function(par, dat, N) {
xi = par[2]
z = par[1]
cst <- exp(lgamma(N + 1) + lgamma(-xi + 1) - lgamma(N - xi + 1))
cbind(dat/z, -xi * z * (dat * (cst - 1)/z + 1)^(-1/xi) * ((psigamma(N - xi + 1) * cst - psigamma(-xi +
1) * cst) * dat/(xi * z * (dat * (cst - 1)/z + 1)) - log(dat * (cst - 1)/z + 1)/xi^2)/((dat *
(cst - 1)/z + 1)^(-1/xi - 1) * (cst - 1)))
}
#' Canonical parameter in the local exponential family approximation
#'
#' @inheritParams gpdN
#' @rdname gpdN.temstat
#' @keywords internal
#' @export
gpdN.phi <- function(par, dat, N, V) {
xi = par[2]
z = par[1]
cst <- exp(lgamma(N + 1) + lgamma(-xi + 1) - lgamma(N - xi + 1))
matrix(-(cst - 1) * (1/xi + 1)/(z * (dat * (cst - 1)/z + 1)), nrow = 1) %*% V
}
## Derivative matrix of the canonical parameter in the local exponential family approximation
#' @inheritParams gpdN
#' @rdname gpdN.temstat
#' @keywords internal
#' @export
gpdN.dphi <- function(par, dat, N, V) {
xi = par[2]
z = par[1]
cst <- exp(lgamma(N + 1) + lgamma(-xi + 1) - lgamma(N - xi + 1))
rbind((cst - 1) * (1/xi + 1)/(z^2 * (dat * (cst - 1)/z + 1)) - dat * (cst - 1)^2 * (1/xi + 1)/(z^3 *
(dat * (cst - 1)/z + 1)^2), -(psigamma(N - xi + 1) * cst - psigamma(-xi + 1) * cst) * (1/xi +
1)/(z * (dat * (cst - 1)/z + 1)) + (psigamma(N - xi + 1) * cst - psigamma(-xi + 1) * cst) * dat *
(cst - 1) * (1/xi + 1)/(z^2 * (dat * (cst - 1)/z + 1)^2) + (cst - 1)/(xi^2 * z * (dat * (cst -
1)/z + 1))) %*% V
}
############################################################################################
############################################################################################
#' @title Generalized extreme value distribution (quantile/mean of N-block maxima parametrization)
#'
#' @description Likelihood, score function and information matrix,
#' approximate ancillary statistics and sample space derivative
#' for the generalized extreme value distribution parametrized in terms of the
#' quantiles/mean of N-block maxima parametrization \eqn{z}, scale and shape.
#'
#' @author Leo Belzile
#' @name gevN
#' @param par vector of \code{loc}, quantile/mean of N-block maximum and \code{shape}
#' @param dat sample vector
#' @param V vector calculated by \code{gevN.Vfun}
#' @param q probability, corresponding to \eqn{q}th quantile of the \code{N}-block maximum
#' @param qty string indicating whether to calculate the \code{q} quantile or the mean
#' @section Usage: \preformatted{gevN.ll(par, dat, N, q, qty = c('mean', 'quantile'))
#' gevN.ll.optim(par, dat, N, q = 0.5, qty = c('mean', 'quantile'))
#' gevN.score(par, dat, N, q = 0.5, qty = c('mean', 'quantile'))
#' gevN.infomat(par, dat, qty = c('mean', 'quantile'), method = c('obs', 'exp'), N, q = 0.5, nobs = length(dat))
#' gevN.Vfun(par, dat, N, q = 0.5, qty = c('mean', 'quantile'))
#' gevN.phi(par, dat, N, q = 0.5, qty = c('mean', 'quantile'), V)
#' gevN.dphi(par, dat, N, q = 0.5, qty = c('mean', 'quantile'), V)}
#'
#' @section Functions:
#' \itemize{
#' \item{\code{gevN.ll}:} {log likelihood}
#' \item{\code{gevN.score}:} {score vector}
#' \item{\code{gevN.infomat}:} {expected and observed information matrix}
#' \item{\code{gevN.Vfun}:} {vector implementing conditioning on approximate ancillary statistics for the TEM}
#' \item{\code{gevN.phi}:} {canonical parameter in the local exponential family approximation}
#' \item{\code{gevN.dphi}:} {derivative matrix of the canonical parameter in the local exponential family approximation}
#' }
NULL
#' Negative log likelihood of the generalized extreme value distribution (quantile/mean of N-block maxima parametrization)
#'
#' @seealso \code{\link{gevN}}
#' @inheritParams gevN
#' @keywords internal
#' @export
gevN.ll <- function(par, dat, N, q = 0.5, qty = c("mean", "quantile")) {
qty <- match.arg(qty)
mu = par[1]
z = par[2]
xi = as.vector(par[3])
if (!isTRUE(all.equal(xi, 0, check.attributes = FALSE))) {
sigma <- switch(qty,
quantile = (z - mu) * xi/(N^xi * (log(1/q))^(-xi) - 1),
mean = (z - mu) * xi/(N^xi * gamma(1 - xi) - 1))
} else {
sigma <- switch(qty,
quantile = (z - mu)/(log(N) - log(-log(q))),
mean = (z - mu)/(-psigamma(1) + log(N)))
}
gev.ll(par = c(mu, sigma, xi), dat = dat)
}
#' Score of the generalized extreme value distribution (quantile/mean of N-block maxima parametrization)
#'
#' @seealso \code{\link{gevN}}
#' @inheritParams gevN
#' @keywords internal
#' @export
gevN.score <- function(par, dat, N, q = 0.5, qty = c("mean", "quantile")) {
qty <- match.arg(qty)
mu = par[1]
z = par[2]
xi = par[3]
log1q = log(1/q)
if (qty == "quantile") {
# quantiles at prob. q
if(abs(xi) > 1e-4){
as.vector(c(sum((-(N^xi * log1q^(-xi) - 1) * (dat - mu)/(mu - z) + 1)^(-1/xi - 1) * ((N^xi * log1q^(-xi) -
1) * (dat - mu)/(mu - z)^2 + (N^xi * log1q^(-xi) - 1)/(mu - z))/xi + ((N^xi * log1q^(-xi) -
1) * (dat - mu)/(mu - z)^2 + (N^xi * log1q^(-xi) - 1)/(mu - z)) * (1/xi + 1)/((N^xi *
log1q^(-xi) - 1) * (dat - mu)/(mu - z) - 1) - 1/(mu - z)), sum(-(N^xi * log1q^(-xi) -
1) * (dat - mu) * (-(N^xi * log1q^(-xi) - 1) * (dat - mu)/(mu - z) + 1)^(-1/xi - 1)/((mu -
z)^2 * xi) - (N^xi * log1q^(-xi) - 1) * (dat - mu) * (1/xi + 1)/(((N^xi * log1q^(-xi) -
1) * (dat - mu)/(mu - z) - 1) * (mu - z)^2) + 1/(mu - z)), sum((-(N^xi * log1q^(-xi) -
1) * (dat - mu)/(mu - z) + 1)^(-1/xi) * ((N^xi * log1q^(-xi) * log(N) - N^xi * log1q^(-xi) *
log(log1q)) * (dat - mu)/(((N^xi * log1q^(-xi) - 1) * (dat - mu)/(mu - z) - 1) * (mu -
z) * xi) - log(-(N^xi * log1q^(-xi) - 1) * (dat - mu)/(mu - z) + 1)/xi^2) - (N^xi * log1q^(-xi) *
log(N) - N^xi * log1q^(-xi) * log(log1q)) * (dat - mu) * (1/xi + 1)/(((N^xi * log1q^(-xi) -
1) * (dat - mu)/(mu - z) - 1) * (mu - z)) + (N^xi * log1q^(-xi) - 1) * ((N^xi * log1q^(-xi) *
log(N) - N^xi * log1q^(-xi) * log(log1q)) * (mu - z) * xi/(N^xi * log1q^(-xi) -
1)^2 - (mu - z)/(N^xi * log1q^(-xi) - 1))/((mu - z) * xi) + log1p(-(N^xi * log1q^(-xi) -
1) * (dat - mu)/(mu - z))/xi^2)))
} else{
logN <- log(N)
loglog1q <- log(log1q)
as.vector(c(
sum((-mu + z + (-1 + exp(((mu - dat)*(-logN + loglog1q))/(mu - z)))*(dat - z)*(logN - loglog1q))/(mu - z)^2),
sum((mu - z + (mu - dat)*(-1 + N^((-mu + dat)/(mu - z))*log1q^((mu - dat)/(mu - z)))*(logN - loglog1q))/
(mu - z)^2),
sum((1/(2*(mu - z)^2))*(((-(mu - z))*(mu - 2*dat + z) + (mu - dat)*(dat - z)* (-1 + N^((-mu + dat)/(mu - z))*
log1q^((mu - dat)/(mu - z)))*(log( N) - loglog1q))*(logN - loglog1q)))
))
}
} else {
# Mean
if(abs(xi) > 1e-4){
as.vector(c(sum((-(N^xi * gamma(-xi + 1) - 1) * (dat - mu)/(mu - z) + 1)^(-1/xi - 1) * ((N^xi * gamma(-xi +
1) - 1) * (dat - mu)/(mu - z)^2 + (N^xi * gamma(-xi + 1) - 1)/(mu - z))/xi + ((N^xi * gamma(-xi +
1) - 1) * (dat - mu)/(mu - z)^2 + (N^xi * gamma(-xi + 1) - 1)/(mu - z)) * (1/xi + 1)/((N^xi *
gamma(-xi + 1) - 1) * (dat - mu)/(mu - z) - 1) - 1/(mu - z)), sum(-(N^xi * gamma(-xi + 1) -
1) * (dat - mu) * (-(N^xi * gamma(-xi + 1) - 1) * (dat - mu)/(mu - z) + 1)^(-1/xi - 1)/((mu -
z)^2 * xi) - (N^xi * gamma(-xi + 1) - 1) * (dat - mu) * (1/xi + 1)/(((N^xi * gamma(-xi +
1) - 1) * (dat - mu)/(mu - z) - 1) * (mu - z)^2) + 1/(mu - z)), sum((-(N^xi * gamma(-xi +
1) - 1) * (dat - mu)/(mu - z) + 1)^(-1/xi) * ((N^xi * log(N) * gamma(-xi + 1) - N^xi * psigamma(-xi +
1) * gamma(-xi + 1)) * (dat - mu)/(((N^xi * gamma(-xi + 1) - 1) * (dat - mu)/(mu - z) - 1) *
(mu - z) * xi) - log1p(-(N^xi * gamma(-xi + 1) - 1) * (dat - mu)/(mu - z))/xi^2) - (N^xi *
log(N) * gamma(-xi + 1) - N^xi * psigamma(-xi + 1) * gamma(-xi + 1)) * (dat - mu) * (1/xi +
1)/(((N^xi * gamma(-xi + 1) - 1) * (dat - mu)/(mu - z) - 1) * (mu - z)) + (N^xi * gamma(-xi +
1) - 1) * ((N^xi * log(N) * gamma(-xi + 1) - N^xi * psigamma(-xi + 1) * gamma(-xi + 1)) *
(mu - z) * xi/(N^xi * gamma(-xi + 1) - 1)^2 - (mu - z)/(N^xi * gamma(-xi + 1) - 1))/((mu -
z) * xi) + log1p(-(N^xi * gamma(-xi + 1) - 1) * (dat - mu)/(mu - z))/xi^2)))
} else{
logN <- log(N)
euler_gamma <- 0.57721566490153231044
as.vector(c(
sum((1/(mu - z)^2)*(-mu - euler_gamma*dat + z + euler_gamma*z - dat*log(N) +
z*log(N) +
exp(((mu - dat)*(euler_gamma + log(N)))/(-mu + z))*(dat -
z)*(euler_gamma + log(N)))),
sum((1/(mu - z)^2)*(mu - euler_gamma*mu + euler_gamma*dat - z - mu*log(N) +
dat*log(N) +
exp(((mu - dat)*(euler_gamma + log(N)))/(-mu + z))*(mu -
dat)*(euler_gamma + log(N)))),
sum((exp((mu*(euler_gamma + log(N)))/(-mu + z))*
(exp((dat*(euler_gamma + log(N)))/(mu - z))*(mu - dat)*(euler_gamma + log(N))*
(pi^2*(mu - z) + 6*euler_gamma^2*(dat - z) + 6*(dat - z)*log(N)*
(2*euler_gamma + log(N))) + exp((mu*(euler_gamma + log(N)))/(mu - z))*
((-euler_gamma)*pi^2*(mu - dat)*(mu - z) + pi^2*(mu - z)^2 - 6*euler_gamma^3*(mu - dat)*(dat - z) -
6*euler_gamma^2*(mu - z)*(mu - 2*dat + z) + log(N)*((-pi^2)*(mu - dat)*(mu - z) -
18*euler_gamma^2*(mu - dat)*(dat - z) - 12*euler_gamma*(mu - z)*(mu - 2*dat + z) +
6*log(N)*(-mu^2 + 3*euler_gamma*dat*(dat - z) + z*(-2*dat + z) +
mu*((2 - 3*euler_gamma)*dat + 3*euler_gamma*z) - (mu - dat)*(dat - z)*log(N))))))/
(12*(mu - z)^2*(euler_gamma + log(N))))
))
}
}
}
#' Information matrix of the generalized extreme value distribution (quantile/mean of N-block maxima parametrization)
#' @seealso \code{\link{gevN}}
#' @inheritParams gevN
#' @keywords internal
#' @export
gevN.infomat <- function(par, dat, method = c("obs", "exp"), qty = c("mean", "quantile"), N, q = 0.5, nobs = length(dat)) {
method <- match.arg(method)
par <- as.vector(par)
mu = par[1]
z = par[2]
xi = par[3]
logN = log(N)
log1q = log(1/q)
loglog1q = log(log1q)
qty <- match.arg(qty)
xizero <- isTRUE(all.equal(xi, 0, tolerance = 1e-05, check.attributes = FALSE))
euler_gamma <- 0.57721566490153231044
zeta3 <- 1.2020569031595944587
if (qty == "mean") {
if (!xizero) {
# z = mu + sigma/xi*(N^xi*gamma(1-xi)-1)
sigmaq <- (z - mu) * xi/(N^xi * gamma(1 - xi) - 1)
} else {
# z = mu + sigma*(log(N) + euler_gamma)
sigmaq <- (z - mu)/(logN + euler_gamma)
}
if (method == "obs") {
if(!xizero){
k11 <- sum(((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1)^(-1/xi - 2) * ((N^xi *
gamma(-xi + 1) - 1) * (mu - dat)/(mu - z)^2 - (N^xi * gamma(-xi + 1) - 1)/(mu - z))^2 *
(1/xi + 1)/xi - 2 * ((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1)^(-1/xi -
1) * ((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z)^3 - (N^xi * gamma(-xi + 1) -
1)/(mu - z)^2)/xi - ((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z)^2 - (N^xi * gamma(-xi +
1) - 1)/(mu - z))^2 * (1/xi + 1)/((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) +
1)^2 + 2 * ((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z)^3 - (N^xi * gamma(-xi +
1) - 1)/(mu - z)^2) * (1/xi + 1)/((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) +
1) - 1/(mu - z)^2)
k12 <- sum(-(N^xi * gamma(-xi + 1) - 1) * ((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu -
z) + 1)^(-1/xi - 2) * (mu - dat) * ((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z)^2 -
(N^xi * gamma(-xi + 1) - 1)/(mu - z)) * (1/xi + 1)/((mu - z)^2 * xi) + ((N^xi * gamma(-xi +
1) - 1) * (mu - dat)/(mu - z) + 1)^(-1/xi - 1) * (2 * (N^xi * gamma(-xi + 1) - 1) * (mu -
dat)/(mu - z)^3 - (N^xi * gamma(-xi + 1) - 1)/(mu - z)^2)/xi - (2 * (N^xi * gamma(-xi +
1) - 1) * (mu - dat)/(mu - z)^3 - (N^xi * gamma(-xi + 1) - 1)/(mu - z)^2) * (1/xi + 1)/((N^xi *
gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1) + (N^xi * gamma(-xi + 1) - 1) * (mu -
dat) * ((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z)^2 - (N^xi * gamma(-xi + 1) -
1)/(mu - z)) * (1/xi + 1)/(((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1)^2 *
(mu - z)^2) + 1/(mu - z)^2)
k13 <- sum(-((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1)^(-1/xi - 1) * ((N^xi *
logN * gamma(-xi + 1) - N^xi * psigamma(-xi + 1) * gamma(-xi + 1)) * (mu - dat) * (1/xi +
1)/(((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1) * (mu - z)) - log1p((N^xi *
gamma(-xi + 1) - 1) * (mu - dat)/(mu - z))/xi^2) * ((N^xi * gamma(-xi + 1) - 1) *
(mu - dat)/(mu - z)^2 - (N^xi * gamma(-xi + 1) - 1)/(mu - z))/xi + ((N^xi * gamma(-xi +
1) - 1) * (mu - dat)/(mu - z) + 1)^(-1/xi - 1) * ((N^xi * logN * gamma(-xi + 1) - N^xi *
psigamma(-xi + 1) * gamma(-xi + 1)) * (mu - dat)/(mu - z)^2 - (N^xi * logN * gamma(-xi +
1) - N^xi * psigamma(-xi + 1) * gamma(-xi + 1))/(mu - z))/xi - ((N^xi * logN * gamma(-xi +
1) - N^xi * psigamma(-xi + 1) * gamma(-xi + 1)) * (mu - dat)/(mu - z)^2 - (N^xi * logN *
gamma(-xi + 1) - N^xi * psigamma(-xi + 1) * gamma(-xi + 1))/(mu - z)) * (1/xi + 1)/((N^xi *
gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1) + (N^xi * logN * gamma(-xi + 1) - N^xi *
psigamma(-xi + 1) * gamma(-xi + 1)) * (mu - dat) * ((N^xi * gamma(-xi + 1) - 1) * (mu -
dat)/(mu - z)^2 - (N^xi * gamma(-xi + 1) - 1)/(mu - z)) * (1/xi + 1)/(((N^xi * gamma(-xi +
1) - 1) * (mu - dat)/(mu - z) + 1)^2 * (mu - z)) - ((N^xi * gamma(-xi + 1) - 1) * (mu -
dat)/(mu - z) + 1)^(-1/xi - 1) * ((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z)^2 -
(N^xi * gamma(-xi + 1) - 1)/(mu - z))/xi^2 + ((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu -
z)^2 - (N^xi * gamma(-xi + 1) - 1)/(mu - z))/(((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu -
z) + 1) * xi^2))
k22 <- sum((N^xi * gamma(-xi + 1) - 1)^2 * ((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu -
z) + 1)^(-1/xi - 2) * (mu - dat)^2 * (1/xi + 1)/((mu - z)^4 * xi) - 2 * (N^xi * gamma(-xi +
1) - 1) * ((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1)^(-1/xi - 1) * (mu -
dat)/((mu - z)^3 * xi) - (N^xi * gamma(-xi + 1) - 1)^2 * (mu - dat)^2 * (1/xi + 1)/(((N^xi *
gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1)^2 * (mu - z)^4) + 2 * (N^xi * gamma(-xi +
1) - 1) * (mu - dat) * (1/xi + 1)/(((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) +
1) * (mu - z)^3) - 1/(mu - z)^2)
k23 <- sum((N^xi * gamma(-xi + 1) - 1) * ((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu -
z) + 1)^(-1/xi - 1) * (mu - dat) * ((N^xi * logN * gamma(-xi + 1) - N^xi * psigamma(-xi +
1) * gamma(-xi + 1)) * (mu - dat) * (1/xi + 1)/(((N^xi * gamma(-xi + 1) - 1) * (mu -
dat)/(mu - z) + 1) * (mu - z)) - log1p((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z))/
xi^2)/((mu - z)^2 * xi) - (N^xi * logN * gamma(-xi + 1) - N^xi * psigamma(-xi +
1) * gamma(-xi + 1)) * ((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1)^(-1/xi -
1) * (mu - dat)/((mu - z)^2 * xi) - (N^xi * logN * gamma(-xi + 1) - N^xi * psigamma(-xi +
1) * gamma(-xi + 1)) * (N^xi * gamma(-xi + 1) - 1) * (mu - dat)^2 * (1/xi + 1)/(((N^xi *
gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1)^2 * (mu - z)^3) + (N^xi * logN * gamma(-xi +
1) - N^xi * psigamma(-xi + 1) * gamma(-xi + 1)) * (mu - dat) * (1/xi + 1)/(((N^xi * gamma(-xi +
1) - 1) * (mu - dat)/(mu - z) + 1) * (mu - z)^2) + (N^xi * gamma(-xi + 1) - 1) * ((N^xi *
gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1)^(-1/xi - 1) * (mu - dat)/((mu - z)^2 *
xi^2) - (N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(((N^xi * gamma(-xi + 1) - 1) * (mu -
dat)/(mu - z) + 1) * (mu - z)^2 * xi^2))
k33 <- sum(((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1)^(-1/xi) * ((N^xi * logN *
gamma(-xi + 1) - N^xi * psigamma(-xi + 1) * gamma(-xi + 1)) * (mu - dat)/(((N^xi * gamma(-xi +
1) - 1) * (mu - dat)/(mu - z) + 1) * (mu - z) * xi) - log1p((N^xi * gamma(-xi + 1) - 1) *
(mu - dat)/(mu - z))/xi^2)^2 + ((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) +
1)^(-1/xi) * ((N^xi * logN * gamma(-xi + 1) - N^xi * psigamma(-xi + 1) * gamma(-xi +
1))^2 * (mu - dat)^2/(((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1)^2 * (mu -
z)^2 * xi) - (N^xi * logN^2 * gamma(-xi + 1) - 2 * N^xi * logN * psigamma(-xi + 1) *
gamma(-xi + 1) + N^xi * psigamma(-xi + 1)^2 * gamma(-xi + 1) + N^xi * psigamma(1, -xi +
1) * gamma(-xi + 1)) * (mu - dat)/(((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) +
1) * (mu - z) * xi) + 2 * (N^xi * logN * gamma(-xi + 1) - N^xi * psigamma(-xi + 1) *
gamma(-xi + 1)) * (mu - dat)/(((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1) *
(mu - z) * xi^2) - 2 * log1p((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z))/xi^3) -
(N^xi * logN * gamma(-xi + 1) - N^xi * psigamma(-xi + 1) * gamma(-xi + 1))^2 * (mu -
dat)^2 * (1/xi + 1)/(((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1)^2 * (mu -
z)^2) + (N^xi * logN^2 * gamma(-xi + 1) - 2 * N^xi * logN * psigamma(-xi + 1) *
gamma(-xi + 1) + N^xi * psigamma(-xi + 1)^2 * gamma(-xi + 1) + N^xi * psigamma(1, -xi +
1) * gamma(-xi + 1)) * (mu - dat) * (1/xi + 1)/(((N^xi * gamma(-xi + 1) - 1) * (mu -
dat)/(mu - z) + 1) * (mu - z)) + (N^xi * gamma(-xi + 1) - 1) * (2 * (N^xi * logN *
gamma(-xi + 1) - N^xi * psigamma(-xi + 1) * gamma(-xi + 1))^2 * (mu - z) * xi/(N^xi *
gamma(-xi + 1) - 1)^3 - (N^xi * logN^2 * gamma(-xi + 1) - 2 * N^xi * logN * psigamma(-xi +
1) * gamma(-xi + 1) + N^xi * psigamma(-xi + 1)^2 * gamma(-xi + 1) + N^xi * psigamma(1,
-xi + 1) * gamma(-xi + 1)) * (mu - z) * xi/(N^xi * gamma(-xi + 1) - 1)^2 - 2 * (N^xi *
logN * gamma(-xi + 1) - N^xi * psigamma(-xi + 1) * gamma(-xi + 1)) * (mu - z)/(N^xi *
gamma(-xi + 1) - 1)^2)/((mu - z) * xi) - (N^xi * logN * gamma(-xi + 1) - N^xi * psigamma(-xi +
1) * gamma(-xi + 1)) * ((N^xi * logN * gamma(-xi + 1) - N^xi * psigamma(-xi + 1) *
gamma(-xi + 1)) * (mu - z) * xi/(N^xi * gamma(-xi + 1) - 1)^2 - (mu - z)/(N^xi * gamma(-xi +
1) - 1))/((mu - z) * xi) + (N^xi * gamma(-xi + 1) - 1) * ((N^xi * logN *