R/univdist.R

Defines functions pp.infomat pp.score pp.ll rlarg.infomat rlarg.score rlarg.ll rrlarg gevN.dphi gevN.phi gevN.Vfun gevN.infomat gevN.score gevN.ll gpdN.dphi gpdN.phi gpdN.Vfun gpdN.infomat gpdN.score gpdN.ll gevr.dphi gevr.phi gevr.Vfun gevr.infomat gevr.score gevr.ll.optim gevr.ll gpdN.quant gpdN.mean gevN.quant gevN.mean gev.retlev gpdr.dphi gpdr.phi gpdr.Vfun gpdr.infomat gpdr.score gpdr.ll.optim gpdr.ll gpde.dphi gpde.phi gpde.Vfun gpde.infomat gpde.score gpde.ll.optim gpde.ll gev.dphi gev.phi gev.Vfun gev.infomat gev.score gev.ll.optim gev.ll gpd.dphi gpd.phi gpd.Vfun gpd.infomat gpd.score gpd.ll.optim gpd.ll

Documented in gev.dphi gev.infomat gev.ll gev.ll.optim gevN.dphi gevN.infomat gevN.ll gevN.mean gevN.phi gevN.score gevN.Vfun gev.phi gevr.dphi gev.retlev gevr.infomat gevr.ll gevr.ll.optim gevr.phi gevr.score gevr.Vfun gev.score gev.Vfun gpd.dphi gpde.dphi gpde.infomat gpde.ll gpde.ll.optim gpde.phi gpde.score gpde.Vfun gpd.infomat gpd.ll gpd.ll.optim gpdN.dphi gpdN.infomat gpdN.ll gpdN.mean gpdN.phi gpdN.quant gpdN.score gpdN.Vfun gpd.phi gpdr.dphi gpdr.infomat gpdr.ll gpdr.ll.optim gpdr.phi gpdr.score gpdr.Vfun gpd.score gpd.Vfun pp.infomat pp.ll pp.score rlarg.infomat rlarg.ll rlarg.score rrlarg

#' Generalized Pareto distribution
#'
#' Likelihood, score function and information matrix, bias,
#' approximate ancillary statistics and sample space derivative
#' for the generalized Pareto distribution
#'
#' @author Leo Belzile
#' @name gpd
#' @param par vector of \code{scale} and \code{shape}
#' @param dat sample vector
#' @param tol numerical tolerance for the exponential model
#' @param method string indicating whether to use the expected  (\code{'exp'}) or the observed (\code{'obs'} - the default) information matrix.
#' @param V vector calculated by \code{gpd.Vfun}
#' @param n sample size
#' @section Usage: \preformatted{gpd.ll(par, dat, tol=1e-5)
#' gpd.ll.optim(par, dat, tol=1e-5)
#' gpd.score(par, dat)
#' gpd.infomat(par, dat, method = c('obs','exp'))
#' gpd.bias(par, n)
#' gpd.Fscore(par, dat, method = c('obs','exp'))
#' gpd.Vfun(par, dat)
#' gpd.phi(par, dat, V)
#' gpd.dphi(par, dat, V)}
#'
#' @section Functions:
#'
#' \itemize{
#' \item{\code{gpd.ll}:} {log likelihood}
#' \item{\code{gpd.ll.optim}:} {negative log likelihood parametrized in terms of \code{log(scale)} and shape
#' in order to perform unconstrained optimization}
#' \item{\code{gpd.score}:} {score vector}
#' \item{\code{gpd.infomat}:} {observed or expected information matrix}
#' \item{\code{gpd.bias}:} {Cox-Snell first order bias}
#' \item{\code{gpd.Fscore}:} {Firth's modified score equation}
#' \item{\code{gpd.Vfun}:} {vector implementing conditioning on approximate ancillary statistics for the TEM}
#' \item{\code{gpd.phi}:} {canonical parameter in the local exponential family approximation}
#' \item{\code{gpd.dphi}:} {derivative matrix of the canonical parameter in the local
#' exponential family approximation}
#' }
#' @references Firth, D. (1993). Bias reduction of maximum likelihood estimates, \emph{Biometrika}, \strong{80}(1), 27--38.
#' @references Coles, S. (2001). \emph{An Introduction to Statistical Modeling of Extreme Values}, Springer, 209 p.
#' @references Cox, D. R. and E. J. Snell (1968). A general definition of residuals, \emph{Journal of the Royal Statistical Society: Series B (Methodological)}, \strong{30}, 248--275.
#' @references Cordeiro, G. M. and R. Klein (1994). Bias correction in ARMA models, \emph{Statistics and Probability Letters}, \strong{19}(3), 169--176.
#' @references Giles, D. E., Feng, H. and R. T. Godwin (2016).  Bias-corrected maximum likelihood estimation of the  parameters of the generalized Pareto distribution, \emph{Communications in Statistics - Theory and Methods}, \strong{45}(8), 2465--2483.
#'
NULL



#' @title Generalized extreme value distribution
#'
#' @description Likelihood, score function and information matrix, bias,
#' approximate ancillary statistics and sample space derivative
#' for the generalized extreme value distribution
#'
#' @name gev
#' @param par vector of \code{loc}, \code{scale} and \code{shape}
#' @param dat sample vector
#' @param method string indicating whether to use the expected  (\code{'exp'}) or the observed (\code{'obs'} - the default) information matrix.
#' @param V vector calculated by \code{gev.Vfun}
#' @param n sample size
#' @param p vector of probabilities
#' @section Usage: \preformatted{gev.ll(par, dat)
#' gev.ll.optim(par, dat)
#' gev.score(par, dat)
#' gev.infomat(par, dat, method = c('obs','exp'))
#' gev.retlev(par, p)
#' gev.bias(par, n)
#' gev.Fscore(par, dat, method=c('obs','exp'))
#' gev.Vfun(par, dat)
#' gev.phi(par, dat, V)
#' gev.dphi(par, dat, V)}
#'
#'
#' @section Functions:
#' \itemize{
#' \item{\code{gev.ll}:} {log likelihood}
#' \item{\code{gev.ll.optim}:} {negative log likelihood parametrized in terms of location, \code{log(scale)} and shape
#' in order to perform unconstrained optimization}
#' \item{\code{gev.score}:} {score vector}
#' \item{\code{gev.infomat}:} {observed or expected information matrix}
#' \item{\code{gev.retlev}:} {return level, corresponding to the \eqn{(1-p)}th quantile}
#' \item{\code{gev.bias}:} {Cox-Snell first order bias}
#' \item{\code{gev.Fscore}:} {Firth's modified score equation}
#' \item{\code{gev.Vfun}:} {vector implementing conditioning on approximate ancillary statistics for the TEM}
#' \item{\code{gev.phi}:} {canonical parameter in the local exponential family approximation}
#' \item{\code{gev.dphi}:} {derivative matrix of the canonical parameter in the local exponential family approximation}
#' }
#' @references Firth, D. (1993). Bias reduction of maximum likelihood estimates, \emph{Biometrika}, \strong{80}(1), 27--38.
#' @references Coles, S. (2001). \emph{An Introduction to Statistical Modeling of Extreme Values}, Springer, 209 p.
#' @references Cox, D. R. and E. J. Snell (1968). A general definition of residuals, \emph{Journal of the Royal Statistical Society: Series B (Methodological)}, \strong{30}, 248--275.
#' @references Cordeiro, G. M. and R. Klein (1994). Bias correction in ARMA models, \emph{Statistics and Probability Letters}, \strong{19}(3), 169--176.
NULL


#' Log likelihood for the generalized Pareto distribution
#'
#' Function returning the density of an \code{n} sample from the GP distribution.
#' \code{gpd.ll.optim} returns the negative log likelihood parametrized in terms of \code{log(scale)} and shape
#' in order to perform unconstrained optimization
#' @seealso \code{\link{gpd}}
#' @inheritParams gpd
#' @export
#' @keywords internal
gpd.ll <- function(par, dat, tol = 1e-05) {
    sigma = par[1]
    if(sigma < 0){
      return(-Inf)
    }
    xi = par[2]
    n <- length(dat)
    if (abs(xi) > tol) {
      if(xi > -1){
        #Most scenarios
        as.vector(-n * log(sigma) - (1 + 1/xi) * sum(log(pmax(1 + xi/sigma * dat, 0))))
      } else if(isTRUE(all.equal(xi, -1, check.attributes = FALSE))){
        -n*log(max(dat))
      } else{
       -Inf
      }
    } else {
      as.vector(-n * log(sigma) - sum(dat)/sigma)
    }
}



#' @rdname gpd.ll
#' @inheritParams gpd
#' @export
gpd.ll.optim <- function(par, dat, tol = 1e-05) {
    sigma = exp(par[1])
    xi = par[2]
    if (abs(xi) > tol) {
        length(dat) * log(sigma) + (1 + 1/xi) * sum(log(pmax(1 + xi/sigma * dat, 0)))
    } else {
        length(dat) * log(sigma) + sum(dat)/sigma
    }
}
#' Score vector for the generalized Pareto distribution
#'
#' @seealso \code{\link{gpd}}
#' @inheritParams gpd
#' @export
#' @keywords internal
gpd.score <- function(par, dat) {
    sigma = par[1]
    xi = as.vector(par[2])
    if (!isTRUE(all.equal(0, xi, check.attributes = FALSE))) {
        c(sum(dat * (xi + 1)/(sigma^2 * (dat * xi/sigma + 1)) - 1/sigma), sum(-dat * (1/xi + 1)/(sigma *
            (dat * xi/sigma + 1)) + log(pmax(dat * xi/sigma + 1, 0))/xi^2))
    } else {
        c(sum((dat - sigma)/sigma^2), sum(1/2 * (dat - 2 * sigma) * dat/sigma^2))
    }
}

#' Information matrix for the generalized Pareto distribution
#'
#' The function returns the expected or observed information matrix.
#' @seealso \code{\link{gpd}}
#' @inheritParams gpd
#' @param nobs number of observations
#' @export
#' @keywords internal
gpd.infomat <- function(par, dat, method = c("obs", "exp"), nobs = length(dat)) {
    method <- match.arg(method)
    dat <- as.vector(dat)
    sigma <- as.vector(par[1])
    xi <- as.vector(par[2])
    if (method == "obs") {
        if (any((1 + xi * dat/sigma) < 0)) {
            stop("Data outside of range specified by parameter, yielding a zero likelihood")
        }
        c11 <- (length(dat) - (1 + xi) * sum(dat * (2 * sigma + xi * dat)/(sigma + xi * dat)^2))/(sigma^2)
        if (!isTRUE(all.equal(xi, 0, check.attributes = FALSE))) {
            c12 <- (1 + xi) * sum(dat/(sigma + xi * dat)^2)/xi - sum(dat/(sigma + xi * dat))/(sigma *
                xi)
            c22 <- (2/xi^2) * sum(dat/(sigma + xi * dat)) - 2 * sum(log(pmax(dat * xi/sigma + 1, 0)))/(xi^3) +
                (1 + 1/xi) * sum(dat^2/(sigma + xi * dat)^2)
        } else {
            c12 <- sum(-(dat^2 - dat * sigma)/sigma^3)
            c22 <- sum(-1/3 * (2 * dat - 3 * sigma) * dat^2/sigma^3)
        }
        infomat <- -matrix(c(c11, c12, c12, c22), nrow = 2, ncol = 2, byrow = TRUE)
    } else if (method == "exp") {
        k22 <- -2/((1 + xi) * (1 + 2 * xi))
        k11 <- -1/(sigma^2 * (1 + 2 * xi))
        k12 <- -1/(sigma * (1 + xi) * (1 + 2 * xi))
        infomat <- -nobs * cbind(c(k11, k12), c(k12, k22))  #fixed 12-10-2016
    }
    colnames(infomat) <- rownames(infomat) <- c("scale","shape")
    return(infomat)
}
#' Tangent exponential model statistics for the generalized Pareto distribution
#'
#' Matrix of approximate ancillary statistics, sample space derivative of the
#' log likelihood and mixed derivative for the generalized Pareto distribution.
#' @seealso \code{\link{gpd}}
#' @inheritParams gpd
#' @export
#' @name gpd.temstat
#' @keywords internal
gpd.Vfun <- function(par, dat) {
    sigma = par[1]
    xi = par[2]
    cbind(dat/sigma, sigma * (dat * xi/sigma + 1)^(-1/xi) * (log(dat * xi/sigma + 1)/xi^2 - dat/(sigma *
        (dat * xi/sigma + 1) * xi))/(dat * xi/sigma + 1)^(-1/xi - 1))
}

#' @inheritParams gpd
#' @rdname gpd.temstat
#' @export
gpd.phi <- function(par, dat, V) {
    sigma = par[1]
    xi = par[2]
    rbind(-(xi + 1)/(sigma * (dat * xi/sigma + 1))) %*% V
}

#' @inheritParams gpd
#' @export
#' @rdname gpd.temstat
gpd.dphi <- function(par, dat, V) {
    sigma = par[1]
    xi = as.vector(par[2])
    if (!isTRUE(all.equal(xi, 0, check.attributes = FALSE))) {
        rbind(xi * (1/xi + 1)/(sigma^2 * (dat * xi/sigma + 1)) - dat * xi^2 * (1/xi + 1)/(sigma^3 * (dat *
            xi/sigma + 1)^2), -(1/xi + 1)/(sigma * (dat * xi/sigma + 1)) + dat * (xi + 1)/(sigma^2 *
            (dat * xi/sigma + 1)^2) + 1/(sigma * (dat * xi/sigma + 1) * xi)) %*% V
    } else {
        rbind(rep(sigma^(-2), length(dat)), (dat - sigma)/sigma^2) %*% V
    }
}


#' log likelihood for the generalized extreme value distribution
#'
#' Function returning the density of an \code{n} sample from the GEV distribution.
#'
#' \code{gev.ll.optim} returns the negative log likelihood parametrized in terms of location, \code{log(scale)} and shape in order to perform unconstrained optimization
#'
#' @inheritParams gev
#' @export
#' @keywords internal
#' @seealso \code{\link{gev}}
gev.ll <- function(par, dat) {
    dat <- as.vector(dat)
    par <- as.vector(par)
    tx <- pmax(1 + par[3]/par[2] * (dat - par[1]), 0)
    if (!isTRUE(all.equal(as.vector(par[3]), 0, tolerance = 1e-07, check.attributes = FALSE))) {
        sum(-log(par[2]) - (1/par[3] + 1) * log(tx) - tx^(-1/par[3]))
    } else {
        sum(-log(par[2]) - (dat - par[1])/par[2] - exp(-(dat - par[1])/par[2]))
    }
}

#' @rdname gev.ll
#' @inheritParams gev
#' @export
#' @keywords internal
gev.ll.optim <- function(par, dat) {
    tpar = par
    tpar[2] = exp(par[2])
    # parameters with log-scale
    nll = -gev.ll(tpar, dat)
    return(nll)
}

#' Score vector for the generalized extreme value distribution
#'
#' @inheritParams gev
#' @export
#' @keywords internal
gev.score <- function(par, dat) {
    mu = par[1]
    sigma = par[2]
    xi = as.vector(par[3])
    if (!isTRUE(all.equal(xi, 0, tolerance = 1e-10, check.attributes = FALSE))) {
        c(sum(-(pmax(0, -(mu - dat) * xi/sigma + 1))^(-1/xi - 1)/sigma - xi * (1/xi + 1)/(sigma * ((mu -
            dat) * xi/sigma - 1))),
          sum(-(dat - mu) * ((dat - mu) * xi/sigma + 1)^(-1/xi - 1)/sigma^2 +
            (dat - mu) * (xi + 1)/(sigma^2 * ((dat - mu) * xi/sigma + 1)) - 1/sigma),
          sum(-(mu - dat) * (1/xi + 1)/(sigma * ((mu - dat) * xi/sigma - 1)) -
                (log1p(-(mu - dat) * xi/sigma)/xi^2 - (mu - dat)/(sigma * ((mu - dat) * xi/sigma - 1) * xi))/
                (-(mu - dat) * xi/sigma + 1)^(1/xi) + log1p(-(mu - dat) * xi/sigma)/xi^2))
    } else {
        c(sum(-exp(mu/sigma - dat/sigma)/sigma + 1/sigma),
          sum(mu * exp(mu/sigma - dat/sigma)/sigma^2 -
            dat * exp(mu/sigma - dat/sigma)/sigma^2 - mu/sigma^2 - 1/sigma + dat/sigma^2),
          sum((exp(-(dat/sigma)) *
            (dat - mu) * (exp(mu/sigma) * (mu - dat) + exp(dat/sigma) * (dat - mu - 2 * sigma)))/(2 * sigma^2)))
    }

}

#' Information matrix for the generalized extreme value distribution
#'
#' The function returns the expected or observed information matrix.
#' @inheritParams gev
#' @param nobs number of observations
#' @export
#' @keywords internal
gev.infomat <- function(par, dat, method = c("obs", "exp"), nobs = length(dat)) {
    method <- match.arg(method)
    par <- as.vector(par)
    if (method == "exp") {
        # (Expected) Fisher information does not depend on location parameter
        if (length(par) == 3) {
            sigma = par[2]
            xi = as.vector(par[3])
        } else {
            sigma = par[1]
            xi = as.vector(par[2])
        }
      if(xi < -0.5){
        stop("The Fisher information is not defined if the shape parameter of the GEV is less than -0.5")
      }
        # Limiting case when xi=0 Fix to value at zero
        if (abs(xi) < 0.001) {
            return(nobs * cbind(c(1/sigma^2, -0.422784335098467/sigma^2, 0.41184033042644/sigma), c(-0.422784335098467/sigma^2,
                1.82368066085288/sigma^2, 0.332484907160274/sigma), c(0.41184033042644/sigma, 0.332484907160274/sigma,
                2.42360605517703)))
        }
        p = (1 + xi)^2 * gamma(1 + 2 * xi)
        q = gamma(2 + xi) * (digamma(1 + xi) + (1 + xi)/xi)
        infomat <- nobs * cbind(c(p/sigma^2, -(p - gamma(2 + xi))/(sigma^2 * xi), (p/xi - q)/(sigma *
            xi)), c(-(p - gamma(2 + xi))/(sigma^2 * xi), (1 - 2 * gamma(2 + xi) + p)/(sigma^2 * xi^2),
            -(1 + digamma(1) + (1 - gamma(2 + xi))/xi - q + p/xi)/(sigma * xi^2)), c((p/xi - q)/(sigma *
            xi), -(1 + digamma(1) + (1 - gamma(2 + xi))/xi - q + p/xi)/(sigma * xi^2), (pi^2/6 + (1 +
            digamma(1) + 1/xi)^2 - 2 * q/xi + p/xi^2)/xi^2))
        if (xi < 0.003 & xi > 0) {
            # Linearization because numerically unstable in this region
            y2 <- (pi^2/6 + (1 + digamma(1) + 1/0.003)^2 - 2 * (gamma(2 + 0.003) * (digamma(1 + 0.003) +
                (1 + 0.003)/0.003))/0.003 + (1 + 0.003)^2 * gamma(1 + 2 * 0.003)/0.003^2)/0.003^2
            y1 <- 2.42360605517703
            infomat[3, 3] <- nobs * ((y2 - y1)/0.003 * xi + y1)
        } else if (xi > -0.003 & xi < 0) {
            y2 <- (pi^2/6 + (1 + digamma(1) - 1/0.003)^2 + 2 * (gamma(2 - 0.003) * (digamma(1 - 0.003) -
                (1 - 0.003)/0.003))/0.003 + (1 - 0.003)^2 * gamma(1 - 2 * 0.003)/0.003^2)/0.003^2
            y1 <- 2.42360605517703
            infomat[3, 3] <- nobs * (-(y2 - y1)/0.003 * xi + y1)
        }
        colnames(infomat) <- rownames(infomat) <- c("loc","scale","shape")
        return(infomat)
    } else if (method == "obs") {
        if (length(par) != 3) {
            stop("Invalid parameter vector")
        }
        mu = par[1]
        sigma = par[2]
        xi = as.vector(par[3])
        if(xi < -0.5){
          stop("The observed information matrix is not defined if the shape parameter of the GEV is less than -0.5")
        }
        # Bug fixed 21-10-2016 (parameter were defined after they were used).
        if (any((1 + xi * (dat - mu)/sigma) < 0)) {
            stop("Data outside of range specified by parameter, yielding a zero likelihood")
        }
        infomat <- matrix(0, ncol = 3, nrow = 3)
        if (!isTRUE(all.equal(xi, 0, check.attributes = FALSE))) {
            infomat[1, 1] <- sum(-((dat - mu) * xi/sigma + 1)^(-1/xi - 2) * (xi + 1)/sigma^2 + xi^2 *
                (1/xi + 1)/(sigma^2 * ((dat - mu) * xi/sigma + 1)^2))
            infomat[1, 2] <- infomat[2, 1] <- sum(-(dat - mu) * ((dat - mu) * xi/sigma + 1)^(-1/xi -
                2) * (xi + 1)/sigma^3 + ((dat - mu) * xi/sigma + 1)^(-1/xi - 1)/sigma^2 - xi * (1/xi +
                1)/(sigma^2 * ((dat - mu) * xi/sigma + 1)) + (dat - mu) * xi^2 * (1/xi + 1)/(sigma^3 *
                ((dat - mu) * xi/sigma + 1)^2))
            infomat[1, 3] <- infomat[3, 1] <- sum((-(mu - dat) * xi/sigma + 1)^(-1/xi - 1) * ((mu - dat) *
                (1/xi + 1)/(sigma * ((mu - dat) * xi/sigma - 1)) - log1p(-(mu - dat) * xi/sigma)/xi^2)/sigma -
                (1/xi + 1)/(sigma * ((mu - dat) * xi/sigma - 1)) + (mu - dat) * (xi + 1)/(sigma^2 * ((mu -
                dat) * xi/sigma - 1)^2) + 1/(sigma * ((mu - dat) * xi/sigma - 1) * xi))
            infomat[2, 3] <- infomat[3, 2] <- sum((mu - dat) * (-(mu - dat) * xi/sigma + 1)^(-1/xi -
                1) * (log1p(-(mu - dat) * xi/sigma)/xi^2 - (mu - dat)/(sigma * ((mu - dat) * xi/sigma -
                1) * xi))/sigma^2 + (mu - dat) * (1/xi + 1)/(sigma^2 * ((mu - dat) * xi/sigma - 1)) -
                (mu - dat)^2 * (xi + 1)/(sigma^3 * ((mu - dat) * xi/sigma - 1)^2) - (mu - dat)/(sigma^2 *
                ((mu - dat) * xi/sigma - 1) * xi) + (mu - dat)^2/(sigma^3 * (-(mu - dat) * xi/sigma +
                1)^(1/xi) * ((mu - dat) * xi/sigma - 1)^2))
            infomat[3, 3] <- sum(-(log1p(-(mu - dat) * xi/sigma)/xi^2 - (mu - dat)/(sigma * ((mu -
                dat) * xi/sigma - 1) * xi))^2/(-(mu - dat) * xi/sigma + 1)^(1/xi) + (2 * log1p(-(mu - dat) *
                xi/sigma)/xi^3 - 2 * (mu - dat)/(sigma * ((mu - dat) * xi/sigma - 1) * xi^2) - (mu -
                dat)^2/(sigma^2 * ((mu - dat) * xi/sigma - 1)^2 * xi))/(-(mu - dat) * xi/sigma + 1)^(1/xi) +
                (mu - dat)^2 * (1/xi + 1)/(sigma^2 * ((mu - dat) * xi/sigma - 1)^2) - 2 * log1p(-(mu -
                dat) * xi/sigma)/xi^3 + 2 * (mu - dat)/(sigma * ((mu - dat) * xi/sigma - 1) * xi^2))
            infomat[2, 2] <- sum(-(mu - dat)^2 * (-(mu - dat) * xi/sigma + 1)^(-1/xi - 2) * (xi + 1)/sigma^4 -
                2 * (mu - dat) * (-(mu - dat) * xi/sigma + 1)^(-1/xi - 1)/sigma^3 - 2 * (mu - dat) *
                (xi + 1)/(sigma^3 * ((mu - dat) * xi/sigma - 1)) + (mu - dat)^2 * xi^2 * (1/xi + 1)/(sigma^4 *
                ((mu - dat) * xi/sigma - 1)^2) + 1/sigma^2)
        } else {
            infomat[1, 1] <- sum(-exp(mu/sigma - dat/sigma)/sigma^2)
            infomat[1, 2] <- infomat[2, 1] <- sum(((mu + sigma) * exp(mu/sigma) - dat * exp(mu/sigma) -
                sigma * exp(dat/sigma)) * exp(-dat/sigma)/sigma^3)
            infomat[2, 2] <- sum(-2 * mu * exp(mu/sigma - dat/sigma)/sigma^3 + 2 * dat * exp(mu/sigma -
                dat/sigma)/sigma^3 + 2 * mu/sigma^3 + 1/sigma^2 - 2 * dat/sigma^3 - (mu^2 * exp(mu/sigma) -
                2 * mu * dat * exp(mu/sigma) + dat^2 * exp(mu/sigma)) * exp(-dat/sigma)/sigma^4)
            infomat[1, 3] <- infomat[3, 1] <- sum(-1/2 * (mu^2 * exp(mu/sigma) + 2 * mu * sigma * exp(mu/sigma) -
                2 * mu * dat * exp(mu/sigma) - 2 * sigma * dat * exp(mu/sigma) + dat^2 * exp(mu/sigma) -
                2 * mu * sigma * exp(dat/sigma) - 2 * sigma^2 * exp(dat/sigma) + 2 * sigma * dat * exp(dat/sigma)) *
                exp(-dat/sigma)/sigma^3)
            infomat[2, 3] <- infomat[3, 2] <- sum(1/2 * (mu^2 * exp(mu/sigma) + 2 * mu * sigma * exp(mu/sigma) -
                2 * mu * dat * exp(mu/sigma) - 2 * sigma * dat * exp(mu/sigma) + dat^2 * exp(mu/sigma) -
                2 * mu * sigma * exp(dat/sigma) - 2 * sigma^2 * exp(dat/sigma) + 2 * sigma * dat * exp(dat/sigma)) *
                (mu - dat) * exp(-dat/sigma)/sigma^4)
            infomat[3, 3] <- sum(exp(-(dat/sigma))*(4*exp(dat/sigma)* sigma*(2*mu + 3*sigma - 2*dat) -
                                 exp(mu/sigma)*(3*mu + 8*sigma - 3*dat)*(mu - dat))*(mu - dat)^2)/(12*sigma^4)
        }
        colnames(infomat) <- rownames(infomat) <- c("loc","scale","shape")
        return(-infomat)
    }
}


#' Tangent exponential model statistics for the generalized extreme value distribution
#'
#' Matrix of approximate ancillary statistics, sample space derivative of the
#' log likelihood and mixed derivative for the generalized extreme value distribution.
#' @seealso \code{\link{gev}}
#' @inheritParams gev
#' @export
#' @name gev.temstat
#' @keywords internal
gev.Vfun <- function(par, dat) {
    cbind(1, (dat - par[1])/par[2], par[2] * (-(par[1] - dat) * par[3]/par[2] + 1)^(-1/par[3]) * (log1p(-(par[1] -
        dat) * par[3]/par[2] )/par[3]^2 - (par[1] - dat)/(par[2] * ((par[1] - dat) * par[3]/par[2] -
        1) * par[3]))/(-(par[1] - dat) * par[3]/par[2] + 1)^(-1/par[3] - 1))
}

#' @rdname gev.temstat
#' @inheritParams gev
#' @export
#' @keywords internal
gev.phi <- function(par, dat, V) {
    mu = par[1]
    sigma = par[2]
    xi = as.vector(par[3])
    if (!isTRUE(all.equal(xi, 0))) {
        t(((dat - mu) * xi/sigma + 1)^(-1/xi - 1)/sigma + xi * (1/xi + 1)/(sigma * ((mu - dat) * xi/sigma -
            1))) %*% V
    } else {
        t(exp((mu - dat)/sigma)/sigma - 1/sigma) %*% V

    }
}


#' @rdname gev.temstat
#' @inheritParams gev
#' @export
#' @keywords internal
gev.dphi <- function(par, dat, V) {
    mu = par[1]
    sigma = par[2]
    xi = as.vector(par[3])
    if (!isTRUE(all.equal(xi, 0))) {
        rbind((-(mu - dat) * xi/sigma + 1)^(-1/xi - 2) * (xi + 1)/sigma^2 - xi^2 * (1/xi + 1)/(sigma^2 *
            ((mu - dat) * xi/sigma - 1)^2), -(mu - dat) * (-(mu - dat) * xi/sigma + 1)^(-1/xi - 2) *
            (xi + 1)/sigma^3 - (-(mu - dat) * xi/sigma + 1)^(-1/xi - 1)/sigma^2 - xi * (1/xi + 1)/(sigma^2 *
            ((mu - dat) * xi/sigma - 1)) + (mu - dat) * xi^2 * (1/xi + 1)/(sigma^3 * ((mu - dat) * xi/sigma -
            1)^2), -(-(mu - dat) * xi/sigma + 1)^(-1/xi - 1) * ((mu - dat) * (1/xi + 1)/(sigma * ((mu -
            dat) * xi/sigma - 1)) - log1p(-(mu - dat) * xi/sigma)/xi^2)/sigma + (1/xi + 1)/(sigma *
            ((mu - dat) * xi/sigma - 1)) - (mu - dat) * (xi + 1)/(sigma^2 * ((mu - dat) * xi/sigma -
            1)^2) - 1/(sigma * ((mu - dat) * xi/sigma - 1) * xi)) %*% V
    } else {
        rbind(exp(-dat/sigma + mu/sigma)/sigma^2, (sigma * exp(dat/sigma) + (dat - mu - sigma) * exp(mu/sigma)) *
            exp(-dat/sigma)/sigma^3, 1/2 * (2 * dat * sigma * exp(dat/sigma) - 2 * mu * sigma * exp(dat/sigma) -
            2 * sigma^2 * exp(dat/sigma) + dat^2 * exp(mu/sigma) - 2 * dat * mu * exp(mu/sigma) + mu^2 *
            exp(mu/sigma) - 2 * dat * sigma * exp(mu/sigma) + 2 * mu * sigma * exp(mu/sigma)) * exp(-dat/sigma)/sigma^3) %*%
            V
    }
}

#' @title Generalized Pareto distribution (expected shortfall parametrization)
#'
#' @description Likelihood, score function and information matrix,
#' approximate ancillary statistics and sample space derivative
#' for the generalized Pareto distribution parametrized in terms of expected shortfall.
#'
#' The parameter \code{m} corresponds to \eqn{\zeta_u}/(1-\eqn{\alpha}), where \eqn{\zeta_u} is the rate of exceedance over the threshold
#' \code{u} and \eqn{\alpha} is the percentile of the expected shortfall.
#' Note that the actual parametrization is in terms of excess expected shortfall, meaning expected shortfall minus threshold.
#'
#' @details The observed information matrix was calculated from the Hessian using symbolic calculus in Sage.
#'
#' @author Leo Belzile
#' @name gpde
#' @param par vector of length 2 containing \eqn{e_m} and \eqn{\xi}, respectively the expected shortfall at probability 1/(1-\eqn{\alpha}) and the shape parameter.
#' @param dat sample vector
#' @param m number of observations of interest for return levels. See \strong{Details}
#' @param tol numerical tolerance for the exponential model
#' @param method string indicating whether to use the expected  (\code{'exp'}) or the observed (\code{'obs'} - the default) information matrix.
#' @param nobs number of observations
#' @param V vector calculated by \code{gpde.Vfun}
#'
#' @section Usage: \preformatted{gpde.ll(par, dat, m, tol=1e-5)
#' gpde.ll.optim(par, dat, m, tol=1e-5)
#' gpde.score(par, dat, m)
#' gpde.infomat(par, dat, m, method = c('obs', 'exp'), nobs = length(dat))
#' gpde.Vfun(par, dat, m)
#' gpde.phi(par, dat, V, m)
#' gpde.dphi(par, dat, V, m)}
#'
#' @section Functions:
#'
#' \itemize{
#' \item{\code{gpde.ll}:} {log likelihood}
#' \item{\code{gpde.ll.optim}:} {negative log likelihood parametrized in terms of log expected
#' shortfall and shape in order to perform unconstrained optimization}
#' \item{\code{gpde.score}:} {score vector}
#' \item{\code{gpde.infomat}:} {observed information matrix for GPD parametrized in terms of rate of expected shortfall and shape}
#' \item{\code{gpde.Vfun}:} {vector implementing conditioning on approximate ancillary statistics for the TEM}
#' \item{\code{gpde.phi}:} {canonical parameter in the local exponential family approximation}
#' \item{\code{gpde.dphi}:} {derivative matrix of the canonical parameter in the local exponential family approximation}
#' }
NULL

#' @title Generalized Pareto distribution (return level parametrization)
#'
#' @description Likelihood, score function and information matrix,
#' approximate ancillary statistics and sample space derivative
#' for the generalized Pareto distribution parametrized in terms of return levels.
#'
#' @details The observed information matrix was calculated from the Hessian using symbolic calculus in Sage.
#' @details The interpretation for \code{m} is as follows: if there are on average \eqn{m_y} observations per year above the threshold, then  \eqn{m=Tm_y} corresponds to \eqn{T}-year return level.
#'
#' @author Leo Belzile
#' @name gpdr
#' @param par vector of length 2 containing \eqn{y_m} and \eqn{\xi}, respectively the \eqn{m}-year return level and the shape parameter.
#' @param dat sample vector
#' @param m number of observations of interest for return levels. See \strong{Details}
#' @param tol numerical tolerance for the exponential model
#' @param method string indicating whether to use the expected  (\code{'exp'}) or the observed (\code{'obs'} - the default) information matrix.
#' @param nobs number of observations
#' @param V vector calculated by \code{gpdr.Vfun}
#'
#' @section Usage: \preformatted{gpdr.ll(par, dat, m, tol=1e-5)
#' gpdr.ll.optim(par, dat, m, tol=1e-5)
#' gpdr.score(par, dat, m)
#' gpdr.infomat(par, dat, m, method = c('obs', 'exp'), nobs = length(dat))
#' gpdr.Vfun(par, dat, m)
#' gpdr.phi(par, V, dat, m)
#' gpdr.dphi(par, V, dat, m)}
#' @section Functions:
#'
#' \itemize{
#' \item{\code{gpdr.ll}:} {log likelihood}
#' \item{\code{gpdr.ll.optim}:} {negative log likelihood parametrized in terms of \code{log(scale)} and shape
#' in order to perform unconstrained optimization}
#' \item{\code{gpdr.score}:} {score vector}
#' \item{\code{gpdr.infomat}:} {observed information matrix for GPD parametrized in terms of rate of \eqn{m}-year return level and shape}
#' \item{\code{gpdr.Vfun}:} {vector implementing conditioning on approximate ancillary statistics for the TEM}
#' \item{\code{gpdr.phi}:} {canonical parameter in the local exponential family approximation}
#' \item{\code{gpdr.dphi}:} {derivative matrix of the canonical parameter in the local exponential family approximation}
#' }
NULL

#' @title Generalized extreme value distribution (return level parametrization)
#'
#' @description Likelihood, score function and information matrix,
#' approximate ancillary statistics and sample space derivative
#' for the generalized extreme value distribution  parametrized in terms of the return level \eqn{z}, scale and shape.
#'
#' @author Leo Belzile
#' @name gevr
#' @param par vector of \code{retlev}, \code{scale} and \code{shape}
#' @param dat sample vector
#' @param p tail probability, corresponding to \eqn{(1-p)}th quantile for \eqn{z}
#' @param method string indicating whether to use the expected  (\code{'exp'}) or the observed (\code{'obs'} - the default) information matrix.
#' @param nobs number of observations
#' @param V vector calculated by \code{gevr.Vfun}
#'
#' @section Usage: \preformatted{gevr.ll(par, dat, p)
#' gevr.ll.optim(par, dat, p)
#' gevr.score(par, dat, p)
#' gevr.infomat(par, dat, p, method = c('obs', 'exp'), nobs = length(dat))
#' gevr.Vfun(par, dat, p)
#' gevr.phi(par, dat, p, V)
#' gevr.dphi(par, dat, p, V)}
#'
#' @section Functions:
#' \itemize{
#' \item{\code{gevr.ll}:} {log likelihood}
#' \item{\code{gevr.ll.optim}:} {negative log likelihood parametrized in terms of return levels, \code{log(scale)} and shape in order to perform unconstrained optimization}
#' \item{\code{gevr.score}:} {score vector}
#' \item{\code{gevr.infomat}:} {observed information matrix}
#' \item{\code{gevr.Vfun}:} {vector implementing conditioning on approximate ancillary statistics for the TEM}
#' \item{\code{gevr.phi}:} {canonical parameter in the local exponential family approximation}
#' \item{\code{gevr.dphi}:} {derivative matrix of the canonical parameter in the local exponential family approximation}
#' }
NULL

#' Negative log likelihood of the generalized Pareto distribution (expected shortfall)
#'
#' @seealso \code{\link{gpde}}
#' @inheritParams gpde
#' @keywords internal
#' @export
gpde.ll <- function(par, dat, m) {
    es = par[1]
    xi = as.vector(par[2])
    if (any(xi > 1, es < 0, min(1 + (m^xi - 1 + xi)/((1 - xi) * es) * dat) < 0)) {
        return(-1e+10)
    }
    xizero <- abs(xi) < 1e-9
    sigmae = ifelse(!xizero, es * (1 - xi) * xi/(m^xi - 1 + xi), es/(log(m)+1))
    return(gpd.ll(par = c(sigmae, xi), dat = dat))
}

#' Negative log likelihood of the generalized Pareto distribution (expected shortfall) - optimization
#' The negative log likelihood is parametrized in terms of log expected shortfall and shape in order to perform unconstrained optimization
#' @rdname gpde.ll
#' @inheritParams gpde
#' @keywords internal
#' @export
gpde.ll.optim <- function(par, dat, m) {
    es = exp(par[1])
    xi = par[2]
    if (xi > 1) {
        return(1e+10)
    }
    -sum(-log(xi * (1 - xi)/(m^xi - 1 + xi)) - log(es) - (1 + 1/xi) * log1p((m^xi - 1 + xi)/((1 - xi) *
        es) * dat))
}
#' Score vector for the GP distribution (expected shortfall)
#' @seealso \code{\link{gpde}}
#' @inheritParams gpde
#' @keywords internal
#' @export
gpde.score <- function(par, dat, m) {
    es = par[1]
    xi = par[2]
    if(missing(m)){
      stop("User must provide \"m\" parameter in \"gpde.score\".")
    }
    xizero <- abs(xi) < 1e-4
    if(!xizero){
    c(sum(-1/es + dat * (m^xi + xi - 1) * (1/xi + 1)/(es^2 * (xi - 1) * (dat * (m^xi + xi - 1)/(es *
        (xi - 1)) - 1))), sum(-((m^xi * log(m) + 1) * dat/(es * (xi - 1)) - dat * (m^xi + xi - 1)/(es *
        (xi - 1)^2)) * (1/xi + 1)/(dat * (m^xi + xi - 1)/(es * (xi - 1)) - 1) + (m^xi + xi - 1) * ((m^xi *
        log(m) + 1) * (xi - 1) * xi/(m^xi + xi - 1)^2 - (xi - 1)/(m^xi + xi - 1) - xi/(m^xi + xi - 1))/((xi -
        1) * xi) + log1p(-dat * (m^xi + xi - 1)/(es * (xi - 1)))/xi^2))
    } else{
      c(sum((dat*log(m) + dat - es)/es^2),
        sum(1/2*((dat^2 - dat*es)*log(m)^3 + (3*dat^2 - 5*dat*es + es^2)*log(m)^2 + dat^2 -
                   4*dat*es + 2*es^2 + (3*dat^2 - 8*dat*es + 2*es^2)*log(m))/(es^2*log(m) + es^2)))
    }
}

#' Observed information matrix for the GP distribution (expected shortfall)
#'
#' The information matrix is parametrized in terms of excess expected shortfall and shape
#' @seealso \code{\link{gpde}}
#' @inheritParams gpde
#' @keywords internal
#' @export
gpde.infomat <- function(par, dat, m, method = c("obs", "exp"), nobs = length(dat)) {
    method <- match.arg(method)  #default to observed information
    es = as.vector(par[1])
    xi = as.vector(par[2])
    if(missing(m)){
      stop("User must provide \"m\" parameter in \"gpde.infomat\".")
    }
    if (xi < -0.5) {
        return(matrix(NA, 2, 2))
    }
    xizero <- abs(xi) < 1e-5
    logm <- log(m)
    if (method == "obs") {
      if(!xizero){
        k11 = sum(-1/es^2 + 2 * dat * (m^xi + xi - 1) * (1/xi + 1)/(es^3 * (xi - 1) * (dat * (m^xi +
            xi - 1)/(es * (xi - 1)) - 1)) - dat^2 * (m^xi + xi - 1)^2 * (1/xi + 1)/(es^4 * (xi - 1)^2 *
            (dat * (m^xi + xi - 1)/(es * (xi - 1)) - 1)^2))
        k22 = sum(-((m^xi * logm + 1) * dat/(es * (xi - 1)) - dat * (m^xi + xi - 1)/(es * (xi - 1)^2))^2 *
            (1/xi + 1)/(dat * (m^xi + xi - 1)/(es * (xi - 1)) - 1)^2 + (dat * m^xi * logm^2/(es * (xi -
            1)) - 2 * (m^xi * logm + 1) * dat/(es * (xi - 1)^2) + 2 * dat * (m^xi + xi - 1)/(es * (xi -
            1)^3)) * (1/xi + 1)/(dat * (m^xi + xi - 1)/(es * (xi - 1)) - 1) - (m^xi * (xi - 1) * xi *
            logm^2/(m^xi + xi - 1)^2 - 2 * (m^xi * logm + 1)^2 * (xi - 1) * xi/(m^xi + xi - 1)^3 +
            2 * (m^xi * logm + 1) * (xi - 1)/(m^xi + xi - 1)^2 + 2 * (m^xi * logm + 1) * xi/(m^xi +
            xi - 1)^2 - 2/(m^xi + xi - 1)) * (m^xi + xi - 1)/((xi - 1) * xi) - (m^xi * logm + 1) *
            ((m^xi * logm + 1) * (xi - 1) * xi/(m^xi + xi - 1)^2 - (xi - 1)/(m^xi + xi - 1) - xi/(m^xi +
                xi - 1))/((xi - 1) * xi) + (m^xi + xi - 1) * ((m^xi * logm + 1) * (xi - 1) * xi/(m^xi +
            xi - 1)^2 - (xi - 1)/(m^xi + xi - 1) - xi/(m^xi + xi - 1))/((xi - 1) * xi^2) + (m^xi + xi -
            1) * ((m^xi * logm + 1) * (xi - 1) * xi/(m^xi + xi - 1)^2 - (xi - 1)/(m^xi + xi - 1) -
            xi/(m^xi + xi - 1))/((xi - 1)^2 * xi) - 2 * ((m^xi * logm + 1) * dat/(es * (xi - 1)) -
            dat * (m^xi + xi - 1)/(es * (xi - 1)^2))/(xi^2 * (dat * (m^xi + xi - 1)/(es * (xi - 1)) -
            1)) + 2 * log1p(-dat * (m^xi + xi - 1)/(es * (xi - 1)))/xi^3)
        k12 = sum(-((m^xi * logm + 1) * dat/(es^2 * (xi - 1)) - dat * (m^xi + xi - 1)/(es^2 * (xi -
            1)^2)) * (1/xi + 1)/(dat * (m^xi + xi - 1)/(es * (xi - 1)) - 1) + dat * (m^xi + xi - 1) *
            ((m^xi * logm + 1) * dat/(es * (xi - 1)) - dat * (m^xi + xi - 1)/(es * (xi - 1)^2)) * (1/xi +
            1)/(es^2 * (xi - 1) * (dat * (m^xi + xi - 1)/(es * (xi - 1)) - 1)^2) + dat * (m^xi + xi -
            1)/(es^2 * (xi - 1) * xi^2 * (dat * (m^xi + xi - 1)/(es * (xi - 1)) - 1)))
      } else{
        k11 <- sum(2*dat*es*log(m) + 2*dat*es - es^2)/es^4
        k12 <- 0.5*sum((2*dat*log(m)^2 - es*log(m)^2 + 4*dat*log(m) - 4*es*log(m) + 2*dat - 4*es)*dat)/es^3
        k22 <- sum(1/12*(4*(2*dat^3 - 3*dat^2*es + dat*es^2)*log(m)^5 + (40*dat^3 - 72*dat^2*es + 32*dat*es^2 - es^3)*log(m)^4 +
                           4*(20*dat^3 - 45*dat^2*es + 25*dat*es^2 - es^3)*log(m)^3 + 8*dat^3 - 36*dat^2*es + 48*dat*es^2 -
                           12*es^3 + 4*(20*dat^3 - 57*dat^2*es + 42*dat*es^2 - 3*es^3)*log(m)^2 +
                           8*(5*dat^3 - 18*dat^2*es + 18*dat*es^2 - 3*es^3)*log(m))/(es^3*log(m)^2 + 2*es^3*log(m) + es^3))
      }
        infomat <- cbind(c(k11, k12), c(k12, k22))
    } else if (method == "exp") {
        sigmae = ifelse(!xizero, es * (1 - xi) * xi/(m^xi - 1 + xi), es/(log(m)+1))
        if(!xizero){
          Jac <- rbind(c((1 - xi) * xi/(m^xi - 1 + xi), (m^xi * logm + 1) * es * (xi - 1) * xi/(m^xi +
            xi - 1)^2 - es * (xi - 1)/(m^xi + xi - 1) - es * xi/(m^xi + xi - 1)), c(0, 1))
        } else{
         Jac <-  rbind(c(1/(logm+1), -1/2*(log(m)^2 + 2*log(m) + 2)*es/(log(m)^2 + 2*log(m) + 1)), c(0, 1))
        }
        infomat <- t(Jac) %*% gpd.infomat(par = c(sigmae, xi), dat = dat, method = "exp", nobs = nobs) %*%
            Jac
    }
    colnames(infomat) <- rownames(infomat) <- c("es","shape")
    return(infomat)
}
#' Tangent exponential model statistics for the generalized Pareto distribution (expected shortfall)
#'
#' Vector implementing conditioning on approximate ancillary statistics for the TEM
#' @seealso \code{\link{gpde}}
#' @name gpde.temstat
#' @inheritParams gpde
#' @keywords internal
#' @export
gpde.Vfun <- function(par, dat, m) {
    es = par[1]
    xi = par[2]
    cbind(dat/es, es * (xi - 1) * xi * (-dat * (m^xi + xi - 1)/(es * (xi - 1)) + 1)^(-1/xi) * (((m^xi *
        log(m) + 1) * dat/(es * (xi - 1)) - dat * (m^xi + xi - 1)/(es * (xi - 1)^2))/(xi * (dat * (m^xi +
        xi - 1)/(es * (xi - 1)) - 1)) - log1p(-dat * (m^xi + xi - 1)/(es * (xi - 1)))/xi^2)/((m^xi +
        xi - 1) * (-dat * (m^xi + xi - 1)/(es * (xi - 1)) + 1)^(-1/xi - 1)))
}

#' Canonical parameter in the local exponential family approximation
#'
#' @inheritParams gpde
#' @rdname gpde.temstat
#' @keywords internal
#' @export
gpde.phi <- function(par, dat, V, m) {
    es = par[1]
    xi = par[2]
    rbind(-(m^xi + xi - 1) * (1/xi + 1)/(es * (xi - 1) * (dat * (m^xi + xi - 1)/(es * (xi - 1)) - 1))) %*%
        V
}

## Derivative matrix of the canonical parameter in the local exponential family approximation
#' @inheritParams gpde
#' @rdname gpde.temstat
#' @keywords internal
#' @export
gpde.dphi <- function(par, dat, V, m) {
    es = par[1]
    xi = par[2]
    rbind((m^xi + xi - 1) * (1/xi + 1)/(es^2 * (xi - 1) * (dat * (m^xi + xi - 1)/(es * (xi - 1)) - 1)) -
        dat * (m^xi + xi - 1)^2 * (1/xi + 1)/(es^3 * (xi - 1)^2 * (dat * (m^xi + xi - 1)/(es * (xi -
            1)) - 1)^2), (m^xi + xi - 1) * ((m^xi * log(m) + 1) * dat/(es * (xi - 1)) - dat * (m^xi +
        xi - 1)/(es * (xi - 1)^2)) * (1/xi + 1)/(es * (xi - 1) * (dat * (m^xi + xi - 1)/(es * (xi - 1)) -
        1)^2) - (m^xi * log(m) + 1) * (1/xi + 1)/(es * (xi - 1) * (dat * (m^xi + xi - 1)/(es * (xi -
        1)) - 1)) + (m^xi + xi - 1) * (1/xi + 1)/(es * (xi - 1)^2 * (dat * (m^xi + xi - 1)/(es * (xi -
        1)) - 1)) + (m^xi + xi - 1)/(es * (xi - 1) * xi^2 * (dat * (m^xi + xi - 1)/(es * (xi - 1)) -
        1))) %*% V
}

#' Negative log likelihood of the generalized Pareto distribution (return levels)
#'
#' @seealso \code{\link{gpdr}}
#' @inheritParams gpdr
#' @keywords internal
#' @export
gpdr.ll <- function(par, dat, m) {
  if(missing(m)){
    stop("Need to specify the reciprocal tail probability \"m\"")
  }
    ym = par[1]
    xi = par[2]
    if (par[1] < 0) {
        return(-1e+15)
    }
    nn = length(dat)
    pr <- m^xi - 1
    sigmar <- ifelse(abs(xi) > 1e-7, ym * xi/(m^xi - 1), ym/log(m))
    return(gpd.ll(par = c(sigmar, xi), dat = dat))
    # if (abs(xi) > 1e-7) {
    #   -nn * log(xi/pr) - nn * log(ym) - (1 + 1/xi) * sum(log(pmax(1 + exp(log(dat) - log(ym)) * pr,0)))
    # } else {
    #   nn * log(log(m)) - nn * log(ym) + sum(-dat * log(m)/ym)
    # }
}

#' Negative log likelihood parametrized in terms of log return level and shape in order to perform unconstrained optimization
#' @inheritParams gpdr
#' @keywords internal
#' @rdname gpdr.ll
#' @export
gpdr.ll.optim <- function(par, dat, m) {
    tol <- 1e-05
    nn = length(dat)
    ym = exp(par[1])
    xi = par[2]
    p <- m^xi - 1
    if (abs(xi) > tol) {
        -(-nn * log(xi/p) - nn * log(ym) - (1 + 1/xi) * sum(log(pmax(1 + exp(log(dat) - log(ym)) * p,
            0))))
    } else {
        -(nn * log(log(m)) - nn * log(ym) + sum(-dat * log(m)/ym))
    }
}

#' Score of the profile log likelihood for the GP distribution (return levels parametrization)
#' @seealso \code{\link{gpdr}}
#' @inheritParams gpdr
#' @keywords internal
#' @export
gpdr.score <- function(par, dat, m) {
  if(missing(m)){
    stop("Need to specify the reciprocal tail probability \"m\"")
  }
    nn = length(dat)
    xi = par[2]
    ym = par[1]
    p <- (m)^xi - 1
    xizero <- abs(xi) < 1e-6
    if(!xizero){
    as.vector(c(-nn/ym + (1 + 1/xi) * sum(dat * p/(ym + dat * p))/ym, -nn/xi + exp(log(nn) + xi * log(m) + log(log(m)))/p +
        sum(log1p(dat/ym * p))/xi^2 - (1 + 1/xi) * sum(exp(log(dat) + log(log(m)) + xi * log(m) - log(ym +
        dat * p)))))
    } else{
      as.vector(c(sum((dat*log(m) - ym))/ym^2,
        0.5*sum((dat^2*log(m)^2 + ym^2*log(m) - (dat*log(m)^2 + 2*dat*log(m))*ym))/ym^2))
    }
}

#' Observed information matrix for GP distribution (return levels)
#'
#'The information matrix is parametrized in terms of rate of \code{m}-year return level and shape
#' @seealso \code{\link{gpdr}}
#' @inheritParams gpdr
#' @keywords internal
#' @export
gpdr.infomat <- function(par, dat, m, method = c("obs", "exp"), nobs = length(dat)) {
  if(missing(m)){
    stop("Need to specify the reciprocal tail probability \"m\"")
  }
    xi = as.vector(par[2])
    r = as.vector(par[1])
    method <- method[1]
    if (xi < -0.5) {
        return(matrix(NA, 2, 2))
    }
    xizero <- abs(xi) < 1e-5
    logm <- log(m)
    if (method == "obs") {
      if(!xizero){
        info <- matrix(ncol = 2, nrow = 2)
        info[1, 1] <- sum(dat^2 * (m^xi - 1)^2 * (1/xi + 1)/((dat * (m^xi - 1)/r + 1)^2 * r^4) - 2 *
            dat * (m^xi - 1) * (1/xi + 1)/((dat * (m^xi - 1)/r + 1) * r^3) + 1/r^2)
        info[2, 2] <- sum(dat^2 * (m^xi)^2 * (1/xi + 1) * logm^2/((dat * (m^xi - 1)/r + 1)^2 * r^2) -
            dat * m^xi * (1/xi + 1) * logm^2/((dat * (m^xi - 1)/r + 1) * r) + m^xi * (m^xi * xi * logm/(m^xi -
            1)^2 - 1/(m^xi - 1)) * logm/xi + (m^xi * xi * logm^2/(m^xi - 1)^2 - 2 * (m^xi)^2 * xi *
            logm^2/(m^xi - 1)^3 + 2 * m^xi * logm/(m^xi - 1)^2) * (m^xi - 1)/xi - (m^xi - 1) * (m^xi *
            xi * logm/(m^xi - 1)^2 - 1/(m^xi - 1))/xi^2 + 2 * dat * m^xi * logm/((dat * (m^xi - 1)/r +
            1) * r * xi^2) - 2 * log1p(dat * (m^xi - 1)/r)/xi^3)
        info[2, 1] <- info[1, 2] <- sum(-dat^2 * (m^xi - 1) * m^xi * (1/xi + 1) * logm/((dat * (m^xi -
            1)/r + 1)^2 * r^3) + dat * m^xi * (1/xi + 1) * logm/((dat * (m^xi - 1)/r + 1) * r^2) -
            dat * (m^xi - 1)/((dat * (m^xi - 1)/r + 1) * r^2 * xi^2))
        infomat <- -info
      } else{
        k11 <- sum((2*dat*r*log(m) - r^2))/r^4
        k12 <- 0.5*sum((2*dat*log(m) - r*log(m) - 2*r)*dat)*log(m)/r^3
        k22 <- sum((8*dat^3*log(m)^3 - r^3*log(m)^2 + 4*(dat*log(m)^3 + 3*dat*log(m)^2)*r^2 - 12*(dat^2*log(m)^3 + dat^2*log(m)^2)*r)/(12*r^3))
        infomat <- matrix(c(k11, k12, k12, k22), byrow = TRUE, ncol = 2, nrow = 2)
      }
    } else if (method == "exp") {
        sigmar <- ifelse(!xizero, r * xi/(m^xi - 1), r/logm)
        if(!xizero){
          Jac <- rbind(c(xi/(m^xi - 1), -m^xi * r * xi * logm/(m^xi - 1)^2 + r/(m^xi - 1)), c(0, 1))
        } else{
          Jac <-  rbind(c(1/logm, -0.5*r), c(0, 1))
        }
        infomat <- t(Jac) %*% gpd.infomat(par = c(sigmar, xi), method = "exp", dat = dat, nobs = length(dat)) %*% Jac
    }
    colnames(infomat) <- rownames(infomat) <- c("retlev","shape")
    return(infomat)
}

#' Tangent exponential model statistics for the generalized Pareto distribution (return level)
#'
#' Vector implementing conditioning on approximate ancillary statistics for the TEM
#' @seealso \code{\link{gpdr}}
#' @name gpdr.temstat
#' @inheritParams gpdr
#' @keywords internal
#' @export
gpdr.Vfun <- function(par, dat, m) {
    xi = par[2]
    ym = par[1]
    p <- m^xi - 1
    cbind(dat/ym, -(ym + dat * p)/p * ((m)^xi * log(m)/(p + ym/dat) - log1p(dat/ym * p)/xi))
}

#' Canonical parameter in the local exponential family approximation
#' @seealso \code{\link{gpdr}}
#' @rdname gpdr.temstat
#' @inheritParams gpdr
#' @keywords internal
#' @export
gpdr.phi <- function(par, dat, V, m) {
    xi = par[2]
    r = par[1]
    matrix(-(1 + 1/xi) * (m^xi - 1)/(r + dat * (m^xi - 1)), nrow = 1) %*% V
}

#' Derivative of the canonical parameter \eqn{\phi(\theta)} in the local exponential family approximation
#' @rdname gpdr.temstat
#' @inheritParams gpdr
#' @keywords internal
#' @export
gpdr.dphi <- function(par, dat, V, m) {
    xi = par[2]
    r = par[1]
    p = m^xi - 1
    rbind((1 + 1/xi) * p/(r + dat * p)^2, p/(xi^2 * (r + dat * p)) - (1 + 1/xi) * log(m) * m^xi * r/(r +
        dat * p)^2) %*% V

}
####################################### GEV in terms of return levels ###


#' Return level for the generalized extreme value distribution
#'
#' This function returns the \eqn{1-p}th quantile of the GEV.
#' @inheritParams gev
#' @seealso \code{\link{gev}}
#' @keywords internal
#' @export
gev.retlev <- function(par, p) {
    mu = par[1]
    sigma = par[2]
    xi = as.vector(par[3])
    if (!isTRUE(all.equal(xi, 0))) {
        mu - sigma/xi * (1 - (-log(1 - p))^(-xi))
    } else {
        mu - sigma * log(-log(-p + 1))
    }
}

#' This function returns the mean of N observations from the GEV.
#' @inheritParams gevN
#' @seealso \code{\link{gevN}}
#' @keywords internal
#' @export
gevN.mean <- function(par, N) {
    mu = par[1]
    sigma = par[2]
    xi = as.vector(par[3])
    if (!isTRUE(all.equal(xi, 0))) {
        mu - sigma/xi * (1 - N^xi * gamma(1 - xi))
    } else {
        mu + sigma * (log(N) - psigamma(1))
    }
}

gevN.quant <- function(par, N, q) {
    mu = par[1]
    sigma = par[2]
    xi = as.vector(par[3])
    if (!isTRUE(all.equal(xi, 0))) {
        mu - sigma/xi * (1 - (N/log(1/q))^xi)
    } else {
        mu + sigma * (log(N) - log(log(1/q)))
    }
}

#' This function returns the mean of N maxima from the GP.
#' @inheritParams gpdN
#' @seealso \code{\link{gpdN}}
#' @keywords internal
#' @export
gpdN.mean <- function(par, N) {
    sigma = par[1]
    xi = as.vector(par[2])
    if (!isTRUE(all.equal(xi, 0))) {
        (exp(lgamma(N + 1) + lgamma(1 - xi) - lgamma(N + 1 - xi)) - 1) * sigma/xi
    } else {
        (-psigamma(1) + psigamma(N + 1)) * sigma
    }
}

#' This function returns the qth percentile of N maxima from the GP.
#' @inheritParams gpdN
#' @seealso \code{\link{gpdN}}
#' @keywords internal
#' @export
gpdN.quant <- function(par, q, N) {
    sigma = par[1]
    xi = as.vector(par[2])
    if (!isTRUE(all.equal(xi, 0))) {
        sigma/xi * ((1 - q^(1/N))^(-xi) - 1)
    } else {
        sigma * (-log(-q^(1/N) + 1))
    }
}


#' Negative log likelihood of the generalized extreme value distribution (return levels)
#'
#' @seealso \code{\link{gevr}}
#' @inheritParams gevr
#' @keywords internal
#' @export
gevr.ll <- function(par, dat, p) {
    z = par[1]
    sigma = par[2]
    xi = as.vector(par[3])
    muf = ifelse(!isTRUE(all.equal(xi, 0)),
                 z + sigma/xi * (1 - (-log(1 - p))^(-xi)),
                 sigma * log(-log(1 - p)) + z)
    if (!isTRUE(all.equal(xi, 0, tolerance = 1e-08))) {
        sum(-log(sigma) - (1/xi + 1) * log(pmax(1 + xi * (dat - muf)/sigma, 0)) - pmax(1 + xi * (dat -
            muf)/sigma, 0)^(-1/xi))
    } else {
        sum((muf - dat)/sigma - exp((muf - dat)/sigma) - log(sigma))
    }

}

#' Negative log likelihood parametrized in terms of location, log return level and shape in order to perform unconstrained optimization
#' @rdname gevr.ll
#' @inheritParams gevr
#' @keywords internal
#' @export
gevr.ll.optim <- function(par, dat, p) {
    tpar = par
    tpar[2] = exp(par[2])
    # if(tpar[3] <= -1){return(1e20)}
    -gevr.ll(tpar, dat, p)
}

#' Score of the log likelihood for the GEV distribution (return levels)
#' @seealso \code{\link{gevr}}
#' @inheritParams gevr
#' @keywords internal
#' @export
gevr.score <- function(par, dat, p) {
    z = par[1]
    sigma = par[2]
    xi = par[3]
    log1mp <- log(1 - p)
    if(abs(xi) > 1e-4){
    c(sum((((dat - z) * xi/sigma + 1/(-log1mp)^xi)^(-1/xi - 2) * (xi + 1) * exp(-1/((dat - z) *
        xi/sigma + 1/(-log1mp)^xi)^(1/xi))/sigma^2 - ((dat - z) * xi/sigma + 1/(-log1mp)^xi)^(-2/xi -
        2) * exp(-1/((dat - z) * xi/sigma + 1/(-log1mp)^xi)^(1/xi))/sigma^2) * sigma * ((dat - z) *
        xi/sigma + 1/(-log1mp)^xi)^(1/xi + 1) * exp(1/(((dat - z) * xi/sigma + 1/(-log1mp)^xi)^(1/xi)))),
        sum(-(dat * (-log1mp)^xi - z * (-log1mp)^xi - (dat * (-log1mp)^xi - z * (-log1mp)^xi -
            sigma) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma *
            (-log1mp)^xi))^(1/xi))/((sigma * dat * xi * (-log1mp)^xi - sigma * xi * z * (-log1mp)^xi
                                     + sigma^2) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma *
            (-log1mp)^xi))^(1/xi))), sum(-(xi * z * (-log1mp)^xi - (dat * (-log1mp)^xi -
            sigma * log(-log1mp)) * xi + ((dat * (-log1mp)^xi - sigma * log(-log1mp)) *
            xi^2 + (dat * (-log1mp)^xi - sigma * log(-log1mp)) * xi - (xi^2 * (-log1mp)^xi +
            xi * (-log1mp)^xi) * z) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi +
            sigma)/(sigma * (-log1mp)^xi))^(1/xi) + (dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi
                                                     - (dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma) * ((dat * xi *
            (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi))^(1/xi) +
            sigma) * log((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma *
            (-log1mp)^xi)))/((dat * xi^3 * (-log1mp)^xi - xi^3 * z * (-log1mp)^xi + sigma *
            xi^2) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi))^(1/xi))))
    } else{
     as.vector(c(sum(
       (1 +  exp((-dat + z)/sigma)*log1mp)/sigma),
       sum((-sigma + dat - z + exp((-dat + z)/sigma)*(dat - z)*log1mp))/sigma^2,
       sum((1/(2*sigma^2))*(( exp(z/sigma)*(-dat + z)*log1mp* (-dat + z + 2*sigma*log(-log1mp)) +  exp(dat/sigma)*((dat - z)*(-2*sigma + dat - z) + 2*sigma*(sigma - dat + z)*log(-log1mp)))/ exp(dat/sigma)))
     ))
    }
}

#' Observed information matrix for GEV distribution (return levels)
#'
#'The information matrix is parametrized in terms of return level (\eqn{(1-p)}th quantile), scale and shape.
#' @seealso \code{\link{gevr}}
#' @inheritParams gevr
#' @param nobs integer number of observations
#' @keywords internal
#' @export
gevr.infomat <- function(par, dat, method = c("obs", "exp"), p, nobs = length(dat)) {
    method <- match.arg(method)
    z = par[1]
    sigma = par[2]
    xi = par[3]
    log1mp <- log(1 - p)
    logmlog1mp <- log(-log1mp)
    if (method == "obs") {
        info <- matrix(0, ncol = 3, nrow = 3)
        if(abs(xi) > 1e-4){
        info[1, 1] <- sum(-(xi * (-log1mp)^(2 * xi) - (xi^2 * (-log1mp)^(2 * xi) + xi *
            (-log1mp)^(2 * xi)) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi +
            sigma)/(sigma * (-log1mp)^xi))^(1/xi) + (-log1mp)^(2 * xi))/((dat^2 * xi^2 * (-log1mp)^(2 * xi) +
             xi^2 * z^2 * (-log1mp)^(2 * xi) + 2 * sigma * dat * xi * (-log1mp)^xi +
            sigma^2 - 2 * (dat * xi^2 * (-log1mp)^(2 * xi) + sigma * xi * (-log1mp)^xi) * z) *
            ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi))^(1/xi)))

        info[1, 2] <- info[2, 1] <- sum(-(dat * (-log1mp)^(2 * xi) - z * (-log1mp)^(2 *
            xi) - sigma * (-log1mp)^xi + (sigma * xi * (-log1mp)^xi + sigma * (-log1mp)^xi) *
            ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi))^(1/xi))/
              ((sigma * dat^2 * xi^2 * (-log1mp)^(2 * xi) + sigma * xi^2 * z^2 *
            (-log1mp)^(2 * xi) + 2 * sigma^2 * dat * xi * (-log1mp)^xi + sigma^3 - 2 * (sigma *
            dat * xi^2 * (-log1mp)^(2 * xi) + sigma^2 * xi * (-log1mp)^xi) * z) * ((dat * xi *
            (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi))^(1/xi)))
        info[1, 3] <- info[3, 1] <- sum(-((sigma * (-log1mp)^xi * logmlog1mp - dat *
            (-log1mp)^(2 * xi)) * xi^2 + (sigma * (-log1mp)^xi * logmlog1mp - dat *
            (-log1mp)^(2 * xi)) * xi + (xi^2 * (-log1mp)^(2 * xi) + xi * (-log1mp)^(2 *
            xi)) * z - (sigma * xi^3 * (-log1mp)^xi * logmlog1mp + xi^2 * z *
            (-log1mp)^(2 * xi) + (sigma * (-log1mp)^xi * (logmlog1mp + 1) - dat * (-log1mp)^(2 *
            xi)) * xi^2) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma *
            (-log1mp)^xi))^(1/xi) + (dat * xi * (-log1mp)^(2 * xi) - xi * z * (-log1mp)^(2 *
            xi) + sigma * (-log1mp)^xi) * log((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/
                                                (sigma * (-log1mp)^xi)))/((dat^2 * xi^4 * (-log1mp)^(2 * xi) +
            xi^4 * z^2 * (-log1mp)^(2 * xi) + 2 * sigma * dat * xi^3 * (-log1mp)^xi + sigma^2 *
            xi^2 - 2 * (dat * xi^4 * (-log1mp)^(2 * xi) + sigma * xi^3 * (-log1mp)^xi) * z) *
            ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi))^(1/xi)))
        info[2, 2] <- sum((dat^2 * xi * (-log1mp)^(2 * xi) + (xi * (-log1mp)^(2 * xi) -
            (-log1mp)^(2 * xi)) * z^2 - dat^2 * (-log1mp)^(2 * xi) + 2 * sigma * dat * (-log1mp)^xi -
              2 * (dat * xi * (-log1mp)^(2 * xi) - dat * (-log1mp)^(2 * xi) + sigma *
            (-log1mp)^xi) * z - (dat^2 * xi * (-log1mp)^(2 * xi) + xi * z^2 * (-log1mp)^(2 *
            xi) + 2 * sigma * dat * (-log1mp)^xi - sigma^2 - 2 * (dat * xi * (-log1mp)^(2 *
            xi) + sigma * (-log1mp)^xi) * z) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/
                                                  (sigma * (-log1mp)^xi))^(1/xi))/((sigma^2 * dat^2 * xi^2 *
             (-log1mp)^(2 * xi) + sigma^2 * xi^2 * z^2 * (-log1mp)^(2 * xi) + 2 * sigma^3 * dat * xi *
            (-log1mp)^xi + sigma^4 - 2 * (sigma^2 * dat * xi^2 * (-log1mp)^(2 * xi) + sigma^3 *
            xi * (-log1mp)^xi) * z) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi +
            sigma)/(sigma * (-log1mp)^xi))^(1/xi)))
        info[3, 2] <- info[2, 3] <- sum(-((sigma * dat * (-log1mp)^xi * logmlog1mp -
            dat^2 * (-log1mp)^(2 * xi)) * xi^2 - (xi^2 * (-log1mp)^(2 * xi) + xi *
            (-log1mp)^(2 * xi)) * z^2 + (sigma * dat * (-log1mp)^xi * logmlog1mp - dat^2 *
            (-log1mp)^(2 * xi)) * xi - ((sigma * (-log1mp)^xi * logmlog1mp - 2 * dat * (-log(-p +
            1))^(2 * xi)) * xi^2 + (sigma * (-log1mp)^xi * logmlog1mp - 2 * dat *
            (-log1mp)^(2 * xi)) * xi) * z - (sigma * dat * xi^3 * (-log1mp)^xi * logmlog1mp - xi^2 *
            z^2 * (-log1mp)^(2 * xi) + (sigma * dat * (-log1mp)^xi * (logmlog1mp + 1) -
            dat^2 * (-log1mp)^(2 * xi)) * xi^2 - (sigma * xi^3 * (-log1mp)^xi * logmlog1mp +
            (sigma * (-log1mp)^xi * (logmlog1mp + 1) - 2 * dat * (-log1mp)^(2 *
            xi)) * xi^2) * z) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma *
            (-log1mp)^xi))^(1/xi) + (dat^2 * xi * (-log1mp)^(2 * xi) + xi * z^2 *
             (-log1mp)^(2 * xi) + sigma * dat * (-log1mp)^xi - (2 * dat * xi * (-log1mp)^(2 * xi) +
            sigma * (-log1mp)^xi) * z) * log((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/
                                               (sigma * (-log1mp)^xi)))/((sigma * dat^2 * xi^4 * (-log1mp)^(2 *
            xi) + sigma * xi^4 * z^2 * (-log1mp)^(2 * xi) + 2 * sigma^2 * dat * xi^3 *
              (-log1mp)^xi + sigma^3 * xi^2 - 2 * (sigma * dat * xi^4 * (-log1mp)^(2 * xi) + sigma^2 * xi^3 *
            (-log1mp)^xi) * z) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma *
            (-log1mp)^xi))^(1/xi)))
        info[3, 3] <- sum((sigma * dat * xi^4 * (-log1mp)^xi * logmlog1mp^2 + (4 * sigma *
            dat * (-log1mp)^xi * logmlog1mp - 3 * dat^2 * (-log1mp)^(2 * xi)) * xi^3 +
            (2 * sigma * dat * (-log1mp)^xi * (logmlog1mp - 1) - (logmlog1mp^2 - 2 *
                logmlog1mp) * sigma^2 - dat^2 * (-log1mp)^(2 * xi)) * xi^2 - (3 * xi^3 *
            (-log1mp)^(2 * xi) + xi^2 * (-log1mp)^(2 * xi)) * z^2 - (dat^2 * xi^2 *
            (-log1mp)^(2 * xi) + xi^2 * z^2 * (-log1mp)^(2 * xi) + 2 * sigma * dat * xi * (-log1mp)^xi +
            sigma^2 - 2 * (dat * xi^2 * (-log1mp)^(2 * xi) + sigma * xi * (-log1mp)^xi) * z) *
            log((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma *
            (-log1mp)^xi))^2 - (sigma * xi^4 * (-log1mp)^xi * logmlog1mp^2 + 2 * (2 * sigma *
            (-log1mp)^xi * logmlog1mp - 3 * dat * (-log1mp)^(2 * xi)) * xi^3 + 2 * (sigma *
            (-log1mp)^xi * (logmlog1mp - 1) - dat * (-log1mp)^(2 * xi)) * xi^2) * z -
            (sigma * dat * xi^5 * (-log1mp)^xi * logmlog1mp^2 + ((logmlog1mp^2 + 2 *
                logmlog1mp) * sigma * dat * (-log1mp)^xi - dat^2 * (-log1mp)^(2 * xi)) *
                xi^4 + (4 * sigma * dat * (-log1mp)^xi * logmlog1mp - 3 * dat^2 *
                          (-log1mp)^(2 * xi)) * xi^3 - 2 * (sigma * dat * (-log1mp)^xi - sigma^2 * logmlog1mp) *
               xi^2 - (xi^4 * (-log1mp)^(2 * xi) + 3 * xi^3 * (-log1mp)^(2 * xi)) *
                z^2 - (sigma * xi^5 * (-log1mp)^xi * logmlog1mp^2 + ((logmlog1mp^2 +
                2 * logmlog1mp) * sigma * (-log1mp)^xi - 2 * dat * (-log1mp)^(2 * xi)) *
                xi^4 + 2 * (2 * sigma * (-log1mp)^xi * logmlog1mp - 3 * dat * (-log1mp)^(2 * xi)) *
                  xi^3 - 2 * sigma * xi^2 * (-log1mp)^xi) * z) * ((dat * xi * (-log1mp)^xi -
                  xi * z * (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi))^(1/xi) + 2 *
            (dat^2 * xi^3 * (-log1mp)^(2 * xi) - (sigma * dat * (-log1mp)^xi * (log(-log(-p +
                1)) - 2) - dat^2 * (-log1mp)^(2 * xi)) * xi^2 + (xi^3 * (-log1mp)^(2 * xi) +
                xi^2 * (-log1mp)^(2 * xi)) * z^2 + (sigma * dat * (-log1mp)^xi - sigma^2 *
                (logmlog1mp - 1)) * xi - (2 * dat * xi^3 * (-log1mp)^(2 * xi) - (sigma *
                (-log1mp)^xi * (logmlog1mp - 2) - 2 * dat * (-log1mp)^(2 * xi)) * xi^2 +
                sigma * xi * (-log1mp)^xi) * z - (dat^2 * xi^3 * (-log1mp)^(2 * xi) + xi^3 *
                z^2 * (-log1mp)^(2 * xi) + 2 * sigma * dat * xi^2 * (-log1mp)^xi + sigma^2 *
                xi - 2 * (dat * xi^3 * (-log1mp)^(2 * xi) + sigma * xi^2 * (-log1mp)^xi) *
                z) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma *
                (-log1mp)^xi))^(1/xi)) * log((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma *
            (-log1mp)^xi)))/((dat^2 * xi^6 * (-log1mp)^(2 * xi) + xi^6 * z^2 * (-log1mp)^(2 *
            xi) + 2 * sigma * dat * xi^5 * (-log1mp)^xi + sigma^2 * xi^4 - 2 * (dat * xi^6 *
           (-log1mp)^(2 * xi) + sigma * xi^5 * (-log1mp)^xi) * z) * ((dat * xi * (-log1mp)^xi -
            xi * z * (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi))^(1/xi)))
        } else{ #xi numerically zero
          info[1, 1] <- sum(( exp((-dat + z)/sigma)*log1mp))/sigma^2
          info[1, 2] <- sum(-((sigma + exp((-dat + z)/sigma)*(sigma - dat + z)*log1mp)))/sigma^3
          info[2, 1] <- info[1, 2]
          info[1, 3] <- sum((1/(2*sigma^3))*((2* exp(dat/sigma)* sigma*(sigma - dat + z + sigma*logmlog1mp) +  exp(z/sigma)* log1mp*((dat - z)*(-2*sigma + dat - z) + 2*sigma*(sigma - dat + z)* logmlog1mp))/ exp(dat/sigma)))
          info[3, 1] <- info[1, 3]
          info[2, 2] <- sum(sigma*(sigma - 2*dat + 2*z) + exp((-dat + z)/sigma)*(dat - z)*(-2*sigma + dat - z) * log1mp)/sigma^4
          info[2, 3] <- sum((1/(2*sigma^4))*(((dat - z)*(2*exp(dat/sigma)*sigma*(sigma - dat + z + sigma*logmlog1mp) +   exp(z/sigma)*  log1mp*((dat - z)*(-2*sigma + dat - z) +   2*sigma*(sigma - dat + z)* logmlog1mp)))/exp(dat/sigma)))
          info[3, 2] <- info[2, 3]
          info[3, 3] <- sum((1/(12*sigma^4))*((dat - z)*(4*sigma*((dat - z)*(3*sigma - 2*dat + 2*z) - 3*sigma*logmlog1mp*(2*(sigma - dat + z) + sigma*logmlog1mp)) +  exp((-dat + z)/sigma)*log1mp*((-8*sigma + 3*dat - 3*z)*(dat - z)^2 + 12*sigma*logmlog1mp*((dat - z)*(2*sigma - dat + z) - sigma*(sigma - dat + z)*logmlog1mp)))))
        }
        infomat <- -info
        # else expected information matrix
    } else {
        muf = z + sigma/xi * (1 - (-log(1 - p))^(-xi))
        if (!isTRUE(all.equal(xi, 0, tolerance = 1e-07, check.attributes = FALSE))) {
            Jac <- rbind(c(1, -((-log1mp)^(-xi) - 1)/xi, sigma * (-log1mp)^(-xi) * log(-log1mp)/xi +
                             sigma * ((-log1mp)^(-xi) - 1)/xi^2), c(0, 1, 0), c(0, 0, 1))
        } else {
            Jac <- rbind(c(1, log(-log1mp), -sigma * log(-log1mp)^2 + 1/2 * sigma * (-log1mp)^(-xi)
                           * log(-log1mp)^2), c(0, 1, 0), c(0, 0, 1))
        }
        infomat <- t(Jac) %*% gev.infomat(dat = dat, method = "exp", par = c(muf, sigma, xi)) %*% Jac
    }
    colnames(infomat) <- rownames(infomat) <- c("retlev","scale","shape")
    return(infomat)
}


#' Tangent exponential model statistics for the GEV distribution (return level)
#'
#' Vector implementing conditioning on approximate ancillary statistics for the TEM
#' @seealso \code{\link{gevr}}
#' @name gevr.temstat
#' @inheritParams gevr
#' @keywords internal
#' @export
gevr.Vfun <- function(par, dat, p) {
    z = par[1]
    sigma = par[2]
    xi = par[3]
    log1mp <- log(1-p)
    cbind(1, (dat - z)/sigma, sigma * ((dat - z) * xi/sigma + (-log1mp)^(-xi))^(-1/xi) * (((-log1mp)^(-xi) * log(-log1mp) - (dat - z)/sigma)/(((dat - z) * xi/sigma + (-log1mp)^(-xi)) *
        xi) + log((dat - z) * xi/sigma + (-log1mp)^(-xi))/xi^2)/((dat - z) * xi/sigma + (-log1mp)^(-xi))^(-1/xi - 1))
}

#' Canonical parameter in the local exponential family approximation
#' @rdname gevr.temstat
#' @inheritParams gevr
#' @keywords internal
#' @export
gevr.phi <- function(par, dat, p, V) {
    z = par[1]
    sigma = par[2]
    xi = par[3]
    log1mp <- log(1-p)
    t(-((xi * (-log1mp)^xi + (-log1mp)^xi) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi))^(1/xi) - (-log1mp)^xi)/((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi))^(1/xi))) %*% V
}

#' Derivative of the canonical parameter \eqn{\phi(\theta)} in the local exponential family approximation
#' @rdname gevr.temstat
#' @inheritParams gevr
#' @keywords internal
#' @export
gevr.dphi <- function(par, dat, p, V) {
    z = par[1]
    sigma = par[2]
    xi = par[3]
    log1mp <- log(1 - p)
    rbind((xi * (-log1mp)^(2 * xi) - (xi^2 * (-log1mp)^(2 * xi) + xi * (-log1mp)^(2 *
        xi)) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi))^(1/xi) + (-log1mp)^(2 * xi))/((dat^2 * xi^2 * (-log1mp)^(2 * xi) + xi^2 *
        z^2 * (-log1mp)^(2 * xi) + 2 * sigma * dat * xi * (-log1mp)^xi + sigma^2 - 2 * (dat *
        xi^2 * (-log1mp)^(2 * xi) + sigma * xi * (-log1mp)^xi) * z) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi))^(1/xi)), (dat * (-log1mp)^(2 * xi) - z * (-log1mp)^(2 * xi) - sigma * (-log1mp)^xi + (sigma * xi * (-log1mp)^xi + sigma * (-log1mp)^xi) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi +
        sigma)/(sigma * (-log1mp)^xi))^(1/xi))/((sigma * dat^2 * xi^2 * (-log1mp)^(2 * xi) +
        sigma * xi^2 * z^2 * (-log1mp)^(2 * xi) + 2 * sigma^2 * dat * xi * (-log1mp)^xi + sigma^3 -
        2 * (sigma * dat * xi^2 * (-log1mp)^(2 * xi) + sigma^2 * xi * (-log1mp)^xi) * z) *
        ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi))^(1/xi)),
        ((sigma * (-log1mp)^xi * log(-log1mp) - dat * (-log1mp)^(2 * xi)) * xi^2 + (sigma *
            (-log1mp)^xi * log(-log1mp) - dat * (-log1mp)^(2 * xi)) * xi + (xi^2 * (-log1mp)^(2 * xi) + xi *
        (-log1mp)^(2 * xi)) * z - (sigma * xi^3 * (-log1mp)^xi * log(-log1mp) + xi^2 * z * (-log1mp)^(2 * xi) + (sigma * (-log1mp)^xi * (log(-log1mp) +
            1) - dat * (-log1mp)^(2 * xi)) * xi^2) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi))^(1/xi) + (dat * xi * (-log1mp)^(2 * xi) -
            xi * z * (-log1mp)^(2 * xi) + sigma * (-log1mp)^xi) * log((dat * xi * (-log1mp)^xi - xi * z *
        (-log1mp)^xi + sigma)/(sigma * (-log1mp)^xi)))/((dat^2 * xi^4 *
            (-log1mp)^(2 * xi) + xi^4 * z^2 * (-log1mp)^(2 * xi) + 2 * sigma * dat * xi^3 *
            (-log1mp)^xi + sigma^2 * xi^2 - 2 * (dat * xi^4 * (-log1mp)^(2 * xi) + sigma *
            xi^3 * (-log1mp)^xi) * z) * ((dat * xi * (-log1mp)^xi - xi * z * (-log1mp)^xi +
            sigma)/(sigma * (-log1mp)^xi))^(1/xi))) %*% V
}


#' @title Generalized Pareto distribution (mean of maximum of N exceedances parametrization)
#'
#' @description Likelihood, score function and information matrix,
#' approximate ancillary statistics and sample space derivative
#' for the generalized Pareto distribution parametrized in terms of average maximum of \code{N} exceedances.
#'
#' The parameter \code{N} corresponds to the number of threshold exceedances of interest over which the maxima is taken.
#' \eqn{z} is the corresponding expected value of this block maxima.
#' Note that the actual parametrization is in terms of excess expected mean, meaning expected mean minus threshold.
#'
#' @details The observed information matrix was calculated from the Hessian using symbolic calculus in Sage.
#'
#' @author Leo Belzile
#' @name gpdN
#' @param par vector of length 2 containing \eqn{z} and \eqn{\xi}, respectively the mean excess of the maxima of N exceedances above the threshold and the shape parameter.
#' @param dat sample vector
#' @param N block size for threshold exceedances.
#' @param tol numerical tolerance for the exponential model
#' @param V vector calculated by \code{gpdN.Vfun}
#' @section Usage: \preformatted{gpdN.ll(par, dat, N, tol=1e-5)
#' gpdN.score(par, dat, N)
#' gpdN.infomat(par, dat, N, method = c('obs', 'exp'), nobs = length(dat))
#' gpdN.Vfun(par, dat, N)
#' gpdN.phi(par, dat, N, V)
#' gpdN.dphi(par, dat, N, V)}
#'
#' @section Functions:
#'
#' \itemize{
#' \item{\code{gpdN.ll}:} {log likelihood}
#' \item{\code{gpdN.score}:} {score vector}
#' \item{\code{gpdN.infomat}:} {observed information matrix for GP parametrized in terms of mean of the maximum of \code{N} exceedances and shape}
#' \item{\code{gpdN.Vfun}:} {vector implementing conditioning on approximate ancillary statistics for the TEM}
#' \item{\code{gpdN.phi}:} {canonical parameter in the local exponential family approximation}
#' \item{\code{gpdN.dphi}:} {derivative matrix of the canonical parameter in the local exponential family approximation}
#' }
NULL

#' Negative log likelihood of the generalized Pareto distribution (mean of maximum of N exceedances parametrization)
#'
#' @seealso \code{\link{gpdN}}
#' @inheritParams gpdN
#' @keywords internal
#' @export
gpdN.ll <- function(par, dat, N) {
    xi = par[2]
    z = par[1]
    euler_gamma = 0.57721566490153231044
    sigma = ifelse(abs(xi) > 1e-8,
                   z * xi/(exp(lgamma(N + 1) + lgamma(-xi + 1) - lgamma(N - xi + 1)) - 1),
                   z / (euler_gamma + psigamma(N + 1)))
    gpd.ll(par = c(sigma, xi), dat = dat)
}

#' Score vector of the generalized Pareto distribution (mean of maximum of N exceedances parametrization)
#' @seealso \code{\link{gpdN}}
#' @inheritParams gpdN
#' @keywords internal
#' @export
gpdN.score <- function(par, dat, N) {
    z = par[1]
    xi = par[2]
    cst <- exp(lgamma(N + 1) + lgamma(1 - xi) - lgamma(N + 1 - xi))
    xizero <- abs(xi) < 1e-6
    if(!xizero){
      as.vector(c(sum(dat * (cst - 1) * (1/xi + 1)/(z^2 * (dat * (cst - 1)/z + 1)) - 1/z), sum(-(psigamma(N - xi +
          1) * cst - psigamma(-xi + 1) * cst) * dat * (1/xi + 1)/(z * (dat * (cst - 1)/z + 1)) + ((psigamma(N -
          xi + 1) * cst - psigamma(-xi + 1) * cst) * xi * z/(cst - 1)^2 - z/(cst - 1)) * (cst - 1)/(xi *z) +
            log(dat * (cst - 1)/z + 1)/xi^2)))
    } else{
      euler_gamma <- 0.57721566490153231044
      psi1pN <- psigamma(1 + N)
      psip1pN <- psigamma(1 + N, deriv = 1)
      as.vector(c( sum(dat*euler_gamma - z + dat*psi1pN)/z^2,
       sum((6*dat^2*euler_gamma^3 - 12*dat*euler_gamma^2*z - 6*dat*euler_gamma^3*z -
          dat*euler_gamma*pi^2*z + 6*euler_gamma^2*z^2 + pi^2*z^2 +
          6*(3*dat^2*euler_gamma - dat*(2 + 3*euler_gamma)*z + z^2)*
          psigamma(1 + N)^2 +
          6*dat*(dat - z)*psigamma(1 + N)^3 + 6*(dat*euler_gamma - z)*z*
          psigamma(deriv = 1, 1+N) + psigamma(1 + N)*(18*dat^2*euler_gamma^2 -
          dat*(24*euler_gamma + 18*euler_gamma^2 + pi^2)*z + 12*euler_gamma*z^2 +
          6*dat*z*psigamma(deriv = 1, 1+N)))/(12*z^2*(euler_gamma + psigamma(1 + N))))))
    }
}

#' Information matrix of the generalized Pareto distribution (mean of maximum of N exceedances parametrization)
#' @seealso \code{\link{gpdN}}
#' @inheritParams gpdN
#' @keywords internal
#' @export
gpdN.infomat <- function(par, dat, N, method = c("obs", "exp"), nobs = length(dat)) {
    z = as.vector(par[1])
    xi = as.vector(par[2])
    if (xi < -0.5) {
        return(matrix(NA, 2, 2))
    }
    method <- method[1]  #default corresponds to observed information rather than Fisher information
    xizero <- abs(xi) < 1e-4
    # Fisher information matrix
    cst <- exp(lgamma(N + 1) + lgamma(1 - xi) - lgamma(N + 1 - xi))
    if (method == "exp") {
      if(!xizero){
        sigmaf <- z * xi/(cst - 1)
        Jac <- rbind(c(xi/(cst - 1),
                       -(digamma(N - xi + 1) * cst - digamma(-xi + 1) * cst) * xi * z/(cst - 1)^2 + z/(cst - 1)), c(0, 1))
      } else{
        euler_gamma <- 0.57721566490153231044
        psi1pN <- psigamma(1 + N)
        sigmaf <- z/(euler_gamma + psi1pN)
        Jac <- rbind(c(1/(euler_gamma + psi1pN),
                       -(z*(6*euler_gamma^2 + pi^2 + 12*euler_gamma*psigamma(1 + N) +
                               6*psigamma(1 + N)^2 - 6*psigamma(1 + N, deriv = 1)))/
                           (12*(euler_gamma + psigamma(1 + N))^2)), c(0,1))
      }
        infomat <- t(Jac) %*% gpd.infomat(par = c(sigmaf, xi), dat = dat, method = "exp") %*% Jac
    } else if (method == "obs") {
        # Observed information
        if(!xizero){
        k11 <- sum(2 * dat * (cst - 1) * (1/xi + 1)/(z^3 * (dat * (cst - 1)/z + 1)) - dat^2 * (cst -
            1)^2 * (1/xi + 1)/(z^4 * (dat * (cst - 1)/z + 1)^2) - 1/z^2)
        k12 <- sum(-(digamma(N - xi + 1) * cst - digamma(-xi + 1) * cst) * dat * (1/xi + 1)/(z^2 * (dat *
            (cst - 1)/z + 1)) + (digamma(N - xi + 1) * cst - digamma(-xi + 1) * cst) * dat^2 * (cst -
            1) * (1/xi + 1)/(z^3 * (dat * (cst - 1)/z + 1)^2) + dat * (cst - 1)/(xi^2 * z^2 * (dat *
            (cst - 1)/z + 1)))
        k22 <- sum(-(digamma(N - xi + 1) * cst - digamma(-xi + 1) * cst)^2 * dat^2 * (1/xi + 1)/(z^2 *
            (dat * (cst - 1)/z + 1)^2) + (digamma(N - xi + 1)^2 * cst - 2 * digamma(N - xi + 1) * digamma(-xi +
            1) * cst + digamma(-xi + 1)^2 * cst - psigamma(deriv = 1, N - xi + 1) * cst + psigamma(deriv = 1,
            -xi + 1) * cst) * dat * (1/xi + 1)/(z * (dat * (cst - 1)/z + 1)) - (digamma(N - xi + 1) *
            cst - digamma(-xi + 1) * cst) * ((digamma(N - xi + 1) * cst - digamma(-xi + 1) * cst) * xi *
            z/(cst - 1)^2 - z/(cst - 1))/(xi * z) + (2 * (digamma(N - xi + 1) * cst - digamma(-xi + 1) *
            cst)^2 * xi * z/(cst - 1)^3 - (digamma(N - xi + 1)^2 * cst - 2 * digamma(N - xi + 1) * digamma(-xi +
            1) * cst + digamma(-xi + 1)^2 * cst - psigamma(deriv = 1, N - xi + 1) * cst + psigamma(deriv = 1,
            -xi + 1) * cst) * xi * z/(cst - 1)^2 - 2 * (digamma(N - xi + 1) * cst - digamma(-xi + 1) *
            cst) * z/(cst - 1)^2) * (cst - 1)/(xi * z) + ((digamma(N - xi + 1) * cst - digamma(-xi +
            1) * cst) * xi * z/(cst - 1)^2 - z/(cst - 1)) * (cst - 1)/(xi^2 * z) - 2 * (digamma(N - xi +
            1) * cst - digamma(-xi + 1) * cst) * dat/(xi^2 * z * (dat * (cst - 1)/z + 1)) + 2 * log(dat *
            (cst - 1)/z + 1)/xi^3)
        } else{
          euler_gamma <- 0.57721566490153231044
          psigamma1pN <- psigamma(1 + N)
          zeta3 <- 1.2020569031595944587
          psigammap1pN <- psigamma(1 + N, deriv = 1)
          psigammapp1pN <-  psigamma(1 + N, deriv = 2)
          k11 <- -sum(-2*dat*euler_gamma + z - 2*dat*psigamma1pN)/z^3
          k12 <- -sum((1/(12*z^3))*(dat*(-12*dat*euler_gamma^2 + (6*euler_gamma*(2 + euler_gamma) + pi^2)*z +
                     12*(-2*dat*euler_gamma + z + euler_gamma*z)*psigamma1pN +
                     6*(-2*dat + z)*psigamma1pN^2 -  6*z*psigammap1pN)))
          k22 <- -sum((-96*dat^3*euler_gamma^5 + 24*dat^2*euler_gamma^3*(6*euler_gamma*(1 + euler_gamma) + pi^2)*z -
                   24*dat*euler_gamma^2*(2*euler_gamma^2*(3 + euler_gamma) + (1 + euler_gamma)*pi^2)*
                   z^2 + (12*euler_gamma^4 + 12*euler_gamma^2*pi^2 - pi^4)*z^3 -
                   12*((40*dat^3*euler_gamma + 4*dat*(3 + 5*euler_gamma)*z^2 - z^3 -
                   12*dat^2*(z + 5*euler_gamma*z))*psigamma1pN^4 +
                   4*dat*(2*dat^2 - 3*dat*z + z^2)*psigamma1pN^5 +
                   2*psigamma1pN^3*(40*dat^3*euler_gamma^2 -  dat^2*(12*euler_gamma*(2 + 5*euler_gamma) + pi^2)*z +
                   dat*(4*euler_gamma*(6 + 5*euler_gamma) + pi^2)*z^2 -2*euler_gamma*z^3 + 6*dat*(dat - z)*z*psigammap1pN) +
                   z*((12*dat^2*euler_gamma^3 - 12*dat*euler_gamma^2*(1 + euler_gamma)*z + (6*euler_gamma^2 - pi^2)*z^2)*
                   psigammap1pN + 3*z^2*psigammap1pN^2 + 4*euler_gamma*(dat*euler_gamma - z)*z*
                   (psigammapp1pN + 2*zeta3)) + psigamma1pN^2*(80*dat^3*euler_gamma^3 -
                    6*dat^2*euler_gamma*(4*euler_gamma*(3 + 5*euler_gamma) + pi^2)*z + 2*dat*(4*euler_gamma^2*(9 + 5*euler_gamma) +
                     (1 + 3*euler_gamma)*pi^2)*z^2 - (6*euler_gamma^2 + pi^2)*z^3 +6*z*(6*dat^2*euler_gamma - 2*dat*(1 + 3*euler_gamma)*z + z^2)*
                     psigammap1pN + 4*dat*z^2*(psigammapp1pN + 2*zeta3)) +2*psigamma1pN*(euler_gamma*(20*dat^3*euler_gamma^3 -
                     3*dat^2*euler_gamma*(2*euler_gamma*(4 + 5*euler_gamma) + pi^2)*z + dat*(2*euler_gamma^2*(12 + 5*euler_gamma) +
                    (2 + 3*euler_gamma)*pi^2)*z^2 - (2*euler_gamma^2 + pi^2)*z^3) +6*euler_gamma*z*(3*dat^2*euler_gamma - dat*(2 + 3*euler_gamma)*z + z^2)*
                    psigammap1pN - 2*z^2*(-2*dat*euler_gamma + z)*(psigammapp1pN + 2*zeta3))))/(144*z^3*(psigamma1pN + euler_gamma)^2))
        }
        infomat <- cbind(c(k11, k12), c(k12, k22))
    }
    colnames(infomat) <- rownames(infomat) <- c("Nmean","shape")
    return(infomat)
}

#' Tangent exponential model statistics for the generalized Pareto distribution (mean of maximum of N exceedances parametrization)
#'
#' Vector implementing conditioning on approximate ancillary statistics for the TEM
#' @seealso \code{\link{gpdN}}
#' @name gpdN.temstat
#' @inheritParams gpdN
#' @keywords internal
#' @export
gpdN.Vfun <- function(par, dat, N) {
    xi = par[2]
    z = par[1]
    cst <- exp(lgamma(N + 1) + lgamma(-xi + 1) - lgamma(N - xi + 1))
    cbind(dat/z, -xi * z * (dat * (cst - 1)/z + 1)^(-1/xi) * ((psigamma(N - xi + 1) * cst - psigamma(-xi +
        1) * cst) * dat/(xi * z * (dat * (cst - 1)/z + 1)) - log(dat * (cst - 1)/z + 1)/xi^2)/((dat *
        (cst - 1)/z + 1)^(-1/xi - 1) * (cst - 1)))
}

#' Canonical parameter in the local exponential family approximation
#'
#' @inheritParams gpdN
#' @rdname gpdN.temstat
#' @keywords internal
#' @export
gpdN.phi <- function(par, dat, N, V) {
    xi = par[2]
    z = par[1]
    cst <- exp(lgamma(N + 1) + lgamma(-xi + 1) - lgamma(N - xi + 1))
    matrix(-(cst - 1) * (1/xi + 1)/(z * (dat * (cst - 1)/z + 1)), nrow = 1) %*% V
}

## Derivative matrix of the canonical parameter in the local exponential family approximation
#' @inheritParams gpdN
#' @rdname gpdN.temstat
#' @keywords internal
#' @export
gpdN.dphi <- function(par, dat, N, V) {
    xi = par[2]
    z = par[1]
    cst <- exp(lgamma(N + 1) + lgamma(-xi + 1) - lgamma(N - xi + 1))
    rbind((cst - 1) * (1/xi + 1)/(z^2 * (dat * (cst - 1)/z + 1)) - dat * (cst - 1)^2 * (1/xi + 1)/(z^3 *
        (dat * (cst - 1)/z + 1)^2), -(psigamma(N - xi + 1) * cst - psigamma(-xi + 1) * cst) * (1/xi +
        1)/(z * (dat * (cst - 1)/z + 1)) + (psigamma(N - xi + 1) * cst - psigamma(-xi + 1) * cst) * dat *
        (cst - 1) * (1/xi + 1)/(z^2 * (dat * (cst - 1)/z + 1)^2) + (cst - 1)/(xi^2 * z * (dat * (cst -
        1)/z + 1))) %*% V
}



############################################################################################

############################################################################################

#' @title Generalized extreme value distribution (quantile/mean of N-block maxima parametrization)
#'
#' @description Likelihood, score function and information matrix,
#' approximate ancillary statistics and sample space derivative
#' for the generalized extreme value distribution  parametrized in terms of the
#' quantiles/mean of N-block maxima parametrization \eqn{z}, scale and shape.
#'
#' @author Leo Belzile
#' @name gevN
#' @param par vector of \code{loc}, quantile/mean of N-block maximum and \code{shape}
#' @param dat sample vector
#' @param V vector calculated by \code{gevN.Vfun}
#' @param q probability, corresponding to \eqn{q}th quantile of the \code{N}-block maximum
#' @param qty string indicating whether to calculate the \code{q} quantile or the mean
#' @section Usage: \preformatted{gevN.ll(par, dat, N, q, qty = c('mean', 'quantile'))
#' gevN.ll.optim(par, dat, N, q = 0.5, qty = c('mean', 'quantile'))
#' gevN.score(par, dat, N, q = 0.5, qty = c('mean', 'quantile'))
#' gevN.infomat(par, dat, qty = c('mean', 'quantile'), method = c('obs', 'exp'), N, q = 0.5, nobs = length(dat))
#' gevN.Vfun(par, dat, N, q = 0.5, qty = c('mean', 'quantile'))
#' gevN.phi(par, dat, N, q = 0.5, qty = c('mean', 'quantile'), V)
#' gevN.dphi(par, dat, N, q = 0.5, qty = c('mean', 'quantile'), V)}
#'
#' @section Functions:
#' \itemize{
#' \item{\code{gevN.ll}:} {log likelihood}
#' \item{\code{gevN.score}:} {score vector}
#' \item{\code{gevN.infomat}:} {expected and observed information matrix}
#' \item{\code{gevN.Vfun}:} {vector implementing conditioning on approximate ancillary statistics for the TEM}
#' \item{\code{gevN.phi}:} {canonical parameter in the local exponential family approximation}
#' \item{\code{gevN.dphi}:} {derivative matrix of the canonical parameter in the local exponential family approximation}
#' }
NULL

#' Negative log likelihood of the generalized extreme value distribution (quantile/mean of N-block maxima parametrization)
#'
#' @seealso \code{\link{gevN}}
#' @inheritParams gevN
#' @keywords internal
#' @export
gevN.ll <- function(par, dat, N, q = 0.5, qty = c("mean", "quantile")) {
    qty <- match.arg(qty)
    mu = par[1]
    z = par[2]
    xi = as.vector(par[3])
    if (!isTRUE(all.equal(xi, 0, check.attributes = FALSE))) {
        sigma <- switch(qty,
                        quantile = (z - mu) * xi/(N^xi * (log(1/q))^(-xi) - 1),
                        mean = (z - mu) * xi/(N^xi * gamma(1 - xi) - 1))
    } else {
        sigma <- switch(qty,
                        quantile = (z - mu)/(log(N) - log(-log(q))),
                        mean = (z - mu)/(-psigamma(1) + log(N)))
    }
    gev.ll(par = c(mu, sigma, xi), dat = dat)
}

#' Score of the generalized extreme value distribution (quantile/mean of N-block maxima parametrization)
#'
#' @seealso \code{\link{gevN}}
#' @inheritParams gevN
#' @keywords internal
#' @export
gevN.score <- function(par, dat, N, q = 0.5, qty = c("mean", "quantile")) {
    qty <- match.arg(qty)
    mu = par[1]
    z = par[2]
    xi = par[3]
    log1q = log(1/q)
    if (qty == "quantile") {
        # quantiles at prob. q
        if(abs(xi) > 1e-4){
        as.vector(c(sum((-(N^xi * log1q^(-xi) - 1) * (dat - mu)/(mu - z) + 1)^(-1/xi - 1) * ((N^xi * log1q^(-xi) -
            1) * (dat - mu)/(mu - z)^2 + (N^xi * log1q^(-xi) - 1)/(mu - z))/xi + ((N^xi * log1q^(-xi) -
            1) * (dat - mu)/(mu - z)^2 + (N^xi * log1q^(-xi) - 1)/(mu - z)) * (1/xi + 1)/((N^xi *
            log1q^(-xi) - 1) * (dat - mu)/(mu - z) - 1) - 1/(mu - z)), sum(-(N^xi * log1q^(-xi) -
            1) * (dat - mu) * (-(N^xi * log1q^(-xi) - 1) * (dat - mu)/(mu - z) + 1)^(-1/xi - 1)/((mu -
            z)^2 * xi) - (N^xi * log1q^(-xi) - 1) * (dat - mu) * (1/xi + 1)/(((N^xi * log1q^(-xi) -
            1) * (dat - mu)/(mu - z) - 1) * (mu - z)^2) + 1/(mu - z)), sum((-(N^xi * log1q^(-xi) -
            1) * (dat - mu)/(mu - z) + 1)^(-1/xi) * ((N^xi * log1q^(-xi) * log(N) - N^xi * log1q^(-xi) *
            log(log1q)) * (dat - mu)/(((N^xi * log1q^(-xi) - 1) * (dat - mu)/(mu - z) - 1) * (mu -
            z) * xi) - log(-(N^xi * log1q^(-xi) - 1) * (dat - mu)/(mu - z) + 1)/xi^2) - (N^xi * log1q^(-xi) *
            log(N) - N^xi * log1q^(-xi) * log(log1q)) * (dat - mu) * (1/xi + 1)/(((N^xi * log1q^(-xi) -
            1) * (dat - mu)/(mu - z) - 1) * (mu - z)) + (N^xi * log1q^(-xi) - 1) * ((N^xi * log1q^(-xi) *
            log(N) - N^xi * log1q^(-xi) * log(log1q)) * (mu - z) * xi/(N^xi * log1q^(-xi) -
            1)^2 - (mu - z)/(N^xi * log1q^(-xi) - 1))/((mu - z) * xi) + log1p(-(N^xi * log1q^(-xi) -
            1) * (dat - mu)/(mu - z))/xi^2)))
    } else{
      logN <- log(N)
      loglog1q <- log(log1q)
      as.vector(c(
        sum((-mu + z + (-1 + exp(((mu - dat)*(-logN + loglog1q))/(mu - z)))*(dat - z)*(logN - loglog1q))/(mu - z)^2),
        sum((mu - z + (mu - dat)*(-1 +  N^((-mu + dat)/(mu - z))*log1q^((mu - dat)/(mu - z)))*(logN -  loglog1q))/
              (mu - z)^2),
        sum((1/(2*(mu - z)^2))*(((-(mu - z))*(mu - 2*dat + z) + (mu - dat)*(dat - z)* (-1 +  N^((-mu + dat)/(mu - z))*
            log1q^((mu - dat)/(mu - z)))*(log( N) - loglog1q))*(logN - loglog1q)))
      ))
      }
    } else {
        # Mean
      if(abs(xi) > 1e-4){
        as.vector(c(sum((-(N^xi * gamma(-xi + 1) - 1) * (dat - mu)/(mu - z) + 1)^(-1/xi - 1) * ((N^xi * gamma(-xi +
            1) - 1) * (dat - mu)/(mu - z)^2 + (N^xi * gamma(-xi + 1) - 1)/(mu - z))/xi + ((N^xi * gamma(-xi +
            1) - 1) * (dat - mu)/(mu - z)^2 + (N^xi * gamma(-xi + 1) - 1)/(mu - z)) * (1/xi + 1)/((N^xi *
            gamma(-xi + 1) - 1) * (dat - mu)/(mu - z) - 1) - 1/(mu - z)), sum(-(N^xi * gamma(-xi + 1) -
            1) * (dat - mu) * (-(N^xi * gamma(-xi + 1) - 1) * (dat - mu)/(mu - z) + 1)^(-1/xi - 1)/((mu -
            z)^2 * xi) - (N^xi * gamma(-xi + 1) - 1) * (dat - mu) * (1/xi + 1)/(((N^xi * gamma(-xi +
            1) - 1) * (dat - mu)/(mu - z) - 1) * (mu - z)^2) + 1/(mu - z)), sum((-(N^xi * gamma(-xi +
            1) - 1) * (dat - mu)/(mu - z) + 1)^(-1/xi) * ((N^xi * log(N) * gamma(-xi + 1) - N^xi * psigamma(-xi +
            1) * gamma(-xi + 1)) * (dat - mu)/(((N^xi * gamma(-xi + 1) - 1) * (dat - mu)/(mu - z) - 1) *
            (mu - z) * xi) - log1p(-(N^xi * gamma(-xi + 1) - 1) * (dat - mu)/(mu - z))/xi^2) - (N^xi *
            log(N) * gamma(-xi + 1) - N^xi * psigamma(-xi + 1) * gamma(-xi + 1)) * (dat - mu) * (1/xi +
            1)/(((N^xi * gamma(-xi + 1) - 1) * (dat - mu)/(mu - z) - 1) * (mu - z)) + (N^xi * gamma(-xi +
            1) - 1) * ((N^xi * log(N) * gamma(-xi + 1) - N^xi * psigamma(-xi + 1) * gamma(-xi + 1)) *
            (mu - z) * xi/(N^xi * gamma(-xi + 1) - 1)^2 - (mu - z)/(N^xi * gamma(-xi + 1) - 1))/((mu -
            z) * xi) + log1p(-(N^xi * gamma(-xi + 1) - 1) * (dat - mu)/(mu - z))/xi^2)))
      } else{
        logN <- log(N)
        euler_gamma <- 0.57721566490153231044
        as.vector(c(
         sum((1/(mu - z)^2)*(-mu - euler_gamma*dat + z + euler_gamma*z - dat*log(N) +
                               z*log(N) +
                               exp(((mu - dat)*(euler_gamma + log(N)))/(-mu + z))*(dat -
                                                                                  z)*(euler_gamma + log(N)))),
         sum((1/(mu - z)^2)*(mu - euler_gamma*mu + euler_gamma*dat - z - mu*log(N) +
                               dat*log(N) +
                               exp(((mu - dat)*(euler_gamma + log(N)))/(-mu + z))*(mu -
                                                                                  dat)*(euler_gamma + log(N)))),
         sum((exp((mu*(euler_gamma + log(N)))/(-mu + z))*
                (exp((dat*(euler_gamma + log(N)))/(mu - z))*(mu - dat)*(euler_gamma + log(N))*
                   (pi^2*(mu - z) + 6*euler_gamma^2*(dat - z) + 6*(dat - z)*log(N)*
                      (2*euler_gamma + log(N))) + exp((mu*(euler_gamma + log(N)))/(mu - z))*
                   ((-euler_gamma)*pi^2*(mu - dat)*(mu - z) + pi^2*(mu - z)^2 - 6*euler_gamma^3*(mu - dat)*(dat - z) -
                      6*euler_gamma^2*(mu - z)*(mu - 2*dat + z) + log(N)*((-pi^2)*(mu - dat)*(mu - z) -
                      18*euler_gamma^2*(mu - dat)*(dat - z) - 12*euler_gamma*(mu - z)*(mu - 2*dat + z) +
               6*log(N)*(-mu^2 + 3*euler_gamma*dat*(dat - z) + z*(-2*dat + z) +
               mu*((2 - 3*euler_gamma)*dat + 3*euler_gamma*z) - (mu - dat)*(dat - z)*log(N))))))/
               (12*(mu - z)^2*(euler_gamma + log(N))))
        ))
      }
    }
}

#' Information matrix of the generalized extreme value distribution (quantile/mean of N-block maxima parametrization)
#' @seealso \code{\link{gevN}}
#' @inheritParams gevN
#' @keywords internal
#' @export
gevN.infomat <- function(par, dat, method = c("obs", "exp"), qty = c("mean", "quantile"), N, q = 0.5,  nobs = length(dat)) {
    method <- match.arg(method)
    par <- as.vector(par)
    mu = par[1]
    z = par[2]
    xi = par[3]
    logN = log(N)
    log1q = log(1/q)
    loglog1q = log(log1q)
    qty <- match.arg(qty)
    xizero <- isTRUE(all.equal(xi, 0, tolerance = 1e-05, check.attributes = FALSE))
    euler_gamma <- 0.57721566490153231044
    zeta3 <- 1.2020569031595944587
    if (qty == "mean") {
        if (!xizero) {
            # z = mu + sigma/xi*(N^xi*gamma(1-xi)-1)
            sigmaq <- (z - mu) * xi/(N^xi * gamma(1 - xi) - 1)
        } else {
            # z = mu + sigma*(log(N) + euler_gamma)
            sigmaq <- (z - mu)/(logN + euler_gamma)
        }

        if (method == "obs") {
          if(!xizero){
            k11 <- sum(((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1)^(-1/xi - 2) * ((N^xi *
                gamma(-xi + 1) - 1) * (mu - dat)/(mu - z)^2 - (N^xi * gamma(-xi + 1) - 1)/(mu - z))^2 *
                (1/xi + 1)/xi - 2 * ((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1)^(-1/xi -
                1) * ((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z)^3 - (N^xi * gamma(-xi + 1) -
                1)/(mu - z)^2)/xi - ((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z)^2 - (N^xi * gamma(-xi +
                1) - 1)/(mu - z))^2 * (1/xi + 1)/((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) +
                1)^2 + 2 * ((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z)^3 - (N^xi * gamma(-xi +
                1) - 1)/(mu - z)^2) * (1/xi + 1)/((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) +
                1) - 1/(mu - z)^2)
            k12 <- sum(-(N^xi * gamma(-xi + 1) - 1) * ((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu -
                z) + 1)^(-1/xi - 2) * (mu - dat) * ((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z)^2 -
                (N^xi * gamma(-xi + 1) - 1)/(mu - z)) * (1/xi + 1)/((mu - z)^2 * xi) + ((N^xi * gamma(-xi +
                1) - 1) * (mu - dat)/(mu - z) + 1)^(-1/xi - 1) * (2 * (N^xi * gamma(-xi + 1) - 1) * (mu -
                dat)/(mu - z)^3 - (N^xi * gamma(-xi + 1) - 1)/(mu - z)^2)/xi - (2 * (N^xi * gamma(-xi +
                1) - 1) * (mu - dat)/(mu - z)^3 - (N^xi * gamma(-xi + 1) - 1)/(mu - z)^2) * (1/xi + 1)/((N^xi *
                gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1) + (N^xi * gamma(-xi + 1) - 1) * (mu -
                dat) * ((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z)^2 - (N^xi * gamma(-xi + 1) -
                1)/(mu - z)) * (1/xi + 1)/(((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1)^2 *
                (mu - z)^2) + 1/(mu - z)^2)
            k13 <- sum(-((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1)^(-1/xi - 1) * ((N^xi *
                logN * gamma(-xi + 1) - N^xi * psigamma(-xi + 1) * gamma(-xi + 1)) * (mu - dat) * (1/xi +
                1)/(((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1) * (mu - z)) - log1p((N^xi *
                gamma(-xi + 1) - 1) * (mu - dat)/(mu - z))/xi^2) * ((N^xi * gamma(-xi + 1) - 1) *
                (mu - dat)/(mu - z)^2 - (N^xi * gamma(-xi + 1) - 1)/(mu - z))/xi + ((N^xi * gamma(-xi +
                1) - 1) * (mu - dat)/(mu - z) + 1)^(-1/xi - 1) * ((N^xi * logN * gamma(-xi + 1) - N^xi *
                psigamma(-xi + 1) * gamma(-xi + 1)) * (mu - dat)/(mu - z)^2 - (N^xi * logN * gamma(-xi +
                1) - N^xi * psigamma(-xi + 1) * gamma(-xi + 1))/(mu - z))/xi - ((N^xi * logN * gamma(-xi +
                1) - N^xi * psigamma(-xi + 1) * gamma(-xi + 1)) * (mu - dat)/(mu - z)^2 - (N^xi * logN *
                gamma(-xi + 1) - N^xi * psigamma(-xi + 1) * gamma(-xi + 1))/(mu - z)) * (1/xi + 1)/((N^xi *
                gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1) + (N^xi * logN * gamma(-xi + 1) - N^xi *
                psigamma(-xi + 1) * gamma(-xi + 1)) * (mu - dat) * ((N^xi * gamma(-xi + 1) - 1) * (mu -
                dat)/(mu - z)^2 - (N^xi * gamma(-xi + 1) - 1)/(mu - z)) * (1/xi + 1)/(((N^xi * gamma(-xi +
                1) - 1) * (mu - dat)/(mu - z) + 1)^2 * (mu - z)) - ((N^xi * gamma(-xi + 1) - 1) * (mu -
                dat)/(mu - z) + 1)^(-1/xi - 1) * ((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z)^2 -
                (N^xi * gamma(-xi + 1) - 1)/(mu - z))/xi^2 + ((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu -
                z)^2 - (N^xi * gamma(-xi + 1) - 1)/(mu - z))/(((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu -
                z) + 1) * xi^2))
            k22 <- sum((N^xi * gamma(-xi + 1) - 1)^2 * ((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu -
                z) + 1)^(-1/xi - 2) * (mu - dat)^2 * (1/xi + 1)/((mu - z)^4 * xi) - 2 * (N^xi * gamma(-xi +
                1) - 1) * ((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1)^(-1/xi - 1) * (mu -
                dat)/((mu - z)^3 * xi) - (N^xi * gamma(-xi + 1) - 1)^2 * (mu - dat)^2 * (1/xi + 1)/(((N^xi *
                gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1)^2 * (mu - z)^4) + 2 * (N^xi * gamma(-xi +
                1) - 1) * (mu - dat) * (1/xi + 1)/(((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) +
                1) * (mu - z)^3) - 1/(mu - z)^2)
            k23 <- sum((N^xi * gamma(-xi + 1) - 1) * ((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu -
                z) + 1)^(-1/xi - 1) * (mu - dat) * ((N^xi * logN * gamma(-xi + 1) - N^xi * psigamma(-xi +
                1) * gamma(-xi + 1)) * (mu - dat) * (1/xi + 1)/(((N^xi * gamma(-xi + 1) - 1) * (mu -
                dat)/(mu - z) + 1) * (mu - z)) - log1p((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z))/
                  xi^2)/((mu - z)^2 * xi) - (N^xi * logN * gamma(-xi + 1) - N^xi * psigamma(-xi +
                1) * gamma(-xi + 1)) * ((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1)^(-1/xi -
                1) * (mu - dat)/((mu - z)^2 * xi) - (N^xi * logN * gamma(-xi + 1) - N^xi * psigamma(-xi +
                1) * gamma(-xi + 1)) * (N^xi * gamma(-xi + 1) - 1) * (mu - dat)^2 * (1/xi + 1)/(((N^xi *
                gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1)^2 * (mu - z)^3) + (N^xi * logN * gamma(-xi +
                1) - N^xi * psigamma(-xi + 1) * gamma(-xi + 1)) * (mu - dat) * (1/xi + 1)/(((N^xi * gamma(-xi +
                1) - 1) * (mu - dat)/(mu - z) + 1) * (mu - z)^2) + (N^xi * gamma(-xi + 1) - 1) * ((N^xi *
                gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1)^(-1/xi - 1) * (mu - dat)/((mu - z)^2 *
                xi^2) - (N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(((N^xi * gamma(-xi + 1) - 1) * (mu -
                dat)/(mu - z) + 1) * (mu - z)^2 * xi^2))
            k33 <- sum(((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1)^(-1/xi) * ((N^xi * logN *
                gamma(-xi + 1) - N^xi * psigamma(-xi + 1) * gamma(-xi + 1)) * (mu - dat)/(((N^xi * gamma(-xi +
                1) - 1) * (mu - dat)/(mu - z) + 1) * (mu - z) * xi) - log1p((N^xi * gamma(-xi + 1) - 1) *
                (mu - dat)/(mu - z))/xi^2)^2 + ((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) +
                1)^(-1/xi) * ((N^xi * logN * gamma(-xi + 1) - N^xi * psigamma(-xi + 1) * gamma(-xi +
                1))^2 * (mu - dat)^2/(((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1)^2 * (mu -
                z)^2 * xi) - (N^xi * logN^2 * gamma(-xi + 1) - 2 * N^xi * logN * psigamma(-xi + 1) *
                gamma(-xi + 1) + N^xi * psigamma(-xi + 1)^2 * gamma(-xi + 1) + N^xi * psigamma(1, -xi +
                1) * gamma(-xi + 1)) * (mu - dat)/(((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) +
                1) * (mu - z) * xi) + 2 * (N^xi * logN * gamma(-xi + 1) - N^xi * psigamma(-xi + 1) *
                gamma(-xi + 1)) * (mu - dat)/(((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1) *
                (mu - z) * xi^2) - 2 * log1p((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z))/xi^3) -
                (N^xi * logN * gamma(-xi + 1) - N^xi * psigamma(-xi + 1) * gamma(-xi + 1))^2 * (mu -
                  dat)^2 * (1/xi + 1)/(((N^xi * gamma(-xi + 1) - 1) * (mu - dat)/(mu - z) + 1)^2 * (mu -
                  z)^2) + (N^xi * logN^2 * gamma(-xi + 1) - 2 * N^xi * logN * psigamma(-xi + 1) *
                gamma(-xi + 1) + N^xi * psigamma(-xi + 1)^2 * gamma(-xi + 1) + N^xi * psigamma(1, -xi +
                1) * gamma(-xi + 1)) * (mu - dat) * (1/xi + 1)/(((N^xi * gamma(-xi + 1) - 1) * (mu -
                dat)/(mu - z) + 1) * (mu - z)) + (N^xi * gamma(-xi + 1) - 1) * (2 * (N^xi * logN *
                gamma(-xi + 1) - N^xi * psigamma(-xi + 1) * gamma(-xi + 1))^2 * (mu - z) * xi/(N^xi *
                gamma(-xi + 1) - 1)^3 - (N^xi * logN^2 * gamma(-xi + 1) - 2 * N^xi * logN * psigamma(-xi +
                1) * gamma(-xi + 1) + N^xi * psigamma(-xi + 1)^2 * gamma(-xi + 1) + N^xi * psigamma(1,
                -xi + 1) * gamma(-xi + 1)) * (mu - z) * xi/(N^xi * gamma(-xi + 1) - 1)^2 - 2 * (N^xi *
                logN * gamma(-xi + 1) - N^xi * psigamma(-xi + 1) * gamma(-xi + 1)) * (mu - z)/(N^xi *
                gamma(-xi + 1) - 1)^2)/((mu - z) * xi) - (N^xi * logN * gamma(-xi + 1) - N^xi * psigamma(-xi +
                1) * gamma(-xi + 1)) * ((N^xi * logN * gamma(-xi + 1) - N^xi * psigamma(-xi + 1) *
                gamma(-xi + 1)) * (mu - z) * xi/(N^xi * gamma(-xi + 1) - 1)^2 - (mu - z)/(N^xi * gamma(-xi +
                1) - 1))/((mu - z) * xi) + (N^xi * gamma(-xi + 1) - 1) * ((N^xi * logN *