Nothing
#' Quantile function for generalized pareto
#' @noRd
qgeneralized_pareto <- function(p, mu = 0, sigma = 1, k = 0, lower.tail = TRUE, log.p = FALSE) {
stopifnot(length(mu) == 1 && length(sigma) == 1 && length(k) == 1)
if (is.na(sigma) || sigma <= 0) {
return(rep(NaN, length(p)))
}
if (log.p) {
p <- exp(p)
}
if (!lower.tail) {
p <- 1 - p
}
if (k == 0) {
q <- mu - sigma * log1p(-p)
} else {
q <- mu + sigma * expm1(-k * log1p(-p)) / k
}
q
}
#' Estimate parameters of the Generalized Pareto distribution
#'
#' Given a sample \eqn{x}, Estimate the parameters \eqn{k} and \eqn{\sigma} of
#' the generalized Pareto distribution (GPD), assuming the location parameter is
#' 0. By default the fit uses a prior for \eqn{k} (this is in addition to the prior described by Zhang and Stephens, 2009), which will stabilize
#' estimates for very small sample sizes (and low effective sample sizes in the
#' case of MCMC samples). The weakly informative prior is a Gaussian prior
#' centered at 0.5 (see details in Vehtari et al., 2022).
#'
#' @param x A numeric vector. The sample from which to estimate the parameters.
#' @param wip Logical indicating whether to adjust \eqn{k} based on a weakly
#' informative Gaussian prior centered on 0.5. Defaults to `TRUE`.
#' @param min_grid_pts The minimum number of grid points used in the fitting
#' algorithm. The actual number used is `min_grid_pts + floor(sqrt(length(x)))`.
#' @param sort_x If `TRUE` (the default), the first step in the fitting
#' algorithm is to sort the elements of `x`. If `x` is already
#' sorted in ascending order then `sort_x` can be set to `FALSE` to
#' skip the initial sorting step.
#' @return A named list with components `k` and `sigma`.
#'
#' @details Here the parameter \eqn{k} is the negative of \eqn{k} in Zhang &
#' Stephens (2009).
#'
#'
#' @references
#' Zhang, J., and Stephens, M. A. (2009). A new and efficient estimation method
#' for the generalized Pareto distribution. *Technometrics* **51**, 316-325.
#'
#' @noRd
gpdfit <- function(x, wip = TRUE, min_grid_pts = 30, sort_x = TRUE) {
# see section 4 of Zhang and Stephens (2009)
if (sort_x) {
x <- sort.int(x)
}
N <- length(x)
prior <- 3
M <- min_grid_pts + floor(sqrt(N))
jj <- seq_len(M)
xstar <- x[floor(N / 4 + 0.5)] # first quartile of sample
theta <- 1 / x[N] + (1 - sqrt(M / (jj - 0.5))) / prior / xstar
k <- matrixStats::rowMeans2(log1p(-theta %o% x))
l_theta <- N * (log(-theta / k) - k - 1) # profile log-lik
w_theta <- exp(l_theta - matrixStats::logSumExp(l_theta)) # normalize
theta_hat <- sum(theta * w_theta)
k_hat <- mean.default(log1p(-theta_hat * x))
sigma_hat <- -k_hat / theta_hat
# adjust k_hat based on weakly informative prior, Gaussian centered on 0.5.
# this stabilizes estimates for very small Monte Carlo sample sizes and low neff (see Vehtari et al., 2022 for details)
if (wip) {
k_hat <- (k_hat * N + 0.5 * 10) / (N + 10)
}
if (is.na(k_hat)) {
k_hat <- Inf
sigma_hat <- NaN
}
list(k = k_hat, sigma = sigma_hat)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.