R/extgp.R

Defines functions egp.pll regp qegp degp pegp qegp.G7 qegp.G6 qegp.G5 qegp.G4 qegp.G3 qegp.G2 qegp.G1 degp.G7 degp.G6 degp.G5 degp.G4 degp.G3 degp.G2 degp.G1 pegp.G7 pegp.G6 pegp.G5 pegp.G4 pegp.G3 pegp.G2 pegp.G1 tstab.egp egp.fitrange print.mev_egp fit.egp egp.retlev egp.ll egp.fit

Documented in egp.fit egp.fitrange egp.ll egp.retlev fit.egp tstab.egp

#' Extended generalised Pareto families
#'
#' @description This function provides the log-likelihood and quantiles for the three different families presented
#' in Papastathopoulos and Tawn (2013) and the two proposals of Gamet and Jalbert (2022), plus exponential tilting. All of the models contain an additional parameter, \eqn{\kappa \ge 0}.
#' All families share the same tail index as the generalized Pareto distribution, while allowing for lower thresholds.
#' For most models, the distribution reduce to the generalised Pareto when \eqn{\kappa=1} (for models \code{gj-tnorm} and \code{logist}, on the boundary of the parameter space when \eqn{kappa \to 0}.
#'
#' @references Papastathopoulos, I. and J. Tawn (2013). Extended generalised Pareto models for tail estimation, \emph{Journal of Statistical Planning and Inference} \bold{143}(3), 131--143, <doi:10.1016/j.jspi.2012.07.001>.
#' @references Gamet, P. and Jalbert, J. (2022). A flexible extended generalized Pareto distribution for tail estimation. \emph{Environmetrics}, \bold{33}(6), <doi:10.1002/env.2744>.
#' @section Usage:
#' \code{egp.ll(xdat, thresh, model, par)}
#' @section Usage:
#' \code{egp.retlev(xdat, thresh, par, model, p, plot=TRUE)}
#' @description \code{egp.retlev} gives the return levels for the extended generalised Pareto distributions
#' @name egp
#' @param xdat vector of observations, greater than the threshold
#' @param thresh threshold value
#' @param par parameter vector (\eqn{\kappa}, \eqn{\sigma}, \eqn{\xi}).
#' @param model a string indicating which extended family to fit
#' @param show logical; if \code{TRUE}, print the results of the optimization
#' @param p extreme event probability; \code{p} must be greater than the rate of exceedance for the calculation to make sense. See \bold{Details}.
#' @param plot logical; if \code{TRUE}, a plot of the return levels
#' @importFrom grDevices rainbow
#'
#' @details
#'
#' For return levels, the \code{p} argument can be related to \eqn{T} year exceedances as follows:
#' if there are \eqn{n_y} observations per year, than take \code{p}
#' to equal \eqn{1/(Tn_y)} to obtain the \eqn{T}-years return level.
#' @author Leo Belzile
#' @return \code{egp.ll} returns the log-likelihood value.
#' @return \code{egp.retlev} returns a plot of the return levels if \code{plot=TRUE} and a matrix of return levels.
#' @examples
#' set.seed(123)
#' xdat <- mev::rgp(1000, loc = 0, scale = 2, shape = 0.5)
#' par <- fit.egp(xdat, thresh = 0, model = 'gj-beta')$par
#' p <- c(1/1000, 1/1500, 1/2000)
#' # With multiple thresholds
#' th <- c(0, 0.1, 0.2, 1)
#' opt <- tstab.egp(xdat, th, model = 'gj-beta')
#' egp.retlev(xdat, opt$thresh, opt$par, 'gj-beta', p = p)
#' opt <- tstab.egp(xdat, th, model = 'pt-power', plots = NA)
#' egp.retlev(xdat, opt$thresh, opt$par, 'pt-power', p = p)
NULL

#' Fit of extended GP models and parameter stability plots
#'
#' This function is an alias of \code{\link{fit.egp}}.
#'
#' Supported for backward compatibility
#'
#' @export
#' @keywords internal
egp.fit <- function(
  xdat,
  thresh,
  model = c(
    "pt-beta",
    "pt-gamma",
    "pt-power",
    "gj-tnorm",
    "gj-beta",
    "exptilt",
    "logist"
  ),
  init,
  show = FALSE
) {
  fit.egp(
    xdat = xdat,
    thresh = thresh,
    model = model,
    init = init,
    show = show
  )
}

#' Extended generalised Pareto log likelihood
#'
#' This function provides the log-likelihood and quantiles for the different extended generalized Pareto distributions. All families share the same tail index as the generalized Pareto, and reduce to the latter when \code{kappa=1}.
#' @export
#' @inheritParams egp
#' @name egp-function
#' @keywords internal
egp.ll <- function(
  xdat,
  thresh,
  par,
  model = c(
    "pt-beta",
    "pt-gamma",
    "pt-power",
    "gj-tnorm",
    "gj-beta",
    "exptilt",
    "logist"
  )
) {
  if (model %in% c("egp1", "egp2", "egp3") & length(model) == 1) {
    model <- switch(
      model,
      egp1 = "pt-beta",
      egp2 = "pt-gamma",
      egp3 = "pt-power"
    )
  }
  par <- as.numeric(par) # strip names
  model <- match.arg(model)
  if (isTRUE(any(xdat < thresh))) {
    xdat = xdat[xdat > thresh] - thresh
  }
  kappa = par[1]
  sigma = par[2]
  xi = par[3]
  if (sigma < 0 || kappa < 0) {
    return(-Inf)
  }
  args = pmax(0, (1 + xi * xdat / sigma))
  if (abs(xi) > 1e-08) {
    sum(degp(
      x = xdat,
      scale = sigma,
      shape = xi,
      kappa = kappa,
      model = model,
      log = TRUE
    ))
  } else {
    # if xi=0
    if (model %in% c("pt-beta", "pt-gamma")) {
      sum(dgamma(x = xdat, shape = kappa, scale = sigma, log = TRUE))
    } else {
      sum(degp(
        x = xdat,
        scale = sigma,
        shape = xi,
        kappa = kappa,
        model = model,
        log = TRUE
      ))
    }
  }
}

#' @name egp-function
#' @export
#' @inheritParams egp
#' @keywords internal
#' @importFrom grDevices hcl.colors
egp.retlev <- function(
  xdat,
  thresh,
  par,
  model = c(
    "pt-beta",
    "pt-gamma",
    "pt-power",
    "gj-tnorm",
    "gj-beta",
    "exptilt",
    "logist"
  ),
  p,
  plot = TRUE
) {
  if (model %in% c("egp1", "egp2", "egp3") & length(model) == 1) {
    model <- switch(
      model,
      egp1 = "pt-beta",
      egp2 = "pt-gamma",
      egp3 = "pt-power"
    )
  }
  model <- match.arg(model)
  if (length(par) %% 3 != 0) {
    stop("Invalid parameter input")
  }
  if (!inherits(par, "matrix")) {
    par = matrix(c(par), ncol = 3)
  }
  rate <- sapply(thresh, function(u) {
    length(xdat[xdat > u]) / length(xdat)
  })
  if (!isTRUE(all.equal(length(rate), length(thresh), nrow(par)))) {
    stop("Input dimension does not match")
  }
  retlev <- matrix(0, nrow = length(thresh), ncol = length(p))
  if (
    any(sapply(rate, function(zeta) {
      zeta < p
    }))
  ) {
    warning(
      "Some probabilities \"p\" are higher than the exceedance rate. Evaluate those empirically."
    )
  }
  p <- sort(p)
  pq <- rev(1 / p)
  np <- length(p)
  for (i in 1:length(thresh)) {
    for (j in 1:np) {
      pl = 1 - p[j] / rate[i]
      retlev[i, np - j + 1] <- thresh[i] +
        qegp(p = pl, scale = par[i, 2], shape = par[i, 3], kappa = par[i, 1])
    }
  }
  if (plot) {
    if (length(thresh) > 1L) {
      cols <- hcl.colors(
        n = length(thresh),
        palette = "viridis"
      )
    } else {
      cols <- "black"
    }
    matplot(
      pq,
      t(retlev),
      pch = 20,
      type = "b",
      lty = rep(1, length(thresh)),
      col = cols,
      xlab = "return period",
      ylab = "return level",
      bty = "l"
    )
    text <- switch(
      model,
      "pt-beta" = "Papastathopoulos-Tawn's EGP 1",
      "pt-gamma" = "Papastathopoulos-Tawn's EGP 2",
      "pt-power" = "Papastathopoulos-Tawn's EGP 3 (power)",
      "gj-tnorm" = "Gamet-Jonathan's truncated normal EGP",
      "gj-beta" = "Gamet-Jonathan's beta EGP",
      "logist" = "logistic EGP",
      "exptilt" = "exponential tilting EGP"
    )
    mtext(text, side = 3, cex = 0.9, line = 0.5, adj = 0)
    if (length(thresh) > 1) {
      legend(
        x = "bottomright",
        inset = c(0, 1),
        cex = 0.8,
        xpd = TRUE,
        horiz = TRUE,
        bty = "n",
        legend = thresh,
        col = cols,
        pch = 20
      )
    }
  }
  res <- retlev
  colnames(res) <- pq
  rownames(res) <- thresh
  return(invisible(res))
}

#' Parameter stability plot and maximum likelihood routine for extended GP models
#'
#' \code{fit.egp} is a numerical optimization routine to fit the extended generalised Pareto models of Papastathopoulos and Tawn (2013),
#' using maximum likelihood estimation.
#'
#' @references Papastathopoulos, I. and J. Tawn (2013). Extended generalised Pareto models for tail estimation, \emph{Journal of Statistical Planning and Inference} \bold{143}(3), 131--143.
#' @inheritParams egp
#' @author Leo Belzile
#' @param init vector of initial values, with \eqn{\log(\kappa)}{log(\kappa)} and \eqn{\log(\sigma)}{log(\sigma)}; can be omitted.
#' @return \code{fit.egp} outputs the list returned by \link[stats]{optim}, which contains the parameter values, the hessian and in addition the standard errors
#' @name fit.egp
#' @description The function \code{tstab.egp} provides classical threshold stability plot for (\eqn{\kappa}, \eqn{\sigma}, \eqn{\xi}).
#' The fitted parameter values are displayed with pointwise normal 95\% confidence intervals.
#' The function returns an invisible list with parameter estimates and standard errors, and p-values for the Wald test that \eqn{\kappa=1}.
#'  The plot is for the modified scale (as in the generalised Pareto model) and as such it is possible that the modified scale be negative.
#' \code{tstab.egp} can also be used to fit the model to multiple thresholds.
#' @param plots vector of integers specifying which parameter stability to plot (if any); passing \code{NA} results in no plots
#' @inheritParams egp
#' @param umin optional minimum value considered for threshold (if \code{thresh} is not provided)
#' @param umax optional maximum value considered for threshold (if \code{thresh} is not provided)
#' @param nint optional integer number specifying the number of thresholds to test.
#' @param changepar logical; if \code{TRUE}, the graphical parameters (via a call to \code{par}) are modified.
#' @return \code{tstab.egp} returns a plot(s) of the parameters fit over the range of provided thresholds, with pointwise normal confidence intervals; the function also returns an invisible list containing notably the matrix of point estimates (\code{par}) and standard errors (\code{se}).
#' @importFrom graphics arrows points polygon title
#' @export
#' @examples
#' xdat <- mev::rgp(
#'   n = 100,
#'   loc = 0,
#'   scale = 1,
#'   shape = 0.5)
#' fitted <- fit.egp(
#'   xdat = xdat,
#'   thresh = 1,
#'   model = "egp2",
#'   show = TRUE)
#' thresh <- mev::qgp(seq(0.1, 0.5, by = 0.05), 0, 1, 0.5)
#' tstab.egp(
#'    xdat = xdat,
#'    thresh = thresh,
#'    model = "egp2",
#'    plots = 1:3)
fit.egp <- function(
  xdat,
  thresh,
  model = c(
    "pt-beta",
    "pt-gamma",
    "pt-power",
    "gj-tnorm",
    "gj-beta",
    "exptilt",
    "logist"
  ),
  init,
  show = FALSE
) {
  if (model %in% c("egp1", "egp2", "egp3") & length(model) == 1) {
    model <- switch(
      model,
      egp1 = "pt-beta",
      egp2 = "pt-gamma",
      egp3 = "pt-power"
    )
  }
  model <- match.arg(model)
  if (length(thresh) > 1) {
    warning(
      "Length of threshold vector greater than one. Selecting first component."
    )
    thresh <- thresh[1]
  }
  if (sum(xdat > thresh[1]) < 4L) {
    stop("Not enough observations to fit an extended generalized Pareto model.")
  }
  # If no initial values are provided, fit a GP distribution to obtain them
  changinit <- missing(init)
  if (!changinit) {
    if (!isTRUE(any(init[1:2] < 0))) {
      changinit <- TRUE
    }
  }
  gpfit <- fit.gpd(xdat, threshold = thresh[1], show = FALSE)
  if (changinit) {
    init <- c(
      kappa = ifelse(model %in% c("gj-tnorm", "logist"), 0.01, 1.01),
      suppressWarnings(gpfit$est)
    )
    # Change starting values for boundary cases, otherwise the optimization stalls
    if (init[3] < -0.99) {
      init[3] <- -0.9
    }
  }
  if (
    !is.finite(egp.ll(par = init, xdat = xdat, thresh = thresh, model = model))
  ) {
    stop("Invalid starting parameters.")
  }
  # Keep exceedances only
  xdata <- xdat[xdat > thresh] - thresh
  xmax <- max(xdata)
  mle <- alabama::auglag(
    par = init,
    fn = function(par, xdat, thresh, model) {
      -egp.ll(par = par, xdat = xdata, thresh = thresh, model = model)
    },
    hin = function(par, ...) {
      c(
        par[1] - 1e-10,
        par[2] - 1e-10,
        par[3] + 1,
        ifelse(par[3] < 0, thresh - par[2] / par[3] - xmax, 1)
      )
    },
    xdat = xdata,
    thresh = 0,
    model = model,
    control.outer = list(trace = FALSE, method = "BFGS"),
    control.optim = list(maxit = 500, reltol = 1e-10)
  )
  browser()
  boundary <- FALSE
  if (model %in% c("gj-tnorm", "logist")) {
    if (isTRUE(mle$value <= gpfit$nllh)) {
      boundary <- TRUE
      mle$par <- c(0, coef(gpfit))
      mle$value <- gpfit$nllh
      # Compute standard errors by hand!
      if (model == "gj-tnorm") {
        info_kappa <- function(par) {
          sigma <- par[1]
          xi <- par[2]
          c(
            length(xdata) / 45,
            sum(xdata * (xdata * xi / sigma + 1)^(-2 / xi - 1) / sigma^2),
            -sum(
              (-log(xdata * xi / sigma + 1) /
                xi^2 +
                xdata / (sigma * (xdata * xi / sigma + 1) * xi)) /
                (xdata * xi / sigma + 1)^(2 / xi)
            )
          )
        }
        info_coef <- info_kappa(coef(gpfit))
        mle$hessian <- rbind(
          kappa = info_coef,
          cbind(info_coef[-1], solve(gpfit$vcov))
        )
      }
    }
  }
  fitted <- list()
  fitted$estimate <- fitted$param <- mle$par
  fitted$deviance <- 2 * mle$value
  fitted$nllh <- mle$value
  if (mle$convergence == 0) {
    fitted$convergence <- "successful"
    fitted$vcov <- try(solve(mle$hessian))
    fitted$std.err <- try(sqrt(diag(fitted$vcov)))
    if (inherits(fitted$std.err, what = "try-error") || mle$par[3] < -0.5) {
      fitted$vcov <- NULL
      fitted$se <- rep(NA, 3)
    }
  } else {
    fitted$convergence <- mle$convergence
    warning(
      "Maximization routine may have failed; check output and try providing better starting values"
    )
  }
  names(fitted$estimate) <- names(fitted$std.err) <- c(
    "kappa",
    "scale",
    "shape"
  )
  if (!is.null(fitted$vcov)) {
    colnames(fitted$vcov) <- rownames(fitted$vcov) <- c(
      "kappa",
      "scale",
      "shape"
    )
  }
  fitted$counts <- mle$counts
  fitted$threshold <- thresh
  fitted$nat <- length(xdata)
  fitted$pat <- length(xdata) / length(xdat)
  fitted$exceedances <- xdata
  fitted$hessian <- mle$hessian
  fitted$method <- "copt"
  fitted$model <- model
  class(fitted) <- c("mev_egp")
  if (show) {
    print(fitted)
  }
  return(invisible(fitted))
}

# @param x A fitted object of class \code{mev_gpd}.
# @param digits Number of digits to display in \code{print} call.
# @param ... Additional argument passed to \code{print}.
#' @export
print.mev_egp <- function(x, digits = max(3, getOption("digits") - 3), ...) {
  text <- switch(
    x$model,
    "pt-beta" = "Papastathopoulos-Tawn's EGP 1",
    "pt-gamma" = "Papastathopoulos-Tawn's EGP 2",
    "pt-power" = "Papastathopoulos-Tawn's EGP 3 (power)",
    "gj-tnorm" = "Gamet-Jonathan's truncated normal EGP",
    "gj-beta" = "Gamet-Jonathan's beta EGP",
    "logist" = "logistic EGP",
    "exptilt" = "exponential tilting EGP"
  )
  cat("Model:", text, "\n")
  cat("Deviance:", round(x$deviance, digits), "\n")

  cat("\nThreshold:", round(x$threshold, digits), "\n")
  cat("Number Above:", x$nat, "\n")
  cat("Proportion Above:", round(x$pat, digits), "\n")

  cat("\nEstimates\n")
  print.default(
    format(x$estimate, digits = digits),
    print.gap = 2,
    quote = FALSE,
    ...
  )
  if (!is.na(x$std.err[1]) && x$estimate[3] > -0.5) {
    cat("\nStandard Errors\n")
    print.default(
      format(x$std.err, digits = digits),
      print.gap = 2,
      quote = FALSE,
      ...
    )
  }
  cat("\nOptimization Information\n")
  cat("  Convergence:", x$convergence, "\n")
  cat("  Function Evaluations:", x$counts["function"], "\n")
  cat("  Gradient Evaluations:", x$counts["gradient"], "\n")
  cat("\n")
  invisible(x)
}

#' Deprecated function for parameter stability plots
#'
#' @export
#' @keywords internal
egp.fitrange <-
  function(
    xdat,
    thresh,
    model = c("egp1", "egp2", "egp3"),
    plots = 1:3,
    umin,
    umax,
    nint
  ) {
    tstab.egp(
      xdat = xdat,
      thresh = thresh,
      model = model,
      plots = plots,
      umin = umin,
      umax = umax,
      nint = nint
    )
  }

#' @inheritParams egp
#' @param ... additional arguments for the plot function, currently ignored
#' @rdname fit.egp
#' @export
tstab.egp <- function(
  xdat,
  thresh,
  model = c(
    "pt-beta",
    "pt-gamma",
    "pt-power",
    "gj-tnorm",
    "gj-beta",
    "exptilt",
    "logist"
  ),
  plots = 1:3,
  type = c("wald", "profile"),
  umin,
  umax,
  nint,
  changepar = TRUE,
  ...
) {
  if (model %in% c("egp1", "egp2", "egp3") & length(model) == 1) {
    model <- switch(
      model,
      egp1 = "pt-beta",
      egp2 = "pt-gamma",
      egp3 = "pt-power"
    )
  }
  model <- match.arg(model)
  type <- match.arg(type)
  if (missing(thresh) && isTRUE(any(c(missing(umin), missing(umax))))) {
    stop(
      "Must provide either minimum and maximum threshold values, or a vector of threshold \"thresh\"."
    )
  } else if (missing(thresh)) {
    stopifnot(
      inherits(umin, c("integer", "numeric")),
      inherits(umax, c("integer", "numeric")),
      length(umin) == 1,
      length(umax) == 1,
      umin < umax
    )
    thresh <- seq(umin, umax, length = nint)
  } else if (length(thresh) <= 1) {
    stop(
      "Invalid argument\"thresh\" provided;\n please use a vector of threshold candidates of length at least 2"
    )
  }
  pe <- se <- matrix(NA, ncol = 4, nrow = length(thresh))
  conv <- rep(0, length(thresh))
  fit <- try(suppressWarnings(fit.egp(
    xdat = xdat,
    thresh = thresh[1],
    model = model
  )))
  if (!inherits(fit, "try-error")) {
    pe[1, -4] <- fit$param
    colnames(pe) <- colnames(se) <- c(names(fit$param), "modif scale")
    se[1, -4] <- fit$std.err
    conv[1] <- ifelse(is.character(fit$convergence), 0, fit$convergence)
    se[1, 4] <- sqrt(
      cbind(1, -thresh[1]) %*%
        solve(fit$hessian[-1, -1]) %*%
        rbind(1, -thresh[1])
    )[1]
  }
  for (i in 2:length(thresh)) {
    fit <- try(
      suppressWarnings(
        fit.egp(
          xdat = xdat,
          thresh = thresh[i],
          model = model,
          init = pe[i - 1, -4]
        )
      ),
      silent = TRUE
    )
    if (inherits(fit, "try-error")) {
      fit <- try(
        suppressWarnings(
          fit.egp(
            xdat = xdat,
            thresh = thresh[i],
            model = model
          )
        ),
        silent = TRUE
      )
    }
    if (!inherits(fit, "try-error")) {
      pe[i, -4] <- fit$param
      se[i, -4] <- fit$std.err
      conv[i] <- ifelse(is.character(fit$convergence), 0, fit$convergence)
      # Standard error for the modified scale via the delta-method
      se[i, 4] <- sqrt(
        cbind(1, -thresh[i]) %*%
          solve(fit$hessian[-1, -1]) %*%
          rbind(1, -thresh[i])
      )[1]
      # Modify point estimates for the modif scale (all at once)
      pe[, 4] <- pe[, 2] - pe[, 3] * thresh
    }
  }
  # Graphics
  plots <- plots[is.finite(plots)]
  if (!isTRUE(all(conv == 0))) {
    warning(paste("Convergence failed for", sum(conv != 0), "thresholds"))
    plots <- NULL
  }
  if (length(plots) > 0 & !isTRUE(all(is.na(pe)))) {
    plots <- sort(unique(plots))
    if (!isTRUE(all(plots %in% 1:3))) {
      stop(
        "Invalid plot selection. Must be a vector of integers containing indices 1, 2 or 3."
      )
    }
    if (changepar) {
      old.par <- par(no.readonly = TRUE)
      on.exit(par(old.par))
      par(mfrow = c(1, length(plots)), mar = c(4.5, 4.5, 3.1, 0.1))
    }
    for (i in plots) {
      if (i == 2) {
        i <- 4
      } #Get modified scale
      # Plotting devices limits
      ylims = c(
        min(pe[, i]) - qnorm(0.975) * max(se[, i]),
        max(pe[, i]) + qnorm(0.975) * max(se[, i])
      )
      plot(
        x = thresh,
        y = pe[, i],
        pch = 20,
        xlab = "threshold",
        bty = "l",
        ylab = switch(
          i,
          expression(kappa),
          expression(sigma),
          expression(xi),
          expression(tilde(sigma))
        ),
        ylim = ylims,
        type = "n"
      )
      polygon(
        x = c(thresh, rev(thresh)),
        y = c(
          pe[, i] - qnorm(0.975) * se[, i],
          rev(pe[, i] + qnorm(0.975) * se[, i])
        ),
        col = "gray95",
        border = FALSE
      )
      # if (i == min(plots)) {
      # title(
      #   paste0("Parameter stability plots for EGP", substr(model, 4, 4), ""),
      #   outer = FALSE
      # )
      # }
      if (i == max(plots)) {
        text <- switch(
          model,
          "pt-beta" = "Papastathopoulos-Tawn's EGP 1",
          "pt-gamma" = "Papastathopoulos-Tawn's EGP 2",
          "pt-power" = "Papastathopoulos-Tawn's EGP 3 (power)",
          "gj-tnorm" = "Gamet-Jonathan's truncated normal EGP",
          "gj-beta" = "Gamet-Jonathan's beta EGP",
          "logist" = "logistic EGP",
          "exptilt" = "exponential tilting EGP"
        )
        mtext(text, side = 3, cex = 0.9, line = 0.5, adj = 1)
      }
      if (i == 1) {
        abline(h = 1, lwd = 0.5, col = "gray20", lty = 2)
      }
      arrows(
        x0 = thresh,
        y0 = pe[, i] - qnorm(0.975) * se[, i],
        y1 = pe[, i] + qnorm(0.975) * se[, i],
        length = 0.05,
        angle = 90,
        code = 3
      )
      points(x = thresh, y = pe[, i], type = "p", pch = 20)
    }
  }
  pval <- 2 * pnorm(abs(pe[, 1] - 1) / se[, 1], lower.tail = FALSE)
  return(invisible(list(
    par = pe[, -4],
    se = se[, -4],
    model = model,
    pval = pval,
    conv = conv,
    thresh = thresh
  )))
}


pegp.G1 <- function(x, kappa, shape) {
  x <- pmin(1, pmax(0, x))
  stopifnot(length(kappa) == 1L, kappa > 0)
  pbeta(1 - (1 - x)^(abs(shape)), kappa, 1 / abs(shape))
}

pegp.G2 <- function(x, kappa) {
  x <- pmin(1, pmax(0, x))
  stopifnot(length(kappa) == 1L, kappa > 0)
  pgamma(-log(1 - x), shape = kappa) # INCORRECT FORMULA
}

pegp.G3 <- function(x, kappa) {
  x <- pmin(1, pmax(0, x))
  stopifnot(length(kappa) == 1L, kappa > 0)
  x^kappa
}

pegp.G4 <- function(x, kappa, a = 1 / 32) {
  x <- pmin(1, pmax(0, x))
  stopifnot(length(a) == 1L, a > 0, a < 0.5, length(kappa) == 1L, kappa > 0)
  cst <- (pbeta(0.5, kappa, kappa) - pbeta(a, kappa, kappa))
  (pbeta((0.5 - a) * x + a, shape1 = kappa, shape2 = kappa) -
    pbeta(a, kappa, kappa)) /
    cst
}

pegp.G5 <- function(x, kappa) {
  x <- pmin(1, pmax(0, x))
  if (kappa > 0) {
    (pnorm(q = x, mean = 1, sd = 1 / sqrt(kappa)) -
      pnorm(q = 0, mean = 1, sd = 1 / sqrt(kappa))) /
      (0.5 - pnorm(-sqrt(kappa)))
  } else {
    x
  }
}

pegp.G6 <- function(x, kappa) {
  x <- pmin(1, pmax(0, x))
  stopifnot(length(kappa) == 1L, kappa > 0)
  if (!isTRUE(all.equal(as.numeric(kappa), 1))) {
    (kappa^x - 1) / (kappa - 1)
  } else {
    x
  }
}

pegp.G7 <- function(x, kappa) {
  x <- pmin(1, pmax(0, x))
  stopifnot(length(kappa) == 1L, kappa >= 0)
  if (kappa == 0) {
    x
  } else {
    cst <- plogis(0, 1, 1 / kappa)
    (plogis(x, location = 1, scale = 1 / kappa) - cst) / (0.5 - cst)
  }
}

degp.G1 <- function(x, kappa, shape, log = FALSE) {
  # problems with zero and 1
  logdens <- rep(-Inf, length(x))
  logdens[is.na(x)] <- NA
  xval <- which(x < 1 & x > 0)
  if (kappa != 1) {
    logdens[xval] <- log(abs(shape)) +
      (abs(shape) - 1) * log(1 - x[xval]) +
      dbeta(1 - (1 - x[xval])^(abs(shape)), kappa, 1 / abs(shape), log = TRUE)
  } else {
    logdens[xval] <- 0
  }
  if (isTRUE(log)) {
    return(logdens)
  } else {
    return(exp(logdens))
  } # TODO limit when shape = 0
}

degp.G2 <- function(x, kappa, log = FALSE) {
  logdens <- rep(-Inf, length(x))
  logdens[is.na(x)] <- NA
  xval <- which(x < 1 & x >= 0)
  logdens[xval] <- dgamma(-log(1 - x[xval]), shape = kappa, log = TRUE) -
    log(1 - x[xval])
  if (isTRUE(log)) {
    return(logdens)
  } else {
    return(exp(logdens))
  }
}

degp.G3 <- function(x, kappa, log = FALSE) {
  logdens <- rep(-Inf, length(x))
  logdens[is.na(x)] <- NA
  xval <- which(x <= 1 & x >= 0)
  if (kappa != 1) {
    logdens[xval] <- log(kappa) + (kappa - 1) * log(x)
  } else {
    logdens[xval] <- 0
  }
  if (isTRUE(log)) {
    return(logdens)
  } else {
    return(exp(logdens))
  }
}

degp.G4 <- function(x, kappa, a = 1 / 32, log = FALSE) {
  logdens <- rep(-Inf, length(x))
  logdens[is.na(x)] <- NA
  xval <- which(x <= 1 & x >= 0)
  stopifnot(
    length(a) == 1L,
    a > 0,
    a < 0.5,
    length(kappa) == 1L,
    kappa > 0
  )
  logdens[xval] <- log(0.5 - a) +
    dbeta((0.5 - a) * x[xval] + a, shape1 = kappa, shape2 = kappa, log = TRUE) -
    log(pbeta(0.5, kappa, kappa) - pbeta(a, kappa, kappa))
  if (isTRUE(log)) {
    return(logdens)
  } else {
    return(exp(logdens))
  }
}

degp.G5 <- function(x, kappa, log = FALSE) {
  logdens <- rep(-Inf, length(x))
  logdens[is.na(x)] <- NA
  xval <- which(x <= 1 & x >= 0)
  stopifnot(kappa >= 0)
  if (kappa > 0) {
    logdens[xval] <- dnorm(
      x = x[xval],
      mean = 1,
      sd = 1 / sqrt(kappa),
      log = TRUE
    ) -
      log(0.5 - pnorm(-sqrt(kappa)))
  } else if (kappa == 0) {
    logdens[xval] <- 0
  }
  if (isTRUE(log)) {
    return(logdens)
  } else {
    return(exp(logdens))
  }
}

degp.G6 <- function(x, kappa, log = FALSE) {
  logdens <- rep(-Inf, length(x))
  logdens[is.na(x)] <- NA
  xval <- which(x <= 1 & x >= 0)
  stopifnot(length(kappa) == 1L, kappa > 0)
  if (!isTRUE(all.equal(as.numeric(kappa), 1))) {
    logdens[xval] <- x[xval] *
      log(kappa) +
      log(abs(log(kappa))) -
      log(abs(kappa - 1))
  } else {
    logdens[xval] <- 0
  }
  if (isTRUE(log)) {
    return(logdens)
  } else {
    return(exp(logdens))
  }
}

degp.G7 <- function(x, kappa, log = FALSE) {
  logdens <- rep(-Inf, length(x))
  logdens[is.na(x)] <- NA
  xval <- which(x <= 1 & x >= 0)
  if (kappa > 0) {
    logdens[xval] <- dlogis(x, location = 1, scale = 1 / kappa, log = TRUE) -
      log(0.5 - plogis(0, location = 1, scale = 1 / kappa))
  } else if (kappa == 0) {
    logdens[xval] <- 0
  }
  if (isTRUE(log)) {
    return(logdens)
  } else {
    return(exp(logdens))
  }
}


qegp.G1 <- function(x, kappa, shape) {
  x <- pmin(1, pmax(0, x))
  1 - (1 - qbeta(x, kappa, 1 / abs(shape)))^(1 / (abs(shape)))
}

qegp.G2 <- function(x, kappa) {
  x <- pmin(1, pmax(0, x))
  1 - exp(-qgamma(x, shape = kappa))
}

qegp.G3 <- function(x, kappa) {
  x <- pmin(1, pmax(0, x))
  x^(1 / kappa)
}

qegp.G4 <- function(x, kappa, a = 1 / 32) {
  x <- pmin(1, pmax(0, x))
  cst <- pbeta(0.5, kappa, kappa) - pbeta(a, kappa, kappa)
  (qbeta(x * cst + pbeta(a, kappa, kappa), kappa, kappa) - a) / (0.5 - a)
}

qegp.G5 <- function(x, kappa, a = 1 / 32) {
  x <- pmin(1, pmax(0, x))
  cst <- 0.5 - pnorm(-sqrt(kappa))
  qnorm(
    x * cst + pnorm(q = 0, mean = 1, sd = 1 / sqrt(kappa)),
    mean = 1,
    sd = 1 / sqrt(kappa)
  )
}

qegp.G6 <- function(x, kappa) {
  x <- pmin(1, pmax(0, x))
  if (kappa != 1) {
    log1p(x * (kappa - 1)) / log(kappa)
  } else {
    x
  }
}

qegp.G7 <- function(x, kappa, a = 1 / 32) {
  x <- pmin(1, pmax(0, x))
  stopifnot(length(kappa) == 1L, kappa >= 0)
  if (kappa > 0) {
    cst <- plogis(0, 1, scale = 1 / kappa)
    qlogis(x * (0.5 - cst) + cst, location = 1, scale = 1 / kappa)
  } else if (kappa == 0) {
    x
  }
}


pegp <- function(
  q,
  scale,
  shape,
  kappa,
  model = c(
    "pt-beta",
    "pt-gamma",
    "pt-power",
    "gj-tnorm",
    "gj-beta",
    "exptilt",
    "logist"
  ),
  lower.tail = TRUE,
  log.p = FALSE
) {
  model <- match.arg(model)
  stopifnot(
    length(kappa) == 1L,
    length(scale) == 1L,
    length(shape) == 1L,
    kappa > 0,
    scale > 0
  )
  if (model %in% c("pt-beta", "pt-gamma") & abs(shape) < 1e-8) {
    return(pgamma(
      q = q,
      scale = scale,
      shape = kappa,
      lower.tail = lower.tail,
      log.p = log.p
    ))
  }
  p <- pgp(q, loc = 0, scale = scale, shape = shape, lower.tail = lower.tail)
  pg <- switch(
    model,
    "pt-beta" = pegp.G1(pg, kappa = kappa, shape = shape),
    "pt-gamma" = pegp.G2(pg, kappa = kappa),
    "pt-power" = pegp.G3(pg, kappa = kappa),
    "gj-tnorm" = pegp.G5(pg, kappa = kappa),
    "gj-beta" = pegp.G4(pg, kappa = kappa),
    "exptilt" = pegp.G6(pg, kappa = kappa),
    "logist" = pegp.G7(pg, kappa = kappa)
  )
  if (!isTRUE(log.p)) {
    return(pg)
  } else {
    return(log(pg))
  }
}

degp <- function(
  x,
  scale,
  shape,
  kappa,
  model = c(
    "pt-beta",
    "pt-gamma",
    "pt-power",
    "gj-tnorm",
    "gj-beta",
    "exptilt",
    "logist"
  ),
  log = FALSE
) {
  stopifnot(
    length(kappa) == 1L,
    length(scale) == 1L,
    length(shape) == 1L,
    kappa >= 0,
    scale > 0
  )
  model <- match.arg(model)
  pg <- pgp(q = x, scale = scale, shape = shape)
  logdens <- switch(
    model,
    "pt-beta" = degp.G1(pg, kappa = kappa, shape = shape, log = TRUE),
    "pt-gamma" = degp.G2(pg, kappa = kappa, log = TRUE),
    "pt-power" = degp.G3(pg, kappa = kappa, log = TRUE),
    "gj-tnorm" = degp.G5(pg, kappa = kappa, log = TRUE),
    "gj-beta" = degp.G4(pg, kappa = kappa, log = TRUE),
    "exptilt" = degp.G6(pg, kappa = kappa, log = TRUE),
    "logist" = degp.G7(pg, kappa = kappa, log = TRUE)
  ) +
    dgp(x = x, scale = scale, shape = shape, log = TRUE)
  if (isTRUE(log)) {
    return(logdens)
  } else {
    return(exp(logdens))
  }
}

qegp <- function(
  p,
  scale,
  shape,
  kappa,
  model = c(
    "pt-beta",
    "pt-gamma",
    "pt-power",
    "gj-tnorm",
    "gj-beta",
    "exptilt",
    "logist"
  ),
  lower.tail = TRUE
) {
  stopifnot(
    length(kappa) == 1L,
    length(scale) == 1L,
    length(shape) == 1L,
    kappa >= 0,
    scale > 0
  )
  model <- match.arg(model)
  if (!isTRUE(lower.tail)) {
    p <- 1 - p
  }
  qu <- rep(NA, length(p))
  vals <- is.finite(p) & p >= 0 & p <= 1
  if (model %in% c("pt-beta", "pt-gamma") & abs(shape) < 1e-8) {
    qu[vals] <- qgamma(
      p = p[vals],
      scale = scale,
      shape = kappa,
      lower.tail = lower.tail
    )
  } else {
    qu[vals] <- qgp(
      switch(
        model,
        "pt-beta" = qegp.G1(p[vals], kappa = kappa, shape = shape),
        "pt-gamma" = qegp.G2(p[vals], kappa = kappa),
        "pt-power" = qegp.G3(p[vals], kappa = kappa),
        "gj-tnorm" = qegp.G5(p[vals], kappa = kappa),
        "gj-beta" = qegp.G4(p[vals], kappa = kappa),
        "exptilt" = qegp.G6(p[vals], kappa = kappa),
        "logist" = qegp.G7(p[vals], kappa = kappa)
      ),
      scale = scale,
      shape = shape
    )
  }
  return(qu)
}


regp <- function(
  n,
  scale,
  shape,
  kappa,
  model = c(
    "pt-beta",
    "pt-gamma",
    "pt-power",
    "gj-tnorm",
    "gj-beta",
    "exptilt",
    "logist"
  )
) {
  stopifnot(
    length(kappa) == 1L,
    length(scale) == 1L,
    length(shape) == 1L,
    kappa >= 0,
    scale > 0
  )
  model <- match.arg(model)
  pg <- switch(
    model,
    "pt-beta" = qegp.G1(runif(n), kappa = kappa, shape = shape),
    "pt-gamma" = qegp.G2(runif(n), kappa = kappa),
    "pt-power" = qegp.G3(runif(n), kappa = kappa),
    "gj-tnorm" = qegp.G5(runif(n), kappa = kappa),
    "gj-beta" = qegp.G4(runif(n), kappa = kappa),
    "exptilt" = qegp.G6(runif(n), kappa = kappa),
    "logist" = qegp.G7(runif(n), kappa = kappa)
  )
}


egp.pll <- function(
  psi,
  model = c(
    "pt-beta",
    "pt-gamma",
    "pt-power",
    "gj-tnorm",
    "gj-beta",
    "exptilt",
    "logist"
  ),
  param = c("shape", "kappa"),
  mle = NULL,
  xdat,
  threshold = NULL,
  plot = FALSE
) {
  param <- match.arg(param)
  if (is.null(threshold)) {
    threshold <- 0
  } else {
    stopifnot(
      is.numeric(threshold),
      length(threshold) == 1L
    )
    stopifnot(threshold >= 0)
    xdat <- xdat[xdat > threshold] - threshold
  }
  if (is.null(mle)) {
    mle <- try(
      fit.egp(xdat = xdat, thresh = threshold, model = model),
      silent = TRUE
    )
    if (inherits(mle, "try-error")) {
      stop("Could not find maximum likelihood estimator.")
    }
  } else {
    stopifnot(inherits(mle, what = "mev_egp"))
  }
  ind <- switch(param, kappa = 1L, shape = 3L)
  coef_mle <- coef(mle)[ind]
  se_mle <- sqrt(diag(vcov(mle))[ind])
  if (missing(psi)) {
    if (is.numeric(se_mle) & is.finite(se_mle)) {
      psi <- seq(-3 * se_mle, 3 * se_mle, length.out = 55) + coef_mle
    } else {
      stop("Could not determine a suitable sequence of values for profiling.")
    }
  } else {
    psi <- sort(unique(c(psi, coef_mle)))
  }
  if (param == "kappa") {
    psi <- psi[psi > 0]
  } else if (param == "shape") {
    psi <- psi[psi > -1]
  }
  mid <- which(psi == coef_mle)
  # should be 28 for seq of psi if no zero values
  pars <- matrix(nrow = length(psi), ncol = 3L)
  pll <- numeric(length(psi))
  pll[mid] <- -mle$nllh
  pars[mid, ] <- coef(mle)
  xmax <- max(xdat)
  if (param == "kappa") {
    nll_egp_kappa <- function(eta, psi, xdat, model) {
      -egp.ll(
        xdat = xdat,
        thresh = 0,
        par = c(psi, eta),
        model = model
      )
    }
    for (i in seq_len(mid - 1)) {
      fit_const <- alabama::auglag(
        par = pars[mid - i + 1, -1],
        fn = nll_egp_kappa,
        hin = function(par, ...) {
          c(
            par[1] - 1e-10,
            par[2] + 1,
            ifelse(par[2] < 0, -par[1] / par[2] - xmax, 1)
          )
        },
        xdat = xdat,
        psi = psi[mid - i],
        model = model,
        control.outer = list(trace = FALSE, method = "nlminb")
      )
      pars[mid - i, ] <- c(psi[mid - i], fit_const$par)
      pll[mid - i] <- -fit_const$value
    }
    for (i in seq_len(length(psi) - mid)) {
      fit_const <- alabama::auglag(
        par = pars[mid + i - 1, -1],
        fn = nll_egp_kappa,
        hin = function(par, ...) {
          c(
            par[1] - 1e-10,
            par[2] + 1,
            ifelse(par[2] < 0, -par[1] / par[2] - xmax, 1)
          )
        },
        xdat = xdat,
        psi = psi[mid + i],
        model = model,
        control.outer = list(trace = FALSE, method = "nlminb")
      )
      pars[mid + i, ] <- c(psi[mid + i], fit_const$par)
      pll[mid + i] <- -fit_const$value
    }
  } else if (param == "shape") {
    nll_egp_shape <- function(eta, psi, xdat, model) {
      -egp.ll(
        xdat = xdat,
        thresh = 0,
        par = c(exp(eta), psi),
        model = model
      )
    }
    for (i in seq_len(mid - 1)) {
      fit_const <- alabama::auglag(
        par = log(pars[mid - i + 1, -3]),
        fn = nll_egp_shape,
        hin = function(par, psi, ...) {
          # TODO check if this is correct
          c(ifelse(psi < 0, -exp(par[2]) / psi - xmax, 1))
        },
        xdat = xdat,
        psi = psi[mid - i],
        model = model,
        control.outer = list(trace = FALSE, method = "nlminb")
      )
      pars[mid - i, ] <- c(exp(fit_const$par), psi[mid - i])
      pll[mid - i] <- -fit_const$value
    }
    for (i in seq_len(length(psi) - mid)) {
      fit_const <- alabama::auglag(
        par = log(pars[mid + i - 1, -3]),
        fn = nll_egp_shape,
        hin = function(par, psi, ...) {
          c(ifelse(psi < 0, -exp(par[2]) / psi - xmax, 1))
        },
        xdat = xdat,
        psi = psi[mid + i],
        model = model,
        control.outer = list(trace = FALSE, method = "nlminb")
      )
      pars[mid + i, ] <- c(exp(fit_const$par), psi[mid + i])
      pll[mid + i] <- -fit_const$value
    }
  }
  ans <- list(
    mle = coef(mle),
    psi.max = coef_mle,
    param = param,
    std.error = se_mle,
    psi = psi,
    pll = pll,
    maxpll = -mle$nllh,
    family = "egp",
    threshold = threshold
  )
  ans$r <- sign(coef_mle - psi) * sqrt(2 * (ans$maxpll - ans$pll))
  ans$normal <- c(ans$psi.max, ans$std.error)
  class(ans) <- "eprof"
  if (isTRUE(plot)) {
    plot(ans)
  }
  return(invisible(ans))
}
lbelzile/mev documentation built on June 14, 2025, 6:40 p.m.