Nothing
#########################################################
### Authors: Carlotta Pacifici and Simone Padoan ###
### Emails: simone.padoan@unibocconi.it ###
### carlotta.pacifici@unibocconi.it ###
### Institution: Department of Decision Sciences, ###
### University Bocconi of Milan, Italy, ###
### File name: Prediction.r ###
### Description: ###
### This file contains the functions to produce ###
### the same type of statistical analysis that ###
### is introduced by the article: ###
### "Predicting hazards of climate extremes: ###
### a statistical perspective" ###
### Last change: 08/10/2025 ###
### by Carlotta Pacifici ###
#########################################################
#########################################################
### START
### Sub-routines (HIDDEN FUNCTIONS)
###
#########################################################
###########################################
######################## STATIONARY SETTING
###########################################
# Density of truncated Student-t (location-scale version)
dtt <- function(
x,
location = 0,
scale = 1,
df = 1,
left = -Inf,
right = Inf,
log = FALSE
) {
stopifnot(length(left) == 1L, length(right) == 1L, left < right)
xl <- is.finite(x) & x >= left & x <= right
dens <- rep(0, length.out = length(x))
dens[xl] <- dt((x[xl] - location) / scale, df = df, log = FALSE) /
(pt(q = (right - location) / scale, df = df) -
pt(q = (left - location) / scale, df = df)) /
scale
dens[is.na(x)] <- NA
if (isTRUE(log)) {
return(log(dens))
} else {
dens
}
}
# Negative log likelihood for discrete generalized Pareto distribution
nll_dgpd <- function(par, excess, constraint = TRUE) {
scale <- ifelse(isTRUE(constraint), exp(par[1]), par[1])
shape <- par[2]
out <- evd::pgpd(q = excess + 1, scale = scale, shape = shape) -
evd::pgpd(q = excess, scale = scale, shape = shape)
return(-sum(log(out)))
}
# Internal function to adjust the value of sigma (=log(theta)) in each iteration
updateSig <- function(sig, acc, d = d, p = p, alpha = alpha, i) {
c <- ((1 - 1 / d) *
sqrt(2 * pi) *
exp(alpha^2 / 2) /
(2 * alpha) +
1 / (d * p * (1 - p))) # Eq(7) of Garthwaite, Fan & Sisson (2016)
Theta <- log(sig)
# Theta = Theta + c * (acc-p) / max(200, i/d)
Theta <- Theta + c * (acc - p) / i
return(exp(Theta))
}
# Internal function to update the covariance matrix
updateCov <- function(sigMat, i, thetaM, theta, d) {
epsilon <- 1 / i
thetaM2 <- ((thetaM * i) + theta) / (i + 1)
sigMat <- (i - 1) /
i *
sigMat +
thetaM %*% t(thetaM) -
(i + 1) / i * thetaM2 %*% t(thetaM2) +
1 / i * theta %*% t(theta) +
epsilon * diag(d)
return(list(sigMat = sigMat, thetaM = thetaM2))
}
# Random Walk Metropolis Hastings algorithm for the Bayesian estimation of continuous GP parameters
cRWMH.POT <- function(data, start, p, sig0, nsim, mle, prior, verbose = TRUE) {
# A) INITIALIZATION
alpha <- -qnorm(p / 2)
d <- length(start) # Dimension of the vector of parameters
if (!any(d == 2)) {
stop("Wrong length of parameter vector")
}
sig <- sig0 # Initial value of sigma
sig.vec <- sig
sigMat <- diag(d) # Initial proposal covarince matrix
acc.vec <- rep(NaN, nsim) # Vector of acceptances
accepted <- rep(0, nsim)
straight.reject <- rep(0, nsim) # Monitor the number of proposed parameters that don't respect the constraints
sig.start <- sig
sig.restart <- sig
n0 <- round(5 / (p * (1 - p)))
iMax <- 100 # iMax is the max number of iterations before the last restart
Numbig <- 0
Numsmall <- 0
param <- start
msg <- "none"
# sample size
k <- length(data)
# B) DEFINITION OF SUBROUTINES
# B.1) GPD-LOG-LIKELIHOOD CONCERNING POT
gpd_llik_fun <- function(param) {
# likelihood function
res <- sum(evd::dgpd(data, 0, param[1], param[2], log = TRUE))
if (is.infinite(res)) {
res <- -1e300
}
return(res)
}
# C) LIKELIHOOD DEFNITION, SET PARAMETERS CONSTRAINTS AND ACCEPTANCE PROBABILITY DEFINITION
llikfun <- function(para) return(gpd_llik_fun(para))
parcheck <- function(para) {
res <- any(para[1] <= 0, (para[2] < 0 & !(max(data) <= -para[1] / para[2])))
return(res)
}
if (prior == "uniform") {
accept_prob <- function(par, par_new) {
res <- min(
exp(llikfun(par_new) - llikfun(par) + log(par[1]) - log(par_new[1])),
1
)
return(res)
}
}
if (prior == "empirical") {
accept_prob <- function(par, par_new) {
res <- min(
exp(
llikfun(par_new) -
llikfun(par) +
dgamma(par_new[1], 1, scale = mle[1], log = TRUE) -
dgamma(par[1], 1, scale = mle[1], log = TRUE) +
dtt(par_new[2], df = 1, left = -1, right = Inf, log = TRUE) -
dtt(par[2], df = 1, left = -1, right = Inf, log = TRUE)
),
1
)
return(res)
}
}
# D) START ALGORITHM
# set the chains
sparam <- param
# create a progress bar
if (isTRUE(verbose)) {
pb <- txtProgressBar(
min = 0,
max = nsim,
initial = 0,
style = 3
)
}
# main algorithm loop
for (i in 1:nsim) {
# simulation from the proposal (MVN):
s <- try(eigen(sig^2 * sigMat, symmetric = TRUE), silent = TRUE)
if (inherits(s, "try-error")) {
msg <- "The algorithm has failed to converge."
return(list(
param_post = sparam,
accepted = accepted,
straight.reject = straight.reject,
nsim = nsim,
sig.vec = sig.vec,
sig.restart = sig.restart,
acc.vec = acc.vec,
msg = msg
))
}
param_new <- as.vector(mvtnorm::rmvnorm(
1,
mean = param,
sigma = sig^2 * sigMat
))
if (any(is.na(param_new))) {
straight.reject[i] <- 1
acc.vec[i] <- 0
} else {
# check basic condition
if (parcheck(param_new)) {
straight.reject[i] <- 1
acc.vec[i] <- 0
} else {
# compute the acceptance probability
ratio <- accept_prob(param, param_new)
if (is.na(ratio)) {
straight.reject[i] <- 1
acc.vec[i] <- 0
} else {
acc.vec[i] <- ratio
u <- runif(1)
if (u < ratio) {
param <- param_new
accepted[i] <- 1
}
}
}
}
sparam <- rbind(sparam, param)
# update covariance matrix with adaptive MCMC
if (i > 100) {
if (i == 101) {
sigMat <- cov(sparam)
thetaM <- apply(sparam, 2, mean)
} else {
tmp <- updateCov(
sigMat = sigMat,
i = i,
thetaM = thetaM,
theta = param,
d = d
)
sigMat <- tmp$sigMat
thetaM <- tmp$thetaM
}
}
# Update sigma
if (i > n0) {
sig <- updateSig(
sig = sig,
acc = ratio,
d = d,
p = p,
alpha = alpha,
i = i
)
sig.vec <- c(sig.vec, sig)
if ((i <= (iMax + n0)) && (Numbig < 5 || Numsmall < 5)) {
Toobig <- (sig > (3 * sig.start))
Toosmall <- (sig < (sig.start / 3)) #
if (Toobig || Toosmall) {
#restart the algorithm
message("restart the program at", i, "th iteration", "\n")
sig.restart <- c(sig.restart, sig)
Numbig <- Numbig + Toobig
Numsmall <- Numsmall + Toosmall
i <- n0
sig.start <- sig
}
} #end iMax
}
if (isTRUE(verbose)) {
print.i <- seq(0, nsim, by = 100)
if (i %in% print.i) setTxtProgressBar(pb, i)
}
}
return(list(
param_post = sparam,
accepted = accepted,
straight.reject = straight.reject,
nsim = nsim,
sig.vec = sig.vec,
sig.restart = sig.restart,
acc.vec = acc.vec,
msg = msg
))
}
# Bayesian or frequentist estimation of the continuous GP distribution parameters
cPOT <- function(
data,
k,
pn,
method,
prior,
start,
sig0,
nsim,
burn,
p,
optim.method,
control,
...
) {
# A) CHECK INPUT
if (missing(data)) {
stop("Missing data input")
}
if (method != "bayesian" && method != "frequentist") {
stop("Method should be `bayesian` or `frequentist` ")
}
#if(any(evt==evt.method)) stop("Evt should be `GEV-CENS-ML` or `GP-CENS-ML` or `POT-ML` or `BM-AT-ML`")
if (!is.vector(data)) {
stop("Data must be a vector")
}
if (is.null(pn)) {
stop(
"Must specify the probability associated with the quantile(s) to extrapolate"
)
}
if (is.null(start)) {
stop("Need to specify start parameter")
} # start is the starting value for both methods
nparam <- length(start) # number of inputed parameters
if (nparam != 2) {
stop("Wrong length of initial parameter vector")
}
#
# B) INITIALIZATION
# B.1) IMPORTANT VARIABLES
n <- length(data) # set sample size
nQuant <- length(pn) # set the number of quantiles to extrapolate
I <- VarCov <- NaN # set the expected information matrix
msg <- "none" # output messagge
conv <- 0 # convergence parameter for the maximazing optimization routine
#
# B.2) DEFINE THRESHOLD EXCEEDANCES
sdata <- sort(data, decreasing = TRUE)
# BAYESIAN INFERENCE
if (method == "bayesian") {
# check the minimal required parameters
if (is.null(sig0) || is.null(nsim) || is.null(p)) {
stop("Missing initial values")
}
if (is.null(nsim)) {
stop("Missing number of replications in algorithm")
}
#
# ML estimates of GPD's parameters
#
mle <- evd::fpot(
data,
sdata[k + 1],
"gpd",
start,
std.err = FALSE
)$estimate
# MCMC ALGORITHM
mcmc <- cRWMH.POT(
data = (sdata[1:k] - sdata[k + 1]),
start = unlist(start),
p = p,
sig0 = sig0,
nsim = nsim,
mle = mle,
prior = prior
)
meanacc <- rep(NA, length(mcmc$acc.vec))
for (j in c(1:length(mcmc$acc.vec))) {
meanacc[j] <- mean(mcmc$acc.vec[round(j / 2):j])
}
index <- c(1:2000, seq(2001, length(mcmc$sig.vec), by = 100))
post_sample <- mcmc$param_post[(burn + 1):nsim, ]
sig.ps <- post_sample[, 1]
gam.ps <- post_sample[, 2]
# derive approximate samples from the quantile posterior distribution
Q.est <- matrix(NaN, nrow = nrow(post_sample), ncol = nQuant)
R.est <- matrix(NaN, nrow = nrow(post_sample), ncol = nQuant)
for (j in 1:nQuant) {
Q.est[, j] <- sdata[k + 1] +
sig.ps * ((k / (n * pn[j]))^gam.ps - 1) / gam.ps
}
return(list(
Q.est = Q.est,
post_sample = post_sample,
burn = burn,
straight.reject = mcmc$straight.reject[-c(1:(burn - 1))],
sig.vec = mcmc$sig.vec,
accept.prob = cbind(index, meanacc[index]),
msg = mcmc$msg,
mle = mle,
t = sdata[k + 1]
))
# FREQUENTIST INFERENCE
} else if (method == "frequentist") {
if (is.null(optim.method)) {
stop("Need to specify an optimisation method in frequentist setting")
}
if (!("control" %in% names(sys.call()))) {
control <- list(fnscale = -1, maxit = 8e+5)
} else {
if (!("fnscale" %in% names(control))) {
control[["fnscale"]] <- -1
control[["maxit"]] <- 8e+5
}
}
# COMPUTE MAXIMUM LIKELIHOOD ESTIMATES
est.para <- evd::fpot(data, sdata[k + 1], "gpd", start, std.err = FALSE)
# compute the expected information matrix at the ML estimates
sigma <- est.para$estimate[1]
gam <- est.para$estimate[2]
# COMPUTE VARIANCE-COVARIANCE MATRIX
VarCov <- diag(2)
VarCov[1, 1] <- (1 + gam)^2
VarCov[1, 2] <- VarCov[2, 1] <- -(1 + gam)
VarCov[2, 2] <- 1 + (1 + gam)^2
# VarCov <- diag(c(sigma, sigma, 1)) %*% VarCov %*% diag(c(sigma, sigma, 1))
VarCov <- diag(c(1, sigma)) %*% VarCov %*% diag(c(1, sigma))
# compute extreme quantile
Q.est <- array(NaN, c(1, nQuant))
for (j in 1:nQuant) {
Q.est[j] <- sdata[k + 1] + sigma * ((k / (n * pn[j]))^gam - 1) / gam
}
Q.gra <- c(
((-(n / k) * log(1 - pn))^(-gam) - 1) / gam,
-sigma *
((-(n / k) * log(1 - pn))^(-gam) - 1) /
gam^2 -
(sigma / gam) *
((-(n / k) * log(1 - pn))^(-gam)) *
log(-(n / k) * log(1 - pn))
)
Q.VC <- (array(Q.gra, c(1, 2)) %*% VarCov %*% array(Q.gra, c(2, 1))) / k
VarCov <- VarCov / k
return(list(
est = est.para$estimate,
t = sdata[k + 1],
Q.est = Q.est,
VarCov = VarCov,
Q.VC = Q.VC,
msg = est.para$convergence
))
}
}
# Random Walk Metropolis Hastings algorithm for the Bayesian estimation of d-GP parameters
dRWMH.POT <- function(
data,
start,
p,
sig0,
nsim,
mle,
prior,
verbose = TRUE
) {
# A) INITIALIZATION
alpha <- -qnorm(p / 2)
d <- length(start) # Dimension of the vector of parameters
if (!any(d == 2)) {
stop("Wrong length of parameter vector")
}
sig <- sig0 # Initial value of sigma
sig.vec <- sig
sigMat <- diag(d) # Initial proposal covarince matrix
acc.vec <- rep(NaN, nsim) # Vector of acceptances
accepted <- rep(0, nsim)
straight.reject <- rep(0, nsim) # Monitor the number of proposed parameters that don't respect the constraints
sig.start <- sig
sig.restart <- sig
n0 <- round(5 / (p * (1 - p)))
iMax <- 100 # iMax is the max number of iterations before the last restart
Numbig <- 0
Numsmall <- 0
param <- start
msg <- "none"
# sample size
k <- length(data)
# B) DEFINITION OF SUBROUTINES
# discrete GP likelihood
dgpd_llik_fun <- function(par) {
scale <- par[1]
shape <- par[2]
out <- (1 + shape / scale * data)^(-1 / shape) -
(1 + shape / scale * (data + 1))^(-1 / shape)
res <- sum(log(out))
if (is.infinite(res)) {
res <- -1e300
}
return(res)
}
# C) LIKELIHOOD DEFNITION, SET PARAMETERS CONSTRAINTS AND ACCEPTANCE PROBABILITY DEFINITION
llikfun <- function(para) return(dgpd_llik_fun(para))
parcheck <- function(para) {
res <- any(para[1] <= 0, (para[2] < 0 & !(max(data) <= -para[1] / para[2])))
return(res)
}
if (prior == "uniform") {
accept_prob <- function(par, par_new) {
res <- min(
exp(llikfun(par_new) - llikfun(par) + log(par[1]) - log(par_new[1])),
1
)
return(res)
}
}
if (prior == "empirical") {
accept_prob <- function(par, par_new) {
res <- min(
exp(
llikfun(as.numeric(par_new)) -
llikfun(as.numeric(par)) +
dgamma(par_new[1], 1, scale = mle[1], log = TRUE) -
dgamma(par[1], 1, scale = mle[1], log = TRUE) +
dtt(par_new[2], df = 1, left = -1, right = Inf, log = TRUE) -
dtt(par[2], df = 1, left = -1, right = Inf, log = TRUE)
),
1
)
return(res)
}
}
# D) START ALGORITHM
# set the chains
sparam <- as.numeric(param)
# create a progress bar
if (isTRUE(verbose)) {
pb <- txtProgressBar(
min = 0,
max = nsim,
initial = 0,
style = 3
)
}
# main algorithm loop
for (i in 1:nsim) {
# simulation from the proposal (MVN):
s <- try(eigen(sig^2 * sigMat, symmetric = TRUE), silent = TRUE)
if (inherits(s, "try-error")) {
msg <- "The algorithm has failed."
return(list(
param_post = sparam,
accepted = accepted,
straight.reject = straight.reject,
nsim = nsim,
sig.vec = sig.vec,
sig.restart = sig.restart,
acc.vec = acc.vec,
msg = msg
))
}
param_new <- as.vector(mvtnorm::rmvnorm(
1,
mean = as.numeric(param),
sigma = sig^2 * sigMat
))
if (any(is.na(param_new))) {
straight.reject[i] <- 1
acc.vec[i] <- 0
} else {
# check basic condition
if (parcheck(param_new)) {
straight.reject[i] <- 1
acc.vec[i] <- 0
} else {
# compute the acceptance probability
ratio <- accept_prob(as.numeric(param), param_new)
if (is.na(ratio)) {
straight.reject[i] <- 1
acc.vec[i] <- 0
} else {
acc.vec[i] <- ratio
u <- runif(1)
if (u < ratio) {
param <- param_new
accepted[i] <- 1
}
}
}
}
sparam <- rbind(sparam, param)
# update covariance matrix with adaptive MCMC
if (i > 100) {
if (i == 101) {
sigMat <- cov(sparam)
thetaM <- apply(sparam, 2, mean)
} else {
tmp <- updateCov(
sigMat = sigMat,
i = i,
thetaM = thetaM,
theta = param,
d = d
)
sigMat <- tmp$sigMat
thetaM <- tmp$thetaM
}
}
# Update sigma
if (i > n0) {
sig <- updateSig(
sig = sig,
acc = ratio,
d = d,
p = p,
alpha = alpha,
i = i
)
sig.vec <- c(sig.vec, sig)
if ((i <= (iMax + n0)) && (Numbig < 5 || Numsmall < 5)) {
Toobig <- (sig > (3 * sig.start))
Toosmall <- (sig < (sig.start / 3)) #
if (Toobig || Toosmall) {
#restart the algorithm
message("restart the program at", i, "th iteration", "\n")
sig.restart <- c(sig.restart, sig)
Numbig <- Numbig + Toobig
Numsmall <- Numsmall + Toosmall
i <- n0
sig.start <- sig
}
} #end iMax
}
if (isTRUE(verbose)) {
print.i <- seq(0, nsim, by = 100)
if (i %in% print.i) setTxtProgressBar(pb, i)
}
}
# results
ret <- list(
param_post = sparam,
accepted = accepted,
straight.reject = straight.reject,
nsim = nsim,
sig.vec = sig.vec,
sig.restart = sig.restart,
acc.vec = acc.vec,
msg = msg
)
class(ret) <- "RWMH"
return(invisible(ret))
}
# Bayesian or frequentist estimation of the discrete GP distribution parameters
dPOT <- function(
data,
k,
pn,
method = c("bayesian", "frequentist"),
prior,
start,
sig0,
nsim,
burn,
p,
optim.method,
control,
verbose = FALSE,
...
) {
# A) CHECK INPUT
if (missing(data)) {
stop("Missing data input")
}
method <- match.arg(method)
if (method != "bayesian" && method != "frequentist") {
stop("Method should be `bayesian` or `frequentist` ")
}
if (!is.vector(data)) {
stop("Data must be a vector")
}
if (is.null(pn)) {
stop(
"Must specify the probability associated with the quantile(s) to extrapolate"
)
}
# Define exceedances
n <- length(data) # set sample size
sdata <- sort(data, decreasing = TRUE)
excess <- sdata[1:k] - sdata[k]
if (is.null(start)) {
start <- evd::fpot(
x = excess,
threshold = 0,
model = "gpd",
std.err = FALSE
)
start <- list(
scale = as.numeric(start$estimate[1]),
shape = as.numeric(start$estimate[2])
)
} else {
# start is the starting value for both methods
if (length(start) != 2) {
stop("Wrong length of initial parameter vector")
}
if (!isTRUE(all(c("scale", "shape") %in% names(start)))) {
stop(
"Invalid list for \"start\": must contain arguments \"scale\" and \"shape\"."
)
}
if (
!is.finite(nll_dgpd(
par = c(start$scale, start$shape),
excess = excess,
constraint = FALSE
))
) {
stop("Invalid arguments")
}
}
#
# B) INITIALIZATION
# B.1) IMPORTANT VARIABLES
nQuant <- length(pn) # set the number of quantiles to extrapolate
I <- VarCov <- NaN # set the expected information matrix
msg <- "none" # output message
conv <- 0 # convergence parameter for the maximazing optimization routine
# B.2) DEFINE THRESHOLD EXCEEDANCES
# BAYESIAN INFERENCE
if (method == "bayesian") {
# check the minimal required parameters
if (is.null(sig0) || is.null(nsim) || is.null(p)) {
stop("Missing initial values")
}
if (is.null(nsim)) {
stop("Missing number of simulations in algorithm")
}
#
# ML estimates of GPD's parameters
mle <- unlist(start)
# MCMC ALGORITHM
mcmc <- dRWMH.POT(
data = excess,
start = c(start$scale, start$shape),
p = p,
sig0 = sig0,
nsim = nsim,
mle = mle,
prior = prior,
verbose = verbose
)
meanacc <- rep(NA, length(mcmc$acc.vec))
for (j in c(1:length(mcmc$acc.vec))) {
meanacc[j] <- mean(mcmc$acc.vec[round(j / 2):j])
}
index <- c(1:2000, seq(2001, length(mcmc$sig.vec), by = 100))
post_sample <- mcmc$param_post[(burn + 1):nsim, ]
sig.ps <- post_sample[, 1]
gam.ps <- post_sample[, 2]
# derive approximate samples from the quantile posterior distribution
Q.est <- matrix(NaN, nrow = nrow(post_sample), ncol = nQuant)
for (j in 1:nQuant) {
Q.est[, j] <- floor(
sdata[k] + sig.ps * ((k / (n * pn[j]))^gam.ps - 1) / gam.ps
) - 1
}
return(list(
Q.est = Q.est,
post_sample = post_sample,
burn = burn,
straight.reject = mcmc$straight.reject[-c(1:(burn - 1))],
sig.vec = mcmc$sig.vec,
accept.prob = cbind(index, meanacc[index]),
msg = mcmc$msg,
mle = mle,
t = sdata[k]
))
# FREQUENTIST INFERENCE
} else if (method == "frequentist") {
if (is.null(optim.method)) {
stop("Need to specify an optimisation method in frequentist setting")
}
if (!("control" %in% names(sys.call()))) {
control <- list(fnscale = -1, maxit = 8e+5)
} else {
if (!("fnscale" %in% names(control))) {
control[["fnscale"]] <- -1
control[["maxit"]] <- 8e+5
}
}
# COMPUTE MAXIMUM LIKELIHOOD ESTIMATES
est.para <- fitdGPD(excess)
# compute the expected information matrix at the ML estimates
sigma <- est.para$mle[1]
gam <- est.para$mle[2]
# COMPUTE VARIANCE-COVARIANCE MATRIX
VarCov <- diag(2)
VarCov[1, 1] <- (1 + gam)^2
VarCov[1, 2] <- VarCov[2, 1] <- -(1 + gam)
VarCov[2, 2] <- 1 + (1 + gam)^2
# VarCov <- diag(c(sigma, sigma, 1)) %*% VarCov %*% diag(c(sigma, sigma, 1))
VarCov <- diag(c(1, sigma)) %*% VarCov %*% diag(c(1, sigma))
# compute extreme quantile
Q.est <- array(NaN, c(1, nQuant))
for (j in 1:nQuant) {
Q.est[j] <- sdata[k + 1] + sigma * ((k / (n * pn[j]))^gam - 1) / gam
}
Q.gra <- c(
((-(n / k) * log(1 - pn))^(-gam) - 1) / gam,
-sigma *
((-(n / k) * log(1 - pn))^(-gam) - 1) /
gam^2 -
(sigma / gam) *
((-(n / k) * log(1 - pn))^(-gam)) *
log(-(n / k) * log(1 - pn))
)
Q.VC <- (array(Q.gra, c(1, 2)) %*% VarCov %*% array(Q.gra, c(2, 1))) / k
VarCov <- VarCov / k
return(list(
est = est.para$estimate,
t = sdata[k],
Q.est = Q.est,
VarCov = VarCov,
Q.VC = Q.VC,
msg = est.para$convergence
))
}
}
########## prediction
# Density for continuous GP
Fdgpd_c <- function(par, x) {
res <- evd::dgpd(x, par[1], par[2], par[3])
if (is.na(res)) {
res <- 0
}
return(res)
}
# Density for discrete GP
Fdgpd_d <- function(par, x) {
loc <- par[1]
scale <- par[2]
shape <- par[3]
z1 <- pmax(0, x - loc) / scale
z2 <- pmax(0, x + 1 - loc) / scale
out <- (1 + shape * z1)^(-1 / shape) - (1 + shape * z2)^(-1 / shape)
if (is.na(out)) {
out <- 0
}
return(out)
}
# Frequentist continuous GP distribution
Fpgpd_c <- function(par, x) {
res <- evd::pgpd(x, par[1], par[2], par[3])
if (is.na(res)) {
res <- 0
}
return(res)
}
# Bayesian predictive distribution - continuous GP
Bpgpd_c <- function(x, post) {
res <- apply(post, 1, Fpgpd_c, x = x)
return(mean(res, na.rm = TRUE))
}
#' Bayesian predictive quantile for generalized Pareto distribution
#'
#' @description WARNING: this function will not be exported in a future version of the package.
#'
#' @export
#' @keywords internal
#' @param p probability levels
#' @param post matrix of posterior samples
#' @param lower lower bound for the search for the quantile
#' @param upper upper bound for the region over which to search for the quantile
#' @return a vector of quantiles of the same length as \code{p}
Bqgpd_c <- function(p, post, lower, upper) {
myf <- function(x, p, post) return(p - Bpgpd_c(x, post))
res <- uniroot(
myf,
lower = lower,
upper = upper,
p = p,
post = post,
extendInt = "yes"
)$root
return(res)
}
# Frequentist discrete GP distribution
Fpgpd_d <- function(par, x) {
z <- pmax(0, x + 1 - par[1]) / par[2]
res <- 1 - (1 + par[3] * z)^(-1 / par[3])
if (is.na(res)) {
res <- 0
}
return(res)
}
# Bayesian predictive distribution - discrete GP
Bpgpd_d <- function(x, post) {
res <- apply(post, 1, Fpgpd_d, x = x)
return(mean(res, na.rm = TRUE))
}
#' Bayesian predictive quantile for discrete generalized Pareto distribution
#'
#' @description WARNING: this function will not be exported in a future version of the package.
#'
#' @export
#' @keywords internal
#' @param p probability levels
#' @param post matrix of posterior samples
#' @param lower lower bound for the search for the quantile
#' @param upper upper bound for the region over which to search for the quantile
#' @return a vector of quantiles of the same length as \code{p}
Bqgpd_d <- function(p, post, lower, upper) {
myf <- function(x, p, post) return(p - Bpgpd_d(x, post))
res <- uniroot(
myf,
lower = lower,
upper = upper,
p = p,
post = post,
extendInt = "yes"
)$root
return(floor(res))
}
# Internal functions for tail homogeneity test from the evt0 package
mop.RBMOP <- function (osx, k, p){
n = length(osx)
losx = log(osx)
dk = length(k)
dp = length(p)
est = matrix(NA, dk, dp)
H = mop.MOP(osx, k, p)
H = H$EVI
rhoest = mop.rho(osx)
betaest = mop.beta(losx, rhoest)
for (j in 1:dp) est[, j] = H[, j] * (1 - ((betaest * (1 -
p[j] * H[, j]))/(1 - rhoest - p[j] * H[, j])) * (n/k)^(rhoest))
return(list(EVI = est, rho = rhoest, beta = betaest))
}
mop.MOP <- function (osx, k, p){
n = length(osx)
dk = length(k)
dp = length(p)
km = max(k)
nk = km + 1
est = matrix(NA, dk, dp)
for (j in 1:dp) if (p[j] == 0) {
losx = rev(log(osx))
losx = losx[1:nk]
tmp = cumsum(losx)/(1:nk)
estc = tmp[-nk] - losx[-1]
est[, j] = estc[k]
}
else {
tosx = rev(osx)
tosx = (tosx[1:nk])^p[j]
tmp = cumsum(tosx)/(1:nk)
estc = tmp[-nk]/tosx[-1]
estc = (1 - estc^-1)/p[j]
est[, j] = estc[k]
}
return(list(EVI = est))
}
mop.rho <- function (osx)
{
losx = log(osx)
n = length(losx)
krho = floor(n^(0.995)):floor(n^(0.999))
krhom = max(krho)
nkrho = krhom + 1
M = matrix(NA, length(krho), 3)
tau0 = numeric(length(krho))
tau1 = numeric(length(krho))
W0 = numeric(length(krho))
W1 = numeric(length(krho))
losx = rev(losx)
losx = losx[1:nkrho]
estc1 = mop.MOP(osx, krho, p = 0)
M[, 1] = estc1$EVI
c12 = cumsum(losx^2)/(1:nkrho)
c22 = (losx)^2
c32 = cumsum(losx)/(1:nkrho)
estc2 = c12[-nkrho] + c22[-1] - 2 * c32[-nkrho] * losx[-1]
M[, 2] = estc2[krho]
c13 = cumsum(losx^3)/(1:nkrho)
c23 = (losx)^3
estc3 = c13[-nkrho] - c23[-1] - 3 * (losx[-1] * c12[-nkrho] -
c32[-nkrho] * c22[-1])
M[, 3] = estc3[krho]
W0 = (log(M[, 1]) - (1/2) * log(M[, 2]/2))/((1/2) * log(M[,
2]/2) - (1/3) * log(M[, 3]/6))
W1 = (M[, 1] - (M[, 2]/2)^(1/2))/((M[, 2]/2)^(1/2) - (M[,
3]/6)^(1/3))
tau0 = -abs(3 * (W0 - 1)/(W0 - 3))
tau1 = -abs(3 * (W1 - 1)/(W1 - 3))
tau0median = median(tau0)
tau1median = median(tau1)
sumtau0 = as.numeric((tau0 - tau0median) %*% (tau0 - tau0median))
sumtau1 = as.numeric((tau1 - tau1median) %*% (tau1 - tau1median))
if ((sumtau0 < sumtau1) || (sumtau0 == sumtau1)) {
return(tau0[length(krho)])
}
else {
return(tau1[length(krho)])
}
}
mop.beta <- function (losx, rhoest)
{
n = length(losx)
k1 = floor(n^(0.999))
v = c(0, rhoest, 2 * rhoest)
D = numeric(length(v))
c1 = ((1:k1)/k1)^(-v[1])
c2 = ((1:k1)/k1)^(-v[2])
c3 = ((1:k1)/k1)^(-v[3])
c = (1:k1) * ((losx[n:(n - k1 + 1)]) - (losx[(n - 1):(n -
k1)]))
D[1] = sum(c1 * c)/k1
D[2] = sum(c2 * c)/k1
D[3] = sum(c3 * c)/k1
d = sum(c2)/k1
betaest = (k1/n)^(v[2]) * (d * D[1] - D[2])/(d * D[2] - D[3])
return(betaest)
}
mop <- function (x, k, p, method = c("MOP", "RBMOP"))
{
n = length(x)
if (n < 2) {
stop("Data vector x with at least two sample points required.")
}
if (is.null(p) || any(is.na(p))) {
stop("p is not specified")
}
if (is.null(k) || any(is.na(k))) {
stop("k is not specified")
}
if (any(k < 1) || any(k > n) || any(k == n) || !is.numeric(k) ||
isTRUE(any(k != as.integer(k)))) {
stop("Each k must be integer and greater than or equal to 1 and less than sample size.")
}
if (!is.numeric(x)) {
stop("Some of the data points are not real.")
}
if (!is.numeric(p)) {
stop("p must be a real.")
}
osx <- sort(x[x > 0])
method = match.arg(method)
if (method == "MOP") {
EVI.est = mop.MOP(osx, k, p)
}
else if (method == "RBMOP") {
EVI.est = mop.RBMOP(osx, k, p)
}
colnames(EVI.est$EVI) = p
rownames(EVI.est$EVI) = k
return(EVI.est)
}
gammaest <- function(x,k){
n <- length(x)
sop <- mop(x, k, 0, "RBMOP")
sop$lambda <- sqrt(k) * sop$EVI * sop$beta * (n/k)^sop$rho
return(c(sop$EVI, sop$rho, sop$beta, sop$lambda))
}
# Tail homogeneity test (Daouia, A., S.A. Padoan and G. Stupfler, 2024)
PooledTailIndex <- function(gamHat, rhoHat, betaHat, lambdaHat, m, n, k, type="AMSE",
bias=TRUE, poolSO=FALSE, etailCop=NULL, test="DPS",
Vmat=NULL, alpha=0.05)
{
# DEFINE IMPORTANT QUANTITIES FOR THE SEUBSEQUENT COMPUTATIONS
# compute the total effective sample size
K <- sum(k)
# compute the total sample size
N <- sum(n)
# compute the scaling matrix
D <- sqrt(K) * diag(sqrt(1/k))
# critical value for the hypothesis testing
critVal <- qchisq(1-alpha, m-1)
# define the vector of ones
ones <- array(1, c(m,1))
# 1) IF SECOND ORDER PARAMETERS ARE GIVEN THEN THEY ARE POLLED TOGETHER
if(poolSO){
rhoHat <- rep(sum(n * rhoHat / N), m)
betaHat <- rep(sum(n * betaHat / N), m)
lambdaHat <- sqrt(k) * rep(sum(k * gamHat / K), m) * betaHat * (n/k)^rhoHat
}
# 2) COMPUTE A DESIGN VERCTOR THAT IS USEFUL FOR THE COMPUTATION OF THE ASYMPTOTIC BIAS TERMS
B <- D %*% array(lambdaHat / (1 - rhoHat), c(m,1))
# 3) COMPUTE A DESIGN MATRIX THAT IS USEFUL FOR THE COMPUTATION OF THE ASYMPTOTIC
# ASYMPTOTIC VARIANCE
# define the V matrix (diagonal) in the case of the distributed inference
VargamHat <- (sum(k * gamHat / K))^2
V <- diag(VargamHat, nrow=m, ncol=m)
# off diagonal elements
if(!is.null(etailCop)){
# define the V matrix (full) in the case of the non-distributed inference
VargamHat <- (mean(gamHat))^2
V <- diag(VargamHat, nrow=m, ncol=m)
V[lower.tri(V)] <- VargamHat * etailCop
V <- t(V)
V[lower.tri(V)] <- VargamHat * etailCop
}
if(all(!is.null(Vmat), is.matrix(Vmat))) V <- Vmat
# define the matrix called in the paper as V_c
V <- D %*% V %*% D
# compute inverse of the V_c matrix
Vinv <- try(solve(V), silent = TRUE)
if(!is.matrix(Vinv)) Vinv <- diag(NaN, nrow=m, ncol=m)
# 4) COMPUTE THE OPTIMAL WEIGTHS
switch(type,
# ASYMPTOTIC VARIANCE
AVAR={
# compute numertator
VRowSums <- Vinv %*% ones
# compute denominator
VTotSum <- c(t(ones) %*% VRowSums)
# optimal weights
w <- c(VRowSums) / VTotSum
},
# ASYMPTOTIC BIAS
ABIAS={
VRawSums <- Vinv %*% ones
VTotSum <- c(t(ones) %*% VRawSums)
S <- Vinv %*% B
ScolSums <- c(t(ones) %*% S)
Q <- c(t(B) %*% S)
# compute numertator
num <- c(Q * VRawSums) - c(ScolSums * S)
# compute denominator
den <- c(Q * VTotSum) - ScolSums^2
# optimal weights
w <- num/den
},
# AYMPTOTIC MEAN-SQUARED-ERROR
AMSE={
VRawSums <- Vinv %*% ones
VTotSum <- c(t(ones) %*% VRawSums)
S <- Vinv %*% B
ScolSums <- c(t(ones) %*% S)
Q <- c(t(B) %*% S)
# compute numerator
num <- c((1 + Q) * VRawSums) - c(ScolSums * S)
# compute denominator
den <- c((1 + Q) * VTotSum) - ScolSums^2
# optimal weights
w <- num/den
},
# NAIVE APPROACH
NOWEI={
w <- rep(1/m, m)
}
)
# 4) COMPUTE THE POOLED ESTIMATE
gamHatP <- sum(w * gamHat)
# 5) COMPUTE AN ESTIMATE OF THE ASYMPTOTIC VARIANCE OF THE POOLED ESTIMATOR
VarGamHatP <- c(t(w) %*% V %*% w) / K
# 6) DEFINE THE ASYMPTOTIC BIAS TERM OF THE POOLED ESTIMATOR
BiasGamHatP <- 0
if(bias) BiasGamHatP <- sum(w * B) / sqrt(K)
# 7) DEFINE AN APPROXIMATE (1-alpha)100% CONFIDENCE INTERVAL FOR THE TAIL INDEX
ME <- sqrt(VarGamHatP) * qnorm(1-alpha/2)
# defines an approximate (1-alpha)100% confidence interval for the tail index
CIGamHatP <- c(gamHatP-BiasGamHatP-ME, gamHatP-BiasGamHatP+ME)
# 8) PERFORM HYPOTHESYS TESTING FOR EQUALITY OF TAIL INDICES
switch(test,
KINS={
gamHDelta <- array(gamHat, c(m,1)) - gamHatP*ones
# COMPUTE THE TEST STATISTIC
logLikR <- K * c(t(gamHDelta) %*% Vinv %*% (gamHDelta))
},
DPS={
Vtest <- diag(gamHat^2)
if(!is.null(etailCop)){
Vtest <- gamHat%*%t(gamHat)
Vtest[lower.tri(Vtest)] <- Vtest[lower.tri(Vtest)] * etailCop
Vtest <- t(Vtest)
Vtest[lower.tri(Vtest)] <- Vtest[lower.tri(Vtest)] * etailCop
}
Vtest <- D %*% Vtest %*% D
Vtestinv <- try(solve(Vtest), silent = TRUE)
if(!is.matrix(Vtestinv)) Vtestinv <- diag(NaN, nrow=m, ncol=m)
gamHatPtest <- c(t(ones) %*% Vtestinv %*% gamHat) / c(t(ones) %*% Vtestinv %*% ones)
gamHDelta <- array(gamHat, c(m,1)) - gamHatPtest*ones
# COMPUTE THE TEST STATISTIC
logLikR <- K * c(t(gamHDelta) %*% Vtestinv %*% (gamHDelta))
}
)
# compute the p-value
PVal <- 1 - pchisq(q=logLikR, df=m-1)
return(list(gamHatP=gamHatP-BiasGamHatP,
VarGamHatP=VarGamHatP,
CIGamHatP=CIGamHatP,
BiasGamHatP=BiasGamHatP,
logLikR=logLikR,
critVal=critVal,
PVal=PVal))
}
################################################
######################## NONSTATIONARY SETTING
################################################
# Generate Dirichlet
rdirichlet <- function(n = 1, alpha) {
Gam <- matrix(0, n, length(alpha))
for (i in 1:length(alpha)) Gam[, i] <- rgamma(n, shape = alpha[i])
Gam / rowSums(Gam)
}
# Extreme quantile with asymmetric credibility intervals - continuous GP
QAhat_c <- function(cx, threshold, sig.ps, gam.ps, n, qlev, k, alpha = 0.05) {
Q <- threshold + sig.ps * (((n * qlev) / (k * cx))^(-gam.ps) - 1) / gam.ps
return(c(quantile(Q, alpha / 2), mean(Q), quantile(Q, 1 - alpha / 2)))
}
# Extreme quantile with symmetric credibility intervals - continuous GP
QShat_c <- function(cx, threshold, sig.ps, gam.ps, n, qlev, k, alpha = 0.05) {
Q <- threshold + sig.ps * (((n * qlev) / (k * cx))^(-gam.ps) - 1) / gam.ps
Qm <- mean(Q)
Qs <- sd(Q)
return(c(Qm - qnorm(1 - alpha / 2) * Qs, Qm, Qm + qnorm(1 - alpha / 2) * Qs))
}
# Extreme quantile with asymmetric credibility intervals - discrete GP
QAhat_d <- function(cx, threshold, sig.ps, gam.ps, n, qlev, k, alpha = 0.05) {
Q <- floor(
threshold + sig.ps * (((n * qlev) / (k * cx))^(-gam.ps) - 1) / gam.ps - 1
)
return(c(
floor(quantile(Q, alpha / 2)),
mean(Q),
floor(quantile(Q, 1 - alpha / 2))
))
}
# Extreme quantile with symmetric credibility intervals - discrete GP
QShat_d <- function(cx, threshold, sig.ps, gam.ps, n, qlev, k, alpha = 0.05) {
Q <- floor(
threshold + sig.ps * (((n * qlev) / (k * cx))^(-gam.ps) - 1) / gam.ps - 1
)
Qm <- mean(Q)
Qs <- sd(Q)
return(c(
floor(Qm - qnorm(1 - alpha / 2) * Qs),
Qm,
floor(Qm + qnorm(1 - alpha / 2) * Qs)
))
}
# Continuous GP density
h.gpd <- function(x, gam, eps = 1e-6) {
if (gam > eps) {
res <- ifelse(x > 0, (1 + gam * x)^(-1 / gam - 1), 0)
}
if (gam < -eps) {
res <- ifelse(
x > 0,
ifelse(x < -1 / gam, (1 + gam * x)^(-1 / gam - 1), 0),
0
)
}
if (abs(gam) <= eps) {
res <- ifelse(x > 0, exp(-x), 0)
}
return(res)
}
# Continuous GP density at the intermediate level
preddens1_c <- function(theta, y, t) {
loc <- t + theta[1] * (theta[3]^theta[2] - 1) / theta[2]
scale <- theta[1] * theta[3]^theta[2]
z <- (y - loc) / scale
res <- h.gpd(z, theta[2])
return(res)
}
# Continuous GP density at the extreme level
preddens2_c <- function(theta, p, y, t, k, n) {
loc <- t + theta[1] * ((theta[3] * 1 / p * k / n)^theta[2] - 1) / theta[2]
scale <- theta[1] * (theta[3] * 1 / p * k / n)^theta[2]
z <- (y - loc) / scale
res <- h.gpd(z, theta[2])
return(res)
}
# Discrete GP density at the intermediate level
preddens1_d <- function(theta, y, t) {
loc <- floor(t + theta[1] * (theta[3]^theta[2] - 1) / theta[2])
scale <- theta[1] * theta[3]^theta[2]
z1 <- pmax(0, (y - loc) / scale)
z2 <- pmax(0, (y + 1 - loc) / scale)
res <- (1 + theta[2] * z1)^(-1 / theta[2]) -
(1 + theta[2] * z2)^(-1 / theta[2])
return(res)
}
# Discrete GP density at the extreme level
preddens2_d <- function(theta, p, y, t, k, n) {
loc <- floor(
t + theta[1] * ((theta[3] * 1 / p * k / n)^theta[2] - 1) / theta[2]
)
scale <- theta[1] * (theta[3] * 1 / p * k / n)^theta[2]
z1 <- pmax(0, (y - loc) / scale)
z2 <- pmax(0, (y + 1 - loc) / scale)
res <- (1 + theta[2] * z1)^(-1 / theta[2]) -
(1 + theta[2] * z2)^(-1 / theta[2])
return(res)
}
################################################################################
### START
### Main-routines (VISIBLE FUNCTIONS)
###
################################################################################
###########################################
######################## STATIONARY SETTING
###########################################
#' Maximum likelihood estimation of the parameters of the discrete generalized Pareto distribution
#'
#' Given a sample of exceedances, estimate the parameters via maximum likelihood along with \eqn{1-\alpha} level confidence intervals.
#'
#' @param excess vector of positive exceedances, i.e., \eqn{Y - t \mid Y > t}, with \eqn{t} being the threshold
#' @param alpha level for confidence interval of scale and shape parameters. Default: 0.05, giving 95\% confidence intervals
#' @return a list with elements
#' \itemize{
#' \item \code{mle} vector of dimension 2 containing estimated scale and shape parameters
#' \item \code{CI} matrix of dimension 2 by 2 containing the \eqn{1-\alpha} level confidence intervals for scale and shape
#' }
#' @references
#' Hitz, A.S., G. Samorodnistsky and R.A. Davis (2024). Discrete Extremes, \emph{Journal of Data Science}, 22(\bold{4}), pp. 524-536.
#' @export
#' @examples
#' fitdGPD(rpois(1000,2))
fitdGPD <- function(excess, alpha = 0.05) {
k <- length(excess)
ntries <- 100
lb <- c(log(0.01), 0.001)
ub <- c(log(3), 1)
start <- matrix(NA, ntries, 2)
nllk <- numeric()
# initialization
for (t in 1:ntries) {
d <- runif(1)
start[t, ] <- lb * (1 - d) + ub * d
nllk[t] <- nll_dgpd(start[t, ], excess)
}
initpar <- start[which(nllk == min(nllk)), ]
# estimation
est <- optim(
initpar,
fn = nll_dgpd,
control = list(maxit = 1e6),
hessian = FALSE,
excess = excess
)
mle <- est$par
if (est$convergence > 0) {
warning("Optimization algorithm did not converge.")
}
# extract hessian
est2 <- optim(
c(exp(mle[1]), mle[2]),
fn = nll_dgpd,
control = list(maxit = 1),
hessian = TRUE,
excess = excess,
constraint = FALSE
)
# confidence intervals
vcov <- solve(est2$hessian)
se <- sqrt(diag(vcov))
lower <- mle + qnorm(alpha / 2) * se
upper <- mle + qnorm(1 - alpha / 2) * se
CI <- rbind(lower, upper)
names(mle) <- colnames(CI) <- c("scale", "shape")
list("mle" = mle, "CI" = CI)
}
#' @title Estimation of generalized Pareto distributions
#' @description Bayesian or frequentist estimation of the scale and shape parameters of the continuous or discrete generalized Pareto distribution.
#' @param data numeric vector of length n containing complete data values (under and above threshold)
#' @param k double indicating the effective sample size. Default: 10
#' @param pn numeric vector containing one or more percentile level at which extreme quantiles are estimated, with \eqn{p < k/n}. Default: NULL
#' @param type string indicating distribution types. Default: \code{c('continuous','discrete')}
#' @param method string indicating estimation methods. Default: \code{c('bayesian', 'frequentist')}
#' @param prior string indicating prior distribution (uniform or empirical). Default: \code{'empirical'}
#' @param start list of 2 containing starting values for scale and shape parameters. Default: \code{NULL}
#' @param sig0 double indicating the initial value for the update of the variance in the MCMC algorithm. Default: \code{NULL}
#' @param nsim double indicating the total number of iterations of the MCMC algorithm in the Bayesian estimation case. Default: 5000L
#' @param burn double indicating the number of iterations to exclude in the MCMC algorithm of the Bayesian estimation case. Default: 1000L
#' @param p double indicating the desired overall sampler acceptance probability. Default: 0.234
#' @param optim.method string indicating the optimization method in the frequentist estimation case. Default: \code{'Nelder-Mead'}
#' @param control list containing additional parameters for the minimization function \link[stats]{optim}. Default: \code{NULL}
#' @param ... other arguments passed to the function
#' @return a list with the following elements
#' \itemize{
#' \item Bayesian estimation case
#' \itemize{
#' \item \code{Q.est} matrix with \code{nsim-burn} rows and \code{length(pn)} columns containing the posterior sample of the
#' extreme quantile estimated at level given in pn
#' \item \code{post_sample} matrix with \code{nsim-burn} rows and 2 columns containing the posterior sample of the scale and
#' shape parameters for the continuous or discrete generalized Pareto distribution
#' \item \code{burn} double indicating the number of iterations excluded in the MCMC algorithm
#' \item \code{straight.reject} vector of length \code{nsim-burn+1} indicating the iterations in which the proposed parameters do not respect basic constraints
#' \item \code{sig.vec} vector of length \code{nsim}\eqn{-\lfloor(5 / (p (1 - p)))\rfloor+1} containing the values of the variance updated at each iteration of the MCMC algorithm
#' \item \code{accept.prob} matrix containing the values of acceptance probability (second column) corresponding to specific iterations (first column)
#' \item \code{msg} character string containing an output message on the result of the Bayesian estimation procedure
#' \item \code{mle} vector of length 2 containing the maximum likelihood estimates of the scale and shape parameters of the continuous or discrete generalized Pareto distribution
#' \item \code{t} double indicating the threshold for the generalized Pareto model, corresponding to the \eqn{n-k}th order statistic of the sample
#' }
#' \item Frequentist estimation case
#' \itemize{
#' \item \code{est} vector of length 2 containing the maximum likelihood estimates of the scale and shape parameters of the continuous or discrete generalized Pareto distribution
#' \item \code{t} double indicating the threshold
#' \item \code{Q.est} vector of dimension \code{length(pn)} containing the estimates of the extreme quantile at level given in pn
#' \item \code{VarCov} \eqn{2 \times 2} variance-covariance matrix of (gamma, sigma)
#' \item \code{Q.VC} variance of Q.est
#' \item \code{msg} character string containing an output message on the result of the frequentist estimation procedure
#' }
#' }
#' @seealso
#' \code{\link[evd]{fpot}}
#' @references
#' Dombry, C., S. Padoan and S. Rizzelli (2025). Asymptotic theory for Bayesian inference and prediction: from the ordinary to a conditional Peaks-Over-Threshold method, arXiv:2310.06720v2.
#' @export
#' @examples
#' \dontrun{
#' # generate data
#' set.seed(1234)
#' n <- 500
#' samp <- evd::rfrechet(n,0,3,4)
#' # set effective sample size and threshold
#' k <- 50
#' threshold <- sort(samp,decreasing = TRUE)[k+1]
#' # preliminary mle estimates of scale and shape parameters
#' mlest <- evd::fpot(samp, threshold)
#' # empirical bayes procedure
#' proc <- estPOT(
#' samp,
#' k = k,
#' pn = c(0.01, 0.005),
#' type = "continuous",
#' method = "bayesian",
#' prior = "empirical",
#' start = as.list(mlest$estimate),
#' sig0 = 0.1)
#' }
#' @importFrom evd fpot
#' @importFrom crch dtt
estPOT <- function(
data,
k = 10L,
pn = NULL,
type = c("continuous", "discrete"),
method = c("bayesian", "frequentist"),
prior = "empirical",
start = NULL,
sig0 = NULL,
nsim = 5000L,
burn = 1000L,
p = 0.234,
optim.method = "Nelder-Mead",
control = NULL,
...
) {
type <- match.arg(type)
if (type == "continuous") {
res <- cPOT(
data,
k,
pn,
method,
prior,
start,
sig0,
nsim,
burn,
p,
optim.method,
control,
verbose = TRUE
)
} else {
res <- dPOT(
data,
k,
pn,
method,
prior,
start,
sig0,
nsim,
burn,
p,
optim.method,
control,
verbose = TRUE
)
}
}
#' Bayesian extreme quantile
#'
#' Given posterior samples for the parameters of the continuous or discrete generalized Pareto distribution,
#' return the posterior mean and \eqn{1-\alpha} level credibility intervals of the extreme quantile
#'
#' @param threshold threshold for the generalized Pareto model, corresponding to the \eqn{n-k}th order statistic of the sample
#' @param postsamp a \code{m} by \code{2} matrix with columns containing the posterior samples of scale and shape parameters of the generalized Pareto distribution, respectively
#' @param k integer, number of exceedances for the generalized Pareto (only used if \code{extrapolation=TRUE})
#' @param n integer, number of observations in the full sample. Must be greater than \code{k} (only used if \code{extrapolation=TRUE})
#' @param retp double indicating the value of the return period
#' @param alpha level for credibility interval. Default: 0.05 giving 95\% credibility intervals
#' @param type string indicating distribution types. Default: \code{c('continuous','discrete')}
#' @return a list with components
#' \itemize{
#' \item \code{mQ} posterior mean of the extreme quantile
#' \item \code{ciQ} vector of dimension 2 returning the \eqn{\alpha/2} and \eqn{1-\alpha/2} empirical quantiles of the posterior distribution of the extreme quantile
#' }
#' @export
#' @examples
#' \dontrun{
#' # generate data
#' set.seed(1234)
#' n <- 500
#' samp <- evd::rfrechet(500,0,3,4)
#' # set effective sample size and threshold
#' k <- 50
#' threshold <- sort(samp,decreasing = TRUE)[k+1]
#' # preliminary mle estimates of scale and shape parameters
#' mlest <- evd::fpot(samp, threshold)
#' # empirical bayes procedure
#' proc <- estPOT(
#' samp,
#' k = k,
#' pn = c(0.01, 0.005),
#' type = "continuous",
#' method = "bayesian",
#' prior = "empirical",
#' start = as.list(mlest$estimate),
#' sig0 = 0.1)
#' # extreme quantile corresponding to a return period of 100
#' extBQuant(
#' proc$t,proc$post_sample,
#' k,
#' n,
#' 100,
#' 0.05,
#' type = "continuous")
#' }
extBQuant <- function(
threshold,
postsamp,
k,
n,
retp,
alpha = 0.05,
type = c("continuous", "discrete")
) {
if (type == "continuous") {
out <- threshold +
postsamp[, 1] * ((k / (n * 1 / retp))^postsamp[, 2] - 1) / postsamp[, 2]
mout <- mean(out)
qout <- quantile(out, c(alpha / 2, (1 - alpha / 2)))
} else {
out <- threshold +
postsamp[, 1] * ((k / (n * 1 / retp))^postsamp[, 2] - 1) / postsamp[, 2] -
1
mout <- floor(mean(out))
qout <- floor(quantile(out, c(alpha / 2, (1 - alpha / 2))))
}
return(list("mQ" = mout, "ciQ" = qout))
}
#' Predictive posterior density of peak-over-threshold models
#'
#' Given posterior samples for the parameters of the continuous or discrete generalized Pareto distribution, return the predictive posterior density
#' of a peak above an intermediate or extreme threshold using the threshold stability property.
#'
#' @param x vector of length \code{r} containing the points at which to evaluate the density
#' @param postsamp a \code{m} by \code{2} matrix with columns containing the posterior samples of scale and shape parameters of the generalized Pareto distribution, respectively
#' @param threshold threshold for the generalized Pareto model, corresponding to the \eqn{n-k}th order statistic of the sample
#' @param type data type, either \code{"continuous"} or \code{"discrete"} for count data.
#' @param extrapolation logical; if \code{TRUE}, extrapolate using the threshold stability property
#' @param k integer, number of exceedances for the generalized Pareto (only used if \code{extrapolation=TRUE})
#' @param n integer, number of observations in the full sample. Must be greater than \code{k} (only used if \code{extrapolation=TRUE})
#' @param p scalar tail probability for the extrapolation. Must be smaller than \eqn{k/n} (only used if \code{extrapolation=TRUE})
#' @return a vector of length \code{r} of posterior predictive density values associated to \code{x}
#' @export
#' @examples
#' \dontrun{
#' # generate data
#' set.seed(1234)
#' n <- 500
#' samp <- evd::rfrechet(n,0,3,4)
#' # set effective sample size and threshold
#' k <- 50
#' threshold <- sort(samp,decreasing = TRUE)[k+1]
#' # preliminary mle estimates of scale and shape parameters
#' mlest <- evd::fpot(samp, threshold)
#' # empirical bayes procedure
#' proc <- estPOT(
#' samp,
#' k = k,
#' pn = c(0.01, 0.005),
#' type = "continuous",
#' method = "bayesian",
#' prior = "empirical",
#' start = as.list(mlest$estimate),
#' sig0 = 0.1)
#' # predictive density estimation
#' yg <- seq(0, 50, by = 2)
#' nyg <- length(yg)
#' predDens_int <- predDens(
#' yg,
#' proc$post_sample,
#' proc$t,
#' "continuous",
#' extrapolation = FALSE)
#' predDens_ext <- predDens(
#' yg,
#' proc$post_sample,
#' proc$t,
#' "continuous",
#' extrapolation = TRUE,
#' p = 0.001,
#' k = k,
#' n = n)
#' # plot
#' plot(
#' x = yg,
#' y = predDens_int,
#' type = "l",
#' lwd = 2,
#' col = "dodgerblue",
#' ylab = "",
#' main = "Predictive posterior density")
#' lines(
#' x = yg,
#' y = predDens_ext,
#' lwd = 2,
#' col = "violet")
#' legend(
#' "topright",
#' legend = c("Intermediate threshold", "Extreme threshold"),
#' lwd = 2,
#' col = c("dodgerblue", "violet"))
#' }
predDens <- function(
x,
postsamp,
threshold,
type = c("continuous", "discrete"),
extrapolation = FALSE,
p,
k,
n
) {
extrapolation <- isTRUE(extrapolation)
if (extrapolation) {
stopifnot(
length(k) == 1L,
length(n) == 1L,
length(p) == 1L
)
stopifnot(
k <= n,
p < 0.5,
p > 0
)
}
nx <- length(x)
out <- numeric(nx)
ns <- nrow(postsamp)
if (type == "continuous" & extrapolation) {
locExt <- threshold +
postsamp[, 1] * ((n / k * p)^(-postsamp[, 2]) - 1) / postsamp[, 2]
scaleExt <- postsamp[, 1] * (n / k * p)^(-postsamp[, 2])
for (i in 1:nx) {
out[i] <- mean(apply(
cbind(locExt, scaleExt, postsamp[, 2]),
1,
Fdgpd_c,
x = x[i]
))
}
} else if (type == "continuous" & !extrapolation) {
for (i in 1:nx) {
out[i] <- mean(apply(
cbind(rep(threshold, ns), postsamp),
1,
Fdgpd_c,
x = x[i]
))
}
} else if (type == "discrete" & extrapolation) {
locExt <- threshold +
floor(postsamp[, 1] * ((n / k * p)^(-postsamp[, 2]) - 1) / postsamp[, 2])
scaleExt <- postsamp[, 1] * (n / k * p)^(-postsamp[, 2])
for (i in 1:nx) {
out[i] <- mean(apply(
cbind(locExt, scaleExt, postsamp[, 2]),
1,
Fdgpd_d,
x = x[i]
))
}
} else {
for (i in 1:nx) {
out[i] <- mean(apply(
cbind(rep(threshold, ns), postsamp),
1,
Fdgpd_d,
x = x[i]
))
}
}
return(out)
}
#' Predictive quantile based on the generalized Pareto model
#'
#' Bayesian Generalize Pareto-based predictive quantile for continuous and discrete predictive distribution conditioned on intermediate and extreme levels.
#'
#' @param qlev double, quantile level
#' @param postsamp a \code{m} by \code{2} matrix with columns containing the posterior samples of scale and shape parameters of the generalized Pareto distribution, respectively
#' @param threshold threshold for the generalized Pareto model, corresponding to the \eqn{n-k}th order statistic of the sample
#' @param lb double, the lower bound of the admissible region for the quantile value
#' @param ub double, the upper bound of the admissible region for the quantile value
#' @param type data type, either \code{"continuous"} or \code{"discrete"} for count data.
#' @param extrapolation logical; if \code{TRUE}, extrapolate using the threshold stability property
#' @param k integer, number of exceedances for the generalized Pareto (only used if \code{extrapolation=TRUE})
#' @param n integer, number of observations in the full sample. Must be greater than \code{k} (only used if \code{extrapolation=TRUE})
#' @param p scalar tail probability for the extrapolation. Must be smaller than \eqn{k/n} (only used if \code{extrapolation=TRUE})
#' @return a double indicating the value of the quantile
#' @examples
#' \dontrun{
#' # generate data
#' set.seed(1234)
#' n <- 500
#' samp <- evd::rfrechet(n,0,3,4)
#' # set effective sample size and threshold
#' k <- 50
#' threshold <- sort(samp,decreasing = TRUE)[k+1]
#' # preliminary mle estimates of scale and shape parameters
#' mlest <- evd::fpot(samp, threshold)
#' # empirical bayes procedure
#' proc <- estPOT(
#' samp,
#' k = k,
#' pn = c(0.01, 0.005),
#' type = "continuous",
#' method = "bayesian",
#' prior = "empirical",
#' start = as.list(mlest$estimate),
#' sig0 = 0.1)
#' # predictive density estimation
#' yg <- seq(0, 50, by = 2)
#' nyg <- length(yg)
#' predDens_int <- predDens(
#' yg,
#' proc$post_sample,proc$t,
#' "continuous",
#' extrapolation=FALSE)
#' predQuant_int <- predQuant(
#' 0.5,
#' proc$post_sample,
#' proc$t,
#' proc$t + 0.01,
#' 50,
#' "continuous",
#' extrapolation = FALSE)
#' predDens_ext <- predDens(
#' yg,
#' proc$post_sample,
#' proc$t,
#' "continuous",
#' extrapolation = TRUE,
#' p = 0.001,
#' k = k,
#' n = n)
#' predQuant_ext <- predQuant(
#' 0.5,
#' proc$post_sample,
#' proc$t,
#' proc$t + 0.01,
#' 100,
#' "continuous",
#' extrapolation = TRUE,
#' p = 0.005,
#' k = k,
#' n = n)
#' }
#' @export
predQuant <- function(
qlev,
postsamp,
threshold,
lb,
ub,
type = c("continuous", "discrete"),
extrapolation = FALSE,
p,
k,
n
) {
extrapolation <- isTRUE(extrapolation)
if (extrapolation) {
stopifnot(
length(k) == 1L,
length(n) == 1L,
length(p) == 1L
)
stopifnot(
k <= n,
p < 0.5,
p > 0
)
}
ns <- nrow(postsamp)
if (type == "continuous" & extrapolation) {
locExt <- threshold +
postsamp[, 1] * ((n / k * p)^(-postsamp[, 2]) - 1) / postsamp[, 2]
scaleExt <- postsamp[, 1] * (n / k * p)^(-postsamp[, 2])
out <- Bqgpd_c(
qlev,
post = cbind(locExt, scaleExt, postsamp[, 2]),
lower = lb,
upper = ub
)
} else if (type == "continuous" & !extrapolation) {
out <- Bqgpd_c(
qlev,
post = cbind(rep(threshold, ns), postsamp),
lower = lb,
upper = ub
)
} else if (type == "discrete" & extrapolation) {
locExt <- threshold +
floor(postsamp[, 1] * ((n / k * p)^(-postsamp[, 2]) - 1) / postsamp[, 2])
scaleExt <- postsamp[, 1] * (n / k * p)^(-postsamp[, 2])
out <- Bqgpd_d(
qlev,
post = cbind(locExt, scaleExt, postsamp[, 2]),
lower = lb,
upper = ub
)
} else {
out <- Bqgpd_d(
qlev,
post = cbind(rep(threshold, ns), postsamp),
lower = lb,
upper = ub
)
}
return(out)
}
#' Test on the effect of concomitant covariate on the extremes of the response variable
#'
#' Given observed data, perform a Kolmogorov-Smirnov type test comparing the cumulative distribution function of the concomitant covariate, defined as \eqn{X \mid Y > t}, with \eqn{t} being the threshold,
#' against the cumulative distribution function of the random vector of covariate.
#'
#' @param data design matrix of dimension \code{n} by \code{2} containing the complete data for the dependent variable (first column) and covariate (second column) in [0,1]
#' @param k integer, number of exceedances for the generalized Pareto
#' @param M integer, number of samples to draw from the posterior distrinution of the law of the concomitant covariate. Default: 1000
#' @param ng length of covariate grid
#' @param xg vector of covariate grid of dimension \code{ng} by \code{1} containing a sequence between zero and the last value of the corresponding covariate
#' @param bayes logical indicating the bootstrap method. If \code{FALSE}, a frequentist bootstrap on the empirical cumulative distribution function of the concomitant covariate is performed. Default to \code{TRUE}
#' @param C integer, hypermparameter entering the posterior distributyion of the law of the concomitant covariate. Default: 5
#' @param alpha double, significance level for the critical value of the test, computed as the \eqn{(1-alpha)} level empirical quantile of the sample of distances between the empirical cumulative distribution function of the concomitant and complete covariate. Default: 0.05
#' @return a list with components
#' \itemize{
#' \item \code{Delta} maximum observed distance between the empirical distribution functions of the concomitant and complete covariate
#' \item \code{DeltaM} vector of length M containing the sample of maximum distances between the empirical distribution function of the concomitant complete covariate
#' \item \code{critical} double, critical value for the test statistic, computed as the \eqn{(1-alpha)} level empirical quantile of DeltaM
#' \item \code{pval} double, p-value
#' }
#' @examples
#' \dontrun{
#' # generate data
#' set.seed(1234)
#' n <- 500
#' samp <- evd::rfrechet(n,0,1:n,4)
#' # set effective sample size and threshold
#' k <- 50
#' threshold <- sort(samp,decreasing = TRUE)[k+1]
#' # preliminary mle estimates of scale and shape parameters
#' mlest <- evd::fpot(samp,
#' threshold,
#' control=list(maxit = 500))
#' # empirical bayes procedure
#' proc <- estPOT(
#' samp,
#' k = k,
#' pn = c(0.01, 0.005),
#' type = "continuous",
#' method = "bayesian",
#' prior = "empirical",
#' start = as.list(mlest$estimate),
#' sig0 = 0.1)
#' # conditional predictive density estimation
#' yg <- seq(0, 50, by = 2)
#' nyg <- length(yg)
#' # estimation of scedasis function
#' # setting
#' M <- 1e3
#' C <- 5
#' alpha <- 0.05
#' bw <- .5
#' nsim <- 5000
#' burn <- 1000
#' # create covariate
#' # in sample obs
#' n_in = n
#' # number of years ahead
#' nY = 1
#' n_out = 365 * nY
#' # total obs
#' n_tot = n_in + n_out
#' # total covariate (in+out sample period)
#' x <- seq(0, 1, length = n_tot)
#' # in sample grid dimension for covariate
#' ng_in <- 150
#' xg <- seq(0, x[n_in], length = ng_in)
#' # in+out of sample grid
#' xg <- c(xg,
#' seq(x[n_in + 1],
#' x[(n_tot)],
#' length = ng_in))
#' # in+out sample grid dimension
#' nxg <- length(xg)
#' xg <- array(xg, c(nxg, 1))
#' # in sample observations
#' samp_in <- samp[1:n_in]
#' ssamp_in <- sort(samp_in, decreasing = TRUE, index = TRUE)
#' x_in <- x[1:n_in] # in sample covariate
#' xs <- x_in[ssamp_in$ix[1:k]] # in sample concomitant covariate
#' # test on covariate effect
#' test <- scedastic.test(
#' cbind(samp, x[1:n]),
#' k,
#' M,
#' array(xg[1:ng_in], c(ng_in, 1)),
#' ng_in,
#' TRUE,
#' C,
#' 0.05
#' )
#' }
#' @export
#' @references
#' Dombry, C., S. Padoan and S. Rizzelli (2025). Asymptotic theory for Bayesian inference and prediction: from the ordinary to a conditional Peaks-Over-Threshold method, arXiv:2310.06720v2.
#'
scedastic.test <- function(
data,
k,
M = 1000L,
xg,
ng,
bayes = TRUE,
C = 5L,
alpha = 0.05
) {
# SORT DATA
sdata <- sort(data[, 1], decreasing = TRUE, index.return = TRUE)
xs <- data[sdata$ix[1:k], 2]
# COMPUTE THE EMPIRICAL DISTRIBUTION RELATIVE TO P*
Fsk <- ecdf(xs)
# COMPUTE THE EMPIRICAL DISTRIBUTION RELATIVE TO P
Fn <- ecdf(data[, 2])
# COMPUTE MAX DISTANCE
Delta <- sqrt(k) * max(abs(Fsk(xg) - Fn(xg)))
### BOOTSTRAP STEP:
fboot <- function(xs, k, xg) {
# weights generation
w <- rexp(k)
w <- w / sum(w) - 1 / k
myf <- function(data, xg, w) return(sum(w * (data <= xg)))
return(sqrt(k) * max(abs(apply(xg, 2, myf, data = xs, w = w))))
}
### BAYESIAN BOOTSTRAP:
bboot <- function(shape1, shape2, k) {
theta <- rdirichlet(n = 1, shape1 + shape2)
Fsb <- cumsum(theta)
Fn <- 1 / k * cumsum(shape2)
return(sqrt(k) * max(abs(Fsb - Fn)))
}
if (isTRUE(bayes)) {
npoints <- function(I, xs) return(sum((xs >= I[1]) & (xs <= I[2])))
shape1 <- C * diff(xg)
shape2 <- apply(cbind(xg[1:(ng - 1)], xg[2:ng]), 1, npoints, xs = xs)
DeltaM <- replicate(M, bboot(shape1, shape2, k))
} else {
DeltaM <- replicate(M, fboot(xs, k, xg))
}
return(list(
Delta = Delta,
DeltaM = DeltaM,
critical = quantile(DeltaM, 1 - alpha),
pval = mean(Delta < DeltaM)
))
}
#' Test on the effect of concomitant covariate on the extremes of the response variable
#'
#' Given observed data, perform a Kolmogorov-Smirnov type test comparing the cumulative distribution function of the concomitant covariate, defined as \eqn{X \mid Y > t}, with \eqn{t} being the threshold,
#' against the cumulative distribution function of the random vector of covariate.
#'
#' @param data design matrix of dimension \code{n} by \code{2} containing the complete data for the dependent variable (first column) and covariate (second column) in [0,1]
#' @param k integer, number of exceedances for the generalized Pareto
#' @param M integer, number of samples to draw from the posterior distrinution of the law of the concomitant covariate. Default: 1000
#' @param ng length of covariate grid
#' @param xg vector of covariate grid of dimension \code{ng} by \code{1} containing a sequence between zero and the last value of the corresponding covariate
#' @param bayes logical indicating the bootstrap method. If \code{FALSE}, a frequentist bootstrap on the empirical cumulative distribution function of the concomitant covariate is performed. Default to \code{TRUE}
#' @param C integer, hypermparameter entering the posterior distributyion of the law of the concomitant covariate. Default: 5
#' @param alpha double, significance level for the critical value of the test, computed as the \eqn{(1-alpha)} level empirical quantile of the sample of distances between the empirical cumulative distribution function of the concomitant and complete covariate. Default: 0.05
#' @return a list with components
#' \itemize{
#' \item \code{Delta} maximum observed distance between the empirical distribution functions of the concomitant and complete covariate
#' \item \code{DeltaM} vector of length M containing the sample of maximum distances between the empirical distribution function of the concomitant complete covariate
#' \item \code{critical} double, critical value for the test statistic, computed as the \eqn{(1-alpha)} level empirical quantile of DeltaM
#' \item \code{pval} double, p-value
#' }
#' @keywords internal
#' @export
schedastic.test <- function(
data,
k,
M = 1000L,
xg,
ng,
bayes = TRUE,
C = 5L,
alpha = 0.05
) {
.Deprecated(new = "scedastic.test")
scedastic.test(
data = data,
k = k,
M = M,
xg = xg,
ng = ng,
bayes = bayes,
C = C,
alpha = alpha
)
}
#' Estimation of the scedasis function
#'
#' Kernel-based method for the estimation of the scedasis function. Given the values of the complete and concomitant covariate, defined as \eqn{X \mid Y > t}, with \eqn{t} being the threshold, it returns
#' a matrix containing a posterior sample of the scedasis function for each covariate value.
#'
#' @param N integer, number of samples to draw from the distribution of the concomitant covariate
#' @param x one-dimensional vector of in-sample covariate in [0,1]
#' @param xs one-dimensional vector of concomitant covariate
#' @param xg one-dimensional vector of length m containing the grid of in-sample and possibly out-sample covariate in [0,1]
#' @param bw double, bandwidth for the computation of the kernel
#' @param k integer, number of exceedances for the generalized Pareto
#' @param C integer, hyperparameter entering the posterior distribution of the law of the concomitant covariate. Default: 5
#' @return an \code{N} by \code{m} matrix containing the values of the posterior samples of the scedasis function (rows) for each value of \code{xg} (columns)
#' @export
#' @examples
#' \dontrun{
#' # generate data
#' set.seed(1234)
#' n <- 500
#' samp <- evd::rfrechet(n, 0, 1:n, 4)
#' # set effective sample size and threshold
#' k <- 50
#' threshold <- sort(samp, decreasing = TRUE)[k+1]
#' # preliminary mle estimates of scale and shape parameters
#' mlest <- evd::fpot(
#' samp,
#' threshold,
#' control = list(maxit = 500))
#' # empirical bayes procedure
#' proc <- estPOT(
#' samp,
#' k = k,
#' pn = c(0.01, 0.005),
#' type = "continuous",
#' method = "bayesian",
#' prior = "empirical",
#' start = as.list(mlest$estimate),
#' sig0 = 0.1)
#' # conditional predictive density estimation
#' yg <- seq(0, 50, by = 2)
#' nyg <- length(yg)
#' # estimation of scedasis function
#' # setting
#' M <- 1e3
#' C <- 5
#' alpha <- 0.05
#' bw <- .5
#' nsim <- 5000
#' burn <- 1000
#' # create covariate
#' # in sample obs
#' n_in = n
#' # number of years ahead
#' nY = 1
#' n_out = 365 * nY
#' # total obs
#' n_tot = n_in + n_out
#' # total covariate (in+out sample period)
#' x <- seq(0, 1, length = n_tot)
#' # in sample grid dimension for covariate
#' ng_in <- 150
#' xg <- seq(0, x[n_in], length = ng_in)
#' # in+out of sample grid
#' xg <- c(xg, seq(x[n_in + 1], x[(n_tot)], length = ng_in))
#' # in+out sample grid dimension
#' nxg <- length(xg)
#' xg <- array(xg, c(nxg, 1))
#' # in sample observations
#' samp_in <- samp[1:n_in]
#' ssamp_in <- sort(samp_in, decreasing = TRUE, index = TRUE)
#' x_in <- x[1:n_in] # in sample covariate
#' xs <- x_in[ssamp_in$ix[1:k]]
#' # in sample concomitant covariate
#' # estimate scedasis function over the in and out of sample period
#' res_stat <- apply(
#' xg,
#' 1,
#' cpost_stat,
#' N = nsim - burn,
#' x = x_in,
#' xs = xs,
#' bw = bw,
#' k = k,
#' C = C
#' )
#' }
cpost_stat <- function(N, x, xs, xg, bw, k, C = 5L) {
sdist <- sum(abs(xs - xg) <= bw)
a <- C * bw + sdist
b <- C * (1 - bw) + k - sdist
den <- mean(abs(x - xg) <= bw)
return(rbeta(N, a, b) / den)
}
#' Conditional Bayesian extreme quantile
#'
#' Given posterior samples for the parameters of the continuous or discrete generalized Pareto distribution and scedasis function for a set of covariates,
#' return the posterior mean and \eqn{1-\alpha} level credibility intervals of the extreme quantile for each value of the scedasis function
#'
#' @param cx an \code{m} by \code{p} matrix, obtained by evaluating the scedasis function for every of the \code{p} covariate values and every \code{m} posterior draw
#' @param postsamp a \code{m} by \code{2} matrix with columns containing the posterior samples of scale and shape parameters of the generalized Pareto distribution, respectively
#' @param threshold threshold for the generalized Pareto model, corresponding to the \eqn{n-k}th order statistic of the sample
#' @param k integer, number of exceedances for the generalized Pareto (only used if \code{extrapolation=TRUE})
#' @param n integer, number of observations in the full sample. Must be greater than \code{k}
#' @param qlev double indicating the percentile level at which the extreme quantile is estimated. Must be smaller than \eqn{k/n}
#' @param type string indicating distribution types. Default: \code{c('continuous','discrete')}
#' @param confint type of confidence interval. Default: \code{c('asymmetric', 'symmetric')}
#' @param alpha level for credibility interval. Default 0.05, giving 95\% credibility intervals
#' @param ... additional arguments, for back-compatilibity
#' @return a list with components
#' \itemize{
#' \item \code{mQ} posterior mean of the extreme quantile
#' \item \code{ciQ} vector of dimension 2 returning the \eqn{\alpha/2} and \eqn{1-\alpha/2} empirical quantiles of the posterior distribution of the extreme quantile
#' }
#' @export
#' @examples
#' \dontrun{
#' # generate data
#' set.seed(1234)
#' n <- 500
#' samp <- evd::rfrechet(n,0,1:n,4)
#' # set effective sample size and threshold
#' k <- 50
#' threshold <- sort(samp,decreasing = TRUE)[k+1]
#' # preliminary mle estimates of scale and shape parameters
#' mlest <- evd::fpot(
#' samp,
#' threshold,
#' control = list(maxit = 500))
#' # empirical bayes procedure
#' proc <- estPOT(
#' samp,
#' k = k,
#' pn = c(0.01, 0.005),
#' type = "continuous",
#' method = "bayesian",
#' prior = "empirical",
#' start = as.list(mlest$estimate),
#' sig0 = 0.1)
#' # conditional predictive density estimation
#' yg <- seq(0, 50, by = 2)
#' nyg <- length(yg)
#' # estimation of scedasis function
#' # setting
#' M <- 1e3
#' C <- 5
#' alpha <- 0.05
#' bw <- .5
#' nsim <- 5000
#' burn <- 1000
#' # create covariate
#' # in sample obs
#' n_in = n
#' # number of years ahead
#' nY = 1
#' n_out = 365 * nY
#' # total obs
#' n_tot = n_in + n_out
#' # total covariate (in+out sample period)
#' x <- seq(0, 1, length = n_tot)
#' # in sample grid dimension for covariate
#' ng_in <- 150
#' xg <- seq(0, x[n_in], length = ng_in)
#' # in+out of sample grid
#' xg <- c(xg, seq(x[n_in + 1], x[(n_tot)], length = ng_in))
#' # in+out sample grid dimension
#' nxg <- length(xg)
#' xg <- array(xg, c(nxg, 1))
#' # in sample observations
#' samp_in <- samp[1:n_in]
#' ssamp_in <- sort(samp_in, decreasing = TRUE, index = TRUE)
#' x_in <- x[1:n_in] # in sample covariate
#' xs <- x_in[ssamp_in$ix[1:k]] # in sample concomitant covariate
#' # estimate scedasis function over the in and out of sample period
#' res_stat <- apply(
#' xg,
#' 1,
#' cpost_stat,
#' N = nsim - burn,
#' x = x_in,
#' xs = xs,
#' bw = bw,
#' k = k,
#' C = C
#' )
#' # conditional predictive posterior density
#' probq = 1 - 0.99
#' res_AQ <- extBQuantx(
#' cx = res_stat,
#' postsamp = proc$post_sample,
#' threshold = proc$t,
#' n = n,
#' qlev = probq,
#' k = k,
#' type = "continuous",
#' confint = "asymmetric")
#' }
extBQuantx <- function(
cx,
postsamp,
threshold,
n,
qlev,
k,
type = c("continuous", "discrete"),
confint = c("asymmetric", "symmetric"),
alpha = 0.05,
...
) {
args <- list(...)
sig.ps <- postsamp[, 1]
gam.ps <- postsamp[, 2]
if (!is.null(args$confinty)) {
confinty <- args$confinty
} else {
confinty <- confint
}
if (type == "continuous") {
if (confinty == "asymmetric") {
out <- apply(
X = cx,
MARGIN = 2,
FUN = QAhat_c,
threshold = threshold,
sig.ps = sig.ps,
gam.ps = gam.ps,
n = n,
qlev = qlev,
k = k,
alpha = alpha
)
} else {
out <- apply(
X = cx,
MARGIN = 2,
FUN = QShat_c,
threshold = threshold,
sig.ps = sig.ps,
gam.ps = gam.ps,
n = n,
qlev = qlev,
k = k,
alpha = alpha
)
}
} else {
if (confinty == "asymmetric") {
out <- apply(
X = cx,
MARGIN = 2,
FUN = QAhat_d,
threshold = threshold,
sig.ps = sig.ps,
gam.ps = gam.ps,
n = n,
qlev = qlev,
k = k,
alpha = alpha
)
} else {
out <- apply(
X = cx,
MARGIN = 2,
FUN = QShat_d,
threshold = threshold,
sig.ps = sig.ps,
gam.ps = gam.ps,
n = n,
qlev = qlev,
k = k,
alpha = alpha
)
}
}
return(out)
}
#' Conditional predictive posterior density of peaks-over-threshold models
#'
#' Given posterior samples for the parameters of the continuous or discrete generalized Pareto distribution and scedasis function for a set of covariates, evaluated at every draw of the latter,
#' return the predictive posterior density of a peak above an intermediate or extreme threshold using the threshold stability property.
#'
#' @param x vector of length \code{r} containing the points at which to evaluate the density
#' @param postsamp a \code{m} by \code{2} matrix with columns containing the posterior samples of scale and shape parameters of the generalized Pareto distribution, respectively
#' @param scedasis an \code{m} by \code{p} matrix, obtained by evaluating the scedasis function for every of the \code{p} covariate values and every \code{m} posterior draw
#' @param threshold threshold for the generalized Pareto model, corresponding to the \eqn{n-k}th order statistic of the sample
#' @param type data type, either \code{"continuous"} or \code{"discrete"} for count data.
#' @param extrapolation logical; if \code{TRUE}, extrapolate using the threshold stability property
#' @param k integer, number of exceedances for the generalized Pareto (only used if \code{extrapolation=TRUE})
#' @param n integer, number of observations in the full sample. Must be greater than \code{k} (only used if \code{extrapolation=TRUE})
#' @param p scalar tail probability for the extrapolation. Must be smaller than \eqn{k/n} (only used if \code{extrapolation=TRUE})
#' @return a list with components
#' \itemize{
#' \item \code{x} the vector at which the posterior density is evaluated
#' \item \code{preddens} an \code{r} by \code{p} matrix of predictive density corresponding to each combination of \code{x} and scedasis value
#' }
#' @export
#' @examples
#' \dontrun{
#' # generate data
#' set.seed(1234)
#' n <- 500
#' samp <- evd::rfrechet(n,0,1:n,4)
#' # set effective sample size and threshold
#' k <- 50
#' threshold <- sort(samp,decreasing = TRUE)[k+1]
#' # preliminary mle estimates of scale and shape parameters
#' mlest <- evd::fpot(samp, threshold, control=list(maxit = 500))
#' # empirical bayes procedure
#' proc <- estPOT(
#' samp,
#' k = k,
#' pn = c(0.01, 0.005),
#' type = "continuous",
#' method = "bayesian",
#' prior = "empirical",
#' start = as.list(mlest$estimate),
#' sig0 = 0.1)
#' # conditional predictive density estimation
#' yg <- seq(0, 50, by = 2)
#' nyg <- length(yg)
#' # estimation of scedasis function
#' # setting
#' M <- 1e3
#' C <- 5
#' alpha <- 0.05
#' bw <- .5
#' nsim <- 5000
#' burn <- 1000
#' # create covariate
#' # in sample obs
#' n_in = n
#' # number of years ahead
#' nY = 1
#' n_out = 365 * nY
#' # total obs
#' n_tot = n_in + n_out
#' # total covariate (in+out sample period)
#' x <- seq(0, 1, length = n_tot)
#' # in sample grid dimension for covariate
#' ng_in <- 150
#' xg <- seq(0, x[n_in], length = ng_in)
#' # in+out of sample grid
#' xg <- c(xg, seq(x[n_in + 1], x[(n_tot)], length = ng_in))
#' # in+out sample grid dimension
#' nxg <- length(xg)
#' xg <- array(xg, c(nxg, 1))
#' # in sample observations
#' samp_in <- samp[1:n_in]
#' ssamp_in <- sort(samp_in, decreasing = TRUE, index = TRUE)
#' x_in <- x[1:n_in] # in sample covariate
#' xs <- x_in[ssamp_in$ix[1:k]] # in sample concomitant covariate
#' # estimate scedasis function over the in and out of sample period
#' res_stat <- apply(
#' xg,
#' 1,
#' cpost_stat,
#' N = nsim - burn,
#' x = x_in,
#' xs = xs,
#' bw = bw,
#' k = k,
#' C = C
#' )
#' # conditional predictive posterior density
#' yg <- seq(500, 5000, by = 100)
#' nyg = length(yg)
#' # intermediate threshold
#' predDens_intx <- predDensx(
#' x = yg,
#' postsamp = proc$post_sample,
#' scedasis = res_stat,
#' threshold = proc$t,
#' "continuous",
#' extrapolation = FALSE)
#' # extreme threshold
#' predDens_extx <- predDensx(
#' x = yg,
#' postsamp = proc$post_sample,
#' scedasis = res_stat,
#' threshold = proc$t,
#' "continuous",
#' extrapolation = TRUE,
#' p = 0.001,
#' k = k,
#' n = n)
#' # plot intermediate and extreme density conditional on a specific value of scedasis function
#' # disclaimer: to speed up the procedure, we specify a coarse grid
#' plot(
#' x = predDens_intx$x,
#' y = predDens_intx$preddens[,20],
#' type = "l",
#' lwd = 2,
#' col="dodgerblue",
#' ylab = "",
#' xlab = "yg",
#' main = "Conditional predictive posterior density")
#' lines(
#' x = predDens_extx$x,
#' y = predDens_extx$preddens[,20],
#' lwd = 2,
#' col = "violet")
#' legend("topright",
#' legend = c("Intermediate threshold","Extreme threshold"),
#' lwd = 2,
#' col = c("dodgerblue", "violet"))
#' }
predDensx <- function(
x,
postsamp,
scedasis,
threshold,
type = c("continuous", "discrete"),
extrapolation = FALSE,
p,
k,
n
) {
extrapolation <- isTRUE(extrapolation)
if (missing(k) || missing(n) || missing(p)) {
extrapolation <- FALSE
}
if (extrapolation) {
stopifnot(
length(k) == 1L,
length(n) == 1L,
length(p) == 1L
)
stopifnot(
k <= n,
p < 0.5,
p > 0
)
}
m <- nrow(postsamp)
ps <- ncol(scedasis)
r <- length(x)
stopifnot(
nrow(scedasis) == m,
ncol(postsamp) == 2L,
length(threshold) == 1L
)
out <- matrix(nrow = r, ncol = ps)
type <- match.arg(type)
if (type == "continuous" && extrapolation) {
for (i in seq_len(ps)) {
tmp <- apply(
X = cbind(postsamp, scedasis[, i]),
MARGIN = 1,
FUN = function(pars) {
preddens2_c(
theta = pars,
p = p,
y = x,
t = threshold,
k = k,
n = n
)
}
)
out[, i] <- rowMeans(tmp)
}
} else if (type == "continuous" && !extrapolation) {
for (i in seq_len(ps)) {
tmp <- apply(
X = cbind(postsamp, scedasis[, i]),
MARGIN = 1,
FUN = function(pars) {
preddens1_c(
theta = pars,
y = x,
t = threshold
)
}
)
out[, i] <- rowMeans(tmp)
}
} else if (type == "discrete" && extrapolation) {
for (i in seq_len(ps)) {
tmp <- apply(
X = cbind(postsamp, scedasis[, i]),
MARGIN = 1,
FUN = function(pars) {
preddens2_d(
theta = pars,
p = p,
y = x,
t = threshold,
k = k,
n = n
)
}
)
out[, i] <- rowMeans(tmp)
}
} else {
# discrete & !extrapolation
for (i in seq_len(ps)) {
tmp <- apply(
X = cbind(postsamp, scedasis[, i]),
MARGIN = 1,
FUN = function(pars) {
preddens1_d(
theta = pars,
y = x,
t = threshold
)
}
)
out[, i] <- rowMeans(tmp)
}
}
list(x = x, preddens = out)
}
#' Predictive quantile of peaks-over-threshold models
#'
#' Given an estimated predictive density evaluated at a grid of points, return the smallest value of the quantile of the predictive distribution function
#' below which a specific probability level corresponds. Namely, \eqn{Q(p) = \inf\{ x \in \mathbb{R}: p \leq F(x)\}}, where \eqn{F(x)} is the
#' predictive distribution function of a peak-over-threshold object.
#' WARNING: this function will not be exported in a future version of the package.
#'
#' @param x vector of grid of points at which the predictive density is evaluated
#' @param y vector containing estimates of the predictive density, e.g., the posterior predictive density
#' @param p double indicating the probability level corresponding to the quantile
#' @return double containing the value of the quantile
#' @export
#' @keywords internal
#' @examples
#' \dontrun{
#' # generate data
#' set.seed(1234)
#' n <- 500
#' samp <- evd::rfrechet(n,0,1:n,4)
#' # set effective sample size and threshold
#' k <- 50
#' threshold <- sort(samp,decreasing = TRUE)[k+1]
#' # preliminary mle estimates of scale and shape parameters
#' mlest <- evd::fpot(samp, threshold, control=list(maxit = 500))
#' # empirical bayes procedure
#' proc <- estPOT(
#' samp,
#' k = k,
#' pn = c(0.01, 0.005),
#' type = "continuous",
#' method = "bayesian",
#' prior = "empirical",
#' start = as.list(mlest$estimate),
#' sig0 = 0.1)
#' # conditional predictive density estimation
#' yg <- seq(0, 50, by = 2)
#' nyg <- length(yg)
#' # estimation of scedasis function
#' # setting
#' M <- 1e3
#' C <- 5
#' alpha <- 0.05
#' bw <- .5
#' nsim <- 5000
#' burn <- 1000
#' # create covariate
#' # in sample obs
#' n_in = n
#' # number of years ahead
#' nY = 1
#' n_out = 365 * nY
#' # total obs
#' n_tot = n_in + n_out
#' # total covariate (in+out sample period)
#' x <- seq(0, 1, length = n_tot)
#' # in sample grid dimension for covariate
#' ng_in <- 150
#' xg <- seq(0, x[n_in], length = ng_in)
#' # in+out of sample grid
#' xg <- c(xg, seq(x[n_in + 1], x[(n_tot)], length = ng_in))
#' # in+out sample grid dimension
#' nxg <- length(xg)
#' xg <- array(xg, c(nxg, 1))
#' # in sample observations
#' samp_in <- samp[1:n_in]
#' ssamp_in <- sort(samp_in, decreasing = TRUE, index = TRUE)
#' x_in <- x[1:n_in] # in sample covariate
#' xs <- x_in[ssamp_in$ix[1:k]] # in sample concomitant covariate
#' # estimate scedasis function over the in and out of sample period
#' res_stat <- apply(
#' xg,
#' 1,
#' cpost_stat,
#' N = nsim - burn,
#' x = x_in,
#' xs = xs,
#' bw = bw,
#' k = k,
#' C = C
#' )
#' # conditional predictive posterior density
#' yg <- seq(500, 5000, by = 100)
#' nyg = length(yg)
#' # intermediate threshold
#' predDens_intx <- predDensx(
#' x = yg,
#' postsamp = proc$post_sample,
#' scedasis = res_stat,
#' threshold = proc$t,
#' "continuous",
#' extrapolation = FALSE)
#' # extreme threshold
#' predDens_extx <- predDensx(
#' x = yg,
#' postsamp = proc$post_sample,
#' scedasis = res_stat,
#' threshold = proc$t,
#' "continuous",
#' extrapolation = TRUE,
#' p = 0.001,
#' k = k,
#' n = n)
#' # predictive conditional quantiles
#' predQuant_intxL <- predQuant_intxU <- predQuant_extxL <- predQuant_extxU <- numeric()
#' for (i in 1:nxg){
#' predQuant_intxU[i] <- apply(
#' array(predDens_intx$preddens[,i], c(1, nyg)),
#' 1,
#' quantF,
#' x = yg,
#' p = c(0.975)
#' )
#' predQuant_intxL[i] <- apply(
#' array(predDens_intx$preddens[,i], c(1, nyg)),
#' 1,
#' quantF,
#' x = yg,
#' p = c(0.025)
#' )
#' predQuant_extxU[i] <- apply(
#' array(predDens_extx$preddens[,i], c(1, nyg)),
#' 1,
#' quantF,
#' x = yg,
#' p = c(0.975)
#' )
#' predQuant_extxL[i] <- apply(
#' array(predDens_extx$preddens[,i], c(1, nyg)),
#' 1,
#' quantF,
#' x = yg,
#' p = c(0.025)
#' )
#' }
#' }
quantF <- function(x, y, p) {
csy <- cumsum(y)
cdfy <- csy / csy[length(y)]
indx <- which(cdfy > p)[1]
return(x[indx])
}
#' Test on tail homogeneity
#'
#' Given observed samples and effective sample size, return the results for a likelihood ratio-type test on tail homogeneity.
#'
#' @param y list, containing the samples on which the test is to be performed
#' @param k integer, number of exceedances for the generalized Pareto
#' @param alpha double indicating the confidence level for the test. Default: 0.05
#' @return list of 7 containing
#' \itemize{
#' \item \code{gamHatP} the pooled tail index
#' \item \code{VarGamHatP} the variance of \code{gamHatP}
#' \item \code{CIGamHatP} \eqn{(1-\alpha)} level confidence interval for \code{gamHatP}
#' \item \code{BiasGamHatP} bias term of \code{gamHatP}
#' \item \code{logLikR} value of the likelihood ratio-type of test statistic
#' \item \code{PVal} p-value of the test
#' }
#' @references
#' Daouia, A., S.A. Padoan and G. Stupfler (2024). Optimal weighted pooling for inference about
#' the tail index and extreme quantiles, \emph{Bernoulli}, 30(\bold{2}), pp. 1287–1312.
#' @export
#' @examples
#' \dontrun{
#' # generate two samples of data
#' set.seed(1234)
#' y1 <- evd::rgpd(500, 0, 1, 0.2)
#' y2 <- evd::rgpd(700, 0, 2, 0.7)
#' y <- list(y1 = y1, y2 = y2)
#' # set effective sample size
#' k <- 50
#' # perform test
#' test <- testTailHomo(y,k)
#' }
testTailHomo <- function(y, k, alpha=0.05)
{
m <- length(y)
n <- numeric(m)
# estimate gamma, rho, beta, lambda
gamma <- array(NA,dim=c(m,4))
for (i in 1:m){
n[i] <- length(y[[i]])
gamma[i,] <- gammaest(y[[i]],k)
}
# perform test on tail homogeneity
testout <- PooledTailIndex(
gamma[,1],
gamma[,2],
gamma[,3],
gamma[,4],
m=m,
n=n,
k=rep(k,m)
)
}
#' Plot empirical Bayes inference results for continuous and discrete generalized Pareto distribution
#'
#' Given a sample of posterior draws of the scale or shape parameter, return a histogram on the density scale for the posterior distribution along with a theoretical prior and posterior comparison curve based on the MLE, pointwise Wald-based normal 95\% confidence intervals for the mean of the sample, and pointwise credible intervals (asymmetric by design).
#'
#' @param x a vector of posterior samples
#' @param mle vector of length 2 containing the maximum likelihood estimator for the scale and shape parameters, respectively (only used if \code{param="scale"})
#' @param alpha level for intervals. Default to 0.05 giving 95\% confidence intervals
#' @param param character string indicating the parameter. Default: \code{c("scale", "shape")}
#' @param cols vector of length three containing colors for posterior mean, confidence intervals, and credible intervals. Default to \code{c("mediumseagreen", "goldenrod", "gold4")}
#' @param ... additional arguments for plotting function; only \code{xlab} is allowed
#' @return \code{NULL}; used to create a plot
#' @export
#' @examples
#' \dontrun{
#' # generate data
#' set.seed(1234)
#' n <- 500
#' samp <- evd::rfrechet(n, 0, 3, 4)
#' # set effective sample size and threshold
#' k <- 50
#' threshold <- sort(samp, decreasing = TRUE)[k+1]
#' # preliminary mle estimates of scale and shape parameters
#' mlest <- evd::fpot(samp, threshold)
#' # empirical bayes procedure
#' proc <- estPOT(
#' samp,
#' k = k,
#' pn = c(0.01, 0.005),
#' type = "continuous",
#' method = "bayesian",
#' prior = "empirical",
#' start = as.list(mlest$estimate),
#' sig0 = 0.1)
#' plotBayes(
#' proc$post_sample[,1],
#' mlest$estimate,
#' param = "scale")
#' plotBayes(
#' proc$post_sample[,2],
#' param = "shape")
#' }
plotBayes <- function(
x,
mle,
alpha = 0.05,
param = c("scale", "shape"),
cols = c("mediumseagreen", "goldenrod", "gold4"),
...
) {
cols <- rep(cols, length.out = 3L)
postsample <- x
args <- list(...)
xlab <- args$xlab
if (is.null(xlab)) {
xlab <- ""
}
param <- match.arg(param)
oldpar <- par(no.readonly = TRUE)
par(mar = c(5, 5, 2, 1))
on.exit(par(oldpar))
cax <- 1.8
caxlab <- 2.2
alphalev = 1 - alpha / 2
# Prior densities
priordens_fn <- switch(
param,
"scale" = function(x) {
dgamma(x, 1, scale = mle[1])
},
"shape" = function(x) {
dtt(x, df = 1, left = -1)
}
)
# Extract label for main
if (!is.null(args$main)) {
label <- args$main
} else {
label <- paste(switch(param, scale = "Scale", shape = "Shape"), "parameter")
}
ran_y <- range(
dnorm(postsample, mean(postsample), sd(postsample)),
priordens_fn(postsample),
hist(postsample, plot = FALSE)$density
)
hist(
postsample,
col = "lightgrey",
border = "white",
freq = FALSE,
ylim = ran_y,
xlab = xlab,
main = label,
cex.axis = cax,
cex.lab = caxlab,
cex.main = caxlab,
)
curve(dnorm(x, mean(postsample), sd(postsample)), lwd = 4, add = TRUE)
curve(priordens_fn, lty = 2, lwd = 4, add = TRUE)
CIl = mean(postsample) -
qt(alphalev, df = length(postsample) - 1) * sd(postsample)
CIu = mean(postsample) +
qt(alphalev, df = length(postsample) - 1) * sd(postsample)
points(mean(postsample), 0, col = cols[1], pch = 15, cex = 3)
points(CIu, 0, col = cols[2], pch = 1, cex = 3, lwd = 3) # symmetric CI
points(CIl, 0, col = cols[2], pch = 1, cex = 3, lwd = 3)
points(
quantile(postsample, alphalev),
0,
pch = 2,
cex = 3,
col = cols[3],
lwd = 3
) # asymmetric CI
points(
quantile(postsample, 1 - alphalev),
0,
pch = 2,
cex = 3,
col = cols[3],
lwd = 3
)
legend(
"topright",
c(
"Posterior",
"Prior",
"PM",
paste0("S-", round(100 * (1 - alpha), 0), "%-CI"),
paste0("A-", round(100 * (1 - alpha), 0), "%-CI")
),
lty = c(1, 2, 0, 0, 0, 0),
bty = "n",
lwd = c(4, 4, 1, 3, 3, 3),
pch = c(-1, -1, 15, 1, 2, 19),
pt.cex = 2,
col = c(1, 1, cols),
cex = 1.8,
x.intersp = 0.5
)
return(invisible(NULL))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.