# R/Censoring_PQ.R In ReIns: Functions from "Reinsurance: Actuarial and Statistical Aspects"

#### Documented in cProbcProbGHcProbGPDcQuantcQuantGHcQuantGPDcReturncReturnGHcReturnGPD

```# This file contains the estimators for quantiles, small probabilities and return periods
# suitable for right censored data.

###########################################################################

# Computes estimates of small exceedance probability 1-F(q)
# for a numeric vector of observations
# (data) and as a function of k
#
# Estimates are based on prior estimates gamma1 for the
# extreme value index (cHill)
#
# censored is a vector which is 1 if a data point is censored and 0 otherwise,
# giving censored=0 results indicates that non of the data points is censored
#
# If plot=TRUE then the estimates are plotted as a
# function of k
#
# plot

cProb <- function(data, censored, gamma1, q, plot = FALSE, add = FALSE,
main = "Estimates of small exceedance probability", ...) {

# Check input arguments
.checkInput(data, gamma1)
censored <- .checkCensored(censored, length(data))

if (length(q) > 1) {
stop("q should be a numeric of length 1.")
}

s <- sort(data, index.return = TRUE)
X <- s\$x
sortix <- s\$ix
# delta is 1 if a data point is not censored, we also sort it with X
#delta <- !(censored[sortix]*1)
n <- length(X)
prob <- numeric(n)
K <- 1:(n-1)

# Kaplan-Meier estimator for survival function in X[n-K]
km <- KaplanMeier(X[n-K], data = X, censored = censored[sortix])\$surv

# Weissman estimator for probabilities
prob[K] <- km * (q/X[n-K])^(-1/gamma1[K])
prob[prob < 0 | prob > 1] <- NA

# plots if TRUE

# output list with values of k, corresponding return period estimates
# and the considered large quantile q

}

cReturn <- function(data, censored, gamma1, q, plot = FALSE, add = FALSE,
main = "Estimates of large return period", ...) {

# Check input arguments
.checkInput(data, gamma1)
censored <- .checkCensored(censored, length(data))

if (length(q) > 1) {
stop("q should be a numeric of length 1.")
}

s <- sort(data, index.return = TRUE)
X <- s\$x
sortix <- s\$ix
# delta is 1 if a data point is not censored, we also sort it with X
#delta <- !(censored[sortix]*1)
n <- length(X)
R <- numeric(n)
K <- 1:(n-1)

# Kaplan-Meier estimator for survival function in X[n-K]
km <- KaplanMeier(X[n-K], data = X, censored = censored[sortix])\$surv

# Weissman estimator for probabilities
R[K] <- 1 / ( km * (q/X[n-K])^(-1/gamma1[K]) )
R[R < 1] <- NA

# plots if TRUE

# output list with values of k, corresponding return period estimates
# and the considered large quantile q

}

##########################################################################

# Computes estimates of extreme quantile Q(1-p)  for a numeric vector of observations
# (data) and as a function of k
#
# Estimates are based on prior estimates gamma1 for the
# extreme value index (cHill)
#
# censored is a vector which is 1 if a data point is censored and 0 otherwise,
# giving censored=0 results indicates that non of the data points is censored
#
# If plot=TRUE then the estimates are plotted as a
# function of k
#
# plot

cQuant <- function(data, censored, gamma1, p, plot = FALSE, add = FALSE,
main = "Estimates of extreme quantile", ...) {

# Check input arguments
.checkInput(data, gamma1)
censored <- .checkCensored(censored, length(data))
.checkProb(p)

s <- sort(data, index.return = TRUE)
X <- s\$x
sortix <- s\$ix
# delta is 1 if a data point is not censored, we also sort it with X
#delta <- !(censored[sortix]*1)
n <- length(X)
quant <- numeric(n)
K <- 1:(n-1)

# Kaplan-Meier estimator for CDF
km <- KaplanMeier(X[n-K], data = X, censored = censored[sortix])\$surv

# Weisman estimator for quantiles
quant[K] <- X[n-K] * (km/p)^(gamma1[K])

# plots if TRUE

# output list with values of k, corresponding quantile estimates
# and the considered small tail probability p

}

#################################################################################

#Estimate extreme quantiles using the censored generalised Hill estimator for gamma1
cQuantGH <- function(data, censored, gamma1, p, plot = FALSE, add = FALSE,
main = "Estimates of extreme quantile", ...) {

# Check input arguments
.checkInput(data, gamma1, gammapos=FALSE)
censored <- .checkCensored(censored, length(data))
.checkProb(p)

s <- sort(data, index.return = TRUE)
X <- s\$x
sortix <- s\$ix
# delta is 1 if a data point is not censored, we also sort it with X
delta <- !(censored[sortix]*1)
n <- length(X)
quant <- numeric(n)
K <- 1:length(gamma1)

# Proportion of non-censored
pk <- cumsum(delta[n-K+1])/K

# Hill estimator (non-censored)
H <- Hill(X)\$gamma

# Auxiliary values
S <- Moment(X)\$gamma - Hill(X)\$gamma
a <- X[n-K] * H[K] * (1-S[K]) / pk[K]

# Kaplan-Meier estimator for CDF
km <- KaplanMeier(X[n-K], data = X, censored = censored[sortix])\$surv

# Estimator for extreme quantiles
quant[K] <- X[n-K] + a/gamma1[K] * ( (km/p)^gamma1[K] - 1 )

# plots if TRUE

# output list with values of k, corresponding quantile estimates
# and the considered small tail probability p

}

#Estimate small exceedance probabilities using the censored generalised Hill estimator for gamma1
cProbGH <- function(data, censored, gamma1, q, plot = FALSE, add = FALSE,
main = "Estimates of small exceedance probability", ...) {

# Check input arguments
.checkInput(data, gamma1, gammapos=FALSE)
censored <- .checkCensored(censored, length(data))

if (length(q) > 1) {
stop("q should be a numeric of length 1.")
}

s <- sort(data, index.return = TRUE)
X <- s\$x
sortix <- s\$ix
# delta is 1 if a data point is not censored, we also sort it with X
delta <- !(censored[sortix]*1)
n <- length(X)
prob <- numeric(n)
K <- 1:length(gamma1)

# Proportion of non-censored
pk <- cumsum(delta[n-K+1])/K

# Hill estimator (non-censored)
H <- Hill(X)\$gamma

# Auxiliary value
a <- X[n-K] * H[K] * (1-pmin(gamma1[K], 0)) / pk[K]

# Kaplan-Meier estimator for CDF
km <- KaplanMeier(X[n-K], data = X, censored = censored[sortix])\$surv

prob[K] <- km * (1 + gamma1[K]/a[K]*(q-X[n-K]))^(-1/gamma1[K])
prob[prob < 0 | prob > 1] <- NA

# plots if TRUE

# output list with values of k, corresponding probability estimates
# and the considered large quantile q

}

#Estimate return periods using the censored generalised Hill estimator for gamma1
cReturnGH <- function(data, censored, gamma1, q, plot = FALSE, add = FALSE,
main = "Estimates of large return period", ...) {

# Check input arguments
.checkInput(data, gamma1, gammapos=FALSE)
censored <- .checkCensored(censored, length(data))

if (length(q) > 1) {
stop("q should be a numeric of length 1.")
}

s <- sort(data, index.return = TRUE)
X <- s\$x
sortix <- s\$ix
# delta is 1 if a data point is not censored, we also sort it with X
delta <- !(censored[sortix]*1)
n <- length(X)
R <- numeric(n)
K <- 1:length(gamma1)

# Proportion of non-censored
pk <- cumsum(delta[n-K+1])/K

# Hill estimator (non-censored)
H <- Hill(X)\$gamma

# Auxiliary value
a <- X[n-K] * H[K] * (1-pmin(gamma1[K], 0)) / pk[K]

# Kaplan-Meier estimator for CDF
km <- KaplanMeier(X[n-K], data = X, censored = censored[sortix])\$surv

R[K] <- 1 / ( km * (1 + gamma1[K]/a[K]*(q-X[n-K]))^(-1/gamma1[K]) )
R[R < 1] <- NA

# plots if TRUE

# output list with values of k, corresponding return period estimates
# and the considered large quantile q

}

#Same expressions using the moment estimator
cQuantMOM <- cQuantGH
cProbMOM <- cProbGH
cReturnMOM <- cReturnGH

#Estimate extreme quantiles using the censored POT estimator for gamma1
cQuantGPD <- function(data, censored, gamma1, sigma1, p, plot = FALSE, add = FALSE,
main = "Estimates of extreme quantile", ...) {

# Check input arguments
.checkInput(data, gamma1, scale=sigma1, gammapos=FALSE)
censored <- .checkCensored(censored, length(data))
.checkProb(p)

s <- sort(data, index.return = TRUE)
X <- s\$x
sortix <- s\$ix
# delta is 1 if a data point is not censored, we also sort it with X
delta <- !(censored[sortix]*1)
n <- length(X)
quant <- numeric(n)
K <- 1:(n-1)

# Proportion of non-censored
pk <- cumsum(delta[n-K+1])/K

# Auxiliary value
a <- sigma1[K]/pk

# Kaplan-Meier estimator for CDF
km <- KaplanMeier(X[n-K], data = X, censored = censored[sortix])\$surv

# Estimator for extreme quantiles
quant[K] <- X[n-K] + a/gamma1[K] * ( (km/p)^gamma1[K] - 1 )

# plots if TRUE

# output list with values of k, corresponding quantile estimates
# and the considered small tail probability p

}

#Estimate small exceedance probabilities using the censored POT estimator for gamma1
cProbGPD <- function(data, censored, gamma1, sigma1, q, plot = FALSE, add = FALSE,
main = "Estimates of small exceedance probability", ...) {

# Check input arguments
.checkInput(data, gamma1, scale=sigma1, gammapos=FALSE)
censored <- .checkCensored(censored, length(data))

if (length(q) > 1) {
stop("q should be a numeric of length 1.")
}

s <- sort(data, index.return = TRUE)
X <- s\$x
sortix <- s\$ix
# delta is 1 if a data point is not censored, we also sort it with X
delta <- !(censored[sortix]*1)
n <- length(X)
prob <- numeric(n)
K <- 1:(n-1)

# Proportion of non-censored
pk <- cumsum(delta[n-K+1])/K

# Auxiliary value
a <- sigma1[K]/pk

# Kaplan-Meier estimator for CDF
km <- KaplanMeier(X[n-K], data = X, censored = censored[sortix])\$surv

prob[K] <- km * (1 + gamma1[K]/a[K]*(q-X[n-K]))^(-1/gamma1[K])
prob[prob < 0 | prob > 1] <- NA

# plots if TRUE

# output list with values of k, corresponding probabilities
# and the considered large quantile q

}

#Estimate return period using the censored POT estimator for gamma1
cReturnGPD <- function(data, censored, gamma1, sigma1, q, plot = FALSE, add = FALSE,
main = "Estimates of large return period", ...) {

# Check input arguments
.checkInput(data, gamma1, scale=sigma1, gammapos=FALSE)
censored <- .checkCensored(censored, length(data))

if (length(q) > 1) {
stop("q should be a numeric of length 1.")
}

s <- sort(data, index.return = TRUE)
X <- s\$x
sortix <- s\$ix
# delta is 1 if a data point is not censored, we also sort it with X
delta <- !(censored[sortix]*1)
n <- length(X)
R <- numeric(n)
K <- 1:(n-1)

# Proportion of non-censored
pk <- cumsum(delta[n-K+1])/K

# Auxiliary value
a <- sigma1[K]/pk

# Kaplan-Meier estimator for CDF
km <- KaplanMeier(X[n-K], data = X, censored = censored[sortix])\$surv

R[K] <- 1 / ( km * (1 + gamma1[K]/a[K]*(q-X[n-K]))^(-1/gamma1[K]) )
R[R < 1] <- NA

# plots if TRUE

# output list with values of k, corresponding return period estimates
# and the considered large quantile q