#' @title Maximum Likelihood Fitting for the GLO distribution in the presence of Historical Data
#' @description ML fitting procedures for sample of both systematic and historical data.
#' Function structure inspired by \code{gev.fit} in ismev and similar to \code{\link{gev.hist.fit}}.
#' Arguments naming and likelihood implementation follow Stedinger and Cohn (1986).
#' @param xdat vector of historical and systematic/observed data - the first k elements of the vector should be the historical events
#' @param k number of historical events available. These events should be stored as the first observations of the xdat vector
#' @param h length of years covered by the information of the historical period
#' @param X0 the perception threshold which is exceeded by the historical events. This is most likely different from the lowest historical value. It should indicate a value after which we are confident the event would have been recorded or left traces
#' @param binomialcens a logical value. Indicates whether the actual k values are to be used, or if only the information that the threshold X0 has been exceeded is used.
#' @param mulink the link function for the location parameter - default to identity
#' @param siglink the link function for the scale parameter - default to identity
#' @param shlink the link function for the shape parameter - default to identity
#' @param muinit initial values for the location parameter
#' @param siginit initial values for the scale parameter
#' @param shinit initial values for the shape parameter
#' @param show Logical; if \code{TRUE} (the default), print details of the fit.
#' @param method The optimization method (see \code{optim} for details)
#' @param maxit The maximum number of iterations
#' @param ... Other control parameters for the optimization. These are passed to components of the control argument of optim.
#' @return An object of the class \code{glo.fit}.
#' @seealso \code{\link{gev.hist.fit}}, \code{\link{dglo}}, Vignette on historical data
#' @export
#' @references
#' Hosking, J.R.M. and Wallis, J.R., 2005. Regional frequency analysis: an approach based on L-moments. Cambridge university press.
#'
#' Stedinger, J.R. and Cohn, T.A., 1986. Flood frequency analysis with historical and paleoflood information. Water resources research, 22(5), pp.785-793.
#'
#' Macdonald, N., Kjeldsen, T.R., Prosdocimi, I. and Sangster, H., 2014. Reassessing flood frequency for the Sussex Ouse, Lewes: the inclusion of historical flood information since AD 1650. Natural Hazards and Earth System Sciences, 14(10), pp.2817-2828.
#' @examples
#' set.seed(7821567)
#' xx <- rglo(500, 40, 6, -0.2)
#' xxsist <- xx[471:500]; xxhist <- xx[1:470][xx[1:470] > 80]
#' glo.hist.fit(c(xxhist,xxsist), k = length(xxhist), h = 470, X0 = 80)
#' glo.hist.fit(c(xxhist,xxsist), k = length(xxhist), h = 470, X0 = 80, binomialcens = TRUE)
#' glod.fit(xxsist) ## notice the higher standard errors
glo.hist.fit<-
function(xdat,k=0,h=NULL,X0=NULL,binomialcens=FALSE,
mulink = identity, siglink = identity, shlink = identity,
muinit = NULL, siginit = NULL, shinit = NULL, show = TRUE,
method = "Nelder-Mead", maxit = 10000, ...)
{
#
# obtains mles etc for glo dist when historical data are present
#
### can not actually come with covariates yet
## binomialcens=TRUE if a binomial type of censoring is applied,
## eg you only consider that some extremes happened in the past, not their dimension
## h is the number of past years you are covering with your historical data
## k is the number of event you see in the past
## X0 is the perception threshold
if(k == 0 & !(is.null(h) & is.null(X0))) {warning("no historical data given, dropping X0 and h"); h=0; X0 = NULL}
mul = NULL; sigl = NULL; shl = NULL
npmu <- length(mul) + 1
npsc <- length(sigl) + 1
npsh <- length(shl) + 1
z <- list()
z$trans <- FALSE # if maximization fails, could try
# changing in1 and in2 which are
# initial values for minimization routine
in2 <- sqrt(6 * var(xdat[(k+1):(length(xdat))]))/pi
in1 <- mean(xdat[(k+1):(length(xdat))]) - 0.57722 * in2
if(is.null(mul)) {
mumat <- as.matrix(rep(1, length(xdat)))
if( is.null( muinit)) muinit <- in1
}
else {
z$trans <- TRUE
mumat <- cbind(rep(1, length(xdat)))
if( is.null( muinit)) muinit <- c(in1, rep(0, length(mul)))
}
if(is.null(sigl)) {
sigmat <- as.matrix(rep(1, length(xdat)))
if( is.null( siginit)) siginit <- in2
}
else {
z$trans <- TRUE
sigmat <- cbind(rep(1, length(xdat)))
if( is.null( siginit)) siginit <- c(in2, rep(0, length(sigl)))
}
if(is.null(shl)) {
shmat <- as.matrix(rep(1, length(xdat)))
if( is.null( shinit)) shinit <- 0.1
}
else {
z$trans <- TRUE
shmat <- cbind(rep(1, length(xdat)))
if( is.null( shinit)) shinit <- c(0.1, rep(0, length(shl)))
}
if(is.null(X0)) {
X0<-min(xdat[1:k])
}
z$model <- list(mul, sigl, shl)
z$link <- deparse(substitute(c(mulink, siglink, shlink)))
a<- init <- c(muinit, siginit, shinit)
glo.hist.lik <- function(a,h,k,binomialcens) {
# computes neg log lik of gev model
mu <- mulink(mumat %*% (a[1:npmu]))
sc <- siglink(sigmat %*% (a[seq(npmu + 1, length = npsc)]))
xi <- shlink(shmat %*% (a[seq(npmu + npsc + 1, length = npsh)]))
y <- ((1-xi*(xdat - mu)/sc))
if(any(!y>0) || any(sc <= 0)) return(10^6)
y <- -log(y)/xi;y0<- -log(((1-xi[1]*(X0 - mu[1])/sc[1])))/xi[1]
## shape = 0 case
if(any(abs(xi) <10^-5)) {y <- (xdat - mu[1])/sc[1]; y0<- (X0 - mu[1])/sc[1]}
if(k==0) return(sum(log(sc)) + sum(y*(1-xi)) + 2*sum(log(1+exp(-y))))
if(!binomialcens & !k==0) return(sum(log(sc)) + sum(y*(1-xi)) + 2*sum(log(1+exp(-y)))-log(choose(h,k))-(h-k)*log((1/(1+exp(-y0)))) )
# If only historical exceedances are known, then below
# if(!binomialcens) return(-log(choose(h,k))-(h-k)*log((1/(1+exp(-y0)))) -k*log((1-(1/(1+exp(-y0)))) ))
if(binomialcens & !k==0) return(sum(log(sc[-(1:k)])) + sum(y[-(1:k)]*(1-xi[-(1:k)])) + 2*sum(log(1+exp(-y[-(1:k)]))) -log(choose(h,k))-(h-k)*log((1/(1+exp(-y0)))) -k*log((1-(1/(1+exp(-y0))))) )
}
x <- optim(init, glo.hist.lik, k=k, h=h, binomialcens=binomialcens, hessian = TRUE, method = method,
control = list(maxit = maxit, ...))
z$conv <- x$convergence
mu <- mulink(mumat %*% (x$par[1:npmu]))
sc <- siglink(sigmat %*% (x$par[seq(npmu + 1, length = npsc)]))
xi <- shlink(shmat %*% (x$par[seq(npmu + npsc + 1, length = npsh)]))
z$nllh <- x$value
z$data <- xdat
if(z$trans) {
z$data <- - log(as.vector((1 + (xi * (xdat - mu))/sc)^(
-1/xi)))
}
z$mle <- x$par
z$cov <- solve(x$hessian)
z$se <- sqrt(diag(z$cov))
z$vals <- cbind(mu, sc, xi)
if(show) {
if(z$trans)
print(z[c(2, 3, 4)])
else print(z[4])
if(!z$conv)
print(z[c(5, 7, 9)])
}
z$k<-k;z$h<-h;z$X0<-X0
class(z) <- "glo.fit"
invisible(z)
}
#' @title Maximum Likelihood Fitting for the GEV distribution in the presence of Historical Data
#' @description ML fitting procedures for sample of both systematic and historical data. Function structure inspired by \code{gev.fit} in ismev.
#' Arguments naming and likelihood implementation follow Stedinger and Cohn (1986).
#' @param xdat vector of historical and systematic/observed data - the first k elements of the vector should be the historical events
#' @param k number of historical events available. These events should be stored as the first observations of the xdat vector
#' @param h length of years covered by the information of the historical period
#' @param X0 the perception threshold which is exceeded by the historical events. This is most likely different from the lowest historical value. It should indicate a value after which we are confident the event would have been recorded or left traces
#' @param binomialcens a logical value. Indicates whether the actual k values are to be used, or if only the information that the threshold X0 has been exceeded is used.
#' @param mulink the link function for the location parameter - default to identity
#' @param siglink the link function for the scale parameter - default to identity
#' @param shlink the link function for the shape parameter - default to identity
#' @param muinit initial values for the location parameter
#' @param siginit initial values for the scale parameter
#' @param shinit initial values for the shape parameter
#' @param show Logical; if \code{TRUE} (the default), print details of the fit.
#' @param method The optimization method (see \code{optim} for details)
#' @param maxit The maximum number of iterations
#' @param ... Other control parameters for the optimization. These are passed to components of the control argument of optim.
#' @details The form of the GEV used is that of Coles (2001) Eq (3.2). Specifically, positive values of the shape parameter imply a heavy tail, and negative values imply a bounded upper tail.
#' @return An object of the class \code{gev.fit}, with additional components related to the historical data.
#' @seealso \code{\link{glo.hist.fit}}, \code{\link{dgev}}, Vignette on historical data
#' @references
#' Hosking, J.R.M. and Wallis, J.R., 2005. Regional frequency analysis: an approach based on L-moments. Cambridge university press.
#'
#' Coles, S., 2001. An introduction to statistical modeling of extreme values. London: Springer.
#'
#' Stedinger, J.R. and Cohn, T.A., 1986. Flood frequency analysis with historical and paleoflood information. Water resources research, 22(5), pp.785-793.
#' @export
#' @examples
#' # check that ismev is installed
#' # any(rownames(installed.packages()) == "ismev")
#' set.seed(5416574)
#' xx <- rgev(500, 40, 6, -0.2)
#' xxsist <- xx[471:500]; xxhist <- xx[1:470][xx[1:470] > 80]
#' gev.hist.fit(c(xxhist,xxsist), k = length(xxhist), h = 470, X0 = 80)
#' gev.hist.fit(c(xxhist,xxsist), k = length(xxhist), h = 470, X0 = 80, binomialcens = TRUE)
#' gevd.fit(xxsist) # note the higher standard errors
gev.hist.fit <-
function(xdat, k=0, h=NULL, X0=NULL, binomialcens=FALSE,
mulink = identity, siglink = identity, shlink = identity,
muinit = NULL, siginit = NULL, shinit = NULL, show = TRUE,
method = "Nelder-Mead", maxit = 10000, ...){
#
# obtains mles etc for gev dist when historical data are present
## builds on ismev::gev.fit
### can not actually come with covariates yet
## binomialcens=TRUE if a binomial type of censoring is applied,
## eg you only consider that some extremes happened in the past, not their dimension
## h is the number of past years you are covering with your historical data
## k is the number of event you see in the past
## X0 is the perception threshold
z <- list()
if(k == 0 & !(is.null(h) & is.null(X0))) {warning("no historical data given, dropping X0 and h"); h=0; X0 = NULL}
mul = NULL; sigl = NULL; shl = NULL
npmu <- length(mul) + 1
npsc <- length(sigl) + 1
npsh <- length(shl) + 1
z$trans <- FALSE
in2 <- sqrt(6 * var(xdat[(k+1):(length(xdat))]))/pi
in1 <- mean(xdat[(k+1):(length(xdat))]) - 0.57722 * in2
if (is.null(mul)) {
mumat <- as.matrix(rep(1, length(xdat)))
if (is.null(muinit))
muinit <- in1
}
else {
z$trans <- TRUE
mumat <- cbind(rep(1, length(xdat)))
if (is.null(muinit))
muinit <- c(in1, rep(0, length(mul)))
}
if (is.null(sigl)) {
sigmat <- as.matrix(rep(1, length(xdat)))
if (is.null(siginit))
siginit <- in2
}
else {
z$trans <- TRUE
sigmat <- cbind(rep(1, length(xdat)))
if (is.null(siginit))
siginit <- c(in2, rep(0, length(sigl)))
}
if (is.null(shl)) {
shmat <- as.matrix(rep(1, length(xdat)))
if (is.null(shinit))
shinit <- 0.1
}
else {
z$trans <- TRUE
shmat <- cbind(rep(1, length(xdat)))
if (is.null(shinit))
shinit <- c(0.1, rep(0, length(shl)))
}
z$model <- list(mul, sigl, shl)
z$link <- deparse(substitute(c(mulink, siglink, shlink)))
init <- c(muinit, siginit, shinit)
gev.hist.lik <- function(a,h,k,binomialcens) {
# computes neg log lik of gev model
# uses Coles' formulation of the F(x) and f(x) of a GEV
# the shape parameter is the negative of the one used in Hosking and Wallis
# so it must be interpreted in the opposite way than for the GLO
# this is a mess, I am sorry...
mu <- mulink(mumat %*% (a[1:npmu]))
sc <- siglink(sigmat %*% (a[seq(npmu + 1, length = npsc)]))
xi <- shlink(shmat %*% (a[seq(npmu + npsc + 1, length = npsh)]))
y <- (xdat - mu)/sc
y <- 1 + xi * y
if(any(y <= 0) || any(sc <= 0))
return(10^6)
if(k == 0) return(sum(log(sc)) + sum(y^(-1/xi)) + sum(log(y) * (1/xi + 1)))
y0 <- 1 + xi[1] * (X0 - mu[1])/sc[1]
if(!binomialcens & !k==0) return(sum(log(sc)) + sum(y^(-1/xi)) + sum(log(y) * (1/xi + 1)) -
log(choose(h,k))-(h-k)*(-y0^(-1/xi[1])))
if(binomialcens & !k==0) return(sum(log(sc[-(1:k)])) + sum(y[-(1:k)]^(-1/xi[-(1:k)])) + sum(log(y[-(1:k)]) * (1/xi[-(1:k)] + 1)) -
log(choose(h,k))-(h-k)*(-y0^(-1/xi[1])) - k*log(1-exp(-y0^(-1/xi[1]))))
}
x <- optim(init, gev.hist.lik, k=k, h=h, binomialcens=binomialcens, hessian = TRUE, method = method,
control = list(maxit = maxit, ...))
z$conv <- x$convergence
mu <- mulink(mumat %*% (x$par[1:npmu]))
sc <- siglink(sigmat %*% (x$par[seq(npmu + 1, length = npsc)]))
xi <- shlink(shmat %*% (x$par[seq(npmu + npsc + 1, length = npsh)]))
z$nllh <- x$value
z$data <- xdat
if (z$trans) {
z$data <- -log(as.vector((1 + (xi * (xdat - mu))/sc)^(-1/xi)))
}
z$mle <- x$par
z$cov <- solve(x$hessian)
z$se <- sqrt(diag(z$cov))
z$vals <- cbind(mu, sc, xi)
if (show) {
if (z$trans)
print(z[c(2, 3, 4)])
else print(z[4])
if (!z$conv)
print(z[c(5, 7, 9)])
}
z$k<-k;z$h<-h;z$X0<-X0
class(z) <- "gev.fit"
invisible(z)
}
#' @title L-moment estimation in the presence of Historical Data
#' @description L-moment fitting procedures for sample of both systematic and historical data via Partial Probability Weighted Moments (Wang, 1990).
#' @param xdat vector of historical and systematic/observed data - the first k elements of the vector should be the historical events
#' @param k number of historical events available. These events should be stored as the first observations of the xdat vector
#' @param h length of years covered by the information of the historical period
#' @param X0 the perception threshold which is exceeded by the historical events. This is most likely different from the lowest historical value. It should indicate a value after which we are confident the event would have been recorded or left traces
#' @param nmom the number of moments to estimate
#' @return A vector of L-moment, as lmom::samlmu.
#' @references Wang, Q.J.(1990). Unbiased estimation of probability weighted moments and partial probability weighted moments from systematic and historical flood information and their application. Journal of hydrology, 120, 115--124.
#' @export
#' @examples
#' # library(ismev)
#' set.seed(54165784)
#' xx <- rgev(500, 40, 6, -0.2)
#' xxsist <- xx[471:500]; xxhist <- xx[1:470][xx[1:470] > 80]
#' lmh <- lmom.hist.fit(c(xxhist,xxsist), k = length(xxhist), h = 470, X0 = 80, nmom = 5)
#' lmh
#' lmom::pelgev(lmh)
#' gev.hist.fit(c(xxhist,xxsist), k = length(xxhist), h = 470, X0 = 80, show = FALSE)$mle
#' ### note the different parametrization for the shape parameter
lmom.hist.fit <- function(xdat, k = NULL, h = NULL, X0 = NULL, nmom = 3){
if((is.null(k) + is.null(X0) + is.null(h)) %in% c(1,2) ) stop("I need all information (X0, k, h) on historical records")
if(all(is.null(k), is.null(X0), is.null(h))) {
lmom::samlmu(xdat, nmom = nmom)
}
else {
extSamlmu(wangPPWM(xcont = xdat[(k+1):(length(xdat))], xhist = xdat[1:k],
nbans = h, seuil = X0, nmom = nmom))
}
}
wangPPWM <- function(xcont, xhist, nbans, seuil, nmom = 3){
## naming after the BayesMCMC function (which doesn't make sense)
n <- length(xcont) ## length of systematic record
m <- n + nbans ## length of systematic+historical record
l <- sum(xcont > seuil) + sum(xhist>seuil) ## number of events above threshold
xcont <- sort(xcont)
xhist <- sort(xhist)
F0hat <- 1-l/m
aboveThresh <- sort(c(xcont[xcont > seuil], xhist[xhist > seuil]))
if(any(xhist <= seuil)) stop("all historical events should be above the threshold")
if(l != length(aboveThresh)) stop("something non-matching in length of obs. above threshold")
### the betas
beta.second <- rep(0,nmom)
for(r in 1:nmom){
for(i in 1:n){
beta.second[r] <- beta.second[r] +
choose(i-1, (r-1)) * ifelse(xcont[i] <= seuil, xcont[i], 0)
}
beta.second[r] <- ((n * choose(n-1, (r-1)))^(-1)) * beta.second[r]
}
beta.prime <- rep(0,nmom)
for(r in 1:nmom){
for(i in seq(from = (m-l+1), to = m)){
## i goes to the high number -
## we do not have the values below the threshold for the m historical years
beta.prime[r] <- beta.prime[r] +
choose(i-1, (r-1)) * aboveThresh[i-(m-l)]
}
beta.prime[r] <- ((m * choose(m-1, (r-1)))^(-1)) * beta.prime[r]
}
# print(c(F0hat, 1/(1-F0hat), beta.second,beta.prime))
# beta.prime <- c(1/(1-F0hat),1/(1-F0hat^2), 1/(1-F0hat^3)) * beta.prime
# beta.second <- c(1/(1-F0hat),1/(1-F0hat^2), 1/(1-F0hat^3)) * beta.second
list(beta.prime = beta.prime, beta.second = beta.second,
betas = beta.prime+beta.second)
}
extSamlmu <- function(x) {
firstOut <- lmomcopwm2lmom(x)
out <- c(firstOut$lambdas[1:2],firstOut$ratios[3:length(firstOut$ratios)])
names(out) <- c("l_1","l_2", paste("t",seq(3,length(firstOut$ratios), by = 1),sep = "_"))
out
}
### copy of lmomco::pwm2lmom - avoid having to depend on the package
lmomcopwm2lmom <- function (pwm){
if (is.list(pwm)) {
if (!is.null(pwm$BETA0)) {
z <- list(L1 = NULL, L2 = NULL, TAU3 = NULL, TAU4 = NULL,
TAU5 = NULL, LCV = NULL, L3 = NULL, L4 = NULL,
L5 = NULL)
z$L1 <- pwm$BETA0
z$L2 <- 2 * pwm$BETA1 - pwm$BETA0
z$L3 <- 6 * pwm$BETA2 - 6 * pwm$BETA1 + pwm$BETA0
z$L4 <- 20 * pwm$BETA3 - 30 * pwm$BETA2 + 12 * pwm$BETA1 -
pwm$BETA0
z$L5 <- 70 * pwm$BETA4 - 140 * pwm$BETA3 + 90 * pwm$BETA2 -
20 * pwm$BETA1 + pwm$BETA0
z$LCV <- z$L2/z$L1
z$TAU3 <- z$L3/z$L2
z$TAU4 <- z$L4/z$L2
z$TAU5 <- z$L5/z$L2
return(z)
}
if (!is.null(pwm$betas)) {
pwm <- pwm$betas
}
else {
warning("ambiguous call, do not find Betas for processing")
return(NULL)
}
}
nmom <- length(pwm)
L <- vector(mode = "numeric", length = nmom)
R <- vector(mode = "numeric", length = nmom)
for (i in seq(1, nmom)) {
r <- i - 1
sum <- 0
for (k in seq(0, r)) {
weight <- (-1)^(r - k) * choose(r, k) * choose(r +
k, k)
sum <- sum + weight * pwm[k + 1]
}
L[i] <- sum
}
if (nmom >= 2) {
R[2] <- L[2]/L[1]
}
if (nmom >= 3) {
for (r in seq(3, nmom)) {
R[r] <- L[r]/L[2]
}
}
R[1] <- NA
z <- list(lambdas = L, ratios = R)
return(z)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.