Nothing
# EVA functions to compute relturn levels / returtn periods from Nonstationary EVD parameters-----------------
#' tsEvaComputeTimeRP
#'
#' \code{tsEvaComputeTimeRP}is a function that calculates the return period
#' of a given event for GEV and GPD distributions at a given time index.
#'
#' @param params A data frame containing the following parameters:
#' \describe{
#' \item{\code{epsilonGEV}}{Shape parameter for the Generalized Extreme Value
#' (GEV) distribution.}
#' \item{\code{muGEV}}{Location parameter for the GEV distribution.}
#' \item{\code{sigmaGEV}}{Scale parameter for the GEV distribution.}
#' \item{\code{epsilonGPD}}{Shape parameter for the Generalized Pareto
#' (GPD) distribution.}
#' \item{\code{thresholdGPD}}{Threshold parameter for the GPD distribution.}
#' \item{\code{sigmaGPD}}{Scale parameter for the GPD distribution.}
#' \item{\code{nPeaks}}{Number of peaks in the sample time horizon.}
#' \item{\code{SampleTimeHorizon}}{Total number of years in the data sample.}
#' }
#' @param RPiGEV Value of RP for the GEV distribution.
#' @param RPiGPD Value of RP for the GPD distribution.
#' @return A vector with the calculated return period for GEV and GPD distributions.
#' @export
#' @examples
#'#Parameter vector:
#'params <- t(data.frame(epsilonGEV = 0.2, muGEV = 3, sigmaGEV = 1,
#' epsilonGPD = 0.2, thresholdGPD = 3, sigmaGPD = 1,
#' nPeaks = 70, SampleTimeHorizon = 70))
#'
#'tsEvaComputeTimeRP(params, RPiGEV = 10, RPiGPD = 10)
tsEvaComputeTimeRP <- function(params, RPiGEV, RPiGPD){
paramx <- data.frame(t(params))
qxV <- 1 - exp(-(1 + paramx$epsilonGEV * (RPiGEV - paramx$muGEV) / paramx$sigmaGEV)^(-1 / paramx$epsilonGEV))
returnPeriodGEV <- 1 / qxV
X0 <- paramx$nPeaks/paramx$SampleTimeHorizon
qxD <- ((1 + paramx$epsilonGPD * (RPiGPD - paramx$thresholdGPD) / paramx$sigmaGPD)^(-1 / paramx$epsilonGPD))
returnPeriodGPD <- 1 / (X0 * qxD)
return(c(GEV = returnPeriodGEV, GPD = returnPeriodGPD))
}
#' tsEvaComputeReturnPeriodsGEV
#'
#' \code{tsEvaComputeReturnPeriodsGEV}is a function that computes the return
#' periods of a set of observations (can be Annual maxima or others)
#' for a Generalized Extreme Value (GEV)
#' distribution, given the GEV parameters and their standard error.
#' The return levels represent the values of annual maxima
#' with a certain probability, while the return periods indicate the average
#' time between exceedances of those threshold values.
#'
#' @param epsilon The shape parameter of the GEV distribution.
#' @param sigma The scale parameter of the GEV distribution.
#' @param mu The location parameter of the GEV distribution.
#' @param BlockMax A vector containing the block maxima data.
#'
#' @return A list containing the following components:
#' \describe{
#' \item{\code{GevPseudo}}{A matrix of pseudo observations obtained from
#' the GEV distribution for each annual extreme at every time step.}
#' \item{\code{returnPeriods}}{A matrix of return periods corresponding to
#' the pseudo observations.}
#' \item{\code{PseudoObs}}{The pseudo observation corresponding to
#' the maximum value used in the computation.}
#' }
#'
#' @seealso \code{\link{empdis}}
#' @examples
#'
#' # Example usage with some sample data
#' epsilon <- 0.1
#' sigma <- 2.2
#' mu <- 1.3
#' BlockMax <- c(10, 20, 30, 40, 50)
#'
#' results <- tsEvaComputeReturnPeriodsGEV(epsilon, sigma, mu, BlockMax)
#' head(results$GevPseudo)
#' head(results$returnPeriods)
#' head(results$PseudoObs)
#' @export
tsEvaComputeReturnPeriodsGEV <- function(epsilon, sigma, mu, BlockMax) {
nyr <- length(BlockMax)
# Compute the empirical return period:
empval <- empdis(BlockMax, nyr)
my <- match(BlockMax, empval$Q)
PseudoObs <- empval[my, ]
# uniforming dimensions of yp, sigma, mu
npars <- length(sigma)
nt <- length(BlockMax)
Bmax <- matrix(BlockMax, nrow = npars, ncol = nt, byrow = TRUE)
sigma_ <- matrix(sigma, nrow = npars, ncol = nt, byrow = FALSE)
mu_ <- matrix(mu, nrow = npars, ncol = nt, byrow = FALSE)
# estimating the return levels
qx <- 1 - exp(-(1 + epsilon * (Bmax - mu_) / sigma_)^(-1 / epsilon))
GevPseudo <- qx
returnPeriods <- 1 / qx
return(list(GevPseudo = GevPseudo, returnPeriods = returnPeriods, PseudoObs = PseudoObs))
}
#' tsEvaComputeReturnPeriodsGPD
#'
#' \code{tsEvaComputeReturnPeriodsGPD}is a function that computes the return
#' periods of a set of observations (peaks) for a
#' Generalized Pareto Distribution (GPD), given the GPD parameters,
#' threshold, peaks data, and sample time horizon.
#'
#' @param epsilon The shape parameter of the GPD.
#' @param sigma The scale parameter of the GPD.
#' @param threshold The threshold value for the GPD.
#' @param peaks A vector containing the peak values.
#' @param nPeaks The number of peak values.
#' @param peaksID An identifier for each peak value.
#' @param sampleTimeHorizon The time horizon of the sample.
#'
#' @return A list containing the following components:
#' \describe{
#' \item{\code{GpdPseudo}}{A matrix of pseudo observations obtained from
#' the GPD for each peak value at every time step.}
#' \item{\code{returnPeriods}}{A matrix of return periods
#' corresponding to the pseudo observations.}
#' \item{\code{PseudoObs}}{A data frame containing the pseudo
#' observations and their corresponding identifiers.}
#' }
#'
#' @seealso \code{\link{empdis}}
#' @examples
#' # Example usage with some sample data
#' epsilon <- 0.1
#' sigma <- 2.2
#' threshold <- 1.3
#' peaks <- c(10, 20, 30, 40, 50)
#' nPeaks=5
#' peaksID=c(230,550,999,1540,3012)
#' SampleTimeHorizon = 70
#'
#' results <- tsEvaComputeReturnPeriodsGPD(epsilon, sigma, threshold, peaks,
#' nPeaks, peaksID, SampleTimeHorizon)
#' head(results$GpdPseudo)
#' head(results$returnPeriods)
#' head(results$PseudoObs)
#' @export
tsEvaComputeReturnPeriodsGPD <- function(epsilon, sigma, threshold, peaks, nPeaks, peaksID, sampleTimeHorizon) {
X0 <- nPeaks / sampleTimeHorizon
nyr <- sampleTimeHorizon
peaksj <- jitter(peaks, 2)
peakAndId <- data.frame(peaksID, peaksj)
peaksjid <- peaksID[order(peaksj)]
# Compute the empirical return period:
empval <- empdis(peaksj, nyr)
PseudoObs <- empval
PseudoObs$ID <- peaksjid
PseudoObs <- PseudoObs[order(PseudoObs$ID), ]
# Uniform dimensions of variables
npars <- length(sigma)
nt <- length(peaks)
Peakx <- matrix(PseudoObs$Q, nrow = npars, ncol = nt, byrow = TRUE)
sigma_ <- matrix(sigma, nrow = npars, ncol = nt, byrow = FALSE)
threshold_ <- matrix(threshold, nrow = npars, ncol = nt, byrow = FALSE)
# Estimate the return levels
# Here I have all the pseudo observations of every annual extreme for every timestep
qx <- (((1 + epsilon * (Peakx - threshold_) / sigma_)^(-1 / epsilon)))
GpdPseudo <- qx
GpdPseudo[which(GpdPseudo < 0)] <- 0
returnPeriods <- 1 / (X0 * qx)
return(list(GpdPseudo = GpdPseudo, returnPeriods = returnPeriods, PseudoObs = PseudoObs))
}
#' tsEvaComputeReturnLevelsGEV
#'
#' \code{tsEvaComputeReturnPeriodsGPD}is a function that calculates the return
#' levels for a Generalized Extreme Value (GEV) distribution given the GEV
#' parameters and their standard errors. The return periods are specified in a
#' time unit that corresponds to the size of the time segments for
#' evaluating the maxima.
#'
#' @param epsilon The shape parameter of the GEV distribution.
#' @param sigma The scale parameter of the GEV distribution.
#' @param mu The location parameter of the GEV distribution.
#' @param epsilonStdErr The standard error of the shape parameter.
#' @param sigmaStdErr The standard error of the scale parameter.
#' @param muStdErr The standard error of the location parameter.
#' @param returnPeriodsInDts The return periods expressed in the
#' corresponding time unit. For example, while working on yearly
# maxima, returnPeriodsInDts must be expressed in years. For
# monthly maxima returnPeriodsInDts must be expressed in months.
#'
#' @return A list containing the following components:
#' \describe{
#' \item{\code{returnLevels}}{A matrix of return levels corresponding to the specified return periods.}
#' \item{\code{returnLevelsErr}}{A matrix of standard errors for the return levels.}
#' }
#'
#' @seealso \code{\link{empdis}}
#' @examples
#' # Example usage with some sample data
#' epsilon <- c(0.1)
#' sigma <- c(2.3)
#' mu <- c(1.3)
#' epsilonStdErr <- c(0.01)
#' sigmaStdErr <- c(0.11)
#' muStdErr <- c(0.011)
#' returnPeriodsInDts <- c( 5, 10, 20, 50)
#' results <- tsEvaComputeReturnLevelsGEV(epsilon, sigma, mu, epsilonStdErr,
#' sigmaStdErr, muStdErr, returnPeriodsInDts)
#' head(results$returnLevels)
#' head(results$returnLevelsErr)
#' @export
tsEvaComputeReturnLevelsGEV <- function(epsilon, sigma, mu, epsilonStdErr, sigmaStdErr, muStdErr, returnPeriodsInDts) {
returnPeriodsInDts <- as.vector(returnPeriodsInDts)
returnPeriodsInDtsSize <- length(returnPeriodsInDts)
if (returnPeriodsInDtsSize[1] > 1) {
returnPeriodsInDts <- as.vector(t(returnPeriodsInDts))
}
# reference: Stuart Coles 2001, pag 49.
yp <- -log(1 - 1 / returnPeriodsInDts)
# uniforming dimensions of yp, sigma, mu
npars <- length(sigma)
nt <- length(returnPeriodsInDts)
yp <- matrix(yp, nrow = npars, ncol = nt, byrow = TRUE)
sigma_ <- matrix(sigma, nrow = npars, ncol = nt, byrow = TRUE)
sigmaStdErr_ <- matrix(sigmaStdErr, nrow = npars, ncol = nt, byrow = TRUE)
mu_ <- matrix(mu, nrow = npars, ncol = nt, byrow = TRUE)
muStdErr_ <- matrix(muStdErr, nrow = npars, ncol = nt, byrow = TRUE)
if (epsilon != 0) {
# estimating the return levels
returnLevels <- mu_ - sigma_ / epsilon * (1 - yp^(-epsilon))
## estimating the error
# estimating the differential of returnLevels to the parameters
dxm_mu <- 1
dxm_sigma <- 1 / epsilon * (1 - yp^(-epsilon))
dxm_epsilon <- sigma_ / (epsilon^2) * (1 - (yp)^(-epsilon)) - sigma_ / epsilon * log(yp) * yp^(-epsilon)
returnLevelsErr <- sqrt((dxm_mu * muStdErr_)^2 + (dxm_sigma * sigmaStdErr_)^2 + (dxm_epsilon * epsilonStdErr)^2)
} else {
returnLevels <- mu_ - sigma_ * log(yp)
## estimating the error
# estimating the differential of returnLevels to the parameter
dxm_u <- rep(1, length(mu_))
dxm_sigma <- log(yp)
returnLevelsErr <- sqrt((dxm_u * muStdErr_)^2 + (dxm_sigma * sigmaStdErr_)^2)
}
return(list(returnLevels = returnLevels, returnLevelsErr = returnLevelsErr))
}
#' tsEvaComputeReturnLevelsGEVFromAnalysisObj
#'
#' \code{tsEvaComputeReturnLevelsGEVFromAnalysisObj}is a function that calculates the return levels for a Generalized Extreme Value (GEV) distribution using the parameters obtained from a non-stationary extreme value analysis. It supports non-stationary analysis by considering different parameters for each time index.
#'
#' @param nonStationaryEvaParams The parameters obtained from a non-stationary extreme value analysis.
#' @param returnPeriodsInYears The return periods expressed in years.
#' @param timeIndex Temporal index corresponding to the time step on which compute the GEV RLs.
#'
#' @return A list containing the following components:
#' \describe{
#' \item{\code{returnLevels}}{A matrix of return levels corresponding to the specified return periods.}
#' \item{\code{returnLevelsErr}}{A matrix of standard errors for the return levels.}
#' \item{\code{returnLevelsErrFit}}{A matrix of standard errors for the return levels obtained from fitting the non-stationary model.}
#' \item{\code{returnLevelsErrTransf}}{A matrix of standard errors for the return levels obtained from the transformed data.}
#' }
#' @examples
#' # Example usage with some sample data
#'nonStationaryEvaParams <- list(list(
#' parameters = list(
#' epsilon = 0.1,
#' sigma = c(2.1, 2.2, 2.3),
#' mu = c(1.1, 1.2, 1.3),
#' timeHorizonStart=as.POSIXct("1951-01-01"),
#' timeHorizonEnd=as.POSIXct("2020-12-31"),
#' nPeaks=90
#'
#' ),
#' paramErr = list(
#' epsilonErr = 0.01,
#' sigmaErr = c(0.11, 0.12, 0.13),
#' muErr = c(0.011, 0.012, 0.013)
#' ),NA
#' )
#')
#' returnPeriodsInYears <- c(1, 5, 10, 20, 50)
#' timeIndex=1
#' results <- tsEvaComputeReturnLevelsGEVFromAnalysisObj(nonStationaryEvaParams, returnPeriodsInYears)
#' head(results$returnLevels)
#' @seealso \code{\link{tsEvaComputeReturnLevelsGEV}}
#' @export
tsEvaComputeReturnLevelsGEVFromAnalysisObj <- function(nonStationaryEvaParams, returnPeriodsInYears, timeIndex=-1) {
epsilon <- nonStationaryEvaParams[[1]]$parameters$epsilon
epsilonStdErr <- nonStationaryEvaParams[[1]]$paramErr$epsilonErr
epsilonStdErrFit <- epsilonStdErr
epsilonStdErrTransf <- 0
nonStationary <- "sigmaErrTransf" %in% names(nonStationaryEvaParams[2]$paramErr)
if (timeIndex > 0) {
sigma <- nonStationaryEvaParams[[1]]$parameters$sigma[timeIndex]
mu <- nonStationaryEvaParams[[1]]$parameters$mu[timeIndex]
sigmaStdErr <- nonStationaryEvaParams[[1]]$paramErr$sigmaErr[timeIndex]
sigmaStdErrFit <- nonStationaryEvaParams[[1]]$paramErr$sigmaErrFit[timeIndex]
sigmaStdErrTransf <- nonStationaryEvaParams[[1]]$paramErr$sigmaErrTransf[timeIndex]
muStdErr <- nonStationaryEvaParams[[1]]$paramErr$muErr[timeIndex]
muStdErrFit <- nonStationaryEvaParams[[1]]$paramErr$muErrFit[timeIndex]
muStdErrTransf <- nonStationaryEvaParams[[1]]$paramErr$muErrTransf[timeIndex]
} else {
sigma <- nonStationaryEvaParams[[1]]$parameters$sigma
mu <- nonStationaryEvaParams[[1]]$parameters$mu
sigmaStdErr <- nonStationaryEvaParams[[1]]$paramErr$sigmaErr
muStdErr <- nonStationaryEvaParams[[1]]$paramErr$muErr
if (nonStationary) {
sigmaStdErrFit <- nonStationaryEvaParams[[1]]$paramErr$sigmaErrFit
sigmaStdErrTransf <- nonStationaryEvaParams[[1]]$paramErr$sigmaErrTransf
muStdErrFit <- nonStationaryEvaParams[[1]]$paramErr$muErrFit
muStdErrTransf <- nonStationaryEvaParams[[1]]$paramErr$muErrTransf
}
}
returnLevels <- tsEvaComputeReturnLevelsGEV(epsilon, sigma, mu, epsilonStdErr, sigmaStdErr, muStdErr, returnPeriodsInYears)
returnLevelsErr <- returnLevels$returnLevelsErr
if (nonStationary) {
returnLevelsErrFit <- tsEvaComputeReturnLevelsGEV(epsilon, sigma, mu, epsilonStdErrFit, sigmaStdErrFit, muStdErrFit, returnPeriodsInYears)$returnLevelsErr
returnLevelsErrTransf <- tsEvaComputeReturnLevelsGEV(epsilon, sigma, mu, epsilonStdErrTransf, sigmaStdErrTransf, muStdErrTransf, returnPeriodsInYears)$returnLevelsErr
} else {
returnLevelsErrFit <- returnLevelsErr
returnLevelsErrTransf <- rep(0, length(returnLevelsErr))
}
return(list(returnLevels = returnLevels$returnLevels, returnLevelsErr = returnLevelsErr, returnLevelsErrFit = returnLevelsErrFit, returnLevelsErrTransf = returnLevelsErrTransf))
}
#' tsEvaComputeReturnLevelsGPD
#'
#' #' \code{tsEvaComputeReturnLevelsGPD}is a function that
#' compute the return levels for a Generalized Pareto Distribution (GPD)
#' using the parameters of the distribution and their standard errors.
#'
#' @param epsilon The shape parameter of the GPD.
#' @param sigma The scale parameter of the GPD.
#' @param threshold The threshold parameter of the GPD.
#' @param epsilonStdErr The standard error of the shape parameter.
#' @param sigmaStdErr The standard error of the scale parameter.
#' @param thresholdStdErr The standard error of the threshold parameter.
#' @param nPeaks The number of peaks used to estimate the parameters.
#' @param sampleTimeHorizon The time horizon of the sample in the same units as the return periods (e.g., years).
#' @param returnPeriods The return periods for which to compute the return levels.
#'
#' @return A list containing the following components:
#' \describe{
#' \item{\code{returnLevels}}{A vector of return levels corresponding to the specified return periods.}
#' \item{\code{returnLevelsErr}}{A vector of standard errors for the return levels.}
#' }
#'
#' @details
#' sampleTimeHorizon and returnPeriods must be in the same units, e.g. years
#' @examples
#' # Example usage with some sample data
#' epsilon <- c(0.1)
#' sigma <- c(2.3)
#' threshold <- c(1.3)
#' epsilonStdErr <- c(0.01)
#' sigmaStdErr <- c(0.11)
#' thresholdStdErr <- c(0.011)
#' returnPeriodsInDts <- c( 5, 10, 20, 50)
#' nPeaks=70
#' SampleTimeHorizon=70
#' results <- tsEvaComputeReturnLevelsGPD(epsilon, sigma, threshold, epsilonStdErr,
#' sigmaStdErr, thresholdStdErr, nPeaks, SampleTimeHorizon, returnPeriodsInDts)
#' head(results$returnLevels)
#' head(results$returnLevelsErr)
#' @export
tsEvaComputeReturnLevelsGPD <- function(epsilon, sigma, threshold, epsilonStdErr, sigmaStdErr, thresholdStdErr, nPeaks, sampleTimeHorizon, returnPeriods) {
X0 <- nPeaks / sampleTimeHorizon
XX <- X0 * returnPeriods
npars <- length(sigma)
nt <- length(returnPeriods)
XX_ <- rep(XX, each = npars)
sigma_ <- rep(sigma, times = nt)
sigmaStdErr_ <- rep(sigmaStdErr, times = nt)
threshold_ <- rep(threshold, times = nt)
thresholdStdErr_ <- rep(thresholdStdErr, times = nt)
if (epsilon != 0) {
# estimating the return levels
returnLevels <- threshold_ + sigma_ / epsilon * ((XX_)^epsilon - 1)
# estimating the error
# estimating the differential of returnLevels to the parameters
# !! ASSUMING NON ZERO ERROR ON THE THRESHOLD AND 0 ERROR ON THE PERCENTILE.
# THIS IS NOT COMPLETELY CORRECT BECAUSE THE PERCENTILE DEPENDS ON THE
# THRESHOLD AND HAS THEREFORE AN ERROR RELATED TO THAT OF THE
# THRESHOLD
dxm_u <- 1
dxm_sigma <- 1 / epsilon * (XX_^epsilon - 1)
dxm_epsilon <- -sigma_ / epsilon^2 * ((XX_)^epsilon - 1) + sigma_ / epsilon * log(XX_) * XX_^epsilon
returnLevelsErr <- sqrt((dxm_u * thresholdStdErr_)^2 + (dxm_sigma * sigmaStdErr_)^2 + (dxm_epsilon * epsilonStdErr)^2)
#
} else {
returnLevels <- threshold_ + sigma_ * log(XX_)
dxm_u <- 1
dxm_sigma <- log(XX_)
returnLevelsErr <- ((dxm_u * thresholdStdErr_)^2 + (dxm_sigma * sigmaStdErr_)^2)^.5
}
return(list(returnLevels = returnLevels, returnLevelsErr = returnLevelsErr))
}
#' tsEvaComputeReturnLevelsGPDFromAnalysisObj
#'
#' \code{tsEvaComputeReturnLevelsGPDFromAnalysisObj} is a function that calculates the return levels for a Generalized Pareto Distribution (GPD) using the parameters obtained from an analysis object. It takes into account non-stationarity by considering time-varying parameters and their associated standard errors.
#'
#' @param nonStationaryEvaParams The non-stationary parameters obtained from the analysis object.
#' @param returnPeriodsInYears The return periods for which to compute the return levels, expressed in years.
#' @param timeIndex Temporal index corresponding to the time step on which compute the GEV RLs.
#'
#' @return A list with the following components:
#' \describe{
#' \item{\code{returnLevels}}{A vector of return levels corresponding to the specified return periods.}
#' \item{\code{returnLevelsErrFit}}{A vector of standard errors for the return levels estimated based on the fit.}
#' \item{\code{returnLevelsErrTransf}}{A vector of standard errors for the return levels estimated based on the transformed parameters.}
#' }
#'
#' @seealso \code{\link{tsEvaComputeReturnLevelsGPD}}
#' @examples
#' # Example usage with some sample data
#'nonStationaryEvaParams <- list(NA,list(
#' parameters = list(
#' epsilon = 0.1,
#' sigma = c(2.1, 2.2, 2.3),
#' threshold = c(1.1, 1.2, 1.3),
#' timeHorizonStart=as.POSIXct("1951-01-01"),
#' timeHorizonEnd=as.POSIXct("2020-12-31"),
#' nPeaks=90
#'
#' ),
#' paramErr = list(
#' epsilonErr = 0.01,
#' sigmaErr = c(0.11, 0.12, 0.13),
#' thresholdErr = c(0.011, 0.012, 0.013)
#' )
#')
#')
#'returnPeriodsInYears <- c(1, 5, 10, 20, 50)
#'timeIndex=1
#'results <- tsEvaComputeReturnLevelsGPDFromAnalysisObj(nonStationaryEvaParams, returnPeriodsInYears)
#'head(results$returnLevels)
#' @export
tsEvaComputeReturnLevelsGPDFromAnalysisObj <- function(nonStationaryEvaParams, returnPeriodsInYears, timeIndex=-1) {
epsilon <- nonStationaryEvaParams[[2]]$parameters$epsilon
epsilonStdErr <- nonStationaryEvaParams[[2]]$paramErr$epsilonErr
epsilonStdErrFit <- epsilonStdErr
epsilonStdErrTransf <- 0
thStart <- nonStationaryEvaParams[[2]]$parameters$timeHorizonStart
thEnd <- nonStationaryEvaParams[[2]]$parameters$timeHorizonEnd
timeHorizonInYears <- round(as.numeric((thEnd - thStart) / 365.2425))
nPeaks <- nonStationaryEvaParams[[2]]$parameters$nPeaks
nonStationary <- "sigmaErrTransf" %in% names(nonStationaryEvaParams[2]$paramErr)
if (timeIndex > 0) {
sigma <- nonStationaryEvaParams[[2]]$parameters$sigma[timeIndex]
threshold <- nonStationaryEvaParams[[2]]$parameters$threshold[timeIndex]
sigmaStdErr <- nonStationaryEvaParams[[2]]$paramErr$sigmaErr[timeIndex]
sigmaStdErrFit <- nonStationaryEvaParams[[2]]$paramErr$sigmaErrFit[timeIndex]
sigmaStdErrTransf <- nonStationaryEvaParams[[2]]$paramErr$sigmaErrTransf[timeIndex]
thresholdStdErr <- nonStationaryEvaParams[[2]]$paramErr$thresholdErr[timeIndex]
thresholdStdErrFit <- 0
thresholdStdErrTransf <- nonStationaryEvaParams[[2]]$paramErr$thresholdErrTransf[timeIndex]
} else {
sigma <- nonStationaryEvaParams[[2]]$parameters$sigma
threshold <- nonStationaryEvaParams[[2]]$parameters$threshold
sigmaStdErr <- nonStationaryEvaParams[[2]]$paramErr$sigmaErr
if (nonStationary) {
thresholdStdErr <- nonStationaryEvaParams[[2]]$paramErr$thresholdErr
sigmaStdErrFit <- nonStationaryEvaParams[[2]]$paramErr$sigmaErrFit
sigmaStdErrTransf <- nonStationaryEvaParams[[2]]$paramErr$sigmaErrTransf
thresholdStdErrFit <- nonStationaryEvaParams[[2]]$paramErr$thresholdErrFit
thresholdStdErrTransf <- nonStationaryEvaParams[[2]]$paramErr$thresholdErrTransf
} else {
thresholdStdErr <- 0
}
}
returnLevels <- tsEvaComputeReturnLevelsGPD(epsilon, sigma, threshold, epsilonStdErr, sigmaStdErr, thresholdStdErr, nPeaks, timeHorizonInYears, returnPeriodsInYears)
returnLevelsErr <- returnLevels$returnLevelsErr
if (nonStationary) {
returnLevelsErrFit <- tsEvaComputeReturnLevelsGPD(epsilon, sigma, threshold, epsilonStdErrFit, sigmaStdErrFit, rep(0, length(thresholdStdErr)), nPeaks, timeHorizonInYears, returnPeriodsInYears)
returnLevelsErrTransf <- tsEvaComputeReturnLevelsGPD(epsilon, sigma, threshold, epsilonStdErrTransf, sigmaStdErrTransf, thresholdStdErrTransf, nPeaks, timeHorizonInYears, returnPeriodsInYears)
} else {
returnLevelsErrFit <- returnLevelsErr
returnLevelsErrTransf <- rep(0, length(returnLevelsErr))
}
return(list(returnLevels = returnLevels$returnLevels, returnLevelsErr = returnLevelsErr, returnLevelsErrFit = returnLevelsErrFit, returnLevelsErrTransf = returnLevelsErrTransf))
}
#' tsEvaComputeRLsGEVGPD
#'
#' \code{tsEvaComputeRLsGEVGPD} is a function that calculates the return levels
#' and their associated errors for a Generalized Extreme Value (GEV)
#' and Generalized Pareto (GPD) distribution using the parameters obtained
#' from a non-stationary extreme value analysis.
#' It supports non-stationary analysis by considering different parameters
#' for each time index.
#'
#' @param nonStationaryEvaParams The parameters obtained from a non-stationary
#' extreme value analysis.
#' @param RPgoal The target return period for which the return levels are computed.
#' @param timeIndex The index at which the time-varying analysis should be estimated.
#' @param trans A character string indicating the transformation to be applied
#' to the data before fitting the EVD. default value is NA, corresponding
#' to no transformation. Currently only the "rev" for reverse transformation is
#' implemented.
#'
#' @return A list containing the following components:
#' \describe{
#' \item{\code{Fit}}{A character string indicating whether the EVD could be
#' fitted to the data ("No fit") or the EVD was successfully fitted to the data ("Fitted").}
#' \item{\code{ReturnLevels}}{A data frame containing the target
#' return period (`ReturnPeriod`), GEV return level (`GEV`), GPD return level (`GPD`),
#' GEV return level error (`errGEV`), and GPD return level error (`errGPD`) for
#' the specified time index.}
#' \item{\code{Params}}{A list containing the GEV and GPD parameters for the
#' specified time index, including their standard errors.}
#' }
#'
#' @seealso \code{\link{tsEvaComputeReturnLevelsGEV}}, \code{\link{tsEvaComputeReturnLevelsGPD}}
#' @export
tsEvaComputeRLsGEVGPD<-function(nonStationaryEvaParams, RPgoal, timeIndex,trans=NA){
#GEV
epsilonGEV <- nonStationaryEvaParams[[1]]$parameters$epsilon
sigmaGEV <- mean(nonStationaryEvaParams[[1]]$parameters$sigma[timeIndex])
muGEV <- mean(nonStationaryEvaParams[[1]]$parameters$mu[timeIndex])
dtSampleYears <- nonStationaryEvaParams[[1]]$parameters$timeDeltaYears
#GPD
epsilonGPD <- nonStationaryEvaParams[[2]]$parameters$epsilon
sigmaGPD <- mean(nonStationaryEvaParams[[2]]$parameters$sigma[timeIndex])
thresholdGPD <- mean(nonStationaryEvaParams[[2]]$parameters$threshold[timeIndex])
nPeaks <- nonStationaryEvaParams[[2]]$parameters$nPeaks
thStart <- nonStationaryEvaParams[[2]]$parameters$timeHorizonStart
thEnd <- nonStationaryEvaParams[[2]]$parameters$timeHorizonEnd
sampleTimeHorizon <- as.numeric((thEnd - thStart)/365.2425)
if (nonStationaryEvaParams[[1]]$method=="No fit"){
message("could not fit EVD to this pixel")
ParamGEV=c(epsilonGEV,sigmaGEV,muGEV,NA, NA, NA)
names(ParamGEV)=c("epsilonGEV","sigmaGEV","muGEV","epsilonStdErrGEV","sigmaStdErrGEV","muStdErrGEV")
ParamGPD=c(epsilonGPD,sigmaGPD,thresholdGPD,NA,NA, NA,nPeaks,sampleTimeHorizon)
names(ParamGPD)=c("epsilonGPD","sigmaGPD","thresholdGPD","epsilonStdErrGPD","sigmaStdErrGPD","thresholdStdErrGPD","nPeaks","SampleTimeHorizon")
return(list(Fit="No fit",Params=c(ParamGEV,ParamGPD)))
}else{
#GEV
epsilonStdErrGEV <- nonStationaryEvaParams[[1]]$paramErr$epsilonErr
sigmaStdErrGEV <- mean(nonStationaryEvaParams[[1]]$paramErr$sigmaErr[timeIndex])
muStdErrGEV <- mean(nonStationaryEvaParams[[1]]$paramErr$muErr[timeIndex])
#GPD
epsilonStdErrGPD <- nonStationaryEvaParams[[2]]$paramErr$epsilonErr
sigmaStdErrGPD <- mean(nonStationaryEvaParams[[2]]$paramErr$sigmaErr[timeIndex])
thresholdStdErrGPD <- mean(nonStationaryEvaParams[[2]]$paramErr$thresholdErr[timeIndex])
if (trans=="rev"){
sigmaGEV=-sigmaGEV
muGEV=-muGEV
sigmaGPD=-sigmaGPD
thresholdGPD=-thresholdGPD
}
returnLevelsGEV <- tsEvaComputeReturnLevelsGEV(epsilonGEV, sigmaGEV, muGEV, epsilonStdErrGEV, sigmaStdErrGEV, muStdErrGEV, RPgoal)
returnLevelsGPD <- tsEvaComputeReturnLevelsGPD(epsilonGPD, sigmaGPD, thresholdGPD, epsilonStdErrGPD, sigmaStdErrGPD, thresholdStdErrGPD,
nPeaks, sampleTimeHorizon, RPgoal)
rlevGEV=returnLevelsGEV$returnLevels
rlevGPD=returnLevelsGPD$returnLevels
errGEV=returnLevelsGEV$returnLevelsErr
errGPD=returnLevelsGPD$returnLevelsErr
ParamGEV=c(epsilonGEV,sigmaGEV,muGEV,epsilonStdErrGEV, sigmaStdErrGEV, muStdErrGEV)
names(ParamGEV)=c("epsilonGEV","sigmaGEV","muGEV","epsilonStdErrGEV","sigmaStdErrGEV","muStdErrGEV")
ParamGPD=c(epsilonGPD,sigmaGPD,thresholdGPD,epsilonStdErrGPD,sigmaStdErrGPD, thresholdStdErrGPD,nPeaks,sampleTimeHorizon)
names(ParamGPD)=c("epsilonGPD","sigmaGPD","thresholdGPD","epsilonStdErrGPD","sigmaStdErrGPD","thresholdStdErrGPD","nPeaks","SampleTimeHorizon")
return(list(Fit="Fitted",ReturnLevels=c(ReturnPeriod=RPgoal, GEV=as.numeric(rlevGEV),GPD=as.numeric(rlevGPD),errGEV=as.numeric(errGEV),errGPD=as.numeric(errGPD)),Params=c(ParamGEV,ParamGPD)))
}
}
#' tsEvaSampleData Function
#'
#' \code{tsEvaSampleData} is a function that calculates various statistics
#' and data for time series evaluation.
#'
#' @param ms A matrix containing the time series data.
#' @param meanEventsPerYear The mean number of events per year.
#' @param minEventsPerYear The minimum number of events per year.
#' @param minPeakDistanceInDays The minimum peak distance in days.
#' @param tail The tail to be studied for POT selection, either 'high' or 'low'.
#'
#' @return A list containing the following elements:
#' \describe{
#' \item{\code{completeSeries}}{The complete time series data.}
#' \item{\code{POT}}{The data for Peaks Over Threshold (POT) analysis.}
#' \item{\code{years}}{The years in the time series data.}
#' \item{\code{Percentiles}}{The desired percentiles and their corresponding values.}
#' \item{\code{annualMax}}{The annual maximum values.}
#' \item{\code{annualMaxDate}}{The dates corresponding to the annual maximum values.}
#' \item{\code{annualMaxIndx}}{The indices of the annual maximum values.}
#' \item{\code{monthlyMax}}{The monthly maximum values.}
#' \item{\code{monthlyMaxDate}}{The dates corresponding to the monthly maximum values.}
#' \item{\code{monthlyMaxIndx}}{The indices of the monthly maximum values.}
#' }
#' @seealso [tsGetPOT()]
#' @import stats
#' @examples
#' # Generate sample data
#' data <- ArdecheStMartin
#' colnames(data) <- c("Date", "Value")
#' #select only the 5 latest years
#' yrs <- as.integer(format(data$Date, "%Y"))
#' tokeep <- which(yrs>=2015)
#' data <- data[tokeep,]
#' timeWindow <- 365 # 1 year
#' # Calculate statistics and data
#' result <- tsEvaSampleData(data, meanEventsPerYear=3, minEventsPerYear=0,
#' minPeakDistanceInDays=7, "high")
#' # View the result
#' print(result)
#' @export
tsEvaSampleData <- function(ms, meanEventsPerYear,minEventsPerYear, minPeakDistanceInDays,tail=NA) {
pctsDesired = c(90, 95, 99, 99.9)
args <- list(meanEventsPerYear = meanEventsPerYear,
minEventsPerYear = minEventsPerYear,
potPercentiles = c(seq(70,90,by=1), seq(91,99.5,by=0.5)))
meanEventsPerYear = args$meanEventsPerYear
minEventsPerYear = args$minEventsPerYear
potPercentiles = args$potPercentiles
if(is.na(tail)) stop("tail for POT selection needs to be 'high' or 'low'")
POTData <- tsGetPOT(ms, potPercentiles, meanEventsPerYear,minEventsPerYear,minPeakDistanceInDays, tail)
vals <- quantile(ms[,2], pctsDesired/100,na.rm=T)
percentiles <- list(precentiles = pctsDesired, values = vals)
pointData <- list()
pointData$completeSeries <- ms
pointData$POT <- POTData
pointDataA <- computeAnnualMaxima(ms)
pointDataM <- computeMonthlyMaxima(ms)
yrs <- unique(as.numeric(format(as.Date(ms[,1]+3600), "%Y")))
yrs <- yrs - min(yrs)
pointData$years <- seq(min(yrs),max(yrs),1)
pointData$Percentiles <- percentiles
pointData$annualMax=pointDataA$annualMax
pointData$annualMaxDate=pointDataA$annualMaxDate
pointData$annualMaxIndx=pointDataA$annualMaxIndx
pointData$monthlyMax=pointDataM$monthlyMax
pointData$monthlyMaxDate=pointDataM$monthlyMaxDate
pointData$monthlyMaxIndx=pointDataM$monthlyMaxIndx
return(pointData)
}
#' tsGetPOT Function
#'
#' \code{tsGetPOT} is a function that calculates the Peaks Over Threshold (POT)
#' for a given time series data.
#'
#' @param ms A matrix containing the time series data with two columns:
#' the first column represents the time and the second column represents the values.
#' @param pcts A numeric vector specifying the percentiles to be used as
#' thresholds for identifying peaks.
#' @param desiredEventsPerYear The desired number of events per year.
#' @param minEventsPerYear The minimum number of events per year.
#' @param minPeakDistanceInDays The minimum distance between two peaks in days.
#' @param tail The tail to be studied for POT selection, either 'high' or 'low'.
#'
#' @return A list containing the following fields:
#' \describe{
#' \item{\code{threshold}}{The threshold value used for identifying peaks}
#' \item{\code{thresholdError}}{The error associated with the threshold value}
#' \item{\code{percentile}}{The percentile value used as the threshold.}
#' \item{\code{peaks}}{The values of the identified peaks.}
#' \item{\code{stpeaks}}{The start indices of the identified peaks.}
#' \item{\code{endpeaks}}{The end indices of the identified peaks.}
#' \item{\code{ipeaks}}{The indices of the identified peaks.}
#' \item{\code{time}}{The time values corresponding to the identified peaks.}
#' \item{\code{pars}}{The parameters of the Generalized Pareto Distribution (GPD)
#' fitted to the peaks.}
#' }
#'
#' @import stats
#' @importFrom POT fitgpd
#' @seealso [tsEvaSampleData()]
#' @examples
#' # Create a sample time series data
#' ms <- ArdecheStMartin
#'
#' # Calculate the POT using the tsGetPOT function
#' pcts <- c(90, 95, 99)
#' desiredEventsPerYear <- 5
#' minEventsPerYear <- 2
#' minPeakDistanceInDays <- 10
#' tail <- "high"
#' POTdata <- tsGetPOT(ms, pcts, desiredEventsPerYear, minEventsPerYear, minPeakDistanceInDays, tail)
#' # Print the results
#' print(POTdata)
#'
#' @export
tsGetPOT <- function(ms, pcts, desiredEventsPerYear,minEventsPerYear, minPeakDistanceInDays, tail) {
if (minPeakDistanceInDays == -1) {
stop("label parameter 'minPeakDistanceInDays' must be set")
}
dt1=min(diff(ms[,1]),na.rm=T)
dt=as.numeric(dt1)
tdim=attributes(dt1)$units
if (tdim=="hours") dt=dt/24
if (tdim=="seconds") dt=dt/3600
minPeakDistance <- minPeakDistanceInDays/dt
minRunDistance <- minPeakDistance
nyears <- round(as.numeric((max(ms[,1]) - min(ms[,1]))/365.25))
if (length(pcts) == 1) {
pcts = c(pcts - 3, pcts)
desiredEventsPerYear = -1
}
numperyear <- rep(NA, length(pcts))
minnumperyear <- rep(NA, length(pcts))
thrsdts <- rep(NA, length(pcts))
gpp=rep(NA, length(pcts))
devpp=rep(NA, length(pcts))
dej=0
skip=0
trip=NA
perfpen=0
for (ipp in 1:length(pcts)) {
#Skip is used to prevent finding peaks for unappropriate thresholds
if (skip>0) {
skip=skip-1
}else{
if(dej==0){
thrsdt <- quantile(ms[,2],pcts[ipp]/100,na.rm=T)
thrsdts[ipp] <- thrsdt
ms[,2][which(is.na(ms[,2]))]=-9999
minEventsPerYear=1
if(tail=="high") {
#boundaries of shape parameter
shape_bnd=c(-0.5,1)
pks <- pracma::findpeaks(ms[,2],minpeakdistance = minPeakDistance, minpeakheight = thrsdt)
}
if(tail=="low") {
pks <- declustpeaks(data = ms[,2] ,minpeakdistance = minPeakDistance ,minrundistance = minRunDistance, qt=thrsdt)
shape_bnd=c(-1.5,0)
}
numperyear[ipp] <- length(pks[,1])/nyears
if(numperyear[ipp]>=3*desiredEventsPerYear & ipp<(length(pcts)-5)) skip = floor(length(pcts)/8)
if(numperyear[ipp]<minEventsPerYear) {
perfpen=(pcts[ipp]*100)
}
if(numperyear[ipp]<(0.7*minEventsPerYear)) {
perfpen=9999*(pcts[ipp]*100)
}
if(numperyear[ipp]<=desiredEventsPerYear+1 & dej==0){
fgpd=suppressWarnings(POT::fitgpd(pks[,1], threshold = thrsdt, est = "mle",method="BFGS",std.err.type = "expected"))
gpdpar=fgpd$fitted.values
deviance=fgpd$deviance
devpp[ipp]=AIC(fgpd)+perfpen
gpp[ipp]=gpdpar[2]
nperYear <- tsGetNumberPerYear(ms, pks[,2])
minnumperyear[ipp] <- min(nperYear$Freq, na.rm = TRUE)
}
}
}
}
devpp[1]=NA
if(is.na(trip)){
isok=F
count=0
devpx=devpp
#discard the fits that provide negative values
while(isok==F){
trip=which.min(devpx)
isok=dplyr::between(gpp[trip], shape_bnd[1], shape_bnd[2])
count=count+1
if(isok==F)devpx[trip]=devpx[trip]+9999
if(count>(length(devpx)-1)){
#safety measure for stability of parameter
dshap=c(0,diff(gpp))
#Penalizing fits with positive shape parameters for low tail
if(tail=="low") devpp[which(gpp>=0)]=devpp[which(gpp>=0)]+99999
devpp[which(abs(dshap)>0.5)]=devpp[which(abs(dshap)>0.5)]+9999
trip=which.min(devpp)
message(paste0("shape outside boudaries: ",round(gpp[trip],2)))
isok=T
}
}
}
message(paste0("\nmax threshold is: ", pcts[trip],"%"))
message(paste0("\naverage number of events per year = ",round(numperyear[trip],1) ))
diffNPerYear <- mean(diff(na.omit(rev(numperyear)), na.rm = TRUE))
if (diffNPerYear == 0) diffNPerYear <- 1
diffNPerYear <- 1
thresholdError <- -mean(diff(na.omit(thrsdts))/diffNPerYear)/2
indexp <- trip
if (!is.na(indexp)) {
thrsd <- quantile(ms[,2],pcts[indexp]/100)
pct <- pcts[indexp]
} else {
thrsd <- 0
pct
}
# Find peaks in the second column of the matrix 'ms'
if(tail=="high") pks_and_locs <- pracma::findpeaks(ms[,2],minpeakdistance = minPeakDistance, minpeakheight = thrsd)
if(tail=="low") pks_and_locs <- declustpeaks(data = ms[,2] ,minpeakdistance = minPeakDistance ,minrundistance = minRunDistance, qt=thrsd)
# Assign peaks and peak locations to separate variables
pks <- pks_and_locs[,1]
locs <- pks_and_locs[,2]
st<-pks_and_locs[,3]
end=pks_and_locs[,4]
# Create a list to store results
POTdata <- list()
# Assign values to the fields of the list
POTdata[['threshold']] <- thrsd
POTdata[['thresholdError']] <- thresholdError
POTdata[['percentile']] <- pct
POTdata[['peaks']] <- pks
POTdata[['stpeaks']] <- st
POTdata[['endpeaks']] <- end
POTdata[['ipeaks']] <- locs
POTdata[['time']] <- ms[locs, 1]
POTdata[['pars']] <- gpdpar
return(POTdata)
}
#' tsEVstatistics
#'
#' \code{tsEvstatistics} is a function that calculates the Generalized Extreme
#' Value (GEV) and Generalized Pareto Distribution (GPD) statistics
#' and return levels for a given dataset of extreme values.
#'
#' @param pointData A list containing the dataset of extreme values. It should include the following components:
#' \describe{
#' \item{\code{annualMax}}{A vector of annual maximum values}
#' \item{\code{annualMaxDate}}{A vector of dates corresponding to the annual maximum values}
#' \item{\code{monthlyMax}}{A matrix of monthly maximum values}
#' }
#' @param alphaCI The confidence level for the confidence intervals of the parameter estimates. Default is 0.95.
#' @param gevMaxima The type of maxima to use for GEV fitting. Can be either 'annual' or 'monthly'. Default is 'annual'.
#' @param gevType The type of GEV distribution to use. Can be either 'GEV', 'Gumbel'. Default is 'GEV'.
#' @param evdType The types of extreme value distributions to calculate. Can be a combination of 'GEV' and 'GPD'. Default is c('GEV', 'GPD').
#' @param shape_bnd The lower and upper bounds for the shape parameter of the GEV distribution. Default is c(-0.5, 1).
#'
#' @importFrom evd fgev
#' @return A list containing the following components:
#' \describe{
#' \item{\code{EVmeta}}{A list containing metadata about the analysis.
#' It includes Tr, A vector of return periods for which
#' return levels are calculated}
#' \item{\code{EVdata}}{A list containing the calculated statistics and return levels. It includes the following components:}
#' \describe{
#' \item{\code{GEVstat}}{A list containing the GEV statistics and return levels:}
#' \describe{
#' \item{\code{method}}{The method used for fitting the GEV distribution.}
#' \item{\code{values}}{A vector of return levels calculated using the GEV distribution.}
#' \item{\code{parameters}}{A vector of parameter estimates for the GEV distribution.}
#' \item{\code{paramCIs}}{A matrix of confidence intervals for the parameter estimates.}
#' }
#' }
#' \item{\code{GPDstat}}{list containing the GPD statistics and return levels:}
#' \describe{
#' \item{\code{method}}{The method used for fitting the GPD distribution}
#' \item{\code{values}}{A vector of return levels calculated using the GPD distribution}
#' \item{\code{parameters}}{A vector of parameter estimates for the GPD distribution}
#' \item{\code{paramCIs}}{A matrix of confidence intervals for the parameter estimates}
#' }
#' \item{\code{isValid}}{A logical value indicating whether the analysis was performed or not.}
#' }
#'
#' @examples
#'# Create a sample dataset
#'data <- ArdecheStMartin
#'colnames(data) <- c("Date", "Value")
#'yrs <- as.integer(format(data$Date, "%Y"))
#'tokeep <- which(yrs>=2015)
#'data <- data[tokeep,]
# Calculate statistics and data
#'pointData <- tsEvaSampleData(data, meanEventsPerYear=3, minEventsPerYear=0,
#'minPeakDistanceInDays=7, "high")
# Calculate GEV and GPD statistics
#'result <- tsEVstatistics(pointData)
#'result$EVdata$GEVstat$values
#'result$EVdata$GPDstat$values
#' @export
tsEVstatistics <- function(pointData, alphaCI = 0.95, gevMaxima = 'annual', gevType = 'GEV', evdType = c('GEV', 'GPD'),shape_bnd=c(-0.5,1)) {
# Create empty data structures
EVmeta <- list()
EVdata <- list()
isValid <- TRUE
minGEVSample <- 7
if(is.null(alphaCI)){alphaCI <- .95}
if(is.null(gevMaxima)){gevMaxima <- 'annual'}
if(is.null(gevType)){gevType <- 'GEV'}
if(is.null(evdType)){evdType <- c('GEV', 'GPD')}
# Define Tr vector
Tr <- c(5,10,20,50,100,200,500,1000)
EVmeta$Tr <- Tr
nyears <- length(pointData$annualMax)[1]
imethod <- 1
methodname <- 'GEVstat'
paramEsts <- numeric(3)
paramCIs <- matrix(NA, nrow = 2, ncol = 3)
rlvls <- numeric(length(Tr))
# GEV statistics
if (('GEV' %in% evdType) && !is.null(pointData$annualMax)) {
if (gevMaxima == 'annual') {
tmpmat <- pointData$annualMax
} else if (gevMaxima == 'monthly') {
tmpmat <- pointData$monthlyMax
} else {
stop(paste0('Invalid gevMaxima type: ', gevMaxima))
}
iIN <- length(tmpmat)
if (sum(iIN) >= minGEVSample) {
tmp <- data.frame(yr=year(pointData$annualMaxDate),dt=tmpmat)
# Perform GEV/Gumbel fitting and computation of return levels
if (gevType == "GEV"){
stdfit=TRUE
#try to fit GEV with bounded shape parameters and stderr, reduces the constrains if no fit.
fit <- suppressWarnings(try(evd::fgev(x=tmp$dt,method="L-BFGS-B",lower=c(-Inf,-Inf,shape_bnd[1]),upper=c(Inf,Inf,shape_bnd[2]),std.err = T),TRUE))
if(inherits(fit, "try-error")){
stdfit=FALSE
message("Not able to fit GEV stderr")
fit <- suppressWarnings(try(evd::fgev(x=tmp$dt,method="L-BFGS-B",lower=c(-Inf,-Inf,shape_bnd[1]),upper=c(Inf,Inf,shape_bnd[2]),std.err = F),TRUE))
}
if(inherits(fit, "try-error")){
stdfit=FALSE
message("Not able to fit GEV with constrained parameters")
fit <- suppressWarnings(try(evd::fgev(x=tmp$dt,std.err = F),TRUE))
}
paramEsts <- c(mu=fit$par[1],sigma=fit$par[2],xi=fit$par[3])
alphaCIx=1-alphaCI
if (stdfit==TRUE){
probs <- c(alphaCIx/2, 1-alphaCIx/2)
# Compute the CI for k using a normal distribution for khat.
kci <- try(qnorm(probs, paramEsts[3], fit$std.err[3]),TRUE)
kci[kci < -1] <- -1
# Compute the CI for sigma using a normal approximation for log(sigmahat)
# and transform back to the original scale.
lnsigci <- try(qnorm(probs, log(paramEsts[2]), fit$std.err[2]/paramEsts[2]),silent=T)
muci <- qnorm(probs, paramEsts[1], fit$std.err[1])
paramCIs <- cbind(muci=muci, sigci=exp(lnsigci), kci=kci)
}else{
#not able to generate parameters CI
paramCIs <- cbind(muci=NA, sigci=NA, kci=NA)
}
} else if (gevType == "gumbel"){
fit <- texmex::evm(y = dt, data = tmp, family = gumbel)
paramEsts <- c(fit$par[1], exp(fit$par[2]),0)
alphaCIx=1-alphaCI
probs <- c(alphaCIx/2, 1-alphaCIx/2)
# Compute the CI for k using a normal distribution for khat.
if(is.character(fit$se[1])){
message("Gumbel fitted")
#methodname <- 'No fit'
gevType = "gumbel"
}else{
message("Gumbel fitted")
#gevType = "gumbel"
kci <- c(NA,NA)
# Compute the CI for sigma using a normal approximation for log(sigmahat)
# and transform back to the original scale.
lnsigci <- try(qnorm(probs, log(paramEsts[2]), fit$se[2]))
muci <- qnorm(probs, paramEsts[1], fit$se[1])
paramCIs <- cbind(muci=muci, sigci=exp(lnsigci), kci=kci)
}
}
else {
stop("tsEVstatistics: invalid gevType: ", gevType, ". Can be only GEV, or Gumbel")
}
rlvls <- qgev(1-1/Tr, paramEsts[1], paramEsts[2], paramEsts[3])
}else{
message("Could not fit GEV")
methodname <- 'No fit'
rlvls=NA
paramEsts=NA
paramCIs=NA
}
}
EVdata$GEVstat <- list(method=methodname,
values=rlvls,
parameters=paramEsts,
paramCIs = paramCIs)
# Create output structures for GEV statistics
# GPD statistics
imethod <- 2
methodname <- 'GPDstat'
paramEsts <- numeric(6)
paramCIs <- matrix(NA, nrow = 2, ncol = 3)
rlvls <- numeric(length(Tr))
if (('GPD' %in% evdType) && !is.null(pointData$annualMax)) {
# Perform GPD fitting and computation of return levels
message("Fitted GPD")
ik <- 1
th=pointData$POT$threshold
d1 <- pointData$POT$peaks
fit <- suppressWarnings(try(POT::fitgpd(d1, threshold = th, est = "mle",
method="BFGS",std.err.type = "expected"),TRUE))
if(!inherits(fit, "try-error")){
ksi <- fit$par[2]
sgm <- fit$par[1]
alphaCIx=1-alphaCI
probs <- c(alphaCIx/2, 1-alphaCIx/2)
kci <- try(qnorm(probs, ksi, fit$std.err[2]),silent=T)
kci[kci < -1] <- -1
# Compute the CI for sigma using a normal approximation for log(sigmahat)
# and transform back to the original scale.
lnsigci <- try(qnorm(probs, log(sgm), fit$std.err[1]/sgm))
paramCIs <- cbind(kci, sigci=exp(lnsigci))
# Create output structures for GPD statistics
# Assign values to paramEstsall
paramEstsall <- c(sgm, ksi, pointData$POT$threshold, length(d1), length(pointData$POT$peaks), pointData$POT$percentile)
# Assign values to rlvls
rlvls <- pointData$POT$threshold + (sgm/ksi) * ((((length(d1)/length(pointData$POT$peaks))*(1/Tr))^(-ksi))-1)
EVdata$GPDstat <- list(method=methodname,
values=rlvls,
parameters=paramEstsall,
paramCIs = paramCIs)
}else {
methodname <- 'No fit'
ik <- 1
th=pointData$POT$threshold
d1 <- pointData$POT$peaks
paramEstsall <- c(pointData$POT$pars[1], pointData$POT$pars[2],
pointData$POT$threshold, length(d1),
length(pointData$POT$peaks), pointData$POT$percentile)
EVdata$GPDstat <- list(method=methodname,
values=NA,
parameters=paramEstsall,
paramCIs = NA)
message("could not estimate GPD: bounded distribution")
}
} else {
methodname <- 'No fit'
ik <- 1
th=pointData$POT$threshold
d1 <- pointData$POT$peaks
paramEstsall <- c(pointData$POT$pars[1], pointData$POT$pars[2],
pointData$POT$threshold, length(d1),
length(pointData$POT$peaks), pointData$POT$percentile)
EVdata$GPDstat <- list(method=methodname,
values=NA,
parameters=paramEstsall,
paramCIs = NA)
message("could not estimate GPD: bounded distribution")
}
# Return outputs
return(list(EVmeta = EVmeta, EVdata = EVdata, isValid = isValid))
}
# Helpers for main functions----------------
#' tsGetNumberPerYear
#'
#' \code{tsGetNumberPerYear} is a function that calculates the number of events per year based on a given time series and a set of locations.
#'
#' @param ms A data frame representing the time series data, where the first column contains the dates of the events.
#' @param locs A vector of indices representing the locations of interest in the time series.
#'
#' @return A data frame with two columns: "year" and "Freq". The "year" column contains the years, and the "Freq" column contains the number of events per year.
#'
#' @importFrom dplyr full_join
#' @importFrom lubridate year
#' @examples
#' # Create a sample time series data frame
#'set.seed(123)
#' ms <- data.frame(date = seq(as.Date("2000-01-01"), as.Date("2022-12-31"), by = "day"),
#' values=rnorm(8401))
#'# Generate random events
#'events <- match(sample(ms$date, 100),ms$date)
#'# Get the number of events per year
#'tsGetNumberPerYear(ms, events)
#' @export
tsGetNumberPerYear <- function(ms, locs){
# Get all years
sdfull <- seq(min(ms[,1]), max(ms[,1]), by = "days")
# Make full time vector
sdfull2 <- sdfull - min(sdfull)
# Round to years
sdfull2 <- year(sdfull)
sdfull2p=data.frame(year=unique(sdfull2))
# Prepare the series time vector
sdser <- ms[,1] - min(ms[,1])
sdser <- year(ms[,1])
# Get years vector
sdp <- ms[locs,1]
sdp <- year(as.Date(sdp))
# Get number of events per year
nperYear <- data.frame(table(sdp))
nperYear$year=as.numeric(as.character(nperYear$sdp))
nperYear=dplyr::full_join(nperYear,sdfull2p,by=c("year"="year"))
shit=table(sdser)
nperYear$Freq[which(is.na(nperYear$sdp))]=0
nperYear$opy <- table(sdser)
nperYear$Freq[which(nperYear$opy<184)] <- 0
return(nperYear)
}
#' findMax
#'
#' \code{findMax} is a function that takes a subset of a vector
#' and returns the index of the maximum value in that subset.
#'
#' @param subIndxs A numeric vector representing the subset of indices to consider.
#' @param srs A vector of numerical data
#' @return The index of the maximum value in the subset.
#' @examples
#' srs <- c(10, 20, 30, 40, 50)
#' findMax(c(1, 3, 5),srs)
#' #result is 5.
#' @export
findMax <- function(subIndxs,srs) {
subIndxMaxIndx <- which.max(srs[subIndxs])
return(subIndxs[subIndxMaxIndx])
}
#' computeAnnualMaxima
#'
#' \code{computeAnnualMaxima} is a function that computes the annual maxima of
#' a time series.
#'
#' @param timeAndSeries A matrix or data frame containing the time stamps and
#' series values. The first column should contain the time stamps and the
#' second column should contain the series values.
#'
#' @return A list containing the annual maximum values, their corresponding dates,
#' and their indices.
#' \describe{
#' \item{\code{annualMax}}{A numeric vector of annual maximum values.}
#' \item{\code{annualMaxDate}}{A vector of dates corresponding to the annual
#' maximum values.}
#' \item{\code{annualMaxIndx}}{A vector of indices indicating the positions
#' of the annual maximum values in the original time series.}
#' }
#'
#' @examples
#' timeAndSeries <- ArdecheStMartin
#' computeAnnualMaxima(timeAndSeries)
#' @export
computeAnnualMaxima <- function(timeAndSeries) {
timeStamps <- timeAndSeries[,1]
dt1=min(diff(timeStamps),na.rm=T)
dt=as.numeric(dt1)
tdim=attributes(dt1)$units
tmvec <- as.Date(timeStamps)
if (tdim!="days") tmvec <- as.Date(timeStamps+3600)
srs <- timeAndSeries[,2]
years <- year(tmvec)
annualMaxIndx <- tapply(1:length(srs), years, function(x) {
findMax(x, srs)
})
annualMaxInx <- annualMaxIndx[!is.na(annualMaxIndx)]
annualMaxIndx=as.vector(unlist((annualMaxIndx)))
annualMax <- srs[annualMaxIndx]
annualMaxDate <- timeStamps[annualMaxIndx]
return(list(annualMax = annualMax, annualMaxDate = annualMaxDate, annualMaxIndx = annualMaxIndx))
}
#' computeMonthlyMaxima
#'
#' \code{computeMonthlyMaxima} is a function that computes the monthly maxima
#' of a time series.
#'
#' @param timeAndSeries A data frame containing the time stamps and series values.
#' The first column should contain the time stamps,
#' and the second column should contain the series values.
#'
#' @return A list containing the monthly maxima, corresponding dates, and indices.
#' \describe{
#' \item{\code{monthlyMax}}{A vector of the monthly maximum values.}
#' \item{\code{monthlyMaxDate}}{A vector of the dates corresponding to the monthly maximum values.}
#' \item{\code{monthlyMaxIndx}}{A vector of the indices of the monthly maximum values in the original series.}
#'}
#' @examples
#' timeAndSeries <- ArdecheStMartin
#' computeMonthlyMaxima(timeAndSeries)
#' @export
computeMonthlyMaxima<- function(timeAndSeries) {
timeStamps <- timeAndSeries[,1]
srs <- timeAndSeries[,2]
tmvec <- as.Date(timeStamps+3600)
yrs <- as.integer(format(tmvec, "%Y"))
mnts <- as.integer(format(tmvec, "%m"))
mnttmvec <- data.frame(yrs, mnts)
valsIndxs <- 1:length(srs)
monthlyMaxIndx <- aggregate(valsIndxs ~ yrs + mnts, data = mnttmvec, function(x) {
findMax(x,srs)
})
monthlyMaxIndx$valsIndxs=as.numeric(monthlyMaxIndx$valsIndxs)
monthlyMaxIndx <- monthlyMaxIndx[order(monthlyMaxIndx$yrs, monthlyMaxIndx$mnts), "valsIndxs"]
monthlyMax <- srs[monthlyMaxIndx]
monthlyMaxDate <- timeStamps[monthlyMaxIndx]
return(list(monthlyMax = monthlyMax, monthlyMaxDate = monthlyMaxDate, monthlyMaxIndx = monthlyMaxIndx))
}
#' declustpeaks
#'
#' \code{declustpeaks} is a function that takes in a data vector, minimum peak distance, minimum run distance, and a threshold value.
#' It finds peaks in the data vector based on the minimum peak distance and threshold value.
#' It then declusters the peaks based on the minimum run distance and threshold value.
#' The function returns a data frame with information about the peaks, including the peak value,
#' start and end indices, duration, and cluster information.
#'
#' @param data A numeric vector representing the data.
#' @param minpeakdistance An integer specifying the minimum distance between peaks.
#' @param minrundistance An integer specifying the minimum distance between runs.
#' @param qt A numeric value representing the threshold for peak detection.
#' @import texmex stats
#' @return A data frame with information about the peaks, including:
#' \describe{
#' \item{\code{Q}}{the peak value}
#' \item{\code{max}}{max time index}
#' \item{\code{start}}{start time index}
#' \item{\code{end}}{end time index}
#' \item{\code{dur}}{duration}
#' \item{\code{cluster}}{cluster information }
#' }
#' @examples
#' data <- c(1, 2, 3, 4, 5, 4, 3, 2, 1)
#' declustpeaks(data, minpeakdistance = 2, minrundistance = 2, qt = 3)
#'
#' @export
declustpeaks<-function(data,minpeakdistance = 10 ,minrundistance = 7, qt){
pks <- pracma::findpeaks(data,minpeakdistance = minpeakdistance, minpeakheight = qt)
peakev=texmex::declust(data,threshold=qt, r=minrundistance)
Qval=peakev$thExceedances
intcl=c(TRUE,peakev$InterCluster)
peakex=data.frame(Qval,intcl, peakev$clusters,peakev$isClusterMax,peakev$exceedanceTimes)
names(peakex)=c("Qs","Istart","clusters","IsClustermax","exceedances")
evmax=peakex[which(peakex$IsClustermax==T),]
ziz=aggregate(peakex$exceedance,
by=list(clust=peakex$clusters), FUN=function(x) c(max(x)-min(x)+1))
st=stats::aggregate(peakex$exceedance,
by=list(clust=peakex$clusters), FUN=function(x) c(min(x)))
end=stats::aggregate(peakex$exceedance,
by=list(clust=peakex$clusters), FUN=function(x) c(max(x)))
evmax$dur=ziz$x
evmax$durx=peakev$sizes
evmax$stdate=peakex$date[which(evmax$Istart==T)]
evmax$cm = peakex$exceedanceTimes[peakev$isClusterMax]
evmax$Qv = peakex$thExceedances[peakev$isClusterMax]
peakdt=data.frame(evmax$Qs,evmax$exceedances,st$x,end$x,ziz$x,evmax$clusters)
names(peakdt)=c("Q","max","start","end","dur","cluster")
peakdt=peakdt[order(peakdt$Q,decreasing = TRUE),]
return(peakdt)
}
#' empdis: Empirical Distribution Function
#'
#' \code{empdis} is a function that calculates the empirical
#'distribution function for a given dataset.
#'
#' @param x A numeric vector representing the dataset.
#' @param nyr An integer representing the number of years in the dataset.
#'
#' @return A data frame containing:
#' \describe{
#' \item{\code{emp.RP}}{empirical return period}
#' \item{\code{haz.RP}}{Hazen return period}
#' \item{\code{cun.RP}}{Cunnane return period}
#' \item{\code{gumbel}}{Gumbel values}
#' \item{\code{emp.f}}{empirical cumulative density}
#' \item{\code{emp.hazen}}{Hazen cumulative density}
#' \item{\code{emp.cunnan}}{Cunnane cumulative density}
#' \item{\code{Q}}{original data}
#' \item{\code{timestamp}}{time component}
#' }
#' @examples
#' x <- c(1, 2, 3, 4, 5)
#' nyr <- 5
#' empdis(x, nyr)
#'
#' @export
empdis <- function(x, nyr) {
ts <- seq(1:length(x))
ts <- as.data.frame(ts[order(x)])
x <- as.data.frame(x[order(x)])
epyp <- length(x[, 1]) / nyr
rank <- apply(x, 2, rank, na.last = "keep")
hazen <- (rank - 0.5) / length(x[, 1])
cunnane <- (rank - 0.4) / (length(x[, 1]) + 0.2)
gumbel <- -log(-log(hazen))
n <- length(x$`x[order(x)]`)
rpc <- (1 / (1 - (1:n) / (n + 1))) / 12
nasmp <- apply(x, 2, function(x) sum(!is.na(x)))
epdp <- rank / rep(nasmp + 1, each = nrow(rank))
empip <- data.frame(1 / (epyp * (1 - epdp)), 1 / (epyp * (1 - hazen)),
1 / (epyp * (1 - cunnane)), gumbel, epdp, hazen, cunnane, x, ts)
names(empip) <- c("emp.RP", "haz.RP", "cun.RP", "gumbel", "emp.f",
"emp.hazen", "emp.cunnane", "Q", "timestamp")
return(empip)
}
#' Empirical Distribution Function
#'
#' This function calculates the empirical distribution function for a given dataset,
#' with a focus on low values
#'
#' @param x A numeric vector or matrix representing the discharge values.
#' @param nyr An integer representing the number of years.
#'
#' @return A data frame containing the following columns:
#' \describe{
#' \item{\code{emp.RP}}{Empirical return period}
#' \item{\code{haz.RP}}{Hazen return period}
#' \item{\code{gumbel}}{Gumbel frequency}
#' \item{\code{emp.f}}{Empirical frequency}
#' \item{\code{emp.hazen}}{Empirical Hazen frequency}
#' \item{\code{Q}}{Discharge values}
#' }
#'
#' @examples
#' x <- c(10, 20, 30, 40, 50)
#' nyr <- 5
#' empdisl(x, nyr)
#'
#' @export
empdisl <- function(x, nyr) {
x <- as.data.frame(x[order(x, decreasing = TRUE)])
epyp <- length(x[, 1]) / nyr
rank <- apply(-x, 2, rank, na.last = "keep")
hazen <- (rank - 0.5) / length(x[, 1])
gumbel <- -log(-log(hazen))
nasmp <- apply(x, 2, function(x) sum(!is.na(x)))
epdp <- rank / rep(nasmp + 1, each = nrow(rank))
empip <- data.frame(1 / (epyp * (1 - epdp)), 1 / (epyp * (1 - hazen)), gumbel, epdp, hazen, x)
names(empip) <- c("emp.RP", "haz.RP", "gumbel", "emp.f", "emp.hazen", "Q")
return(empip)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.