R/eva_functions.R
In RtsEva: Performs the Transformed-Stationary Extreme Values Analysis

Documented in computeAnnualMaxima computeMonthlyMaxima declustpeaks empdis empdisl findMax tsEvaComputeReturnLevelsGEV tsEvaComputeReturnLevelsGEVFromAnalysisObj tsEvaComputeReturnLevelsGPD tsEvaComputeReturnLevelsGPDFromAnalysisObj tsEvaComputeReturnPeriodsGEV tsEvaComputeReturnPeriodsGPD tsEvaComputeRLsGEVGPD tsEvaComputeTimeRP tsEvaSampleData tsEVstatistics tsGetNumberPerYear tsGetPOT

# EVA functions to compute relturn levels / returtn periods from Nonstationary EVD parameters-----------------


#' tsEvaComputeTimeRP
#'
#' \code{tsEvaComputeTimeRP}is a function that calculates the return period
#' of a given event for GEV and GPD distributions at a given time index.
#'
#' @param params A data frame containing the following parameters:
#' \describe{
#' \item{\code{epsilonGEV}}{Shape parameter for the Generalized Extreme Value
#' (GEV) distribution.}
#' \item{\code{muGEV}}{Location parameter for the GEV distribution.}
#' \item{\code{sigmaGEV}}{Scale parameter for the GEV distribution.}
#' \item{\code{epsilonGPD}}{Shape parameter for the Generalized Pareto
#' (GPD) distribution.}
#' \item{\code{thresholdGPD}}{Threshold parameter for the GPD distribution.}
#' \item{\code{sigmaGPD}}{Scale parameter for the GPD distribution.}
#' \item{\code{nPeaks}}{Number of peaks in the sample time horizon.}
#' \item{\code{SampleTimeHorizon}}{Total number of years in the data sample.}
#' }
#' @param RPiGEV Value of RP for the GEV distribution.
#' @param RPiGPD Value of RP for the GPD distribution.
#' @return A vector with the calculated return period for GEV and GPD distributions.
#' @export
#' @examples
#'#Parameter vector:
#'params <- t(data.frame(epsilonGEV = 0.2, muGEV = 3, sigmaGEV = 1,
#'                   epsilonGPD = 0.2, thresholdGPD = 3, sigmaGPD = 1,
#'                   nPeaks = 70, SampleTimeHorizon = 70))
#'
#'tsEvaComputeTimeRP(params, RPiGEV = 10, RPiGPD = 10)

tsEvaComputeTimeRP <- function(params, RPiGEV, RPiGPD){
  paramx <- data.frame(t(params))
  qxV <- 1 - exp(-(1 + paramx$epsilonGEV * (RPiGEV - paramx$muGEV) / paramx$sigmaGEV)^(-1 / paramx$epsilonGEV))
  returnPeriodGEV <- 1 / qxV
  X0 <- paramx$nPeaks/paramx$SampleTimeHorizon
  qxD <- ((1 + paramx$epsilonGPD * (RPiGPD - paramx$thresholdGPD) / paramx$sigmaGPD)^(-1 / paramx$epsilonGPD))
  returnPeriodGPD <- 1 / (X0 * qxD)

  return(c(GEV = returnPeriodGEV, GPD = returnPeriodGPD))
}

#' tsEvaComputeReturnPeriodsGEV
#'
#' \code{tsEvaComputeReturnPeriodsGEV}is a function that computes the return
#' periods of a set of observations (can be Annual maxima or others)
#' for a Generalized Extreme Value (GEV)
#' distribution, given the GEV parameters and their standard error.
#' The return levels represent the values of annual maxima
#' with a certain probability, while the return periods indicate the average
#' time between exceedances of those threshold values.
#'
#' @param epsilon The shape parameter of the GEV distribution.
#' @param sigma The scale parameter of the GEV distribution.
#' @param mu The location parameter of the GEV distribution.
#' @param BlockMax A vector containing the block maxima data.
#'
#' @return A list containing the following components:
#' \describe{
#' \item{\code{GevPseudo}}{A matrix of pseudo observations obtained from
#' the GEV distribution for each annual extreme at every time step.}
#' \item{\code{returnPeriods}}{A matrix of return periods corresponding to
#' the pseudo observations.}
#' \item{\code{PseudoObs}}{The pseudo observation corresponding to
#' the maximum value used in the computation.}
#' }
#'
#' @seealso \code{\link{empdis}}
#' @examples
#'
#' # Example usage with some sample data
#' epsilon <- 0.1
#' sigma <- 2.2
#' mu <- 1.3
#' BlockMax <- c(10, 20, 30, 40, 50)
#'
#' results <- tsEvaComputeReturnPeriodsGEV(epsilon, sigma, mu, BlockMax)
#' head(results$GevPseudo)
#' head(results$returnPeriods)
#' head(results$PseudoObs)
#' @export
tsEvaComputeReturnPeriodsGEV <- function(epsilon, sigma, mu, BlockMax) {
  nyr <- length(BlockMax)
  # Compute the empirical return period:
  empval <- empdis(BlockMax, nyr)
  my <- match(BlockMax, empval$Q)
  PseudoObs <- empval[my, ]

  # uniforming dimensions of yp, sigma, mu
  npars <- length(sigma)
  nt <- length(BlockMax)
  Bmax <- matrix(BlockMax, nrow = npars, ncol = nt, byrow = TRUE)
  sigma_ <- matrix(sigma, nrow = npars, ncol = nt, byrow = FALSE)
  mu_ <- matrix(mu, nrow = npars, ncol = nt, byrow = FALSE)

  # estimating the return levels
  qx <- 1 - exp(-(1 + epsilon * (Bmax - mu_) / sigma_)^(-1 / epsilon))
  GevPseudo <- qx
  returnPeriods <- 1 / qx

  return(list(GevPseudo = GevPseudo, returnPeriods = returnPeriods, PseudoObs = PseudoObs))
}

#' tsEvaComputeReturnPeriodsGPD
#'
#' \code{tsEvaComputeReturnPeriodsGPD}is a function that computes the return
#' periods of a set of observations (peaks) for a
#' Generalized Pareto Distribution (GPD), given the GPD parameters,
#' threshold, peaks data, and sample time horizon.
#'
#' @param epsilon The shape parameter of the GPD.
#' @param sigma The scale parameter of the GPD.
#' @param threshold The threshold value for the GPD.
#' @param peaks A vector containing the peak values.
#' @param nPeaks The number of peak values.
#' @param peaksID An identifier for each peak value.
#' @param sampleTimeHorizon The time horizon of the sample.
#'
#' @return A list containing the following components:
#'   \describe{
#'     \item{\code{GpdPseudo}}{A matrix of pseudo observations obtained from
#'      the GPD for each peak value at every time step.}
#'     \item{\code{returnPeriods}}{A matrix of return periods
#'     corresponding to the pseudo observations.}
#'     \item{\code{PseudoObs}}{A data frame containing the pseudo
#'     observations and their corresponding identifiers.}
#'   }
#'
#' @seealso \code{\link{empdis}}
#' @examples
#' # Example usage with some sample data
#' epsilon <- 0.1
#' sigma <- 2.2
#' threshold <- 1.3
#' peaks <- c(10, 20, 30, 40, 50)
#' nPeaks=5
#' peaksID=c(230,550,999,1540,3012)
#' SampleTimeHorizon = 70
#'
#' results <- tsEvaComputeReturnPeriodsGPD(epsilon, sigma, threshold, peaks,
#' nPeaks, peaksID, SampleTimeHorizon)
#' head(results$GpdPseudo)
#' head(results$returnPeriods)
#' head(results$PseudoObs)
#' @export
tsEvaComputeReturnPeriodsGPD <- function(epsilon, sigma, threshold, peaks, nPeaks, peaksID, sampleTimeHorizon) {
  X0 <- nPeaks / sampleTimeHorizon
  nyr <- sampleTimeHorizon
  peaksj <- jitter(peaks, 2)
  peakAndId <- data.frame(peaksID, peaksj)
  peaksjid <- peaksID[order(peaksj)]

  # Compute the empirical return period:
  empval <- empdis(peaksj, nyr)
  PseudoObs <- empval
  PseudoObs$ID <- peaksjid
  PseudoObs <- PseudoObs[order(PseudoObs$ID), ]

  # Uniform dimensions of variables
  npars <- length(sigma)
  nt <- length(peaks)
  Peakx <- matrix(PseudoObs$Q, nrow = npars, ncol = nt, byrow = TRUE)
  sigma_ <- matrix(sigma, nrow = npars, ncol = nt, byrow = FALSE)
  threshold_ <- matrix(threshold, nrow = npars, ncol = nt, byrow = FALSE)

  # Estimate the return levels
  # Here I have all the pseudo observations of every annual extreme for every timestep
  qx <- (((1 + epsilon * (Peakx - threshold_) / sigma_)^(-1 / epsilon)))
  GpdPseudo <- qx
  GpdPseudo[which(GpdPseudo < 0)] <- 0
  returnPeriods <- 1 / (X0 * qx)

  return(list(GpdPseudo = GpdPseudo, returnPeriods = returnPeriods, PseudoObs = PseudoObs))
}

#' tsEvaComputeReturnLevelsGEV
#'
#' \code{tsEvaComputeReturnPeriodsGPD}is a function that calculates the return
#' levels for a Generalized Extreme Value (GEV) distribution given the GEV
#' parameters and their standard errors. The return periods are specified in a
#' time unit that corresponds to the size of the time segments for
#' evaluating the maxima.
#'
#' @param epsilon The shape parameter of the GEV distribution.
#' @param sigma The scale parameter of the GEV distribution.
#' @param mu The location parameter of the GEV distribution.
#' @param epsilonStdErr The standard error of the shape parameter.
#' @param sigmaStdErr The standard error of the scale parameter.
#' @param muStdErr The standard error of the location parameter.
#' @param returnPeriodsInDts The return periods expressed in the
#' corresponding time unit. For example, while working on yearly
# maxima, returnPeriodsInDts must be expressed in years. For
# monthly maxima returnPeriodsInDts must be expressed in months.
#'
#' @return A list containing the following components:
#'   \describe{
#'     \item{\code{returnLevels}}{A matrix of return levels corresponding to the specified return periods.}
#'     \item{\code{returnLevelsErr}}{A matrix of standard errors for the return levels.}
#'   }
#'
#' @seealso \code{\link{empdis}}
#' @examples
#' # Example usage with some sample data
#' epsilon <- c(0.1)
#' sigma <- c(2.3)
#' mu <- c(1.3)
#' epsilonStdErr <- c(0.01)
#' sigmaStdErr <- c(0.11)
#' muStdErr <- c(0.011)
#' returnPeriodsInDts <- c( 5, 10, 20, 50)

#' results <- tsEvaComputeReturnLevelsGEV(epsilon, sigma, mu, epsilonStdErr,
#' sigmaStdErr, muStdErr, returnPeriodsInDts)
#' head(results$returnLevels)
#' head(results$returnLevelsErr)
#' @export
tsEvaComputeReturnLevelsGEV <- function(epsilon, sigma, mu, epsilonStdErr, sigmaStdErr, muStdErr, returnPeriodsInDts) {
  returnPeriodsInDts <- as.vector(returnPeriodsInDts)
  returnPeriodsInDtsSize <- length(returnPeriodsInDts)
  if (returnPeriodsInDtsSize[1] > 1) {
    returnPeriodsInDts <- as.vector(t(returnPeriodsInDts))
  }
  # reference: Stuart Coles 2001, pag 49.
  yp <- -log(1 - 1 / returnPeriodsInDts)

  # uniforming dimensions of yp, sigma, mu
  npars <- length(sigma)
  nt <- length(returnPeriodsInDts)
  yp <- matrix(yp, nrow = npars, ncol = nt, byrow = TRUE)
  sigma_ <- matrix(sigma, nrow = npars, ncol = nt, byrow = TRUE)
  sigmaStdErr_ <- matrix(sigmaStdErr, nrow = npars, ncol = nt, byrow = TRUE)
  mu_ <- matrix(mu, nrow = npars, ncol = nt, byrow = TRUE)
  muStdErr_ <- matrix(muStdErr, nrow = npars, ncol = nt, byrow = TRUE)
  if (epsilon != 0) {
    # estimating the return levels
    returnLevels <- mu_ - sigma_ / epsilon * (1 - yp^(-epsilon))
    ## estimating the error
    # estimating the differential of returnLevels to the parameters
    dxm_mu <- 1
    dxm_sigma <- 1 / epsilon * (1 - yp^(-epsilon))
    dxm_epsilon <- sigma_ / (epsilon^2) * (1 - (yp)^(-epsilon)) - sigma_ / epsilon * log(yp) * yp^(-epsilon)
    returnLevelsErr <- sqrt((dxm_mu * muStdErr_)^2 + (dxm_sigma * sigmaStdErr_)^2 + (dxm_epsilon * epsilonStdErr)^2)
  } else {
    returnLevels <- mu_ - sigma_ * log(yp)
    ## estimating the error
    # estimating the differential of returnLevels to the parameter
    dxm_u <- rep(1, length(mu_))
    dxm_sigma <- log(yp)
    returnLevelsErr <- sqrt((dxm_u * muStdErr_)^2 + (dxm_sigma * sigmaStdErr_)^2)
  }
  return(list(returnLevels = returnLevels, returnLevelsErr = returnLevelsErr))
}

#' tsEvaComputeReturnLevelsGEVFromAnalysisObj
#'
#' \code{tsEvaComputeReturnLevelsGEVFromAnalysisObj}is a function that calculates the return levels for a Generalized Extreme Value (GEV) distribution using the parameters obtained from a non-stationary extreme value analysis. It supports non-stationary analysis by considering different parameters for each time index.
#'
#' @param nonStationaryEvaParams The parameters obtained from a non-stationary extreme value analysis.
#' @param returnPeriodsInYears The return periods expressed in years.
#' @param timeIndex Temporal index corresponding to the time step on which compute the GEV RLs.
#'
#' @return A list containing the following components:
#'   \describe{
#'     \item{\code{returnLevels}}{A matrix of return levels corresponding to the specified return periods.}
#'     \item{\code{returnLevelsErr}}{A matrix of standard errors for the return levels.}
#'     \item{\code{returnLevelsErrFit}}{A matrix of standard errors for the return levels obtained from fitting the non-stationary model.}
#'     \item{\code{returnLevelsErrTransf}}{A matrix of standard errors for the return levels obtained from the transformed data.}
#'   }
#' @examples
#' # Example usage with some sample data
#'nonStationaryEvaParams <- list(list(
#' parameters = list(
#'   epsilon = 0.1,
#'   sigma = c(2.1, 2.2, 2.3),
#'   mu = c(1.1, 1.2, 1.3),
#'   timeHorizonStart=as.POSIXct("1951-01-01"),
#'   timeHorizonEnd=as.POSIXct("2020-12-31"),
#'   nPeaks=90
#'
#' ),
#' paramErr = list(
#'   epsilonErr = 0.01,
#'   sigmaErr = c(0.11, 0.12, 0.13),
#'   muErr = c(0.011, 0.012, 0.013)
#' ),NA
#' )
#')
#' returnPeriodsInYears <- c(1, 5, 10, 20, 50)
#' timeIndex=1
#' results <- tsEvaComputeReturnLevelsGEVFromAnalysisObj(nonStationaryEvaParams, returnPeriodsInYears)
#' head(results$returnLevels)

#' @seealso \code{\link{tsEvaComputeReturnLevelsGEV}}
#' @export
tsEvaComputeReturnLevelsGEVFromAnalysisObj <- function(nonStationaryEvaParams, returnPeriodsInYears, timeIndex=-1) {
  epsilon <- nonStationaryEvaParams[[1]]$parameters$epsilon
  epsilonStdErr <- nonStationaryEvaParams[[1]]$paramErr$epsilonErr
  epsilonStdErrFit <- epsilonStdErr
  epsilonStdErrTransf <- 0
  nonStationary <- "sigmaErrTransf" %in% names(nonStationaryEvaParams[2]$paramErr)
  if (timeIndex > 0) {
    sigma <- nonStationaryEvaParams[[1]]$parameters$sigma[timeIndex]
    mu <- nonStationaryEvaParams[[1]]$parameters$mu[timeIndex]
    sigmaStdErr <- nonStationaryEvaParams[[1]]$paramErr$sigmaErr[timeIndex]
    sigmaStdErrFit <- nonStationaryEvaParams[[1]]$paramErr$sigmaErrFit[timeIndex]
    sigmaStdErrTransf <- nonStationaryEvaParams[[1]]$paramErr$sigmaErrTransf[timeIndex]
    muStdErr <- nonStationaryEvaParams[[1]]$paramErr$muErr[timeIndex]
    muStdErrFit <- nonStationaryEvaParams[[1]]$paramErr$muErrFit[timeIndex]
    muStdErrTransf <- nonStationaryEvaParams[[1]]$paramErr$muErrTransf[timeIndex]
  } else {
    sigma <- nonStationaryEvaParams[[1]]$parameters$sigma
    mu <- nonStationaryEvaParams[[1]]$parameters$mu
    sigmaStdErr <- nonStationaryEvaParams[[1]]$paramErr$sigmaErr
    muStdErr <- nonStationaryEvaParams[[1]]$paramErr$muErr
    if (nonStationary) {
      sigmaStdErrFit <- nonStationaryEvaParams[[1]]$paramErr$sigmaErrFit
      sigmaStdErrTransf <- nonStationaryEvaParams[[1]]$paramErr$sigmaErrTransf
      muStdErrFit <- nonStationaryEvaParams[[1]]$paramErr$muErrFit
      muStdErrTransf <- nonStationaryEvaParams[[1]]$paramErr$muErrTransf
    }
  }

  returnLevels <- tsEvaComputeReturnLevelsGEV(epsilon, sigma, mu, epsilonStdErr, sigmaStdErr, muStdErr, returnPeriodsInYears)
  returnLevelsErr <- returnLevels$returnLevelsErr
  if (nonStationary) {
    returnLevelsErrFit <- tsEvaComputeReturnLevelsGEV(epsilon, sigma, mu, epsilonStdErrFit, sigmaStdErrFit, muStdErrFit, returnPeriodsInYears)$returnLevelsErr
    returnLevelsErrTransf <- tsEvaComputeReturnLevelsGEV(epsilon, sigma, mu, epsilonStdErrTransf, sigmaStdErrTransf, muStdErrTransf, returnPeriodsInYears)$returnLevelsErr
  } else {
    returnLevelsErrFit <- returnLevelsErr
    returnLevelsErrTransf <- rep(0, length(returnLevelsErr))
  }

  return(list(returnLevels = returnLevels$returnLevels, returnLevelsErr = returnLevelsErr, returnLevelsErrFit = returnLevelsErrFit, returnLevelsErrTransf = returnLevelsErrTransf))
}

#' tsEvaComputeReturnLevelsGPD
#'
#' #' \code{tsEvaComputeReturnLevelsGPD}is a function that
#'  compute the return levels for a Generalized Pareto Distribution (GPD)
#'  using the parameters of the distribution and their standard errors.
#'
#' @param epsilon The shape parameter of the GPD.
#' @param sigma The scale parameter of the GPD.
#' @param threshold The threshold parameter of the GPD.
#' @param epsilonStdErr The standard error of the shape parameter.
#' @param sigmaStdErr The standard error of the scale parameter.
#' @param thresholdStdErr The standard error of the threshold parameter.
#' @param nPeaks The number of peaks used to estimate the parameters.
#' @param sampleTimeHorizon The time horizon of the sample in the same units as the return periods (e.g., years).
#' @param returnPeriods The return periods for which to compute the return levels.
#'
#' @return A list containing the following components:
#'   \describe{
#'     \item{\code{returnLevels}}{A vector of return levels corresponding to the specified return periods.}
#'     \item{\code{returnLevelsErr}}{A vector of standard errors for the return levels.}
#'   }
#'
#' @details
#' sampleTimeHorizon and returnPeriods must be in the same units, e.g. years
#' @examples
#' # Example usage with some sample data
#' epsilon <- c(0.1)
#' sigma <- c(2.3)
#' threshold <- c(1.3)
#' epsilonStdErr <- c(0.01)
#' sigmaStdErr <- c(0.11)
#' thresholdStdErr <- c(0.011)
#' returnPeriodsInDts <- c( 5, 10, 20, 50)
#' nPeaks=70
#' SampleTimeHorizon=70
#' results <- tsEvaComputeReturnLevelsGPD(epsilon, sigma, threshold, epsilonStdErr,
#' sigmaStdErr, thresholdStdErr, nPeaks, SampleTimeHorizon, returnPeriodsInDts)
#' head(results$returnLevels)
#' head(results$returnLevelsErr)
#' @export
tsEvaComputeReturnLevelsGPD <- function(epsilon, sigma, threshold, epsilonStdErr, sigmaStdErr, thresholdStdErr, nPeaks, sampleTimeHorizon, returnPeriods) {

  X0 <- nPeaks / sampleTimeHorizon
  XX <- X0 * returnPeriods
  npars <- length(sigma)
  nt <- length(returnPeriods)
  XX_ <- rep(XX, each = npars)
  sigma_ <- rep(sigma, times = nt)
  sigmaStdErr_ <- rep(sigmaStdErr, times = nt)
  threshold_ <- rep(threshold, times = nt)
  thresholdStdErr_ <- rep(thresholdStdErr, times = nt)

  if (epsilon != 0) {
    # estimating the return levels
    returnLevels <- threshold_ + sigma_ / epsilon * ((XX_)^epsilon - 1)
    # estimating the error
    # estimating the differential of returnLevels to the parameters
    # !! ASSUMING NON ZERO ERROR ON THE THRESHOLD AND 0 ERROR ON THE PERCENTILE.
    # THIS IS NOT COMPLETELY CORRECT BECAUSE THE PERCENTILE DEPENDS ON THE
    # THRESHOLD AND HAS THEREFORE AN ERROR RELATED TO THAT OF THE
    # THRESHOLD
    dxm_u <- 1
    dxm_sigma <- 1 / epsilon * (XX_^epsilon - 1)
    dxm_epsilon <- -sigma_ / epsilon^2 * ((XX_)^epsilon - 1) + sigma_ / epsilon * log(XX_) * XX_^epsilon

    returnLevelsErr <- sqrt((dxm_u * thresholdStdErr_)^2 + (dxm_sigma * sigmaStdErr_)^2 + (dxm_epsilon * epsilonStdErr)^2)
    #
  } else {
    returnLevels <- threshold_ + sigma_ * log(XX_)
    dxm_u <- 1
    dxm_sigma <- log(XX_)

    returnLevelsErr <- ((dxm_u * thresholdStdErr_)^2 + (dxm_sigma * sigmaStdErr_)^2)^.5
  }
  return(list(returnLevels = returnLevels, returnLevelsErr = returnLevelsErr))
}

#' tsEvaComputeReturnLevelsGPDFromAnalysisObj
#'
#' \code{tsEvaComputeReturnLevelsGPDFromAnalysisObj} is a function that calculates the return levels for a Generalized Pareto Distribution (GPD) using the parameters obtained from an analysis object. It takes into account non-stationarity by considering time-varying parameters and their associated standard errors.
#'
#' @param nonStationaryEvaParams The non-stationary parameters obtained from the analysis object.
#' @param returnPeriodsInYears The return periods for which to compute the return levels, expressed in years.
#' @param timeIndex Temporal index corresponding to the time step on which compute the GEV RLs.
#'
#' @return A list with the following components:
#'   \describe{
#'     \item{\code{returnLevels}}{A vector of return levels corresponding to the specified return periods.}
#'     \item{\code{returnLevelsErrFit}}{A vector of standard errors for the return levels estimated based on the fit.}
#'     \item{\code{returnLevelsErrTransf}}{A vector of standard errors for the return levels estimated based on the transformed parameters.}
#'   }
#'
#' @seealso \code{\link{tsEvaComputeReturnLevelsGPD}}
#' @examples
#' # Example usage with some sample data
#'nonStationaryEvaParams <- list(NA,list(
#'  parameters = list(
#'    epsilon = 0.1,
#'    sigma = c(2.1, 2.2, 2.3),
#'    threshold = c(1.1, 1.2, 1.3),
#'    timeHorizonStart=as.POSIXct("1951-01-01"),
#'    timeHorizonEnd=as.POSIXct("2020-12-31"),
#'    nPeaks=90
#'
#'  ),
#'  paramErr = list(
#'    epsilonErr = 0.01,
#'    sigmaErr = c(0.11, 0.12, 0.13),
#'    thresholdErr = c(0.011, 0.012, 0.013)
#'  )
#')
#')
#'returnPeriodsInYears <- c(1, 5, 10, 20, 50)
#'timeIndex=1
#'results <- tsEvaComputeReturnLevelsGPDFromAnalysisObj(nonStationaryEvaParams, returnPeriodsInYears)
#'head(results$returnLevels)
#' @export

tsEvaComputeReturnLevelsGPDFromAnalysisObj <- function(nonStationaryEvaParams, returnPeriodsInYears, timeIndex=-1) {
  epsilon <- nonStationaryEvaParams[[2]]$parameters$epsilon
  epsilonStdErr <- nonStationaryEvaParams[[2]]$paramErr$epsilonErr
  epsilonStdErrFit <- epsilonStdErr
  epsilonStdErrTransf <- 0
  thStart <- nonStationaryEvaParams[[2]]$parameters$timeHorizonStart
  thEnd <- nonStationaryEvaParams[[2]]$parameters$timeHorizonEnd
  timeHorizonInYears <- round(as.numeric((thEnd - thStart) / 365.2425))
  nPeaks <- nonStationaryEvaParams[[2]]$parameters$nPeaks
  nonStationary <- "sigmaErrTransf" %in% names(nonStationaryEvaParams[2]$paramErr)
  if (timeIndex > 0) {
    sigma <- nonStationaryEvaParams[[2]]$parameters$sigma[timeIndex]
    threshold <- nonStationaryEvaParams[[2]]$parameters$threshold[timeIndex]
    sigmaStdErr <- nonStationaryEvaParams[[2]]$paramErr$sigmaErr[timeIndex]
    sigmaStdErrFit <- nonStationaryEvaParams[[2]]$paramErr$sigmaErrFit[timeIndex]
    sigmaStdErrTransf <- nonStationaryEvaParams[[2]]$paramErr$sigmaErrTransf[timeIndex]
    thresholdStdErr <- nonStationaryEvaParams[[2]]$paramErr$thresholdErr[timeIndex]
    thresholdStdErrFit <- 0
    thresholdStdErrTransf <- nonStationaryEvaParams[[2]]$paramErr$thresholdErrTransf[timeIndex]
  } else {
    sigma <- nonStationaryEvaParams[[2]]$parameters$sigma
    threshold <- nonStationaryEvaParams[[2]]$parameters$threshold
    sigmaStdErr <- nonStationaryEvaParams[[2]]$paramErr$sigmaErr
    if (nonStationary) {
      thresholdStdErr <- nonStationaryEvaParams[[2]]$paramErr$thresholdErr
      sigmaStdErrFit <- nonStationaryEvaParams[[2]]$paramErr$sigmaErrFit
      sigmaStdErrTransf <- nonStationaryEvaParams[[2]]$paramErr$sigmaErrTransf
      thresholdStdErrFit <- nonStationaryEvaParams[[2]]$paramErr$thresholdErrFit
      thresholdStdErrTransf <- nonStationaryEvaParams[[2]]$paramErr$thresholdErrTransf
    } else {
      thresholdStdErr <- 0
    }
  }
  returnLevels <- tsEvaComputeReturnLevelsGPD(epsilon, sigma, threshold, epsilonStdErr, sigmaStdErr, thresholdStdErr, nPeaks, timeHorizonInYears, returnPeriodsInYears)
  returnLevelsErr <- returnLevels$returnLevelsErr
  if (nonStationary) {
    returnLevelsErrFit <- tsEvaComputeReturnLevelsGPD(epsilon, sigma, threshold, epsilonStdErrFit, sigmaStdErrFit, rep(0, length(thresholdStdErr)), nPeaks, timeHorizonInYears, returnPeriodsInYears)
    returnLevelsErrTransf <- tsEvaComputeReturnLevelsGPD(epsilon, sigma, threshold, epsilonStdErrTransf, sigmaStdErrTransf, thresholdStdErrTransf, nPeaks, timeHorizonInYears, returnPeriodsInYears)
  } else {
    returnLevelsErrFit <- returnLevelsErr
    returnLevelsErrTransf <- rep(0, length(returnLevelsErr))
  }
  return(list(returnLevels = returnLevels$returnLevels, returnLevelsErr = returnLevelsErr, returnLevelsErrFit = returnLevelsErrFit, returnLevelsErrTransf = returnLevelsErrTransf))
}

#' tsEvaComputeRLsGEVGPD
#'
#' \code{tsEvaComputeRLsGEVGPD} is a function that calculates the return levels
#'  and their associated errors for a Generalized Extreme Value (GEV)
#'  and Generalized Pareto (GPD) distribution using the parameters obtained
#'  from a non-stationary extreme value analysis.
#'  It supports non-stationary analysis by considering different parameters
#'  for each time index.
#'
#' @param nonStationaryEvaParams The parameters obtained from a non-stationary
#' extreme value analysis.
#' @param RPgoal The target return period for which the return levels are computed.
#' @param timeIndex The index at which the time-varying analysis should be estimated.
#' @param trans A character string indicating the transformation to be applied
#' to the data before fitting the EVD. default value is NA, corresponding
#' to no transformation. Currently only the "rev" for reverse transformation is
#' implemented.
#'
#' @return A list containing the following components:
#'   \describe{
#'     \item{\code{Fit}}{A character string indicating whether the EVD could be
#'      fitted to the data ("No fit") or the EVD was successfully fitted to the data ("Fitted").}
#'     \item{\code{ReturnLevels}}{A data frame containing the target
#'      return period (`ReturnPeriod`), GEV return level (`GEV`), GPD return level (`GPD`),
#'       GEV return level error (`errGEV`), and GPD return level error (`errGPD`) for
#'        the specified time index.}
#'     \item{\code{Params}}{A list containing the GEV and GPD parameters for the
#'     specified time index, including their standard errors.}
#'   }
#'
#' @seealso \code{\link{tsEvaComputeReturnLevelsGEV}}, \code{\link{tsEvaComputeReturnLevelsGPD}}
#' @export
tsEvaComputeRLsGEVGPD<-function(nonStationaryEvaParams, RPgoal, timeIndex,trans=NA){

  #GEV
  epsilonGEV <- nonStationaryEvaParams[[1]]$parameters$epsilon
  sigmaGEV <- mean(nonStationaryEvaParams[[1]]$parameters$sigma[timeIndex])
  muGEV <- mean(nonStationaryEvaParams[[1]]$parameters$mu[timeIndex])
  dtSampleYears <- nonStationaryEvaParams[[1]]$parameters$timeDeltaYears

  #GPD
  epsilonGPD <- nonStationaryEvaParams[[2]]$parameters$epsilon
  sigmaGPD <- mean(nonStationaryEvaParams[[2]]$parameters$sigma[timeIndex])
  thresholdGPD <- mean(nonStationaryEvaParams[[2]]$parameters$threshold[timeIndex])
  nPeaks <- nonStationaryEvaParams[[2]]$parameters$nPeaks
  thStart <- nonStationaryEvaParams[[2]]$parameters$timeHorizonStart
  thEnd <- nonStationaryEvaParams[[2]]$parameters$timeHorizonEnd
  sampleTimeHorizon <- as.numeric((thEnd - thStart)/365.2425)

  if (nonStationaryEvaParams[[1]]$method=="No fit"){
    message("could not fit EVD to this pixel")
    ParamGEV=c(epsilonGEV,sigmaGEV,muGEV,NA, NA, NA)
    names(ParamGEV)=c("epsilonGEV","sigmaGEV","muGEV","epsilonStdErrGEV","sigmaStdErrGEV","muStdErrGEV")

    ParamGPD=c(epsilonGPD,sigmaGPD,thresholdGPD,NA,NA, NA,nPeaks,sampleTimeHorizon)
    names(ParamGPD)=c("epsilonGPD","sigmaGPD","thresholdGPD","epsilonStdErrGPD","sigmaStdErrGPD","thresholdStdErrGPD","nPeaks","SampleTimeHorizon")
    return(list(Fit="No fit",Params=c(ParamGEV,ParamGPD)))
  }else{
    #GEV
    epsilonStdErrGEV <- nonStationaryEvaParams[[1]]$paramErr$epsilonErr
    sigmaStdErrGEV <- mean(nonStationaryEvaParams[[1]]$paramErr$sigmaErr[timeIndex])
    muStdErrGEV <- mean(nonStationaryEvaParams[[1]]$paramErr$muErr[timeIndex])

    #GPD
    epsilonStdErrGPD <- nonStationaryEvaParams[[2]]$paramErr$epsilonErr
    sigmaStdErrGPD <- mean(nonStationaryEvaParams[[2]]$paramErr$sigmaErr[timeIndex])
    thresholdStdErrGPD <- mean(nonStationaryEvaParams[[2]]$paramErr$thresholdErr[timeIndex])

    if (trans=="rev"){
      sigmaGEV=-sigmaGEV
      muGEV=-muGEV
      sigmaGPD=-sigmaGPD
      thresholdGPD=-thresholdGPD
    }
    returnLevelsGEV <- tsEvaComputeReturnLevelsGEV(epsilonGEV, sigmaGEV, muGEV, epsilonStdErrGEV, sigmaStdErrGEV, muStdErrGEV, RPgoal)

    returnLevelsGPD <- tsEvaComputeReturnLevelsGPD(epsilonGPD, sigmaGPD, thresholdGPD, epsilonStdErrGPD, sigmaStdErrGPD, thresholdStdErrGPD,
                                                   nPeaks, sampleTimeHorizon, RPgoal)
    rlevGEV=returnLevelsGEV$returnLevels
    rlevGPD=returnLevelsGPD$returnLevels

    errGEV=returnLevelsGEV$returnLevelsErr
    errGPD=returnLevelsGPD$returnLevelsErr

    ParamGEV=c(epsilonGEV,sigmaGEV,muGEV,epsilonStdErrGEV, sigmaStdErrGEV, muStdErrGEV)
    names(ParamGEV)=c("epsilonGEV","sigmaGEV","muGEV","epsilonStdErrGEV","sigmaStdErrGEV","muStdErrGEV")

    ParamGPD=c(epsilonGPD,sigmaGPD,thresholdGPD,epsilonStdErrGPD,sigmaStdErrGPD, thresholdStdErrGPD,nPeaks,sampleTimeHorizon)
    names(ParamGPD)=c("epsilonGPD","sigmaGPD","thresholdGPD","epsilonStdErrGPD","sigmaStdErrGPD","thresholdStdErrGPD","nPeaks","SampleTimeHorizon")
    return(list(Fit="Fitted",ReturnLevels=c(ReturnPeriod=RPgoal, GEV=as.numeric(rlevGEV),GPD=as.numeric(rlevGPD),errGEV=as.numeric(errGEV),errGPD=as.numeric(errGPD)),Params=c(ParamGEV,ParamGPD)))
  }


}
#' tsEvaSampleData Function
#'
#' \code{tsEvaSampleData} is a function that calculates various statistics
#'  and data for time series evaluation.
#'
#' @param ms A matrix containing the time series data.
#' @param meanEventsPerYear The mean number of events per year.
#' @param minEventsPerYear The minimum number of events per year.
#' @param minPeakDistanceInDays The minimum peak distance in days.
#' @param tail The tail to be studied for POT selection, either 'high' or 'low'.
#'
#' @return A list containing the following elements:
#'   \describe{
#'     \item{\code{completeSeries}}{The complete time series data.}
#'     \item{\code{POT}}{The data for Peaks Over Threshold (POT) analysis.}
#'     \item{\code{years}}{The years in the time series data.}
#'     \item{\code{Percentiles}}{The desired percentiles and their corresponding values.}
#'     \item{\code{annualMax}}{The annual maximum values.}
#'     \item{\code{annualMaxDate}}{The dates corresponding to the annual maximum values.}
#'     \item{\code{annualMaxIndx}}{The indices of the annual maximum values.}
#'     \item{\code{monthlyMax}}{The monthly maximum values.}
#'     \item{\code{monthlyMaxDate}}{The dates corresponding to the monthly maximum values.}
#'     \item{\code{monthlyMaxIndx}}{The indices of the monthly maximum values.}
#'   }
#' @seealso [tsGetPOT()]
#' @import stats
#' @examples
#' # Generate sample data
#' data <- ArdecheStMartin
#' colnames(data) <- c("Date", "Value")
#' #select only the 5 latest years
#' yrs <- as.integer(format(data$Date, "%Y"))
#' tokeep <- which(yrs>=2015)
#' data <- data[tokeep,]
#' timeWindow <- 365 # 1 year
#' # Calculate statistics and data
#' result <- tsEvaSampleData(data, meanEventsPerYear=3, minEventsPerYear=0,
#' minPeakDistanceInDays=7, "high")
#' # View the result
#' print(result)
#' @export
tsEvaSampleData <- function(ms, meanEventsPerYear,minEventsPerYear, minPeakDistanceInDays,tail=NA) {

  pctsDesired = c(90, 95, 99, 99.9)
  args <- list(meanEventsPerYear = meanEventsPerYear,
               minEventsPerYear = minEventsPerYear,
               potPercentiles = c(seq(70,90,by=1), seq(91,99.5,by=0.5)))
  meanEventsPerYear = args$meanEventsPerYear
  minEventsPerYear = args$minEventsPerYear
  potPercentiles = args$potPercentiles
  if(is.na(tail)) stop("tail for POT selection needs to be 'high' or 'low'")

  POTData <- tsGetPOT(ms, potPercentiles, meanEventsPerYear,minEventsPerYear,minPeakDistanceInDays, tail)

  vals <- quantile(ms[,2], pctsDesired/100,na.rm=T)
  percentiles <- list(precentiles = pctsDesired, values = vals)

  pointData <- list()
  pointData$completeSeries <- ms
  pointData$POT <- POTData
  pointDataA <- computeAnnualMaxima(ms)
  pointDataM <- computeMonthlyMaxima(ms)

  yrs <- unique(as.numeric(format(as.Date(ms[,1]+3600), "%Y")))
  yrs <- yrs - min(yrs)
  pointData$years <- seq(min(yrs),max(yrs),1)

  pointData$Percentiles <- percentiles
  pointData$annualMax=pointDataA$annualMax
  pointData$annualMaxDate=pointDataA$annualMaxDate
  pointData$annualMaxIndx=pointDataA$annualMaxIndx
  pointData$monthlyMax=pointDataM$monthlyMax
  pointData$monthlyMaxDate=pointDataM$monthlyMaxDate
  pointData$monthlyMaxIndx=pointDataM$monthlyMaxIndx

  return(pointData)
}



#' tsGetPOT Function
#'
#' \code{tsGetPOT} is a function that calculates the Peaks Over Threshold (POT)
#' for a given time series data.
#'
#' @param ms A matrix containing the time series data with two columns:
#' the first column represents the time and the second column represents the values.
#' @param pcts A numeric vector specifying the percentiles to be used as
#' thresholds for identifying peaks.
#' @param desiredEventsPerYear The desired number of events per year.
#' @param minEventsPerYear The minimum number of events per year.
#' @param minPeakDistanceInDays The minimum distance between two peaks in days.
#' @param tail The tail to be studied for POT selection, either 'high' or 'low'.
#'
#' @return A list containing the following fields:
#' \describe{
#'   \item{\code{threshold}}{The threshold value used for identifying peaks}
#'   \item{\code{thresholdError}}{The error associated with the threshold value}
#'   \item{\code{percentile}}{The percentile value used as the threshold.}
#'   \item{\code{peaks}}{The values of the identified peaks.}
#'   \item{\code{stpeaks}}{The start indices of the identified peaks.}
#'   \item{\code{endpeaks}}{The end indices of the identified peaks.}
#'   \item{\code{ipeaks}}{The indices of the identified peaks.}
#'   \item{\code{time}}{The time values corresponding to the identified peaks.}
#'   \item{\code{pars}}{The parameters of the Generalized Pareto Distribution (GPD)
#'   fitted to the peaks.}
#' }
#'
#' @import stats
#' @importFrom POT fitgpd
#' @seealso [tsEvaSampleData()]
#' @examples
#' # Create a sample time series data
#' ms <- ArdecheStMartin
#'
#' # Calculate the POT using the tsGetPOT function
#' pcts <- c(90, 95, 99)
#' desiredEventsPerYear <- 5
#' minEventsPerYear <- 2
#' minPeakDistanceInDays <- 10
#' tail <- "high"
#' POTdata <- tsGetPOT(ms, pcts, desiredEventsPerYear, minEventsPerYear, minPeakDistanceInDays, tail)
#' # Print the results
#' print(POTdata)
#'
#' @export
tsGetPOT <- function(ms, pcts, desiredEventsPerYear,minEventsPerYear, minPeakDistanceInDays, tail) {

  if (minPeakDistanceInDays == -1) {
    stop("label parameter 'minPeakDistanceInDays' must be set")
  }
  dt1=min(diff(ms[,1]),na.rm=T)
  dt=as.numeric(dt1)
  tdim=attributes(dt1)$units
  if (tdim=="hours") dt=dt/24
  if (tdim=="seconds") dt=dt/3600
  minPeakDistance <- minPeakDistanceInDays/dt
  minRunDistance <- minPeakDistance
  nyears <- round(as.numeric((max(ms[,1]) - min(ms[,1]))/365.25))
  if (length(pcts) == 1) {
    pcts = c(pcts - 3, pcts)
    desiredEventsPerYear = -1
  }

  numperyear <- rep(NA, length(pcts))
  minnumperyear <- rep(NA, length(pcts))
  thrsdts <- rep(NA, length(pcts))
  gpp=rep(NA, length(pcts))
  devpp=rep(NA, length(pcts))
  dej=0
  skip=0
  trip=NA
  perfpen=0
  for (ipp in 1:length(pcts)) {
    #Skip is used to prevent finding peaks for unappropriate thresholds
    if (skip>0) {
      skip=skip-1
    }else{
      if(dej==0){
        thrsdt <- quantile(ms[,2],pcts[ipp]/100,na.rm=T)
        thrsdts[ipp] <- thrsdt
        ms[,2][which(is.na(ms[,2]))]=-9999
        minEventsPerYear=1

        if(tail=="high") {
          #boundaries of shape parameter
          shape_bnd=c(-0.5,1)
          pks <- pracma::findpeaks(ms[,2],minpeakdistance = minPeakDistance, minpeakheight = thrsdt)
        }
        if(tail=="low") {
          pks <- declustpeaks(data = ms[,2] ,minpeakdistance = minPeakDistance ,minrundistance = minRunDistance, qt=thrsdt)
          shape_bnd=c(-1.5,0)
        }
        numperyear[ipp] <- length(pks[,1])/nyears
        if(numperyear[ipp]>=3*desiredEventsPerYear & ipp<(length(pcts)-5)) skip = floor(length(pcts)/8)
        if(numperyear[ipp]<minEventsPerYear) {
          perfpen=(pcts[ipp]*100)
        }
        if(numperyear[ipp]<(0.7*minEventsPerYear)) {
          perfpen=9999*(pcts[ipp]*100)
        }
        if(numperyear[ipp]<=desiredEventsPerYear+1 & dej==0){
          fgpd=suppressWarnings(POT::fitgpd(pks[,1], threshold = thrsdt, est = "mle",method="BFGS",std.err.type = "expected"))
          gpdpar=fgpd$fitted.values
          deviance=fgpd$deviance
          devpp[ipp]=AIC(fgpd)+perfpen
          gpp[ipp]=gpdpar[2]
          nperYear <- tsGetNumberPerYear(ms, pks[,2])
          minnumperyear[ipp] <- min(nperYear$Freq, na.rm = TRUE)
        }
      }
    }
  }
  devpp[1]=NA
  if(is.na(trip)){
    isok=F
    count=0
    devpx=devpp
    #discard the fits that provide negative values
    while(isok==F){
      trip=which.min(devpx)
      isok=dplyr::between(gpp[trip], shape_bnd[1], shape_bnd[2])
      count=count+1
      if(isok==F)devpx[trip]=devpx[trip]+9999
      if(count>(length(devpx)-1)){
        #safety measure for stability of parameter
        dshap=c(0,diff(gpp))
        #Penalizing fits with positive shape parameters for low tail
        if(tail=="low") devpp[which(gpp>=0)]=devpp[which(gpp>=0)]+99999
        devpp[which(abs(dshap)>0.5)]=devpp[which(abs(dshap)>0.5)]+9999
        trip=which.min(devpp)
        message(paste0("shape outside boudaries: ",round(gpp[trip],2)))
        isok=T
      }
    }
  }
  message(paste0("\nmax threshold is: ", pcts[trip],"%"))
  message(paste0("\naverage number of events per year = ",round(numperyear[trip],1) ))

  diffNPerYear <- mean(diff(na.omit(rev(numperyear)), na.rm = TRUE))
  if (diffNPerYear == 0) diffNPerYear <- 1
  diffNPerYear <- 1
  thresholdError <- -mean(diff(na.omit(thrsdts))/diffNPerYear)/2
  indexp <- trip
  if (!is.na(indexp)) {
    thrsd <- quantile(ms[,2],pcts[indexp]/100)
    pct <- pcts[indexp]
  } else {
    thrsd <- 0
    pct
  }
  # Find peaks in the second column of the matrix 'ms'
  if(tail=="high") pks_and_locs <- pracma::findpeaks(ms[,2],minpeakdistance = minPeakDistance, minpeakheight = thrsd)
  if(tail=="low") pks_and_locs <- declustpeaks(data = ms[,2] ,minpeakdistance = minPeakDistance ,minrundistance = minRunDistance, qt=thrsd)

  # Assign peaks and peak locations to separate variables
  pks <- pks_and_locs[,1]
  locs <- pks_and_locs[,2]
  st<-pks_and_locs[,3]
  end=pks_and_locs[,4]

  # Create a list to store results
  POTdata <- list()
  # Assign values to the fields of the list
  POTdata[['threshold']] <- thrsd
  POTdata[['thresholdError']] <- thresholdError
  POTdata[['percentile']] <- pct
  POTdata[['peaks']] <- pks
  POTdata[['stpeaks']] <- st
  POTdata[['endpeaks']] <- end
  POTdata[['ipeaks']] <- locs
  POTdata[['time']] <- ms[locs, 1]
  POTdata[['pars']] <- gpdpar


  return(POTdata)
}

#' tsEVstatistics
#'
#' \code{tsEvstatistics} is a function that calculates the Generalized Extreme
#' Value (GEV) and Generalized Pareto Distribution (GPD) statistics
#' and return levels for a given dataset of extreme values.
#'
#' @param pointData A list containing the dataset of extreme values. It should include the following components:
#'   \describe{
#'     \item{\code{annualMax}}{A vector of annual maximum values}
#'     \item{\code{annualMaxDate}}{A vector of dates corresponding to the annual maximum values}
#'     \item{\code{monthlyMax}}{A matrix of monthly maximum values}
#'   }
#' @param alphaCI The confidence level for the confidence intervals of the parameter estimates. Default is 0.95.
#' @param gevMaxima The type of maxima to use for GEV fitting. Can be either 'annual' or 'monthly'. Default is 'annual'.
#' @param gevType The type of GEV distribution to use. Can be either 'GEV', 'Gumbel'. Default is 'GEV'.
#' @param evdType The types of extreme value distributions to calculate. Can be a combination of 'GEV' and 'GPD'. Default is c('GEV', 'GPD').
#' @param shape_bnd The lower and upper bounds for the shape parameter of the GEV distribution. Default is c(-0.5, 1).
#'
#' @importFrom evd fgev
#' @return A list containing the following components:
#'   \describe{
#'     \item{\code{EVmeta}}{A list containing metadata about the analysis.
#'     It includes Tr, A vector of return periods for which
#'     return levels are calculated}
#'     \item{\code{EVdata}}{A list containing the calculated statistics and return levels. It includes the following components:}
#'       \describe{
#'         \item{\code{GEVstat}}{A list containing the GEV statistics and return levels:}
#'           \describe{
#'             \item{\code{method}}{The method used for fitting the GEV distribution.}
#'             \item{\code{values}}{A vector of return levels calculated using the GEV distribution.}
#'             \item{\code{parameters}}{A vector of parameter estimates for the GEV distribution.}
#'             \item{\code{paramCIs}}{A matrix of confidence intervals for the parameter estimates.}
#'           }
#'         }
#'         \item{\code{GPDstat}}{list containing the GPD statistics and return levels:}
#'           \describe{
#'             \item{\code{method}}{The method used for fitting the GPD distribution}
#'             \item{\code{values}}{A vector of return levels calculated using the GPD distribution}
#'             \item{\code{parameters}}{A vector of parameter estimates for the GPD distribution}
#'             \item{\code{paramCIs}}{A matrix of confidence intervals for the parameter estimates}
#'           }
#'         \item{\code{isValid}}{A logical value indicating whether the analysis was performed or not.}
#'     }
#'
#' @examples
#'# Create a sample dataset
#'data <- ArdecheStMartin
#'colnames(data) <- c("Date", "Value")
#'yrs <- as.integer(format(data$Date, "%Y"))
#'tokeep <- which(yrs>=2015)
#'data <- data[tokeep,]
# Calculate statistics and data
#'pointData <- tsEvaSampleData(data, meanEventsPerYear=3, minEventsPerYear=0,
#'minPeakDistanceInDays=7, "high")
# Calculate GEV and GPD statistics
#'result <- tsEVstatistics(pointData)
#'result$EVdata$GEVstat$values
#'result$EVdata$GPDstat$values
#' @export
tsEVstatistics <- function(pointData, alphaCI = 0.95, gevMaxima = 'annual', gevType = 'GEV', evdType = c('GEV', 'GPD'),shape_bnd=c(-0.5,1)) {
  # Create empty data structures
  EVmeta <- list()
  EVdata <- list()
  isValid <- TRUE

  minGEVSample <- 7
  if(is.null(alphaCI)){alphaCI <- .95}
  if(is.null(gevMaxima)){gevMaxima <- 'annual'}
  if(is.null(gevType)){gevType <- 'GEV'}
  if(is.null(evdType)){evdType <- c('GEV', 'GPD')}

  # Define Tr vector
  Tr <- c(5,10,20,50,100,200,500,1000)
  EVmeta$Tr <- Tr
  nyears <- length(pointData$annualMax)[1]

  imethod <- 1
  methodname <- 'GEVstat'
  paramEsts <- numeric(3)
  paramCIs <- matrix(NA, nrow = 2, ncol = 3)
  rlvls <- numeric(length(Tr))

  # GEV statistics
  if (('GEV' %in% evdType) && !is.null(pointData$annualMax)) {
    if (gevMaxima == 'annual') {
      tmpmat <- pointData$annualMax
    } else if (gevMaxima == 'monthly') {
      tmpmat <- pointData$monthlyMax
    } else {
      stop(paste0('Invalid gevMaxima type: ', gevMaxima))
    }
    iIN <- length(tmpmat)
    if (sum(iIN) >= minGEVSample) {
      tmp <- data.frame(yr=year(pointData$annualMaxDate),dt=tmpmat)
      # Perform GEV/Gumbel fitting and computation of return levels
      if (gevType == "GEV"){
        stdfit=TRUE
        #try to fit GEV with bounded shape parameters and stderr, reduces the constrains if no fit.
        fit <- suppressWarnings(try(evd::fgev(x=tmp$dt,method="L-BFGS-B",lower=c(-Inf,-Inf,shape_bnd[1]),upper=c(Inf,Inf,shape_bnd[2]),std.err = T),TRUE))
        if(inherits(fit, "try-error")){
          stdfit=FALSE
          message("Not able to fit GEV stderr")
          fit <- suppressWarnings(try(evd::fgev(x=tmp$dt,method="L-BFGS-B",lower=c(-Inf,-Inf,shape_bnd[1]),upper=c(Inf,Inf,shape_bnd[2]),std.err = F),TRUE))
        }
        if(inherits(fit, "try-error")){
          stdfit=FALSE
          message("Not able to fit GEV with constrained parameters")
          fit <- suppressWarnings(try(evd::fgev(x=tmp$dt,std.err = F),TRUE))
        }
        paramEsts <- c(mu=fit$par[1],sigma=fit$par[2],xi=fit$par[3])
        alphaCIx=1-alphaCI
        if (stdfit==TRUE){
          probs <- c(alphaCIx/2, 1-alphaCIx/2)
          # Compute the CI for k using a normal distribution for khat.
          kci <- try(qnorm(probs, paramEsts[3], fit$std.err[3]),TRUE)
          kci[kci < -1] <- -1
          # Compute the CI for sigma using a normal approximation for log(sigmahat)
          # and transform back to the original scale.
          lnsigci <- try(qnorm(probs, log(paramEsts[2]), fit$std.err[2]/paramEsts[2]),silent=T)
          muci <- qnorm(probs, paramEsts[1], fit$std.err[1])
          paramCIs <- cbind(muci=muci, sigci=exp(lnsigci), kci=kci)
        }else{
          #not able to generate parameters CI
          paramCIs <- cbind(muci=NA, sigci=NA, kci=NA)
        }
      } else if (gevType == "gumbel"){
        fit <- texmex::evm(y = dt, data = tmp, family = gumbel)
        paramEsts <- c(fit$par[1], exp(fit$par[2]),0)

        alphaCIx=1-alphaCI
        probs <- c(alphaCIx/2, 1-alphaCIx/2)
        # Compute the CI for k using a normal distribution for khat.
        if(is.character(fit$se[1])){
          message("Gumbel fitted")
          #methodname <- 'No fit'
          gevType = "gumbel"
        }else{
          message("Gumbel fitted")
          #gevType = "gumbel"
          kci <- c(NA,NA)
          # Compute the CI for sigma using a normal approximation for log(sigmahat)
          # and transform back to the original scale.
          lnsigci <- try(qnorm(probs, log(paramEsts[2]), fit$se[2]))

          muci <- qnorm(probs, paramEsts[1], fit$se[1])
          paramCIs <- cbind(muci=muci, sigci=exp(lnsigci), kci=kci)
        }
      }
      else {
        stop("tsEVstatistics: invalid gevType: ", gevType, ". Can be only GEV, or Gumbel")
      }
      rlvls <- qgev(1-1/Tr, paramEsts[1], paramEsts[2], paramEsts[3])

    }else{
      message("Could not fit GEV")
      methodname <- 'No fit'
      rlvls=NA
      paramEsts=NA
      paramCIs=NA

    }
  }

  EVdata$GEVstat <- list(method=methodname,
                         values=rlvls,
                         parameters=paramEsts,
                         paramCIs = paramCIs)

  # Create output structures for GEV statistics
  # GPD statistics
  imethod <- 2
  methodname <- 'GPDstat'
  paramEsts <- numeric(6)
  paramCIs <- matrix(NA, nrow = 2, ncol = 3)
  rlvls <- numeric(length(Tr))

  if (('GPD' %in% evdType) && !is.null(pointData$annualMax)) {
    # Perform GPD fitting and computation of return levels
    message("Fitted GPD")
    ik <- 1
    th=pointData$POT$threshold
    d1 <- pointData$POT$peaks


    fit <- suppressWarnings(try(POT::fitgpd(d1, threshold = th, est = "mle",
                                       method="BFGS",std.err.type = "expected"),TRUE))
    if(!inherits(fit, "try-error")){
      ksi <- fit$par[2]
      sgm <- fit$par[1]
      alphaCIx=1-alphaCI
      probs <- c(alphaCIx/2, 1-alphaCIx/2)
      kci <- try(qnorm(probs, ksi, fit$std.err[2]),silent=T)
      kci[kci < -1] <- -1
      # Compute the CI for sigma using a normal approximation for log(sigmahat)
      # and transform back to the original scale.
      lnsigci <- try(qnorm(probs, log(sgm), fit$std.err[1]/sgm))
      paramCIs <- cbind(kci, sigci=exp(lnsigci))

      # Create output structures for GPD statistics

      # Assign values to paramEstsall
      paramEstsall <- c(sgm, ksi, pointData$POT$threshold, length(d1), length(pointData$POT$peaks), pointData$POT$percentile)
      # Assign values to rlvls
      rlvls <- pointData$POT$threshold + (sgm/ksi) * ((((length(d1)/length(pointData$POT$peaks))*(1/Tr))^(-ksi))-1)

      EVdata$GPDstat <- list(method=methodname,
                             values=rlvls,
                             parameters=paramEstsall,
                             paramCIs = paramCIs)
    }else {
      methodname <- 'No fit'
      ik <- 1
      th=pointData$POT$threshold
      d1 <- pointData$POT$peaks
      paramEstsall <- c(pointData$POT$pars[1], pointData$POT$pars[2],
                        pointData$POT$threshold, length(d1),
                        length(pointData$POT$peaks), pointData$POT$percentile)
      EVdata$GPDstat <-  list(method=methodname,
                              values=NA,
                              parameters=paramEstsall,
                              paramCIs = NA)
      message("could not estimate GPD: bounded distribution")
    }
  } else {
    methodname <- 'No fit'
    ik <- 1
    th=pointData$POT$threshold
    d1 <- pointData$POT$peaks
    paramEstsall <- c(pointData$POT$pars[1], pointData$POT$pars[2],
                      pointData$POT$threshold, length(d1),
                      length(pointData$POT$peaks), pointData$POT$percentile)
    EVdata$GPDstat <-  list(method=methodname,
                            values=NA,
                            parameters=paramEstsall,
                            paramCIs = NA)
    message("could not estimate GPD: bounded distribution")
  }

  # Return outputs
  return(list(EVmeta = EVmeta, EVdata = EVdata, isValid = isValid))
}



# Helpers for main functions----------------

#' tsGetNumberPerYear
#'
#' \code{tsGetNumberPerYear} is a function that calculates the number of events per year based on a given time series and a set of locations.
#'
#' @param ms A data frame representing the time series data, where the first column contains the dates of the events.
#' @param locs A vector of indices representing the locations of interest in the time series.
#'
#' @return A data frame with two columns: "year" and "Freq". The "year" column contains the years, and the "Freq" column contains the number of events per year.
#'
#' @importFrom dplyr full_join
#' @importFrom lubridate year
#' @examples
#' # Create a sample time series data frame
#'set.seed(123)
#' ms <- data.frame(date = seq(as.Date("2000-01-01"), as.Date("2022-12-31"), by = "day"),
#'                 values=rnorm(8401))

#'# Generate random events
#'events <- match(sample(ms$date, 100),ms$date)
#'# Get the number of events per year
#'tsGetNumberPerYear(ms, events)

#' @export
tsGetNumberPerYear <- function(ms, locs){

  # Get all years
  sdfull <- seq(min(ms[,1]), max(ms[,1]), by = "days")

  # Make full time vector
  sdfull2 <- sdfull - min(sdfull)

  # Round to years
  sdfull2 <- year(sdfull)
  sdfull2p=data.frame(year=unique(sdfull2))


  # Prepare the series time vector
  sdser <- ms[,1] - min(ms[,1])
  sdser <- year(ms[,1])

  # Get years vector
  sdp <- ms[locs,1]
  sdp <- year(as.Date(sdp))


  # Get number of events per year
  nperYear <- data.frame(table(sdp))
  nperYear$year=as.numeric(as.character(nperYear$sdp))
  nperYear=dplyr::full_join(nperYear,sdfull2p,by=c("year"="year"))
  shit=table(sdser)
  nperYear$Freq[which(is.na(nperYear$sdp))]=0
  nperYear$opy <- table(sdser)
  nperYear$Freq[which(nperYear$opy<184)] <- 0
  return(nperYear)
}


#' findMax
#'
#' \code{findMax} is a function that takes a subset of a vector
#' and returns the index of the maximum value in that subset.
#'
#' @param subIndxs A numeric vector representing the subset of indices to consider.
#' @param srs A vector of numerical data
#' @return The index of the maximum value in the subset.
#' @examples
#' srs <- c(10, 20, 30, 40, 50)
#' findMax(c(1, 3, 5),srs)
#' #result is 5.
#' @export
findMax <- function(subIndxs,srs) {
  subIndxMaxIndx <- which.max(srs[subIndxs])
  return(subIndxs[subIndxMaxIndx])
}

#' computeAnnualMaxima
#'
#' \code{computeAnnualMaxima} is a function that computes the annual maxima of
#' a time series.
#'
#' @param timeAndSeries A matrix or data frame containing the time stamps and
#' series values. The first column should contain the time stamps and the
#' second column should contain the series values.
#'
#' @return A list containing the annual maximum values, their corresponding dates,
#'  and their indices.
#' \describe{
#' \item{\code{annualMax}}{A numeric vector of annual maximum values.}
#' \item{\code{annualMaxDate}}{A vector of dates corresponding to the annual
#' maximum values.}
#' \item{\code{annualMaxIndx}}{A vector of indices indicating the positions
#' of the annual maximum values in the original time series.}
#' }
#'
#' @examples
#' timeAndSeries <- ArdecheStMartin
#' computeAnnualMaxima(timeAndSeries)
#' @export
computeAnnualMaxima <- function(timeAndSeries) {
  timeStamps <- timeAndSeries[,1]
  dt1=min(diff(timeStamps),na.rm=T)
  dt=as.numeric(dt1)
  tdim=attributes(dt1)$units
  tmvec <- as.Date(timeStamps)
  if (tdim!="days")   tmvec <- as.Date(timeStamps+3600)
  srs <- timeAndSeries[,2]
  years <- year(tmvec)
  annualMaxIndx <- tapply(1:length(srs), years, function(x) {
    findMax(x, srs)
  })
  annualMaxInx <- annualMaxIndx[!is.na(annualMaxIndx)]
  annualMaxIndx=as.vector(unlist((annualMaxIndx)))
  annualMax <- srs[annualMaxIndx]
  annualMaxDate <- timeStamps[annualMaxIndx]

  return(list(annualMax = annualMax, annualMaxDate = annualMaxDate, annualMaxIndx = annualMaxIndx))
}


#' computeMonthlyMaxima
#'
#' \code{computeMonthlyMaxima} is a function that computes the monthly maxima
#' of a time series.
#'
#' @param timeAndSeries A data frame containing the time stamps and series values.
#'                      The first column should contain the time stamps,
#'                      and the second column should contain the series values.
#'
#' @return A list containing the monthly maxima, corresponding dates, and indices.
#' \describe{
#' \item{\code{monthlyMax}}{A vector of the monthly maximum values.}
#' \item{\code{monthlyMaxDate}}{A vector of the dates corresponding to the monthly maximum values.}
#' \item{\code{monthlyMaxIndx}}{A vector of the indices of the monthly maximum values in the original series.}
#'}
#' @examples
#' timeAndSeries <- ArdecheStMartin
#' computeMonthlyMaxima(timeAndSeries)
#' @export
computeMonthlyMaxima<- function(timeAndSeries) {
  timeStamps <- timeAndSeries[,1]
  srs <- timeAndSeries[,2]

  tmvec <- as.Date(timeStamps+3600)
  yrs <- as.integer(format(tmvec, "%Y"))
  mnts <- as.integer(format(tmvec, "%m"))
  mnttmvec <- data.frame(yrs, mnts)
  valsIndxs <- 1:length(srs)
  monthlyMaxIndx <- aggregate(valsIndxs ~ yrs + mnts, data = mnttmvec, function(x) {
    findMax(x,srs)
  })
  monthlyMaxIndx$valsIndxs=as.numeric(monthlyMaxIndx$valsIndxs)
  monthlyMaxIndx <- monthlyMaxIndx[order(monthlyMaxIndx$yrs, monthlyMaxIndx$mnts), "valsIndxs"]
  monthlyMax <- srs[monthlyMaxIndx]
  monthlyMaxDate <- timeStamps[monthlyMaxIndx]

  return(list(monthlyMax = monthlyMax, monthlyMaxDate = monthlyMaxDate, monthlyMaxIndx = monthlyMaxIndx))
}


#' declustpeaks
#'
#' \code{declustpeaks} is a function that takes in a data vector, minimum peak distance, minimum run distance, and a threshold value.
#' It finds peaks in the data vector based on the minimum peak distance and threshold value.
#' It then declusters the peaks based on the minimum run distance and threshold value.
#' The function returns a data frame with information about the peaks, including the peak value,
#' start and end indices, duration, and cluster information.
#'
#' @param data A numeric vector representing the data.
#' @param minpeakdistance An integer specifying the minimum distance between peaks.
#' @param minrundistance An integer specifying the minimum distance between runs.
#' @param qt A numeric value representing the threshold for peak detection.
#' @import texmex stats
#' @return A data frame with information about the peaks, including:
#' \describe{
#'   \item{\code{Q}}{the peak value}
#'   \item{\code{max}}{max time index}
#'   \item{\code{start}}{start time index}
#'   \item{\code{end}}{end time index}
#'   \item{\code{dur}}{duration}
#'   \item{\code{cluster}}{cluster information }
#'   }
#' @examples
#' data <- c(1, 2, 3, 4, 5, 4, 3, 2, 1)
#' declustpeaks(data, minpeakdistance = 2, minrundistance = 2, qt = 3)
#'
#' @export
declustpeaks<-function(data,minpeakdistance = 10 ,minrundistance = 7, qt){
  pks <- pracma::findpeaks(data,minpeakdistance = minpeakdistance, minpeakheight = qt)
  peakev=texmex::declust(data,threshold=qt, r=minrundistance)

  Qval=peakev$thExceedances
  intcl=c(TRUE,peakev$InterCluster)
  peakex=data.frame(Qval,intcl, peakev$clusters,peakev$isClusterMax,peakev$exceedanceTimes)
  names(peakex)=c("Qs","Istart","clusters","IsClustermax","exceedances")
  evmax=peakex[which(peakex$IsClustermax==T),]
  ziz=aggregate(peakex$exceedance,
                by=list(clust=peakex$clusters), FUN=function(x) c(max(x)-min(x)+1))
  st=stats::aggregate(peakex$exceedance,
               by=list(clust=peakex$clusters), FUN=function(x) c(min(x)))
  end=stats::aggregate(peakex$exceedance,
                by=list(clust=peakex$clusters), FUN=function(x) c(max(x)))
  evmax$dur=ziz$x
  evmax$durx=peakev$sizes
  evmax$stdate=peakex$date[which(evmax$Istart==T)]
  evmax$cm = peakex$exceedanceTimes[peakev$isClusterMax]
  evmax$Qv = peakex$thExceedances[peakev$isClusterMax]
  peakdt=data.frame(evmax$Qs,evmax$exceedances,st$x,end$x,ziz$x,evmax$clusters)
  names(peakdt)=c("Q","max","start","end","dur","cluster")
  peakdt=peakdt[order(peakdt$Q,decreasing = TRUE),]

  return(peakdt)
}

#' empdis: Empirical Distribution Function
#'
#' \code{empdis} is a function that calculates the empirical
#'distribution function for a given dataset.
#'
#' @param x A numeric vector representing the dataset.
#' @param nyr An integer representing the number of years in the dataset.
#'
#' @return A data frame containing:
#' \describe{
#'   \item{\code{emp.RP}}{empirical return period}
#'   \item{\code{haz.RP}}{Hazen return period}
#'   \item{\code{cun.RP}}{Cunnane return period}
#'   \item{\code{gumbel}}{Gumbel values}
#'   \item{\code{emp.f}}{empirical cumulative density}
#'   \item{\code{emp.hazen}}{Hazen cumulative density}
#'   \item{\code{emp.cunnan}}{Cunnane cumulative density}
#'   \item{\code{Q}}{original data}
#'   \item{\code{timestamp}}{time component}
#'   }
#' @examples
#' x <- c(1, 2, 3, 4, 5)
#' nyr <- 5
#' empdis(x, nyr)
#'
#' @export
empdis <- function(x, nyr) {
  ts <- seq(1:length(x))
  ts <- as.data.frame(ts[order(x)])
  x <- as.data.frame(x[order(x)])
  epyp <- length(x[, 1]) / nyr
  rank <- apply(x, 2, rank, na.last = "keep")
  hazen <- (rank - 0.5) / length(x[, 1])
  cunnane <- (rank - 0.4) / (length(x[, 1]) + 0.2)
  gumbel <- -log(-log(hazen))
  n <- length(x$`x[order(x)]`)
  rpc <- (1 / (1 - (1:n) / (n + 1))) / 12
  nasmp <- apply(x, 2, function(x) sum(!is.na(x)))
  epdp <- rank / rep(nasmp + 1, each = nrow(rank))
  empip <- data.frame(1 / (epyp * (1 - epdp)), 1 / (epyp * (1 - hazen)),
                      1 / (epyp * (1 - cunnane)), gumbel, epdp, hazen, cunnane, x, ts)
  names(empip) <- c("emp.RP", "haz.RP", "cun.RP", "gumbel", "emp.f",
                    "emp.hazen", "emp.cunnane", "Q", "timestamp")
  return(empip)
}


#' Empirical Distribution Function
#'
#' This function calculates the empirical distribution function for a given dataset,
#' with a focus on low values
#'
#' @param x A numeric vector or matrix representing the discharge values.
#' @param nyr An integer representing the number of years.
#'
#' @return A data frame containing the following columns:
#'   \describe{
#'     \item{\code{emp.RP}}{Empirical return period}
#'     \item{\code{haz.RP}}{Hazen return period}
#'     \item{\code{gumbel}}{Gumbel frequency}
#'     \item{\code{emp.f}}{Empirical frequency}
#'     \item{\code{emp.hazen}}{Empirical Hazen frequency}
#'     \item{\code{Q}}{Discharge values}
#'   }
#'
#' @examples
#' x <- c(10, 20, 30, 40, 50)
#' nyr <- 5
#' empdisl(x, nyr)
#'
#' @export
empdisl <- function(x, nyr) {
  x <- as.data.frame(x[order(x, decreasing = TRUE)])
  epyp <- length(x[, 1]) / nyr
  rank <- apply(-x, 2, rank, na.last = "keep")
  hazen <- (rank - 0.5) / length(x[, 1])
  gumbel <- -log(-log(hazen))
  nasmp <- apply(x, 2, function(x) sum(!is.na(x)))
  epdp <- rank / rep(nasmp + 1, each = nrow(rank))
  empip <- data.frame(1 / (epyp * (1 - epdp)), 1 / (epyp * (1 - hazen)), gumbel, epdp, hazen, x)
  names(empip) <- c("emp.RP", "haz.RP", "gumbel", "emp.f", "emp.hazen", "Q")
  return(empip)
}
Any scripts or data that you put into this service are public.
RtsEva documentation built on June 24, 2024, 5:07 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
RtsEva
Performs the Transformed-Stationary Extreme Values Analysis

R/eva_functions.R
In RtsEva: Performs the Transformed-Stationary Extreme Values Analysis

Try the RtsEva package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

RtsEva Performs the Transformed-Stationary Extreme Values Analysis

R/eva_functions.R In RtsEva: Performs the Transformed-Stationary Extreme Values Analysis

Try the RtsEva package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

RtsEva
Performs the Transformed-Stationary Extreme Values Analysis

R/eva_functions.R
In RtsEva: Performs the Transformed-Stationary Extreme Values Analysis