R/All.R

Defines functions LRatioChange PermAdj Rating BFI NGRDist TrendTest EncProb SCF ARF DDF99 ReFH LKurt LSkew Lcv WeightsGLSkew WeightsGLcv WeightsUnLSkew WeightsUnLcv WGaugLcv WGaugLSkew WungLSkew WungLcv Lmoms ERPlot DesHydro DiagPlots AMplot HydroPlot EVPool EVPlotAdd EVPlot H2 Zdists GoTFPool GoTF UncSS Uncertainty AMextract POTextract ImportAM GetCDs ImportCDs DeTrend GetAM UAF LSkewUrb LcvUrb GetQMED QMEDfseSS DonAdj QMEDDonEq QMEDLink QMEDPOT QMED SimData GumbelPars GenParetoPars GenLogPars GEVPars GumbelEst GEVEst GenParetoEst GenLogEst GumbelAM GenParetoPOT GEVAM GenLogAM GumbelGF GenParetoGF GEVGF GenLogGF OptimPars PoolEst Pool

Documented in AMextract AMplot ARF BFI DDF99 DesHydro DeTrend DiagPlots DonAdj EncProb ERPlot EVPlot EVPlotAdd EVPool GenLogAM GenLogEst GenLogGF GenLogPars GenParetoEst GenParetoGF GenParetoPars GenParetoPOT GetAM GetCDs GetQMED GEVAM GEVEst GEVGF GEVPars GoTF GoTFPool GumbelAM GumbelEst GumbelGF GumbelPars H2 HydroPlot ImportAM ImportCDs Lcv LcvUrb LKurt Lmoms LRatioChange LSkew LSkewUrb NGRDist OptimPars PermAdj Pool PoolEst POTextract QMED QMEDDonEq QMEDfseSS QMEDLink QMEDPOT Rating ReFH SCF SimData TrendTest UAF Uncertainty UncSS WeightsGLcv WeightsGLSkew WeightsUnLcv WeightsUnLSkew WGaugLcv WGaugLSkew WungLcv WungLSkew Zdists

#' Kingston upon Thames daily flow and catchment precipitation 2000-10-01 to 2015-09-30
#'
#' A data.frame of three columns; Date, Precipitation (P), & daily mean flow (Q)
#'
#' @format A data frame with 5478 rows and 3 columns:
#' \describe{
#'   \item{Date}{Date}
#'   \item{P}{Precipitation, in mm}
#'   \item{Q}{Daily mean discharge, in m3/s}
#' }
#' @source \url{https://nrfa.ceh.ac.uk/data/station/meanflow/39001}
"ThamesPQ"


#' National River Flow Archive descriptors and calculated statistics for sites suitable for pooling
#'
#' A data.frame of catchment descriptors, Lmoments, Lmoment ratios, sample size and median annual maximum flow (QMED). NRFA Peak Flow Dataset - Version 10.
#' @details The functions for pooling group formation and estimation rely on this dataframe. However, the data frame is open for manipulation in case the user wishes to add sites that aren't included, or change parts where local knowledge has improved on the data. Although, usually, in the latter case, such changes will be more appropriately applied to the formed pooling group. If changes are made, they will only remain within the workspace. If a new workspace is opened and the UKFE package is loaded, the data frame will have returned to it's original state.
#'
#' @format A data frame with 546 rows and 27 variables
#' @source \url{https://nrfa.ceh.ac.uk/peak-flow-dataset}
#' @export
"NRFAData"


#' National River Flow Archive (NRFA) annual maximum data for sites suitable for pooling
#'
#' A data.frame with three columns; Date, Flow, id. NRFA Peak Flow Dataset - Version 10
#'
#'
#' @format A data frame with 25020 rows and 3 columns
#' \describe{
#'   \item{Date}{Date}
#'   \item{Flow}{Annual maximum peak flow, m3/s}
#'   \item{id}{Station identification number}
#' }
#' @source \url{https://nrfa.ceh.ac.uk/peak-flow-dataset}
"AMSP"


#' National River Flow Archive descriptors and calculated statistics for sites suitable for QMED & pooling
#'
#' A data.frame of catchment & data descriptors relating to the median annual maximum flow (QMED). NRFA Peak Flow Dataset - Version 10
#'
#' @details The functions for QMED estimation and retreieval of catchment descriptors rely on this dataframe. However, the data frame is open for manipulation in case the user wishes to add sites that aren't included, or change parts where local knowledge has improved on the data. If changes are made, they will only remain within the workspace. If a new workspace is opened and the UKFE package is loaded, the data frame will have returned to it's original state.
#'
#' @format A data frame with 885 rows and 24 variables
#' @source \url{https://nrfa.ceh.ac.uk/peak-flow-dataset}
"QMEDData"


#' UK outline
#'
#' Easting and northing national grid reference points around the coast of the UK
#'
#' @format A data frame with 3867 rows and 2 variables
#' \describe{
#'   \item{X_BNG}{Easting, British national grid reference}
#'   \item{Y_BNG}{Northing, British national grid reference}
#' }
#' @source \url{https://environment.data.gov.uk/}
"UKOutline"


globalVariables(c("ThamesPQ", "NRFAData", "QMEDData", "UKOutline", "AMSP", "id", "URBEXT2000"))


# QuickResults ------------------------------------------------------------

#' Quick pooled results
#'
#' Provides pooled gauged, ungauged, or fake ungauged results, directly from the catchment descriptors
#'
#' The quick results function provides results with a default pooling group. If gauged = FALSE the median annual maximum flood (QMED) is estimated from catchment descriptors using the QMED equation and then adjusted with two of the closest un-urban gauged sites (can be changed to 0 or 1 donors). If the site is urban, an urban adjustment is made to the QMED and to the pooled growth curve. If gauged = TRUE QMED is the median of the gauged annual maxima and the growth curve is formed with the gauged weighting procedure (often known as enhanced single site). If the gauged catchment is urban, it's included in the pooling group and deurbanised before an urban adjustment is made to the final growth curve. If FUngauged = TRUE, the top site in the pooling group is excluded and the estimate is performed henceforth in the manner of gauged = FALSE. If the CDs are from a gauged site that is not in the list of sites that are considered suitable for pooling, it won't be included in the pooling group. In which case, if gauged = TRUE, the result, will be erroneous. For more details of the trend argument see the details for the PoolEst function.
#'@param CDs catchment descriptors derived from either GetCDs or ImportCDs
#'@param gauged logical argument with a default of FALSE. TRUE for gauged results and FALSE for ungauged
#'@param dons number of donors required with a choice of 0, 1, or 2
#'@param Qmed user supplied QMED which overrides the default QMED estimate
#'@param trend logical argument with a default of FALSE. TRUE adjusts the stationary QMED estimate to a non-stationary estimate
#'@param FUngauged logical argument with a default of FALSE. TRUE provides an ungauged estimate whilst excluding the gauged site (the site with the most similar CDs)
#'@param plot logical argument with a default of TRUE. TRUE provides an extreme value plot. FALSE prevents the plot
#'@param dist a choice of distribution for the estimates. The choices are "GenLog", "GEV", or "Gumbel; the generalised logistic, generalised extreme value, and Gumbel distributions, respectively. The default is "GenLog"
#'@examples
#'#Get some catchment descriptors
#'CDs.73005 <- GetCDs(73005)
#'#Get default ungauged results
#'QuickResults(CDs.73005)
#'#Get gauged results with a GEV distribution
#'QuickResults(CDs.73005, gauged = TRUE, dist = "GEV")
#'#Get fake ungauged results with one donor
#'QuickResults(CDs.73005, FUngauged = TRUE, dons = 1)
#'
#'
#'@return A list of length two. Element one is a data frame with columns; return period (RP), peak flow estimates (Q) and growth factor estimates (GF). Two additional columns quantify the uncertainty. The second element is the estimated Lcv and Lskew (linear coefficient of variation and skewness). By default an extreme value plot is also returned
#'@author Anthony Hammond
QuickResults <- function (CDs, gauged = FALSE, dons = 2, Qmed = NULL, trend = FALSE,
                          FUngauged = FALSE, plot = TRUE, dist = "GenLog")
{
  if(is.data.frame(CDs) == FALSE) {stop("CDs doesn't appear to be a CDs object")}
  if(is.na(CDs[20,1]) == TRUE | CDs[20,1] != "Northing") {stop("CDs doesn't appear to be a CDs object")}
  Donor1 <- function(CDs, DonSite) {
    QMED.cd <- 8.3062 * CDs[1, 2]^0.851 * 0.1536^(1000/CDs[15,
                                                           2]) * CDs[8, 2]^3.4451 * 0.046^(CDs[5, 2]^2)
    Site <- DonSite
    Donors <- DonAdj(CDs = CDs, rows = 500)
    Rw <- which(rownames(Donors) == DonSite)
    Result <- Donors[Rw, 27]
    return(Result)
  }
  Donor2 <- function(CDs, Sites) {
    rij <- function(d) {
      0.4598 * exp(-0.02 * d) + (1 - 0.4598) * exp(-0.4785 *
                                                     d)
    }
    NGRDist <- function(i, j) {
      sqrt((i[1] - j[1])^2 + (i[2] - j[2])^2)/1000
    }
    Site1 <- Sites[1]
    Site2 <- Sites[2]
    CDs.Site1 <- GetCDs(Site1)
    CDs.Site2 <- GetCDs(Site2)
    Dist1 <- NGRDist(c(CDs[19, 2], CDs[20, 2]), c(CDs.Site1[19,
                                                            2], CDs.Site1[20, 2]))
    Dist2 <- NGRDist(c(CDs[19, 2], CDs[20, 2]), c(CDs.Site2[19,
                                                            2], CDs.Site2[20, 2]))
    Dist12 <- NGRDist(c(CDs.Site1[19, 2], CDs.Site1[20, 2]),
                      c(CDs.Site2[19, 2], CDs.Site2[20, 2]))
    ps1 <- rij(Dist1)
    p12 <- rij(Dist12)
    ps2 <- rij(Dist2)
    a1 <- (ps1 - p12 * ps2)/(1 - p12^2)
    a2 <- (ps2 - p12 * ps1)/(1 - p12^2)
    QMEDscd <- 8.3062 * CDs[1, 2]^0.851 * 0.1536^(1000/CDs[15,
                                                           2]) * CDs[8, 2]^3.4451 * 0.046^(CDs[5, 2]^2)
    QMED1cd <- 8.3062 * CDs.Site1[1, 2]^0.851 * 0.1536^(1000/CDs.Site1[15,
                                                                       2]) * CDs.Site1[8, 2]^3.4451 * 0.046^(CDs.Site1[5,
                                                                                                                       2]^2)
    QMED2cd <- 8.3062 * CDs.Site2[1, 2]^0.851 * 0.1536^(1000/CDs.Site2[15,
                                                                       2]) * CDs.Site2[8, 2]^3.4451 * 0.046^(CDs.Site2[5,
                                                                                                                       2]^2)
    QMED1obs <- QMEDData$QMED[which(rownames(QMEDData) ==
                                      Site1)]
    QMED2obs <- QMEDData$QMED[which(rownames(QMEDData) ==
                                      Site2)]
    QMEDs.adj <- QMEDscd * (QMED1obs/QMED1cd)^a1 * (QMED2obs/QMED2cd)^a2
    return(QMEDs.adj)
  }
  if (gauged == TRUE & FUngauged == TRUE) {
    print("Warning: Gauged & FUngauged are both TRUE. Gauged results provided")
  }
  if (gauged == FALSE) {
    PoolGroup <- Pool(CDs = CDs)
    if (CDs[18, 2] > 0.03) {
      Ptemp <- Pool(CDs = CDs, iug = TRUE)
    }
    else {
      Ptemp <- Pool(CDs = CDs)
    }
    Ex <- as.numeric(rownames(Ptemp)[1])
    PoolGroupFun <- Pool(CDs = CDs, exclude = Ex)
    if (is.null(Qmed) == TRUE) {
      qmed <- 8.3062 * CDs[1, 2]^0.851 * 0.1536^(1000/CDs[15,
                                                          2]) * CDs[8, 2]^3.4451 * 0.046^(CDs[5, 2]^2)
      DonQMED <- DonAdj(CDs = CDs, rows = 500)
      if (FUngauged == TRUE) {
        DonQMED <- DonQMED[-1, ]
      }
      UrbInd <- which(DonQMED$URBEXT2000 > 0.03)
      if (length(UrbInd) < 1) {
        DonQMED <- DonQMED
      }
      else {
        DonQMED <- DonQMED[-UrbInd, ]
      }
      D2Result <- Donor2(CDs = CDs, Sites = rownames(DonQMED)[1:2])
      D1Result <- Donor1(CDs = CDs, DonSite = rownames(DonQMED)[1])
      if (CDs[18, 2] <= 0.03) {
        if (dons == 0) {
          qmed <- qmed
        }
        if (dons == 1) {
          qmed <- D1Result
        }
        if (dons == 2) {
          qmed <- D2Result
        }
      }
      else {
        if (dons == 0) {
          qmed <- QMED(CDs = CDs, UrbAdj = TRUE)[[1]]
        }
        if (dons == 1) {
          qmed <- QMED(CDs = CDs, Don1 = rownames(DonQMED)[1],
                       UrbAdj = TRUE)[[1]]
        }
        if (dons == 2) {
          qmed <- QMED(CDs = CDs, Don2 = rownames(DonQMED)[1:2],
                       UrbAdj = TRUE)[[1]]
        }
      }
    }
    else {
      qmed = Qmed
    }
    if (FUngauged == FALSE) {
      if (CDs[18, 2] <= 0.03) {
        Est <- PoolEst(PoolGroup, QMED = qmed, trend = trend,
                       dist = dist)
      }
      else {
        Est <- PoolEst(PoolGroup, QMED = qmed, UrbAdj = TRUE,
                       CDs = CDs, trend = trend, dist = dist)
      }
    }
    else {
      if (CDs[18, 2] <= 0.03) {
        Est <- PoolEst(PoolGroupFun, QMED = qmed, trend = trend,
                       dist = dist)
      }
      else {
        Est <- PoolEst(PoolGroupFun, QMED = qmed, UrbAdj = TRUE,
                       CDs = CDs, trend = trend, dist = dist)
      }
    }
  }
  if (gauged == TRUE) {
    if (CDs[18, 2] > 0.03) {
      PoolGroup <- Pool(CDs = CDs, iug = TRUE, DeUrb = TRUE)
    }
    else {
      PoolGroup <- Pool(CDs = CDs)
    }
    Site <- rownames(PoolGroup)[1]
    AMAX <- GetAM(Site)
    if (is.null(Qmed) == TRUE) {
      qmed <- median(AMAX$Flow)
    }
    else {
      qmed <- Qmed
    }
    if (CDs[18, 2] <= 0.03) {
      Est <- PoolEst(PoolGroup, gauged = TRUE, QMED = qmed,
                     trend = trend, dist = dist)
    }
    else {
      Est <- PoolEst(PoolGroup, gauged = TRUE, QMED = qmed,
                     UrbAdj = TRUE, CDs = CDs, trend = trend, dist = dist)
    }
  }
  if (plot == TRUE) {
    if (CDs[18, 2] <= 0.03) {
      if (gauged == FALSE) {
        EVPool(PoolGroup, dist = dist)
      }
      if (gauged == TRUE) {
        EVPool(PoolGroup, gauged = TRUE, dist = dist)
      }
    }
    else {
      if (gauged == FALSE) {
        EVPool(PoolGroup, UrbAdj = TRUE, CDs = CDs, dist = dist)
      }
      if (gauged == TRUE) {
        EVPool(PoolGroup, gauged = TRUE, UrbAdj = TRUE,
               CDs = CDs, dist = dist)
      }
    }
  }
  SEess <- function(x, RP) {
    VAR <-  (((median(x) * Lcv(x))^2)/length(x))* exp(1.3125 + 0.599*(log(RP-1)) + 0.00399*(log(RP-1)^2))
    SE <- sqrt(VAR)
    return(SE)
  }
  fseUGAH <- function(RP, Dons) {
    y <- -log(-log(1-1/RP))
    if(Dons == 2) {Result <- 1.4149 - 0.0163*y + 0.0102*y^2}
    if(Dons == 1) {Result <- 1.427 - 0.0134*y + 0.0098*y^2}
    if(Dons == 0) {Result <- 1.4665 - 0.0135*y + 0.0096*y^2}
    return(Result)
  }
  if(gauged == FALSE) {
    fses <- fseUGAH(RP = Est[[1]][,1], Dons = dons)
    lower68 <- round(Est[[1]][,2]/(fses), 3)
    upper68 <- round(Est[[1]][,2]*(fses), 3)
    Est[[1]] <- data.frame(Est[[1]][,-c(4,5)], lower68, upper68)
  }
  if(gauged == TRUE) {
    SEs <- SEess(AMAX[,2], RP = Est[[1]][,1])
    lower95 <- round(Est[[1]][,2]-SEs*1.96, 3)
    upper95 <- round(Est[[1]][,2]+SEs*1.96, 3)
    Est[[1]] <- data.frame(Est[[1]][,-c(4,5)], lower95, upper95)
  }
  return(Est)
}




# Pool --------------------------------------------------------------------

#' Create pooling group
#'
#' Function to develop a pooling group based on catchment descriptors
#'
#' A pooling group is created from a CDs object, derived from GetCDs or ImportCDs, or specifically with the catchment descriptors AREA, SAAR, FARL, & FPEXT (see arguments). To change the default pooling group one or more sites can be excluded using the 'exclude' option, which requires either a site reference or multiple site references in a vector. If this is done, the site with the next lowest similarity distance measure is added to the group (until the total number of years is at least N). Sites with URBEXT2000 (urban extent) > 0.03 are excluded by default. If a gauged assessment is required and the site of interest is urban it can be included by setting iug = TRUE. In which case the user may wish to de-urbanise the subject site's Lcv and Lskew (L-moment ratios) by setting DeUrb = TRUE. If the user has more data available for a parcticular site within the pooling group, the Lcv and Lskew for the site can be updated after the group has been finalised. An example of doing so is provided below. The pooling method is outlined in Science Report: SC050050 - Improving the FEH statistical procedures for flood frequency estimation.
#'@param CDs catchment descriptors derived from either GetCDs or ImportCDs
#'@param AREA catchment area in km2
#'@param SAAR catchment standard average annual rainfall (1961-1990) in mm
#'@param FARL catchment flood attenuation from reservoirs & lakes
#'@param FPEXT catchment floodplain extent. The proportion of the catchment that is estimated to be inundated by a 100-year flood
#'@param N minimum Number of total gauged record years for the pooling group
#'@param exclude sites to exclude from the pooling group. Either a single site reference or a vector of site references (numeric)
#'@param iug iug stands for 'include urban gauge'. It's a logical argument with default of FALSE. TRUE will over-ride the default and add the closest site in catchment descriptor space to the CDs provided if it has URBEXT2000 >= 0.03
#'@param DeUrb logical argument with a default of FALSE. If true, the Lcv and LSkew of the top site in the pooling group will be de-urbanised
#'@examples
#'#Get some catchment descriptors
#'CDs.73005 <- GetCDs(73005)
#'#Set up a pooling group object called Pool.73005 excluding sites 79005 & 71011.
#'#Then print the group to the console
#'Pool.73005 <- Pool(CDs.73005, exclude = c(79005, 71011))
#'Pool.73005
#'#Form a pooling group, called PoolGroup, with the catchment descriptors specifically
#'PoolGroup <- Pool(AREA = 1000, SAAR = 800, FARL = 1, FPEXT = 0.01)
#'#Form a pooling group using an urban catchment which is intended for enhanced
#'#single site estimation - by including it in the group.
#'CDs.39001 <- GetCDs(39001)
#'Pool.39001 <- Pool(CDs.39001, iug = TRUE, DeUrb = TRUE)
#'#Change the Lcv and LSkew of the top site in the pooling group to 0.19 & 0.18,
#'#respectively. Lcv and Lskew are in columns 16 & 17.
#'Pool.39001[1,c(16, 17)] <- c(0.19, 0.18)
#'@return A data.frame of the pooling group with site refence row names and 24 columns each providing catchment & gauge details for the sites in the pooling group.
#'@author Anthony Hammond
Pool <- function(CDs = NULL, AREA, SAAR, FARL, FPEXT, N = 500, exclude = NULL, iug = FALSE, DeUrb = FALSE){
  if(is.null(exclude) == FALSE) {
    Site.id <- match(exclude, row.names(NRFAData))
    if(any(is.na(Site.id)) == TRUE) stop ("Site ID/s not within the set of sites considered suitable for pooling, therefore it is/they are already excluded")}
  suppressWarnings(if(is.null(CDs) == TRUE){

    SDMs <- function(x, AREA, SAAR, FARL, FPEXT)
    {
      sqrt(
        (3.2*((log(AREA) - log(x[,1]))/1.28)^2)
        + (0.5*((log(SAAR) - log(x[,15]))/0.37)^2)
        + (0.1*((FARL - x[,8])/0.05)^2)
        + (0.2*((FPEXT - x[,9])/0.04)^2))
    }
    suppressWarnings(if (is.null(exclude) == TRUE) {NRFAData <- NRFAData} else
    {
      index <- NULL
      for (i in 1:length(exclude)) {index[i] <- which(row.names(NRFAData) == exclude[i])}
      NRFAData <- NRFAData[-index,]})
    Site <- SDMs(NRFAData, AREA, SAAR, FARL, FPEXT)
    Refs <- data.frame(row.names(NRFAData), Site)
    colnames(Refs) <- c("Site", "SDM")
    Refs.Order <- Refs[order(Refs$SDM),]
    Char.Sites <- Refs.Order$Site
    Char.Sites <- as.character(Char.Sites)
    Site.NRFA <- NRFAData[Char.Sites, ]
    UrbInd <- Char.Sites[1]
    ug.index <- which(row.names(NRFAData) == UrbInd)
    UrbUrbInd <- NRFAData[ug.index,22]
    Site.NRFA <- subset(Site.NRFA, URBEXT2000 <= 0.03)
    if(iug == FALSE) {Site.NRFA <- Site.NRFA}
    if(iug == TRUE & UrbUrbInd > 0.03) {Site.NRFA <- rbind(NRFAData[ug.index,], Site.NRFA)} else {Site.NRFA <- Site.NRFA}
    if(iug == TRUE & UrbUrbInd <= 0.03) {print("Warning: iug = TRUE and the closest site in catchment descriptor space has URBEXT2000 <0.03. Group formed as if iug = FALSE")}
    SDM <- SDMs(Site.NRFA,AREA, SAAR, FARL, FPEXT)
    Site.NRFA <- cbind(Site.NRFA, SDM)
    Cum.N <- NULL
    for (i in 1:length(Site.NRFA$N)) {Cum.N[i] <- sum(Site.NRFA$N[1:i])}
    min(which(Cum.N >= 500))
    Site.NRFA <- Site.NRFA[1:min(which(Cum.N >= N)),]
    Ds <-  function(x)
    {
      u.hat <- apply(tf, 2, mean)
      Res <- numeric(1)
      for (i in 1:length(Site.NRFA$N)) {Res <- Res+as.numeric(tf[i,]-u.hat)%*%t(as.numeric((tf[i,]-u.hat)))}
      D <- NULL
      for (i in 1:length(Site.NRFA$N)) {D[i] <- ((1/3)*length(Site.NRFA$N))*as.numeric(tf[i,]-u.hat)%*%solve(Res)%*%(as.numeric((tf[i,]-u.hat)))}
      return(D)
    }
    tf <- data.frame(Site.NRFA$Lcv, Site.NRFA$LSkew, Site.NRFA$LKurt)
    Discordancy <- Ds(tf)
    crit.vs <- c(1.333, 1.648, 1.917, 2.140, 2.329, 2.491, 2.632, 2.757, 2.869, 2.971, 3)
    xd <- seq(5,15)
    Crit.frame <- data.frame(xd, crit.vs)
    C.V <- Crit.frame[min(which(Crit.frame$xd >= length(Site.NRFA$N))),2]
    Discordant <- NULL
    for (i in 1:length(Discordancy)) {Discordant[i] <- isTRUE(Discordancy[i] > C.V)}
    Site.NRFA <- cbind(Site.NRFA, Discordancy, Discordant)
    Site.NRFA <- Site.NRFA[,-c(12,13,14,16,19,20)]
    colnames(Site.NRFA)[24] <- "Discordant?"
    if(DeUrb == FALSE) {Site.NRFA <- Site.NRFA} else
    { LcvCol <- which(colnames(Site.NRFA) == "Lcv")
    LskewCol <- which(colnames(Site.NRFA) == "LSkew")
    UrbCol <- which(colnames(Site.NRFA) == "URBEXT2000")
    DeUrbesLCV <- LcvUrb(Site.NRFA[1, LcvCol], Site.NRFA[1, UrbCol], DeUrb = TRUE)
    DeUrbesLSKEW <- LSkewUrb(Site.NRFA[1, LskewCol], Site.NRFA[1, UrbCol], DeUrb = TRUE)
    Site.NRFA[1,LcvCol] <- DeUrbesLCV
    Site.NRFA[1,LskewCol] <- DeUrbesLSKEW
    if(Site.NRFA[1,14] < 0.03) {print("Warning: DeUrb = TRUE and the top site in the group has URBEXT2000 < 0.03. The sites' Lcv & lskew have been adjusted as if it were urban" )}
    }
    return(Site.NRFA)
  } else {
    SDMs <- function(x, AREA, SAAR, FARL, FPEXT)
    {
      sqrt(
        (3.2*((log(AREA) - log(x[,1]))/1.28)^2)
        + (0.5*((log(SAAR) - log(x[,15]))/0.37)^2)
        + (0.1*((FARL - x[,8])/0.05)^2)
        + (0.2*((FPEXT - x[,9])/0.04)^2))
    }
    suppressWarnings(if (is.null(exclude) == TRUE) {NRFAData <- NRFAData} else
    {
      index <- NULL
      for (i in 1:length(exclude)) {index[i] <- which(row.names(NRFAData) == exclude[i])}
      NRFAData <- NRFAData[-index,]})
    Site <- SDMs(NRFAData, CDs[1,2], CDs[15,2], CDs[8,2], CDs[9,2])
    Refs <- data.frame(row.names(NRFAData), Site)
    colnames(Refs) <- c("Site", "SDM")
    Refs.Order <- Refs[order(Refs$SDM),]
    Char.Sites <- Refs.Order$Site
    Char.Sites <- as.character(Char.Sites)
    Site.NRFA <- NRFAData[Char.Sites, ]
    UrbInd <- Char.Sites[1]
    ug.index <- which(row.names(NRFAData) == UrbInd)
    UrbUrbInd <- NRFAData[ug.index, which(colnames(NRFAData) == "URBEXT2000")]
    Site.NRFA <- subset(Site.NRFA, URBEXT2000 <= 0.03)
    if(iug == FALSE) {Site.NRFA <- Site.NRFA}
    if(iug == TRUE & UrbUrbInd > 0.03) {Site.NRFA <- rbind(NRFAData[ug.index,], Site.NRFA)} else {Site.NRFA <- Site.NRFA}
    if(iug == TRUE & UrbUrbInd <= 0.03) {print("Warning: iug = TRUE and the closest site in catchment descriptor space has URBEXT2000 <0.03. Group formed as if iug = FALSE")}
    SDM <- SDMs(Site.NRFA,CDs[1,2], CDs[15,2], CDs[8,2], CDs[9,2])
    Site.NRFA <- cbind(Site.NRFA, SDM)
    Cum.N <- NULL
    for (i in 1:length(Site.NRFA$N)) {Cum.N[i] <- sum(Site.NRFA$N[1:i])}
    min(which(Cum.N >= 500))
    Site.NRFA <- Site.NRFA[1:min(which(Cum.N >= N)),]
    Ds <-  function(x)
    {
      u.hat <- apply(tf, 2, mean)
      Res <- numeric(1)
      for (i in 1:length(Site.NRFA$N)) {Res <- Res+as.numeric(tf[i,]-u.hat)%*%t(as.numeric((tf[i,]-u.hat)))}
      D <- NULL
      for (i in 1:length(Site.NRFA$N)) {D[i] <- ((1/3)*length(Site.NRFA$N))*as.numeric(tf[i,]-u.hat)%*%solve(Res)%*%(as.numeric((tf[i,]-u.hat)))}
      return(D)
    }
    tf <- data.frame(Site.NRFA$Lcv, Site.NRFA$LSkew, Site.NRFA$LKurt)
    Discordancy <- Ds(tf)
    crit.vs <- c(1.333, 1.648, 1.917, 2.140, 2.329, 2.491, 2.632, 2.757, 2.869, 2.971, 3)
    xd <- seq(5,15)
    Crit.frame <- data.frame(xd, crit.vs)
    C.V <- Crit.frame[min(which(Crit.frame$xd >= length(Site.NRFA$N))),2]
    Discordant <- NULL
    for (i in 1:length(Discordancy)) {Discordant[i] <- isTRUE(Discordancy[i] > C.V)}
    Site.NRFA <- cbind(Site.NRFA, Discordancy, Discordant)
    Site.NRFA <- Site.NRFA[,-c(12,13,14,16,19,20)]
    colnames(Site.NRFA)[24] <- "Discordant?"
    if(DeUrb == FALSE) {Site.NRFA <- Site.NRFA} else
    { LcvCol <- which(colnames(Site.NRFA) == "Lcv")
    LskewCol <- which(colnames(Site.NRFA) == "LSkew")
    UrbCol <- which(colnames(Site.NRFA) == "URBEXT2000")
    DeUrbesLCV <- LcvUrb(Site.NRFA[1, LcvCol], Site.NRFA[1, UrbCol], DeUrb = TRUE)
    DeUrbesLSKEW <- LSkewUrb(Site.NRFA[1, LskewCol], Site.NRFA[1, UrbCol], DeUrb = TRUE)
    Site.NRFA[1,LcvCol] <- DeUrbesLCV
    Site.NRFA[1,LskewCol] <- DeUrbesLSKEW
    if(Site.NRFA[1,14] < 0.03) {print("Warning: DeUrb = TRUE and the top site in the group has URBEXT2000 < 0.03")}
    }
    return(Site.NRFA)
  } )
}


# PoolEst -----------------------------------------------------------------

#' Pooled flood estimates
#'
#' Provides pooled results from a pooling group - gauged, ungauged and with urban adjustment if necessary.
#'
#' PoolEst is a function to provide results from a pooling group derived using the Pool function. QMED (median annual maximum flow) needs to be supplied and can be derived from the QMED function for ungauged estimates or the annual maximum sample for gauged estimates. If the catchment of interest is urban, the UrbAdj argument can be set to TRUE. If this is done, either URBEXT (urban extent) needs to be provided or the catchment descriptors, derived from ImportCDs or GetCDs. The methods for estimating pooled growth curves are according to Science Report: SC050050 - Improving the FEH statistical procedures for flood frequency estimation. The methods for estimating the L-moments and growth factors are outlined in the Flood Estimation Handbook (1999), volume 3. The methods for quantifying uncertainty are detailed in Hammond, A. (2021). Sampling uncertainty of UK design flood estimation. Hydrology Research, 52 (6): 1357–1371. When UrbAdj = TRUE, urban adjustment is applied to the QMED estimate according to the method outlined in the guidance by Wallingford HydroSolutions: 'WINFAP 4 Urban Adjustment Procedures'. If trend = TRUE & gauged = FALSE, the QMED is multiplied by a trend coefficient. The coefficient was derived by calculating a weighted (by sample size) mean proportional change in the 2-year flow, from the UK National River Flow Archive benchmark sites considered suitable for pooling. The weighted per year change was first calculated and then multiplied by half the mean sample size of sites suitable for QMED. If trend = TRUE and gauged = TRUE, the weighted per year change is multiplied by half the sample size of the first site in the pooling group. This approach attempts to include a generic non-stationarity in the pooled estimates by adjusting the location parameter. Among other assumptions (such as the change being linear and only to the location of the distribution), it makes the assumption that apparent trends at individual sites are unreliable due to short record lengths or site specific due to human influence, but across many sites the mean increase in median peak discharge is representative of a non-stationary process across the UK (this approach will be regionalised in a later version of the tool). The trend is applied to whatever is in the QMED argument. Therefore, trend should equal FALSE if the QMED estimate has been user adjusted for trend already. If Lcv & Lskew have also been user adjusted for a gauged site, these can be changed in the pooling group (see Pool function details). The per year, weighted mean QMED trend, was estimated to be 0.12 percent +/- 0.05 (95 percent uncertainty - calculated by weighted resampling).
#'
#'@param x pooling group derived from the Pool function
#'@param gauged logical argument with a default of FALSE. TRUE for gauged results and FALSE for ungauged
#'@param QMED estimate of the median annual maximum flow
#'@param dist a choice of distribution for the estimates. The choices are "GenLog", "GEV", or "Gumbel"; the generalised logistic, generalised extreme value, and Gumbel distribution, respectively. The default is "GenLog"
#'@param RP return period of interest. By default the following RPs are provided: 2, 5, 10, 20, 50, 75, 100, 200, 500, 1000
#'@param UrbAdj logical argument with a default of FALSE. When TRUE, an urban adjustment is applied to the pooled Lcv and LSkew
#'@param CDs catchment descriptors derived from either GetCDs or ImportCDs
#'@param URBEXT the catchment URBEXT2000, to be supplied if UrbAdj is TRUE and if CDs have not been
#'@param trend logical argument with a default of FALSE. TRUE adjusts the stationary QMED estimate to a non-stationary estimate
#'@param fseQMED factorial standard error of the median annual maximum (QMED) estimate, used for quantifying ungauged uncertainty. Default is 1.41
#'@examples
#'#Get some catchment descriptors and form a pooling group. It's urban and
#'#therefore the site of interest is not included.
#'CDs.27083 <- GetCDs(27083)
#'Pool.27083 <- Pool(CDs.27083)
#'#Get results for the ungauged case, with urban adjustment
#'PoolEst(Pool.27083, QMED = 11.941, UrbAdj = TRUE, CDs = CDs.27083)
#'#Form the group again with the urban gauge included & undertake a gauged estimate
#'#with urban adjustment. QMED in this example is estimated as the median of the annual
#'#maximum series for site 27083.
#'PoolG.27083 <- PoolG.27083 <- Pool(CDs.27083, iug = TRUE, DeUrb = TRUE)
#'PoolEst(PoolG.27083, QMED = 12.5, UrbAdj = TRUE, CDs = CDs.27083)
#'
#'@return A list of length two. Element one is a data frame with columns; return period (RP), peak flow estimates (Q), growth factor estimates (GF), lower and upper intervals of uncertainty (68 percent intervals for ungauged and 95 percent for gauged). The second element is the estimated Lcv and Lskew.
#'@author Anthony Hammond
PoolEst <- function(x, gauged = FALSE, QMED, dist = "GenLog", RP = c(2,5,10,20,50,75,100,200,500,1000), UrbAdj = FALSE, CDs = NULL, URBEXT = NULL, trend = FALSE, fseQMED = 1.41) {
  if(is.data.frame(x) == FALSE) {stop("x must be a pooled group. Pooled groups can be created with the Pool() function")}
  if(ncol(x) != 24) stop ("x must be a pooled group. Pooled groups can be created with the Pool() function")
  if(UrbAdj == TRUE) {
    if(is.null(URBEXT) == TRUE & is.null(CDs) == TRUE) stop("if Urbadj = TRUE, URBEXT or CDs must be provided")
    if(is.null(URBEXT) == TRUE) {URBEXT2000 <- CDs[18,2]} else {URBEXT2000 <- URBEXT}}
  if(dist == "GenLog") {func <- GenLogGF}
  if(dist == "GEV") {func <- GEVGF}
  if(dist == "Gumbel") {func <- GumbelGF}
  if(gauged == FALSE) {lcv <- WungLcv(x)} else {lcv <- WGaugLcv(x)}
  if(gauged == FALSE) {lskew <- WungLSkew(x)} else {lskew <- WGaugLSkew(x)}
  if(UrbAdj == TRUE) {lcv <- lcv*0.68654^(1.567*URBEXT2000)} else {lcv <- lcv}
  if(UrbAdj == TRUE) {lskew <- ((lskew+1)*1.096017^(1.567*URBEXT2000))-1} else {lskew <- lskew}
  if(dist == "Gumbel") {Zt <- func(lcv, RP = RP)} else {Zt <- func(lcv, lskew, RP = RP)}
  GF <- as.numeric(format(round(Zt, 3), nsmall = 3))
  if(trend == TRUE & gauged == FALSE) {QMED <- QMED*1.023}
  if(trend == TRUE & gauged == TRUE) {QMED <- QMED+(QMED*0.001048557*(x$N[1]/2))}
  Qt <- Zt*QMED
  Q <- as.numeric(format(round(Qt, 3), nsmall = 3))
  PooledLcv <- lcv
  PooledLSkew <- lskew
  if(gauged == TRUE) {
    VAR <-  (((median(x[1,15]) * x[1,16])^2)/x[1,21])* exp(1.3125 + 0.599*(log(RP-1)) + 0.00399*(log(RP-1)^2))
    SE <-sqrt(VAR)
    lower95 <- round(Q-SE*1.96, 3)
    upper95 <- round(Q+SE*1.96, 3)
    res <- data.frame(RP, Q, GF, lower95, upper95)
  }
  if(gauged == FALSE){
    fseGFfunc <- function(RP) {
      if(RP == 2) {fse <- 1} else {
        Y <- -log(-log(1-1/RP))
        fse <- round(0.0069*Y^2 - 0.0099*Y + 1.0039, 3)
      }
      return(fse)
    }
    fseGF <- NULL
    for(i in 1:length(RP)) {fseGF[i] <- fseGFfunc(RP[i])}
    fse <- fseGF*fseQMED
    lower68 <- round(Q/fse, 3)
    upper68 <- round(Q*fse, 3)
    res <- data.frame(RP, Q, GF, lower68, upper68)
  }
  #res <- data.frame(RP, Q, GF)
  Pars <- cbind(PooledLcv, PooledLSkew)
  return(list(res, Pars))
}


# OptimPars ---------------------------------------------------------------

#' Optimise distribution parameters
#'
#' Estimates the parameters of the generalised extreme value, generalised logistic, or Gumbel distribution from known return period estimates
#'
#' Given a dataframe with return periods (RPs) in the first column and associated estimates in the second column, this function provides an estimate of the distribution parameters. Ideally the first RP should be 2. Extrapolation outside the RPs used for calibration comes with greater uncertainty.
#'@param x a data.frame with RPs in the first column and associated estimates in the second column
#'@param dist a choice of distribution for the estimates. The choices are "GenLog", "GEV", or "Gumbel" - the generalised logistic, generalised extreme value and Gumbel distribution, respectively. The default is "GenLog"
#'@examples
#'#Get some catchment descriptors and some quick results. Then estmate the GenLog parameters
#'Results <- QuickResults(GetCDs(96001), plot = FALSE)[[1]]
#'OptimPars(Results[,1:2])
#'
#'@return The location, scale and shape parameters for the generalised logistic or Generalised extreme value distribution. Or the location and scale for the Gumbel.
#' @author Anthony Hammond
OptimPars <-  function(x, dist = "GenLog") {
  if(is.data.frame(x) == FALSE) {stop("x must be a data.frame with RPs in the first column and associated variable in the second")}
  res <- x
  RP <- x[,1]
  Q <- x[,2]
  if(dist == "GenLog") {
    min.GL <- function(data, par) {
      with(data, sum(((par[1]+par[2]/par[3]*(1-(RP-1)^-par[3])-Q)^2)))}
    result <- optim(par = c(x[1,2], mean((x[1,2]/4),(x[1,2]/3)), 0.01), fn = min.GL, data = res, lower = c(NA, NA, -1), upper = c(NA, NA, 1), method = "L-BFGS-B")
    Pars <- result$par
    Pars <- data.frame(Pars[1], Pars[2], Pars[3])
    colnames(Pars) <- c("loc", "scale", "shape")
  }
  if(dist == "GEV"){
    min.GEV <- function(data, par) {
      with(data, sum(((par[1]+par[2]/par[3]*(1-(-log(1-1/RP))^par[3])-Q)^2)))}
    result <- optim(par = c(x[1,2], mean((x[1,2]/4),(x[1,2]/3)), 0.01), fn = min.GEV, data = res, lower = c(NA, NA, -1), upper = c(NA, NA, 1), method = "L-BFGS-B")
    Pars <- result$par
    Pars <- data.frame(Pars[1], Pars[2], Pars[3])
    colnames(Pars) <- c("loc", "scale", "shape")
  }
  if(dist == "Gumbel"){
    min.Gum <- function(data, par) {
      with(data, sum((((par[1]+par[2]*(-log(-log(1-(1/RP)))))-Q)^2)))}
    result <- optim(par = c(x[1,2], (x[1,2]/3)), fn = min.Gum, data = res, method = "L-BFGS-B")
    Pars <- result$par
    Pars <- data.frame(Pars[1], Pars[2])
    colnames(Pars) <- c("loc", "scale")
  }
  return(Pars)
}




# DistFuncs ---------------------------------------------------------------

#'Generalised logistic distribution growth factors
#'
#'Estimated growth factors as a function of return period, with inputs of Lcv & LSkew (linear coefficient of variation & linear skewness)
#'
#'@details Growth factors are calculated by the method outlined in the Flood Estimation Handbook, volume 3, 1999.
#'
#' @param lcv linear coefficient of variation
#' @param lskew linear skewness
#' @param RP return period
#' @examples
#' #Estimate the 50-year growth factors from an Lcv and Lskew of 0.17 and 0.04, respectively.
#' GenLogGF(0.17, 0.04, RP = 50)
#' @return Generalised logistic estimated growth factor
#' @author Anthony Hammond

GenLogGF <- function(lcv, lskew, RP) {
  k <- -lskew
  B <- lcv*k*sin((pi)*k)/(k*pi*(k+lcv)-lcv*sin((pi)*k))
  zt <- 1+(B/k)*(1-(RP-1)^lskew)
  return(zt)
}

#'Generalised extreme value distribution growth factors
#'
#'Estimated growth factors as a function of return period, with inputs of Lcv & LSkew (linear coefficient of variation & linear skewness)
#'
#'@details Growth factors are calculated by the method outlined in the Flood Estimation Handbook, volume 3, 1999.
#' @param lcv linear coefficient of variation
#' @param lskew linear skewness
#' @param RP return period
#' @examples
#' #Estimate the 50-year growth factors from Lcv = 0.17 and Lskew = 0.04
#' GEVGF(0.17, 0.04, RP = 50)
#' @return Generalised extreme value estimated growth factor
#' @author Anthony Hammond
GEVGF <- function(lcv,lskew, RP) {
  C <- (2/(3+lskew)) - (log(2)/log(3))
  kgev <- 7.859*C+2.95548*C^2
  Bgev <- (kgev*lcv)/(lcv*(gamma(1+kgev)-(log(2))^kgev)+gamma(1+kgev)*(1-2^-kgev))
  zt <- 1+(Bgev/kgev)*(log(2)^kgev - (log(RP/(RP-1)))^kgev)
  return(zt)
}

#'Generalised Pareto distribution growth factors
#'
#'Estimated growth factors as a function of return period, with inputs of Lcv & LSkew (linear coefficient of variation & linear skewness)
#'
#'@details Growth factors (GF) are calculated by the method outlined in the Flood Estimation Handbook, volume 3, 1999. The average number of peaks per year argument (ppy) is for the function to convert from the peaks over threshold (POT) scale to the annual scale. For example, if there are 3 peaks per year, the probability associated with the 100-yr return period estimate would be 0.01/3 (i.e. an RP of 300 on the POT scale) rather than 0.01.
#' @param lcv linear coefficient of variation
#' @param lskew linear skewness
#' @param RP return period
#' @param ppy peaks per year
#' @examples
#' #Get POT flow data from the Thames at Kingston (noting the no. peaks per year).
#' #Then estimate the 100-year growth factor with lcv and lskew estimates
#' TPOT <- POTextract(ThamesPQ[,c(1,3)], thresh = 0.90)
#' GenParetoGF(Lcv(TPOT$peak), LSkew(TPOT$peak), RP = 100, ppy = 1.867)
#' #multiply by the median of the POT data for an estimate of the 100-yr flood
#' GenParetoGF(Lcv(TPOT$peak), LSkew(TPOT$peak), RP = 100, ppy = 1.867)*median(TPOT$peak)
#' @return Generalised Pareto estimated growth factor
#' @author Anthony Hammond
GenParetoGF <- function(lcv, lskew, RP, ppy = 1) {
  k <- (1-3*lskew)/(1+lskew)
  Bgp <- (lcv*k*(1+k)*(2+k))/(k-lcv*(2+k)*(2^-k*(1+k)-1))
  RPppy <- 1/((1/RP)/ppy)
  Zt <- 1 + (Bgp/k) *((2^-k)-(1-(1-(1/RPppy)))^k)
  return(Zt)
}


#'Gumbel distribution growth factors
#'
#'Estimated growth factors as a function of return period, with inputs of Lcv & LSkew (linear coefficient of variation & linear skewness)
#'
#'@details Growth factors are calculated by the method outlined in the Flood Estimation Handbook, volume 3, 1999.
#'
#' @param lcv linear coefficient of variation
#' @param RP return period
#' @examples
#' #Estimate the 50-year growth factors from an Lcv of 0.17.
#' GumbelGF(0.17, RP = 50)
#' @return Gumbel estimated growth factor
#' @author Anthony Hammond

GumbelGF <- function(lcv, RP){
  B <- lcv/(log(2)-lcv*(0.5772+log(log(2))))
  gf <- 1+B*(log(log(2))-log(-log(1-(1/RP))))
  return(gf)
}


#'Generalised logistic distribution - estimates directly from sample
#'
#'Estimated quantiles as a function of return period (RP) and vice versa, directly from the data
#'
#'If the argument q is used, it overrides RP and provides RP as a function of q (magnitude of variable) as opposed to q as a function of RP. The parameters are estimated by the method of L-moments, as detailed in 'Hosking J. Wallis J. 1997 Regional Frequency Analysis: An Approach Based on L-moments. Cambridge University Press, New York'. The trend argument allows the location parameter to move in line with the observed linear trend of the sample. Another option is to detrend the sample first with the DeTrend function. On average this makes little difference to the two year flow but lower results for longer return periods (not always) when compared to the trend option in this function.
#' @param x numeric vector (block maxima sample)
#' @param RP return period (default = 100)
#' @param q quantile (magnitude of variable)
#' @param trend logical argument with default of FALSE. If TRUE, a linear adjustment to the location parameter is made to account for non-stationarity
#' @examples
#' #Get an annual maximum sample and estimate the 50-year RP
#' AM.27090 <- GetAM(27090)
#' GenLogAM(AM.27090$Flow, RP = 50)
#' #Estimate the RP for a 600m3/s discharge
#' GenLogAM(AM.27090$Flow, q = 600)
#' #Estimate the 50-year RP allowing for non-stationarity in the location parameter
#' GenLogAM(AM.27090$Flow, RP = 50, trend = TRUE)
#' @return quantile as a function of RP or vice versa, with the option of accounting for the linear trend in the sample
#' @author Anthony Hammond
GenLogAM <- function(x, RP = 100, q = NULL, trend = FALSE)
{
  if(is.numeric(x) == FALSE) {stop("x must be a numeric vector")}
  Sort.x <- sort(x)
  Rank <- seq(1, length(x))
  b0 <- mean(x, na.rm = TRUE)
  b1 <- mean((Rank-1)/(length(x)-1)*Sort.x, na.rm = TRUE)
  b2 <- mean(((Rank-1)*(Rank-2))/((length(x)-1)*(length(x)-2))*Sort.x, na.rm = TRUE)
  b3 <- mean(((Rank-1)*(Rank-2)*(Rank-3))/((length(x)-1)*(length(x)-2)*(length(x)-3))*Sort.x, na.rm = TRUE)
  L1 <- b0
  L2 <- 2*b1-b0
  L3 <- 6*b2-6*b1+b0
  L4 <- 20*b3-30*b2+12*b1-b0
  Lcv <- L2/L1
  LSkew <- L3/L2
  k <- -LSkew
  a <- (L2*sin(k*pi))/(k*pi)
  loc <- b0-a*((1/k)-(pi/sin(k*pi)))
  if(is.null(q) == TRUE) {res <- loc+ a/k * (1-(RP-1) ^-k)}
  else {
    y <- -k^(-1) * log(1 - k * (q - loc)/a)
    P <- 1-(1/(1+exp(-y)))
    res <- 1/P
  }
  m <- function(i, j) {sum((i-mean(i))*(j-mean(j)))/sum((i-mean(i))^2)}
  M <- m(i = seq(1, length(x)), j = x)
  b <- mean(x)-M*mean(seq(1,length(x)))
  LM <- (M*(length(x))+b)-(M*median(seq(1,length(x)))+b)
  LocTrend <- loc+LM
  if (is.null(q) == TRUE) {resTrend <- LocTrend+ a/k * (1-(RP-1) ^-k)}
  else {
    yTrend <- -k^(-1) * log(1 - k * (q - LocTrend)/a)
    Ptrend <- P <- 1-(1/(1+exp(-yTrend)))
    resTrend <- 1/Ptrend}
  if(trend == FALSE) {return(res)} else {return(resTrend)}
}



#'Generalised extreme value distribution - estimates directly from sample
#'
#'Estimated quantiles as function of return period (RP) and vice versa, directly from the data
#'
#'If the argument q is used, it overrides RP and provides RP as a function of q (magnitude of variable) as opposed to q as a function of RP. The parameters are estimated by the method of L-moments, as detailed in 'Hosking J. Wallis J. 1997 Regional Frequency Analysis: An Approach Based on L-moments. Cambridge University Press, New York'. The trend argument allows the location parameter to move in line with the observed linear trend of the sample. Another option is to detrend the sample first with the DeTrend function. On average this makes little difference to the two year flow but lower results for longer return periods (not always) when compared to the trend option in this function.
#' @param x numeric vector (block maxima sample)
#' @param RP return period (default = 100)
#' @param q quantile (magnitude of variable)
#' @param trend logical argument with default of FALSE. If TRUE, a linear adjustment to the location parameter is made to account for non-stationarity
#' @examples
#' #Get an annual maximum sample and estimate the 50-year RP
#' AM.27090 <- GetAM(27090)
#' GEVAM(AM.27090$Flow, RP = 50)
#' #Estimate the RP for a 600m3/s discharge
#' GEVAM(AM.27090$Flow, q = 600)
#' #Estimate the 50-year RP allowing for non-stationarity in the location parameter
#' GEVAM(AM.27090$Flow, RP = 50, trend = TRUE)
#' @return quantile as a function of RP or vice versa, with the option of accounting for the linear trend in the sample
#' @author Anthony Hammond
GEVAM <- function(x, RP = 100, q = NULL, trend = FALSE)
{
  if(is.numeric(x) == FALSE) {stop("x must be a numeric vector")}
  Sort.x <- sort(x)
  Rank <- seq(1, length(x))
  b0 <- mean(x, na.rm = TRUE)
  b1 <- mean((Rank-1)/(length(x)-1)*Sort.x, na.rm = TRUE)
  b2 <- mean(((Rank-1)*(Rank-2))/((length(x)-1)*(length(x)-2))*Sort.x, na.rm = TRUE)
  b3 <- mean(((Rank-1)*(Rank-2)*(Rank-3))/((length(x)-1)*(length(x)-2)*(length(x)-3))*Sort.x, na.rm = TRUE)
  L1 <- b0
  L2 <- 2*b1-b0
  L3 <- 6*b2-6*b1+b0
  L4 <- 20*b3-30*b2+12*b1-b0
  Lcv <- L2/L1
  LSkew <- L3/L2
  C <- (2/(3+LSkew)) - (log(2)/log(3))
  k <- 7.859*C+2.95548*C^2
  a <- (L2*k)/((1-2^-k)*gamma(1+k))
  loc <- b0-a*((1-gamma(1+k))/k)
  if(is.null(q) == TRUE) {res <- loc+a/k*(1-(-log(1-1/RP))^k)} else {
    y <- -k^(-1) * log(1 - k * (q - loc)/a)
    P <- 1-(exp(-exp(-y)))
    res <- 1/P}
  m <- function(i, j) {sum((i-mean(i))*(j-mean(j)))/sum((i-mean(i))^2)}
  M <- m(i = seq(1, length(x)), j = x)
  b <- mean(x)-M*mean(seq(1,length(x)))
  LM <- (M*(length(x))+b)-(M*median(seq(1,length(x)))+b)
  LocTrend <- loc+LM
  if (is.null(q) == TRUE) {resTrend <- LocTrend+a/k*(1-(-log(1-1/RP))^k)}
  else {
    yTrend <- -k^(-1) * log(1 - k * (q - LocTrend)/a)
    Ptrend <- 1-(exp(-exp(-yTrend)))
    resTrend <- 1/Ptrend}
  if(trend == FALSE) {return(res)} else {return(resTrend)}
}



#'Generalised Pareto distribution - estimates directly from sample
#'
#'Estimated quantiles as function of return period (RP) and vice versa, directly from the data
#'
#'If the argument q is used, it overrides RP and provides RP as a function of q (magnitude of variable) as opposed to q as a function of RP. The average number of peaks per year argument (ppy) is for the function to convert from the peaks over threshold (POT) scale to the annual scale. For example, if there are 3 peaks per year, the probability associated with the 100-yr return period estimate would be 0.01/3 (i.e. an RP of 300 on the POT scale) rather than 0.01. The parameters are estimated by the method of L-moments, as detailed in 'Hosking J. Wallis J. 1997 Regional Frequency Analysis: An Approach Based on L-moments. Cambridge University Press, New York'.
#' @param x numeric vector (block maxima sample)
#' @param ppy peaks per year
#' @param RP return period (default = 100)
#' @param q quantile (magnitude of variable)
#' @examples
#' #Get a POT series and estimate the 50-year RP
#' ThamesPOT <- POTextract(ThamesPQ[,c(1,3)], thresh = 0.90)
#' GenParetoPOT(ThamesPOT$peak, ppy = 1.867, RP = 50)
#' #Estimate the RP for a 600m3/s discharge
#' GenParetoPOT(ThamesPOT$peak, ppy = 1.867, q = 600)
#' @return quantile as a function of RP or vice versa
#' @author Anthony Hammond
GenParetoPOT <- function(x, ppy = 1, RP = 100, q = NULL)
{
  if(is.numeric(x) == FALSE) {stop("x must be a numeric vector")}
  Sort.x <- sort(x)
  Rank <- seq(1, length(x))
  b0 <- mean(x, na.rm = TRUE)
  b1 <- mean((Rank-1)/(length(x)-1)*Sort.x, na.rm = TRUE)
  b2 <- mean(((Rank-1)*(Rank-2))/((length(x)-1)*(length(x)-2))*Sort.x, na.rm = TRUE)
  b3 <- mean(((Rank-1)*(Rank-2)*(Rank-3))/((length(x)-1)*(length(x)-2)*(length(x)-3))*Sort.x, na.rm = TRUE)
  L1 <- b0
  L2 <- 2*b1-b0
  L3 <- 6*b2-6*b1+b0
  L4 <- 20*b3-30*b2+12*b1-b0
  Lcv <- L2/L1
  LSkew <- L3/L2
  k <- (1-3*LSkew)/(1+LSkew)
  a <- (1+k)*(2+k)*L2
  loc <- L1-(2+k)*L2
  if(is.null(q) == TRUE) {res <- loc+a*(1-(1-(1-(1/RP)/ppy))^k)/k} else {
    y <- -k^-1*log(1-k*(q-loc)/a)
    P <- 1-(1-exp(-y))
    RPPOT <- 1/P
    res <- RPPOT/ppy
  }
  return(res)
}

#'Gumbel distribution - estimates directly from sample
#'
#' @description Estimated quantiles as a function of return period (RP) and vice versa, directly from the data
#' @details If the argument q is used, it overrides RP and provides RP as a function of q (magnitude of variable) as opposed to q as a function of RP. The parameters are estimated by the method of L-moments, as detailed in 'Hosking J. Wallis J. 1997 Regional Frequency Analysis: An Approach Based on L-moments. Cambridge University Press, New York'. The trend argument allows the location parameter to move in line with the observed linear trend of the sample. Another option is to detrend the sample first with the DeTrend function. On average this makes little difference to the two year flow but lower results for longer return periods (not always) when compared to the trend option in this function.
#' @param x numeric vector (block maxima sample)
#' @param RP return period (default = 100)
#' @param q quantile (magnitude of variable)
#' @param trend logical argument with default of FALSE. If TRUE, a linear adjustment to the location parameter is made to account for non-stationarity
#' @examples
#' #Get an annual maximum sample and estimate the 50-year RP
#' AM.27090 <- GetAM(27090)
#' GumbelAM(AM.27090$Flow, RP = 50)
#' #Estimate the RP for a 600m3/s discharge
#' GumbelAM(AM.27090$Flow, q = 600)
#' #Estimate the 50-year RP allowing for non-stationarity in the location parameter
#' GumbelAM(AM.27090$Flow, RP = 50, trend = TRUE)
#' @return quantile as a function of RP or vice versa, with the option of accounting for the linear trend in the sample
#' @author Anthony Hammond
GumbelAM <- function(x, RP = 100, q = NULL, trend = FALSE)
{
  if(is.numeric(x) == FALSE) {stop("x must be a numeric vector")}
  Sort.x <- sort(x)
  Rank <- seq(1, length(x))
  b0 <- mean(x, na.rm = TRUE)
  b1 <- mean((Rank-1)/(length(x)-1)*Sort.x, na.rm = TRUE)
  L1 <- b0
  L2 <- 2*b1-b0
  a <- L2/log(2)
  loc <- L1 - 0.5772*a
  if(is.null(q) == TRUE) {res <- loc - a * log(-log(1-(1/RP)))}
  else {
    Prob <- 1- exp(-exp(-(q - loc)/a))
    res <- 1/Prob
  }
  m <- function(i, j) {sum((i-mean(i))*(j-mean(j)))/sum((i-mean(i))^2)}
  M <- m(i = seq(1, length(x)), j = x)
  b <- mean(x)-M*mean(seq(1,length(x)))
  LM <- (M*(length(x))+b)-(M*median(seq(1,length(x)))+b)
  LocTrend <- loc+LM
  if (is.null(q) == TRUE) {resTrend <- LocTrend - a * log(-log(1-(1/RP)))}
  else {
    ProbTrend <- 1- exp(-exp(-(q - LocTrend)/a))
    resTrend <- 1/ProbTrend}
  if(trend == FALSE) {return(res)} else {return(resTrend)}
}



#'Generalised logistic distribution estimates from parameters
#'
#'Estimated quantiles as function of return period (RP) and vice versa, from user input parameters
#'
#'If the argument q is used, it overrides RP and provides RP as a function of q (magnitude of variable) as opposed to q as a function of RP.
#' @param loc location parameter
#' @param scale scale parameter
#' @param shape shape parameter
#' @param q quantile. magnitude of the variable under consideration
#' @param RP return period
#' @examples
#' #Get an annual maximum sample, estimate the parameters and estimate 50-year RP
#' AM.27090 <- GetAM(27090)
#' GenLogPars(AM.27090$Flow)
#' #Store parameters in an object
#' Pars <- as.numeric(GenLogPars(AM.27090$Flow))
#' #get estimate of 50-yr flow
#' GenLogEst(Pars[1], Pars[2], Pars[3], RP = 50)
#' #Estimate the RP for a 600m3/s discharge
#' GenLogEst(Pars[1], Pars[2], Pars[3], q = 600)
#' @return quantile as a function of RP or vice versa
#' @author Anthony Hammond
GenLogEst <- function(loc, scale, shape, q = NULL, RP = 100) {
  if(is.null(q) == TRUE) {res <- loc+ scale/shape * (1-(RP-1) ^-shape)}
  else {
    y <- -shape^(-1) * log(1 - shape * (q - loc)/scale)
    P <- 1-(1/(1+exp(-y)))
    res <- 1/P
  }
  return(res)
}


#'Generalised Pareto distribution estimates from parameters
#'
#'Estimated quantiles as function of return period (RP) and vice versa, from user input parameters
#'
#'If the argument q is used, it overrides RP and provides RP as a function of q (magnitude of variable) as opposed to q as a function of RP. The average number of peaks per year argument (ppy) is for the function to convert from the peaks over threshold (POT) scale to the annual scale. For example, if there are 3 peaks per year, the probability associated with the 100-yr return period estimate would be 0.01/3 (i.e. an RP of 300 on the POT scale) rather than 0.01.
#' @param loc location parameter
#' @param scale scale parameter
#' @param shape shape parameter
#' @param q quantile. magnitude of the variable under consideration
#' @param RP return period
#' @param ppy peaks per year. Default is one
#' @examples
#' #Get a POT sample, estimate the parameters, and estimate 50-year RP
#' ThamesPOT <- POTextract(ThamesPQ[,c(1,3)], thresh = 0.90)
#' GenParetoPars(ThamesPOT$peak)
#' #Store parameters in an object
#' Pars <- as.numeric(GenParetoPars(ThamesPOT$peak))
#' #get estimate of 50-yr flow
#' GenParetoEst(Pars[1], Pars[2], Pars[3], ppy = 1.867, RP = 50)
#' #Estimate the RP for a 600m3/s discharge
#' GenParetoEst(Pars[1], Pars[2], Pars[3], ppy = 1.867, q = 600)
#' @return quantile as a function of RP or vice versa
#' @author Anthony Hammond
GenParetoEst <- function(loc, scale, shape, q = NULL, RP = 100, ppy = 1) {
  if(is.null(q) == TRUE) {res <- loc+scale*(1-(1-(1-(1/RP)/ppy))^shape)/shape} else {
    y <- -shape^-1*log(1-shape*(q-loc)/scale)
    P <- 1-(1-exp(-y))
    RPPOT <- 1/P
    res <- RPPOT/ppy
  }
  return(res)
}


#'Generalised extreme value distribution estimates from parameters
#'
#'Estimated quantiles as function of return period (RP) and vice versa, from user input parameters
#'
#'If the argument q is used, it overrides RP and provides RP as a function of q (magnitude of variable) as opposed to q as a function of RP.
#' @param loc location parameter
#' @param scale scale parameter
#' @param shape shape parameter
#' @param q quantile. magnitude of the variable under consideration
#' @param RP return period
#' @examples
#' #Get an annual maximum sample, estimate the parameters and estimate 50-year RP
#' AM.27090 <- GetAM(27090)
#' GEVPars(AM.27090$Flow)
#' #Store parameters in an object
#' Pars <- as.numeric(GEVPars(AM.27090$Flow))
#' #get estimate of 50-yr flow
#' GEVEst(Pars[1], Pars[2], Pars[3], RP = 50)
#' #Estimate the RP for a 600m3/s discharge
#' GEVEst(Pars[1], Pars[2], Pars[3], q = 600)
#' @return quantile as a function of RP or vice versa
#' @author Anthony Hammond
GEVEst <- function(loc, scale, shape, q = NULL, RP = 100) {
  if(is.null(q) == TRUE) {res <- loc+scale/shape*(1-(-log(1-1/RP))^shape)} else {
    y <- -shape^(-1) * log(1 - shape * (q - loc)/scale)
    P <- 1-(exp(-exp(-y)))
    res <- 1/P}
  return(res)
}

#'Gumbel distribution estimates from parameters
#'
#'Estimated quantiles as function of return period (RP) and vice versa, from user input parameters
#'
#'If the argument q is used, it overrides RP and provides RP as a function of q (magnitude of variable) as opposed to q as a function of RP.
#' @param loc location parameter
#' @param scale scale parameter
#' @param q quantile. magnitude of the variable under consideration
#' @param RP return period
#' @examples
#' #Get an annual maximum sample, estimate the parameters and estimate 50-year RP
#' AM.27090 <- GetAM(27090)
#' Pars <- as.numeric(GumbelPars(AM.27090$Flow))
#' GumbelEst(Pars[1], Pars[2], RP = 50)
#' #Estimate the RP for a 600m3/s discharge
#' GumbelEst(Pars[1], Pars[2], q = 600)
#' @return quantile as a function of RP or vice versa
#' @author Anthony Hammond
GumbelEst <- function(loc, scale, q = NULL,RP = 100){
  if(is.null(q) == TRUE) {res <- loc+scale*(-log(-log(1-(1/RP))))}
  else {
    Prob <- 1- exp(-exp(-(q - loc)/scale))
    res <- 1/Prob}
  return(res)
}


#'Generalised extreme value distribution parameter estimates
#'
#'Estimated parameters from a sample (with Lmoments or maximum likelihood estimation) or from L1 (first L-moment), Lcv (linear coefficient of variation), and LSkew (linear skewness)
#'
#'@details The L-moment estimated parameters are by the method detailed in 'Hosking J. Wallis J. 1997 Regional Frequency Analysis: An Approach Based on L-moments. Cambridge University Press, New York'
#'
#' @param x numeric vector. The sample
#' @param mle logical argument with a default of FALSE. If FALSE the parameters are estimated with Lmoments, if TRUE the parameters are estimated by maximum likelihood estimation
#' @param L1 first Lmoment
#' @param LCV linear coefficient of variation
#' @param LSKEW linear skewness
#' @examples
#' #Get an annual maximum sample and estimate the parameters using Lmoments
#' AM.27090 <- GetAM(27090)
#' GEVPars(AM.27090$Flow)
#' #Estimate parameters using MLE
#' GEVPars(AM.27090$Flow, mle = TRUE)
#' #calculate Lmoments and estimate the parmeters with L1, Lcv and Lskew
#' Lmoms(AM.27090$Flow)
#' #store linear moments in an object
#' LPars <- as.numeric(Lmoms(AM.27090$Flow))[c(1,5,6)]
#' GEVPars(L1 = LPars[1], LCV = LPars[2], LSKEW = LPars[3])
#' @return Parameter estimates (location, scale, shape)
#' @author Anthony Hammond
GEVPars <- function(x = NULL, mle = FALSE, L1, LCV, LSKEW) {
  if(is.null(x) == FALSE & is.numeric(x) == FALSE) {stop("x must be a numeric vector")}
  if(mle == FALSE){
    if(is.null(x)) {C <- (2/(3+LSKEW)) - (log(2)/log(3))
    Shape <- 7.859*C+2.95548*C^2
    L2 <- L1*LCV
    Scale <- (L2*Shape)/((1-2^-Shape)*gamma(1+Shape))
    Loc <- L1-Scale*((1-gamma(1+Shape))/Shape)
    return(data.frame(Loc, Scale, Shape))}
    else {
      L1 <- mean(x, na.rm = TRUE)
      LCV <- Lcv(x)
      LSKEW <- LSkew(x)
      C <- (2/(3+LSKEW)) - (log(2)/log(3))
      Shape <- 7.859*C+2.95548*C^2
      L2 <- L1*LCV
      Scale <- (L2*Shape)/((1-2^-Shape)*gamma(1+Shape))
      Loc <- L1-Scale*((1-gamma(1+Shape))/Shape)
      return(data.frame(Loc, Scale, Shape))}}
  else {
    pars <- c(mean(x), sd(x)/1.5, 0.01)
    max.lhd <- function(q, par) {
      abs(sum(log(gev.pdf(q, loc = par[1], scale = par[2], shape = par[3]))))
    }
    gev.pdf <- function(q, loc, scale, shape) {
      y <- -shape^-1*log(1-shape*(q-loc)/scale)
      p <- scale^-1*exp(1)^(-(1-shape)*y-exp(1)^(-y))
      return(p)
    }
    result <- suppressWarnings(optim(par = pars, fn = max.lhd, q = x))
    loc <- result$par[1]
    scale <- result$par[2]
    shape <- result$par[3]
    log.likelihood <- -result$value[1]
    message <- result$message
    Res <- data.frame(loc, scale, shape, log.likelihood)
    return(Res)
  }
}

#'Generalised logistic distribution parameter estimates
#'
#'Estimated parameters from a sample (with Lmoments or maximum likelihood estimation) or from L1 (first L-moment), Lcv (linear coefficient of variation), and LSkew (linear skewness)
#'
#'@details The L-moment estimated parameters are by the method detailed in 'Hosking J. Wallis J. 1997 Regional Frequency Analysis: An Approach Based on L-moments. Cambridge University Press, New York'
#' @param x numeric vector. The sample
#' @param mle logical argument with a default of FALSE. If FALSE the parameters are estimated with Lmoments, if TRUE the parameters are estimated by maximum likelihood estimation.
#' @param L1 first Lmoment
#' @param LCV linear coefficient of variation
#' @param LSKEW linear skewness
#' @examples
#' #Get an annual maximum sample and estimate the parameters using Lmoments
#' AM.27090 <- GetAM(27090)
#' GenLogPars(AM.27090$Flow)
#' #Estimate parameters using MLE
#' GenLogPars(AM.27090$Flow, mle = TRUE)
#' #calculate Lmoments and estimate the parmeters with L1, Lcv and Lskew
#' Lmoms(AM.27090$Flow)
#' #store linear moments in an object
#' LPars <- as.numeric(Lmoms(AM.27090$Flow))[c(1,5,6)]
#' GenLogPars(L1 = LPars[1], LCV = LPars[2], LSKEW = LPars[3])
#' @return Parameter estimates (location, scale, shape)
#' @author Anthony Hammond
GenLogPars <- function(x = NULL, mle = FALSE, L1, LCV, LSKEW) {
  if(is.null(x) == FALSE & is.numeric(x) == FALSE) {stop("x must be a numeric vector")}
  if(mle == FALSE){
    if(is.null(x)) {
      Shape <- -LSKEW
      L2 <- L1*LCV
      Scale <- (L2*sin(Shape*pi))/(Shape*pi)
      Loc <- L1-Scale*((1/Shape)-(pi/sin(Shape*pi)))
      return(data.frame(Loc, Scale, Shape))}
    else {
      L1 <- mean(x, na.rm = TRUE)
      LCV <- Lcv(x)
      LSKEW <- LSkew(x)
      Shape <- -LSKEW
      L2 <- L1*LCV
      Scale <- (L2*sin(Shape*pi))/(Shape*pi)
      Loc <- L1-Scale*((1/Shape)-(pi/sin(Shape*pi)))
      return(data.frame(Loc, Scale, Shape))}}
  else {
    pars <- c(mean(x), sd(x)/1.5, 0.01)
    min.SLS <- function(q, par) {
      abs(sum(log(gl.pdf(q, loc = par[1], scale = par[2], shape = par[3]))))
    }
    gl.pdf <- function(q, loc, scale, shape) {
      y <- -shape^-1*log(1-shape*(q-loc)/scale)
      f <- (scale^-1*exp(1)^(-(1-shape)*y))/(1+exp(1)^-y)^2
      return(f)
    }
    result <- suppressWarnings(optim(par = pars, fn = min.SLS, q = x))
    loc <- result$par[1]
    scale <- result$par[2]
    shape <- result$par[3]
    log.likelihood <- -result$value[1]
    message <- result$message
    Res <- data.frame(loc, scale, shape, log.likelihood)
    return(Res)
  }

}

#'Generalised Pareto distribution parameter estimates
#'
#'Estimated parameters from a sample (with Lmoments or maximum likelihood estimation) or from L1 (first L-moment), Lcv (linear coefficient of variation), and LSkew (linear skewness)
#'
#'@details The L-moment estimated parameters are by the method detailed in 'Hosking J. Wallis J. 1997 Regional Frequency Analysis: An Approach Based on L-moments. Cambridge University Press, New York'
#'
#' @param x numeric vector. The sample
#' @param mle logical argument with a default of FALSE. If FALSE the parameters are estimated with Lmoments, if TRUE the parameters are estimated by maximum likelihood estimation
#' @param L1 first Lmoment
#' @param LCV linear coefficient of variation
#' @param LSKEW linear skewness
#' @examples
#' #Get a peaks over threshold sample and estimate the parameters using Lmoments
#' ThamesPOT <- ThamesPOT <- POTextract(ThamesPQ[,c(1,3)], thresh = 0.90)
#' GenParetoPars(ThamesPOT$peak)
#' #Estimate parameters using MLE
#' GenParetoPars(ThamesPOT$peak, mle = TRUE)
#' #calculate Lmoments and estimate the parmeters with L1, Lcv and Lskew
#' Lmoms(ThamesPOT$peak)
#' #store linear moments in an object
#' LPars <- as.numeric(Lmoms(ThamesPOT$peak))[c(1,5,6)]
#' GenParetoPars(L1 = LPars[1], LCV = LPars[2], LSKEW = LPars[3])
#' @return Parameter estimates (location, scale, shape)
#' @author Anthony Hammond
GenParetoPars <- function(x = NULL, mle = FALSE, L1, LCV, LSKEW) {
  if(is.null(x) == FALSE & is.numeric(x) == FALSE) {stop("x must be a numeric vector")}
  if(mle == FALSE){
    if(is.null(x)) {
      Shape <- (1-3*LSKEW)/(1+LSKEW)
      L2 <- L1*LCV
      Scale <- (1+Shape)*(2+Shape)*L2
      Loc <- L1-(2+Shape)*L2
      return(data.frame(Loc, Scale, Shape))}
    else {
      L1 <- mean(x, na.rm = TRUE)
      LCV <- Lcv(x)
      LSKEW <- LSkew(x)
      Shape <- (1-3*LSKEW)/(1+LSKEW)
      L2 <- L1*LCV
      Scale <- (1+Shape)*(2+Shape)*L2
      Loc <- L1-(2+Shape)*L2
      return(data.frame(Loc, Scale, Shape))}
  } else {
    pars <- c(sd(x)/1.5, 0.01)
    max.lhd <- function(q, par) {
      abs(sum(log(gp.pdf(q, loc = min(x), scale = par[1], shape = par[2]))))}
    gp.pdf <- function(q, loc, scale, shape) {
      y <- -shape^-1*log(1-shape*(q-loc)/scale)
      p <- scale^-1*exp(1)^(-(1-shape)*y)
      return(p)
    }
    result <- suppressWarnings(optim(par = pars, fn = max.lhd, q = x))
    loc <- min(x)
    scale <- result$par[1]
    shape <- result$par[2]
    log.likelihood <- -result$value[1]
    message <- result$message
    Res <- data.frame(loc, scale, shape, log.likelihood)
    return(Res)
  }
}

#'Gumbel distribution parameter estimates
#'
#'Estimated parameters from a sample (with Lmoments or maximum likelihood estimation) or from L1 (first L-moment), Lcv (linear coefficient of variation)
#'
#'@details The L-moment estimated parameters are by the method detailed in 'Hosking J. Wallis J. 1997 Regional Frequency Analysis: An Approach Based on L-moments. Cambridge University Press, New York'
#'
#' @param x numeric vector. The sample
#' @param mle logical argument with a default of FALSE. If FALSE the parameters are estimated with Lmoments, if TRUE the parameters are estimated by maximum likelihood estimation
#' @param L1 first Lmoment
#' @param LCV linear coefficient of variation
#' @examples
#' #Get an annual maximum sample and estimate the parameters using Lmoments
#' AM.27090 <- GetAM(27090)
#' GumbelPars(AM.27090$Flow)
#' #Estimate parameters using MLE
#' GumbelPars(AM.27090$Flow, mle = TRUE)
#' #calculate Lmoments and estimate the parmeters with L1 and Lcv
#' Pars <- as.numeric(Lmoms(AM.27090$Flow)[c(1,5)])
#' GumbelPars(L1 = Pars[1], LCV = Pars[2])
#' @return Parameter estimates (location, scale)
#' @author Anthony Hammond
GumbelPars <- function(x = NULL, mle = FALSE, L1, LCV){
  if(is.null(x) == FALSE & is.numeric(x) == FALSE) {stop("x must be a numeric vector")}
  if(mle == FALSE) {
    if(is.null(x)) {Scale <- (L1*LCV)/log(2)
    Loc <- L1 - 0.5772*Scale
    return(data.frame(Loc, Scale))
    } else {
      L1 <- mean(x, na.rm = TRUE)
      LCV <- Lcv(x)
      Scale <- (L1*LCV)/log(2)
      Loc <- L1 - 0.5772*Scale
      return(data.frame(Loc, Scale))
    }
  } else {
    pars <- c(mean(x), sd(x)/1.5)
    max.lhd <- function(q, par) {
      abs(sum(log(gum.pdf(q, loc = par[1], scale = par[2]))))
    }
    gum.pdf <- function(q, loc, scale) {
      p <- scale^-1 * exp(-(q-loc)/scale)*exp(-exp(-(q-loc)/scale))
      return(p)
    }
    result <- suppressWarnings(optim(par = pars, fn = max.lhd, q = x))
    loc <- result$par[1]
    scale <- result$par[2]
    log.likelihood <- -result$value[1]
    message <- result$message
    Res <- data.frame(loc, scale, log.likelihood)
    return(Res)
  }
}


#'Data simulator
#'
#'Simulation of a random sample from the generalised extreme value, generalised logistic, Gumbel, or generalised Pareto distributions
#'
#'The simulated sample can be generated using distribution parameters, or the growth factor (GF) inputs; linear coefficient of variationn (Lcv), linear skewness (LSkew) & the median annual maximum (QMED).
#' @param n sample size to be simulated
#' @param pars vector of parameters in the order of location, scale, shape (only location and shape for Gumbel)
#' @param dist choice of distribution. Either "GEV", "GenLog", "Gumbel" or "GenPareto"
#' @param GF vector of GF inputs in the order of Lcv, LSkew, QMED (only Lcv and QMED if dist = "Gumbel")
#' @examples
#' #Simulate a sample of size 30 using parameters GenLog and parameters 299, 51, -0.042
#' SimData(30, pars = c(299, 51, -0.042), dist = "GenLog")
#' #Now simulate using the Lcv, Lskew, and median (0.17, 0.04, 310)
#' SimData(30, GF = c(0.17, 0.04, 310), dist = "GenLog")
#' @return A random sample of size n for the chosen distribution.
#' @author Anthony Hammond
SimData <- function(n, pars = NULL, dist = "GenLog", GF = NULL) {
  if(is.null(GF) == TRUE){
    if(dist == "GenPareto") {res <- GenParetoEst(loc = pars[1], scale = pars[2], shape = pars[3], RP = 1/runif(n), ppy = 1)}
    if(dist == "GEV") {res <- GEVEst(loc = pars[1], scale = pars[2], shape = pars[3], RP = 1/runif(n))}
    if(dist == "GenLog") {res <- GenLogEst(loc = pars[1], scale = pars[2], shape = pars[3], RP = 1/runif(n))}
    if(dist == "Gumbel") {res <- GumbelEst(loc = pars[1], scale = pars[2], RP = 1/runif(n))}
    return(res)} else
    {if(dist == "GenPareto") {res <- GenParetoGF(lcv = GF[1], lskew = GF[2], RP = 1/runif(n))}
      if(dist == "GEV") {res <- GEVGF(lcv = GF[1], lskew = GF[2], RP = 1/runif(n))}
      if(dist == "GenLog") {res <- GenLogGF(lcv = GF[1], lskew = GF[2], RP = 1/runif(n))}
      if(dist == "Gumbel") {res <- GumbelGF(lcv = GF[1], RP = 1/runif(n))}
      if(dist == "Gumbel"){
        return(res*GF[2])
      } else
      return(res*GF[3])}
}


# QMED --------------------------------------------------------------------

#'QMED (median annual maximum flow) estimate from catchment descriptors
#'
#'Estimated median annual maximum flow from catchment descriptors and donor sites
#'
#'QMED is estimated from catchment descriptors: QMED = 8.3062*AREA^0.8510 0.1536^(1000/SAAR) FARL^3.4451 0.0460^(BFIHOST^2) as derived in Science Report: SC050050 - Improving the FEH statistical procedures for flood frequency estimation. The single donor method is from the same paper. The method for two donors is outlined in 'Kjeldsen, T. (2019). Adjustment of QMED in ungauged catchments using two donor sites. Circulation - The Newsletter of the British Hydrological Society, 4'. When UrbAdj = TRUE, urban adjustment is applied to the QMED estimate according to the method outlined in the guidance by Wallingford HydroSolutions: 'WINFAP 4 Urban Adjustment Procedures'. For flexibility there is the option to input the relevant catchment descriptors directly rather than a CDs object.
#' @param CDs catchment descriptors derived from either GetCDs or ImportCDs
#' @param Don1 numeric site reference for the a signle donor (for donor candidates see DonAdj function)
#' @param Don2 vector of two site references for two donors (for donor candidates see DonAdj function)
#' @param UrbAdj logical argument with a default of FALSE. True applies an urban adjustment
#' @param AREA catchment area in km2
#' @param SAAR standard average annual rainfall (mm)
#' @param FARL flood attenuation from reservoirs and lakes
#' @param BFIHOST baseflow index calculated from the catchment hydrology of soil type classification
#' @param URBEXT2000 measure of catchment urbanisation
#' @examples
#' #Get some catchment descriptors and calculate QMED as if it was ungauged, with
#' #no donors, one donor, and two donors
#' CDs.55004 <- GetCDs(55004)
#' QMED(CDs.55004)
#' QMED(CDs.55004, Don1 = 55012)
#' QMED(CDs.55004, Don2 = c(55012, 60007))
#' #Get CDs for urban gauge and calculate QMED with urban adjustment
#' CDs.27083 <- GetCDs(27083)
#' QMED(CDs.27083, UrbAdj = TRUE)
#' @return An estimate of QMED from catchment descriptors
#' @author Anthony Hammond
QMED <- function(CDs = NULL, Don1 = NULL, Don2 = NULL, UrbAdj = FALSE, AREA, SAAR, FARL, BFIHOST, URBEXT2000 = NULL){
  Donor1 <- function(CDs, DonSite){
    QMED.cd <- 8.3062*CDs[1,2]^0.8510*0.1536^(1000/CDs[15,2])*CDs[8,2]^3.4451*0.0460^(CDs[5,2]^2)
    Site <- DonSite
    Donors <- DonAdj(CDs = CDs, rows = 500)
    Rw <- which(rownames(Donors) == DonSite)
    Result <- Donors[Rw, 27]
    return(Result)
  }
  Donor2 <- function(CDs, Sites) {
    rij <- function(d) {0.4598*exp(-0.0200*d)+(1-0.4598)*exp(-0.4785*d)}
    NGRDist <- function(i, j) {sqrt((i[1]-j[1])^2+(i[2]-j[2])^2)/1000}
    Site1 <- Sites[1]
    Site2 <- Sites[2]
    CDs.Site1 <- GetCDs(Site1)
    CDs.Site2 <- GetCDs(Site2)
    Dist1 <- NGRDist(c(CDs[19,2], CDs[20,2]), c(CDs.Site1[19,2], CDs.Site1[20,2]))
    Dist2 <- NGRDist(c(CDs[19,2], CDs[20,2]), c(CDs.Site2[19,2], CDs.Site2[20,2]))
    Dist12 <- NGRDist(c(CDs.Site1[19,2], CDs.Site1[20,2]), c(CDs.Site2[19,2], CDs.Site2[20,2]))
    ps1 <- rij(Dist1)
    p12 <- rij(Dist12)
    ps2 <- rij(Dist2)
    a1 <- (ps1-p12*ps2)/(1-p12^2)
    a2 <- (ps2-p12*ps1)/(1-p12^2)
    QMEDscd <- 8.3062*CDs[1,2]^0.8510*0.1536^(1000/CDs[15,2])*CDs[8,2]^3.4451*0.0460^(CDs[5,2]^2)
    QMED1cd <- 8.3062*CDs.Site1[1,2]^0.8510*0.1536^(1000/CDs.Site1[15,2])*CDs.Site1[8,2]^3.4451*0.0460^(CDs.Site1[5,2]^2)
    QMED2cd <- 8.3062*CDs.Site2[1,2]^0.8510*0.1536^(1000/CDs.Site2[15,2])*CDs.Site2[8,2]^3.4451*0.0460^(CDs.Site2[5,2]^2)
    QMED1obs <- QMEDData$QMED[which(rownames(QMEDData) == Site1)]
    QMED2obs <- QMEDData$QMED[which(rownames(QMEDData) == Site2)]
    QMEDs.adj <- QMEDscd*(QMED1obs/QMED1cd)^a1 * (QMED2obs/QMED2cd)^a2
    return(QMEDs.adj)
  }
  if(is.null(CDs) == TRUE) {
    QMED.cd <- 8.3062*AREA^0.8510*0.1536^(1000/SAAR)*FARL^3.4451*0.0460^(BFIHOST^2)
    if(is.null(URBEXT2000) == TRUE & UrbAdj == TRUE) stop ("If UrbAdj is TRUE, URBEXT2000 is required")
    if(UrbAdj == TRUE) {
      Q.ua <- as.numeric(UAF(URBEXT2000 = URBEXT2000, BFIHOST = BFIHOST)[2])*QMED.cd}
    if (UrbAdj == FALSE) {QMED <- QMED.cd} else {QMED <- Q.ua}
    if (is.null(URBEXT2000) == TRUE & UrbAdj == FALSE) {print("No input for URBEXT2000. If it is above > 0.03, urban adjustment is recommended")}
    if(is.null(URBEXT2000) == FALSE){
      if(UrbAdj == FALSE & URBEXT2000 > 0.03){print("URBEXT > 0.03, urban adjustment is recommended")}}
    return(QMED)
  } else {
    QMED.cd <- 8.3062*CDs[1,2]^0.8510*0.1536^(1000/CDs[15,2])*CDs[8,2]^3.4451*0.0460^(CDs[5,2]^2)
    if(is.null(Don1) == TRUE) {QMED.cd <- QMED.cd} else {QMED.cd <- Donor1(CDs = CDs, Don1)}
    if(is.null(Don2) == TRUE) {QMED.cd <- QMED.cd} else {QMED.cd <- Donor2(CDs = CDs, Don2)}
    Q.ua <- as.numeric(UAF(CDs = CDs)[2])*QMED.cd
    if (UrbAdj == FALSE) {QMED <- QMED.cd} else {QMED <- Q.ua}
    if (CDs[18,2] > 0.03 & UrbAdj == FALSE) {print("URBEXT > 0.03, urban adjustment is recommended")}
    return(QMED)
  }
}

#' Empirical estimate of QMED from peaks over threshold (POT) data
#'
#' Estimates the median annual maximum flow (QMED) from peaks over threshold data
#'
#'@details If there are multiple peaks per year, the peaks per year (ppy) argument is used to convert to the annual scale to derive QMED. If ppy is one, then the median of the POT sample is returned (the median of x).
#' @param x numerical vector. POT data
#' @param ppy number of peaks per year in the POT data
#' @examples
#' #Extract some POT data and estimate QMED
#' ThamesPOT <- POTextract(ThamesPQ[,c(1,3)], thresh = 0.90)
#' QMEDPOT(ThamesPOT$peak, ppy = 1.867263)
#' @author Anthony Hammond
QMEDPOT <- function(x, ppy){
  if(is.numeric(x) == FALSE) {stop("x must be a numeric vector")}
  qu <- 1-(0.5/ppy)
  qmed <- quantile(x, qu, na.rm = TRUE)
  return(as.numeric(qmed))
}


#' QMED Linking equation
#'
#' Estimates the median annual maximum flow (QMED) from non-flood flows
#'
#'The QMED Linking equation estimates QMED as a function of the flow that is exceeded five percent of the time, the flow that is exceeded 10 percent of the time, the baseflow index, and the catchment desciptor; drainage path slope (DPSBAR). All of these can be found for sites on the National River Flow Archive (NRFA) website. The method is provided in the guidance note 'WINFAP 4 QMED Linking equation' (2016) by Wallingford HydroSolutions.
#' @param Q5dmf numeric. The daily mean flow that is exceeded 5 percent of the time
#' @param Q10dmf numeric. The daily mean flow that is exceeded 10 percent of the time
#' @param DPSBAR a catchment descriptor. The average drainage path slope of the catchment
#' @param BFI the baseflow index of the gauged flow
#' @examples
#' #Calculate the QMED for site 1001 (Wick at Tarroul)
#' QMEDLink(10.14, 7.352, 29.90, 0.39)
#' @author Anthony Hammond
QMEDLink <- function(Q5dmf, Q10dmf, DPSBAR, BFI) {
  GRAD <- (log10(Q5dmf)-log10(Q10dmf))/(qnorm(0.05)-qnorm(0.1))
  1.762*Q5dmf^0.866*(1+GRAD)^-0.775*DPSBAR^0.265*0.2388^(BFI^2)
}


#' QMED donor adjustment
#'
#' Applies a donor adjustment to the median annual maximum flow (QMED) estimate
#'
#'Although a single donor adjustment can be applied with the DonAdj() function and the QMED(), this is provided for flexibility. The method is that of Science Report: SC050050 - Improving the FEH statistical procedures for flood frequency estimation (2008).
#' @param AREA catchment area in km2
#' @param SAAR standardised average annual rainfall in mm
#' @param FARL flood attenuation from reservoirs and lakes
#' @param BFIHOST the baseflow index as a function of soil type
#' @param QMEDgObs the observed QMED at the donor site
#' @param QMEDgCds the QMED equation derived QMED at the donor site
#' @param xSI the catchment centroid easting for the site of interest
#' @param ySI the catchment centroid northing for the site of interest
#' @param xDon the catchment centroid easting for the donor site
#' @param yDon the catchment centroid northing for the donor site
#' @param alpha a logical argument with a default of TRUE. When FALSE the exponent in the donor equation is set to one. Otherwise it is determined by the distance between the donor and the subject site
#'
#' @examples
#' #Get observed QMED for site 96003
#' Qob <- median(GetAM(96003)[,2])
#' #Get QMED equation estimated QMED for the donor site
#' QCD <- QMED(CDs = GetCDs(96003))
#' #display CDs for site 96001 & note the easting and northing
#' GetCDs(96001)
#' #display CDs for site 96003 & note the easting and northing
#' GetCDs(96003)
#' #Apply the QMEDDonEq function with the information gained
#' QMEDDonEq(194, 1096, 0.955, 0.297, Qob, QCD, xSI = 289289,ySI = 947523,xDon = 280908,yDon = 953653)
#' @author Anthony Hammond
QMEDDonEq <- function(AREA, SAAR, FARL, BFIHOST, QMEDgObs, QMEDgCds, xSI, ySI, xDon, yDon, alpha = TRUE) {
  QMED.scd <- 8.3062*AREA^0.8510*0.1536^(1000/SAAR)*FARL^3.4451*0.0460^(BFIHOST^2)
  d <- NGRDist(i = c(xSI,ySI), j = c(xDon,yDon))
  rij <- function(d) {0.4598*exp(-0.0200*d)+(1-0.4598)*exp(-0.4785*d)}
  if(alpha == TRUE) {a <- rij(d)} else{a <- 1}
  QMED.adj <- QMED.scd*(QMEDgObs/QMEDgCds)^a
  return(QMED.adj)
}



#'Donor adjustment candidates & results
#'
#'Provides donor adjustment candidates, descriptors, and results in order of the proximity to the centroid of the subject catchment.
#'
#'When d2 is FALSE the results for single donor adjustment are in the final column headed 'QMED.adj' for each site. If alpha is set to FALSE, the results in this column are from the same donor equation but with an exponent of 1. The donor adjustment method is as outlined in Science Report: SC050050 - Improving the FEH statistical procedures for flood frequency estimation. The method for two donors is outlined in 'Kjeldsen, T. (2019). Adjustment of QMED in ungauged catchments using two donor sites. Circulation - The Newsletter of the British Hydrological Society, 4'. When two donors are used, only the result is returned, rather than donor candidates. The QMEDfse column provides the gauged factorial standard error for the median of the annual maximum sample. It is worth considering this when choosing a donor site (a high value indicates a poor donor). When choosing between two donors, the site with a lower QMEDfse would be an appropriate choice (all else being equal). The QMEDfse is calculated with the QMEDfseSS() function.
#' @param CDs catchment descriptors derived from either GetCDs or ImportCDs
#' @param x catchment centroid easting (for when CDs isn't used)
#' @param y catchment centroid northing (for when CDs isn't used)
#' @param QMEDscd QMED estimate for the catchment of interest (for when CDs isn't used)
#' @param alpha logical argument with a default of TRUE. If FALSE the exponent of the donor adjustment equation is set to one
#' @param rows number of sites provided; default is 10
#' @param d2 a numeric vector of length two; the two site references for the donor catchments chosen for the two donor case
#' @examples
#' #Get some CDs and output candidate donor sites
#' CDs.54022 <- GetCDs(54022)
#' DonAdj(CDs.54022)
#' #Get results with inputs of x,y, and QMEDscd
#' DonAdj(x = 283261, y = 288067, QMEDscd = 17.931)
#' #Get a result with two donors
#' DonAdj(CDs.54022, d2 = c(54092, 54091))
#' @return A data.frame with rownames of site references and columns of catchment descriptors, distance from subect site, and associated results. When two donors are used, only the resulting adjusted QMED is returned
#' @author Anthony Hammond

DonAdj <- function(CDs = NULL, x,y, QMEDscd = NULL, alpha = TRUE, rows = 10, d2 = NULL){
  Donor2 <- function(CDs, Sites) {
    rij <- function(d) {0.4598*exp(-0.0200*d)+(1-0.4598)*exp(-0.4785*d)}
    NGRDist <- function(i, j) {sqrt((i[1]-j[1])^2+(i[2]-j[2])^2)/1000}
    Site1 <- Sites[1]
    Site2 <- Sites[2]
    CDs.Site1 <- GetCDs(Site1)
    CDs.Site2 <- GetCDs(Site2)
    Dist1 <- NGRDist(c(CDs[19,2], CDs[20,2]), c(CDs.Site1[19,2], CDs.Site1[20,2]))
    Dist2 <- NGRDist(c(CDs[19,2], CDs[20,2]), c(CDs.Site2[19,2], CDs.Site2[20,2]))
    Dist12 <- NGRDist(c(CDs.Site1[19,2], CDs.Site1[20,2]), c(CDs.Site2[19,2], CDs.Site2[20,2]))
    ps1 <- rij(Dist1)
    p12 <- rij(Dist12)
    ps2 <- rij(Dist2)
    a1 <- (ps1-p12*ps2)/(1-p12^2)
    a2 <- (ps2-p12*ps1)/(1-p12^2)
    QMEDscd <- 8.3062*CDs[1,2]^0.8510*0.1536^(1000/CDs[15,2])*CDs[8,2]^3.4451*0.0460^(CDs[5,2]^2)
    QMED1cd <- 8.3062*CDs.Site1[1,2]^0.8510*0.1536^(1000/CDs.Site1[15,2])*CDs.Site1[8,2]^3.4451*0.0460^(CDs.Site1[5,2]^2)
    QMED2cd <- 8.3062*CDs.Site2[1,2]^0.8510*0.1536^(1000/CDs.Site2[15,2])*CDs.Site2[8,2]^3.4451*0.0460^(CDs.Site2[5,2]^2)
    QMED1obs <- QMEDData$QMED[which(rownames(QMEDData) == Site1)]
    QMED2obs <- QMEDData$QMED[which(rownames(QMEDData) == Site2)]
    QMEDs.adj <- QMEDscd*(QMED1obs/QMED1cd)^a1 * (QMED2obs/QMED2cd)^a2
    return(QMEDs.adj)
  }
  if(is.null(QMEDscd) == TRUE) {QMEDscd <- 8.3062*CDs[1,2]^0.8510*0.1536^(1000/CDs[15,2])*CDs[8,2]^3.4451*0.0460^(CDs[5,2]^2)} else {QMEDscd <- QMEDscd}
  suppressWarnings(if(is.null(CDs) == TRUE) {
    NGRDist <- function(i, j) {sqrt((i[1]-j[1])^2+(i[2]-j[2])^2)/1000}
    Dists <- NULL
    for(i in 1:length(QMEDData$QMED)) {Dists[i] <- NGRDist(i = c(x,y), j = c(QMEDData$X[i],QMEDData$Y[i]))}
    Dists.Table <- data.frame(QMEDData, Dists)
    Dists.Order <- Dists.Table[order(Dists.Table$Dists),]
    rij <- function(d) {0.4598*exp(-0.0200*d)+(1-0.4598)*exp(-0.4785*d)}
    if(alpha == TRUE) {a <- rij(Dists.Order$Dists)} else{a <- 1}
    Dists.Order <- cbind(Dists.Order, a)
    QMED.adj <- QMEDscd*(Dists.Order$QMED/Dists.Order$QMEDcd)^a
    Dists.Order <- cbind(Dists.Order, QMED.adj)
    if(is.null(d2) == TRUE){
      return(Dists.Order[1:rows,])
    } else {
      Qd2 <- Donor2(CDs, Sites = d2)
      return(Qd2)
    }} else {
      NGRDist <- function(i, j) {sqrt((i[1]-j[1])^2+(i[2]-j[2])^2)/1000}
      Dists <- NULL
      for(i in 1:length(QMEDData$QMED)) {Dists[i] <- NGRDist(i = c(CDs$Value[19],CDs$Value[20]), j = c(QMEDData$X[i],QMEDData$Y[i]))}
      Dists.Table <- data.frame(QMEDData, Dists)
      Dists.Order <- Dists.Table[order(Dists.Table$Dists),]
      rij <- function(d) {0.4598*exp(-0.0200*d)+(1-0.4598)*exp(-0.4785*d)}
      if(alpha == TRUE) {a <- rij(Dists.Order$Dists)} else{a <- 1}
      Dists.Order <- cbind(Dists.Order, a)
      QMED.adj <- QMEDscd*(Dists.Order$QMED/Dists.Order$QMEDcd)^a
      Dists.Order <- cbind(Dists.Order, QMED.adj)
      if(is.null(d2) == TRUE){
        return(Dists.Order[1:rows,])
      } else {
        Qd2 <- Donor2(CDs, Sites = d2)
        return(Qd2)
      }

    })
}


#' QMED factorial standard error for gauged sites
#'
#' Estimates the median annual maximum flow (QMED) factorial standard error (FSE) by bootstrapping the sample
#'
#'The bootstrapping procedure resamples from the sample N*500 times with replacement. After splitting into 500 samples of size N, the median is calculated for each. Then the exponent of the standard deviation of the log transformed residuals is taken as the FSE. i.e. exp(sd(log(x)-mean(log(x)))), where x is the bootstrapped medians.
#' @param x a numeric vector. The sample of interest
#' @examples
#' #Extract an AMAX sample and estimate the QMED factorial standard error
#' AM.203018 <- GetAM(203018)
#' QMEDfseSS(AM.203018$Flow)
#' @return The factorial standard error for the median of a sample.
#' @author Anthony Hammond
QMEDfseSS <- function(x) {
  if(is.numeric(x) == FALSE) {stop("x must be a numeric vector")}
  resample <- sample(x, size = length(x)*500, replace = TRUE)
  mat <- matrix(resample, nrow = length(x), ncol = 500)
  res <- apply(mat, 2, median)
  FSE <- function(x) {exp(sd(log(x) - mean(log(x))))}
  fse <- FSE(res)
  return(fse)
}


#' QMED from a gauged site suitable for QMED
#'
#' Provides QMED (median annual maximum flow) from a site suitable for QMED, using the site reference.
#'
#' @param x the gauged reference
#' @examples
#' #Get the observed QMED from sites 55002
#' GetQMED(55002)
#' @return the median annual maximum
#' @author Anthony Hammond
GetQMED <- function(x) {
  MedianAM <- QMEDData[which(rownames(QMEDData) == x),19]
  if(length(MedianAM) < 1) stop("Site reference not recognised. Site not suitable for QMED or pooling.")
  return(MedianAM)
}


# UrbFuncs ----------------------------------------------------------------

#' Urban adjustment for the linear coefficient of variation (Lcv)
#'
#' Urbanises or de-urbanises the Lcv using the methods outlined in the guidance by Wallingford HydroSolutions: 'WINFAP 4 Urban Adjustment Procedures'
#'
#'The method for de-urbanisation isn't explicitly provided in 'WINFAP 4 Urban Adjustment Procedures', but the procedure is a re-arrangment of the urbanisation equation, solving for Lcv rather than Lcv-urban.
#' @param lcv the Lcv (numeric)
#' @param URBEXT2000 quantiication of urban and suburbanisation for the subject catchment
#' @param DeUrb logical argument with a default of FALSE. If set to TRUE, de-urbanisation adjustment is performed, if FALSE, urbanisation adjustment is performed
#' @examples
#' #Choose an urban site (site 53006) from the NRFA data then apply a de-urban
#' #adjustment using the Lcv and URBEXT2000 displayed
#' NRFAData[which(rownames(NRFAData) == 53006),]
#' LcvUrb(0.21, 0.1138, DeUrb = TRUE)
#' #Get the pooled Lmoment ratios results for catchment 53006 and apply the
#' #urban adjustment using the pooled Lcv, and the URBEXT2000 for site 53006.
#' CDs.53006 <- GetCDs(53006)
#' QuickResults(CDs.53006)[[2]]
#' LcvUrb(0.196, 0.1138)
#' @return The urban adjust Lcv or the de-urbanised Lcv
#' @author Anthony Hammond

LcvUrb <- function(lcv, URBEXT2000, DeUrb = FALSE) {if (DeUrb == FALSE) {lcv*0.68654^(1.567*URBEXT2000)} else {lcv/(0.68654^(1.567*URBEXT2000))}}


#' Urban adjustment for the linear skewness (LSkew)
#'
#' Urbanises or de-urbanises the LSkew using the methods outlined in the guidance by Wallingford HydroSolutions: 'WINFAP 4 Urban Adjustment Procedures'
#'
#'The method for de-urbanisation isn't explicitly provided in 'WINFAP 4 Urban Adjustment Procedures', but the procedure is a re-arrangment of the urbanisation equation, solving for LSkew rather than LSkew-urban.
#' @param lskew the LSkew (numeric)
#' @param URBEXT2000 quantiication of urban and suburbanisation for the subject site
#' @param DeUrb logical argument with a default of FALSE. If set to TRUE, de-urbanisation adjustment is performed, if FALSE, urbanisation adjustment is performed
#' @examples
#' #Choose an urban site (site 53006) from the NRFA data then apply a de-urban
#' #adjustment using the Lcv and URBEXT2000 displayed
#' NRFAData[which(rownames(NRFAData) == 53006),]
#' LSkewUrb(0.124, 0.1138, DeUrb = TRUE)
#' #Get the pooled Lmoment ratios results for catchment 53006 and apply the urban
#' #Get the CDS & adjustment using the pooled LSkew, and the URBEXT2000 for site 53006.
#' CDs.53006 <- GetCDs(53006)
#' QuickResults(CDs.53006)[[2]]
#' LSkewUrb(0.194, 0.1138)
#' @return The urban adjust Lcv or the de-urbanised Lcv
#' @author Anthony Hammond

LSkewUrb <- function(lskew, URBEXT2000, DeUrb = FALSE) {if(DeUrb == FALSE) {((lskew+1)*1.096017^(1.567*URBEXT2000))-1} else {((lskew+1)/1.096017^(1.567*URBEXT2000))-1}}


#' Urban adjustment factor (UAF) and percentage runoff urban adjustment factor (PRUAF)
#'
#' UAF and PRUAF from catchment descriptors for QMED estimation in ungauged urban catchments
#'
#' @param CDs catchment descriptors derived from either GetCDs or ImportCDs
#' @param URBEXT2000 quantification of catchment urbanisation (used when CDs is not)
#' @param BFIHOST baseflow index as a function of hydrological soil type of the catchment (used when CDs is not)
#' @examples
#' #Get some catchment descriptors for an urban catchment calculate the UAF & PRUAF
#' CDs.53006 <- GetCDs(53006)
#' UAF(CDs.53006)
#' #Calculate UAF and PRUAF using a user input URBEXT2000 and BFIHOST
#' UAF(URBEXT2000 = 0.1138, BFIHOST = 0.3620)
#' @return a data.frame with columns UAF and PRUAF
#' @author Anthony Hammond

UAF <- function(CDs = NULL, URBEXT2000, BFIHOST) {
  if(is.null(CDs) == TRUE) {PRUAF <- 1+0.3*1.567*URBEXT2000*(70/(69.366-65.686*BFIHOST)-1)} else {PRUAF <-   1+0.3*1.567*CDs[18,2]*(70/(69.366-65.686*CDs[5,2])-1)}
  if(is.null(CDs) == TRUE) {UAF <- (1+0.3*(1.567*URBEXT2000))^1.25*PRUAF^1.33} else {UAF <- (1+0.3*(1.567*CDs[18,2]))^1.25*PRUAF^1.33}
  return(data.frame(PRUAF, UAF))
}




# DataFuncs ---------------------------------------------------------------
#' Get an annual maximum sample from the National River Flow Archive sites suitable for pooling
#'
#' Extracts the annual maximum peak flow sample and associated dates for the site of interest.
#' @param ref the site reference of interest (numeric)
#' @examples
#' #Get an AMAX sample and display it in the console
#' GetAM(203018)
#' #Save an AMAX sample as an object
#' AM.203018 <- GetAM(203018)
#' @return A data.frame with columns; Date, Flow, and id
#' @author Anthony Hammond

GetAM <- function(ref) {
  Test <- which(AMSP$id == ref)
  if(length(Test) < 1) stop("Only sites suitable for pooling are available via this function. Check the reference or use ImportAM to get sites not suitable for pooling")
  AM <- subset(AMSP, id == ref)
  rws <- seq(1, length(AM$Flow))
  Date <- as.Date(AM[,1])
  AM <- AM[,-1]
  AM <- cbind(Date, AM)
  rownames(AM) <- rws
  return(AM)
}


# Detrend -----------------------------------------------------------------

#' Linearly detrend a sample
#'
#'@description Applies a linear detrend to a sample
#'@details Adjusts all the values in the sample, of size n, by the difference between the linearly modelled ith data point and the linearly modelled nth data point.
#'@param x a numeric vector
#'@examples
#'# Get an annual maximum (AM) sample that looks to have a significant trend
#'AM.21025 <- GetAM(21025)
#'# plot the resulting AM as a bar plot. Then detrend and compare with another plot
#'plot(AM.21025$Flow, type = "h", ylab = "Discharge (m3/s)")
#'AM.Detrend <- DeTrend(AM.21025$Flow)
#'plot(AM.Detrend, type = "h", ylab = "Discharge (m3/s)")
#'@return A numeric vector which is a linearly detrended version of x.
#'@author Anthony Hammond
DeTrend <- function(x) {
  if(is.numeric(x) == FALSE) {stop("x must be a numeric vector")}
  Lmod <- lm(x ~ seq(1, length(x)))
  Lpred <- as.numeric(predict(Lmod))
  DiffsTrend <- NULL
  for(i in 1:length(x)) {DiffsTrend[i] <- Lpred[length(x)]-Lpred[i]}
  Detrend <- x+DiffsTrend
  return(Detrend)
}



#' Import catchment descriptors from .CD3 files
#'
#' Imports catchment descriptors from CD3 files either from an FEH webservice download or from the Peakflows dataset downloaded from the national river flow archive (NRFA) website
#'
#' The CD3 files downloaded from the FEH webserver are formatted differently from the CD3 files of the peak flows dataset, this function is coded to import either, given the correct file path. File paths for importing data require forward slashes. On some operating systems, such as windows, the copy and pasted file paths will have backward slashes and would need to be changed accordingly.
#' @param x the CD3 file path
#' @examples
#' #Import catchment descriptors from a NRFA peakflows CD3 file and display in console
#' \dontrun{CDs.4003 <- ImportCDs("C:/Data/NRFAPeakFlow_v9/Suitable for QMED/4003.CD3", web = FALSE)}
#' \dontrun{CDs.4003}
#' #Import catchment descriptors from a FEH webserver CD3 file and display CDs in the console
#' \dontrun{CDs.MySite <- ImportCDs("C:/Data/FEH_Catchment_384200_458200.CD3")}
#' @return A data.frame with columns; Descriptor and Value.
#' @author Anthony Hammond
ImportCDs <- function(x) {
  WebTest <- read.table(x, stringsAsFactors = FALSE, skip = 4, nrows = 1)
  if(WebTest[1,1] == "[CDS") {web <- TRUE} else {web <- FALSE}
  if(web == TRUE) {ImportWebCDs <- function(x) {
    IniCDs <- read.table(x, skip = 20, nrows = 23, header = FALSE, sep = ",", stringsAsFactors = FALSE)
    Excl <- IniCDs[-c(9,10,19,20,21,22),]
    Area <- read.table(x, skip = 18, nrows = 1, header = FALSE, sep = ",", stringsAsFactors = FALSE)
    Centroid <- read.table(x, skip = 19, nrows = 1, header = FALSE, sep = ",", stringsAsFactors = FALSE)
    Easting <- Centroid[2:3]
    colnames(Easting) <- c("V1", "V2")
    Northing <- Centroid[3:4]
    colnames(Northing) <- c("V1", "V2")
    CDSet <- rbind(Area, Excl, Easting, Northing)
    rownames(CDSet) <- seq(1,20)
    CDSet[19,1] <- "Easting"
    CDSet[20,1] <- "Northing"
    CDSet[1,1] <- "AREA"
    colnames(CDSet) <- c("Descriptor", "Value")
    return(CDSet)
  }
  CDs <- ImportWebCDs(x)}
  else {
    ImportNRFACDs <- function(x) {
      IniFile <- suppressWarnings(read.table(x, sep = ",", skip = 3, stringsAsFactors = FALSE, header = FALSE, quote = "\\"))
      CDNames <- c("DTM AREA", "ALTBAR", "ASPBAR", "ASPVAR", "BFIHOST", "DPLBAR", "DPSBAR", "FARL", "FPEXT", "LDP", "PROPWET", "RMED-1H", "RMED-1D", "RMED-2D", "SAAR", "SAAR4170", "SPRHOST", "URBEXT2000")
      MatchDescriptor <- match(CDNames, IniFile$V1)
      MD1 <- MatchDescriptor+1
      Descriptor <- as.character(IniFile$V1[MatchDescriptor])
      Value <- as.numeric(IniFile$V1[MD1])
      CDsDF <- data.frame(Descriptor, Value, stringsAsFactors = FALSE)
      NGRInd <- which(IniFile$V1 == "CENTROID NGR")
      Easting <- as.integer(IniFile$V1[(NGRInd+2)])
      Northing <- as.integer(IniFile$V1[(NGRInd+3)])
      NGRNames <- c("Easting", "Northing")
      NGRDF <- data.frame(NGRNames, c(as.integer(Easting), as.integer(Northing)), stringsAsFactors = FALSE)
      colnames(NGRDF) <- c("Descriptor", "Value")
      CDsDF <- rbind(CDsDF, NGRDF)
      CDsDF[1,1] <- "AREA"
      rws <- seq(1,nrow(CDsDF))
      row.names(CDsDF) <- rws
      return(CDsDF)
    }
    CDs <- ImportNRFACDs(x)
  }
  return(CDs)
}


#' Get catchment descriptors from the National River Flow Archive sites considered suitable for median annual maximum flow estimation (QMED).
#'
#' Extracts the catchment descriptors for a site of interest from those suitable for QMED.
#' @param x the site reference of interest (numeric)
#' @examples
#' #Get CDs and display in the console
#' CDs.203018 <- GetCDs(203018)
#' CDs.203018
#' @return A data.frame with columns; Descriptor and Value.
#' @author Anthony Hammond
GetCDs <- function(x) {
  Site.id <- which(row.names(QMEDData) == x)
  if(length(Site.id) == 0) stop ("Site ID not within the set of sites considered suitable for QMED or pooling analysis. For further sites the ImportCDs can be used")
  Site <- QMEDData[Site.id,]
  Site <- Site[,-c(19,20)]
  colnames(Site)[colnames(Site) == "X"] <-  "Easting"
  colnames(Site)[colnames(Site) == "Y"] <-  "Northing"
  Site <- t(Site)
  rws <- as.vector(row.names(Site))
  values <- NULL
  for(i in 1:22) {values[i] <- Site[i,1]}
  dframe <- data.frame(rws, values)
  colnames(dframe) <- c("Descriptor", "Value")
  dframe[1:14,2] <- round(dframe[1:14,2], 4)
  dframe[c(15,16,19,20,22),2] <- round(dframe[c(15,16,19,20,22),2])
  dframe[21,2] <- round(dframe[21,2], 3)
  return(dframe)
}

#' Import an annual maximum (AMAX) sample from NRFA peak flow .AM files
#'
#' Imports the peak flows and dates from from NRFA peak flow .AM files, exluding the rejected years
#'
#'  File paths for importing data require forward slashes. On some operating systems, such as windows, the copy and pasted file paths will have backward slashes and would need to be changed accordingly.
#' @param x the file path for the .AM file
#' @examples
#' #Import an AMAX sample and display the first six rows in the console
#' \dontrun{AM.4003 <- ImportAM("C:/Data/NRFAPeakFlow_v10/Suitable for QMED/4003.AM")}
#' \dontrun{head(AM.4003)]}
#' @return A data.frame with columns; Date and Flow
#' @author Anthony Hammond
ImportAM <- function(x)
{
  AMAX <- read.table(x, sep = ",", col.names = c("Date", "Flow", "Stage"), colClasses = c("character", "numeric", "NULL"), fill = T, skip = 6)
  Row.Strt <- 1+which(AMAX[,1] == "[AM Values]")
  AM <- AMAX[Row.Strt:length(AMAX[,1])-1,]
  AM <- AM[-1,]
  Dates <- data.frame(as.Date(AM[,1], format = "%d %b %Y"), AM[,2])
  colnames(Dates) <- c("Date", "Flow")
  AM.c1 <- AMAX[,1]
  if(AMAX[1,1] == "[AM Rejected]"){
    Rej <- AM.c1[2:which(AM.c1 == "[AM Values]")-2][-1]
    RejEnd <- as.character(as.numeric(Rej)+1)
    WYDate <- as.Date(paste(Rej, "- 10 - 01"), format = "%Y - %m - %d")
    WYEnd <- as.Date(paste(RejEnd, "- 09 - 30"), format = "%Y - %m - %d")
    Date.Func <- function(x, y, z){
      isTRUE(x >= y & x <= z)
    }
    RejFunc <- function(ind){
      Rej <- NULL
      for(i in 1:nrow(Dates)) {Rej[i] <- Date.Func(Dates$Date[i], WYDate[ind], WYEnd[ind])}
      WhichTRUE <- which(Rej == TRUE)
      if(length(WhichTRUE) == 0) {return(NA)} else {return(WhichTRUE)}
    }
    if(length(WYDate) > 1) {
      RejInd <- list()
      for(i in 1:length(WYDate)) {RejInd[[i]] <- RejFunc(i)}
      RejInd <- unlist(RejInd)
      if(any(is.na(RejInd)) == TRUE) {
        NAInd <- which(is.na(RejInd == TRUE))
        if(length(NAInd) == length(WYDate)) {RejInd <- NULL} else {
          RejInd <- RejInd[-which(is.na(RejInd) == TRUE)]}}
    } else {
      RejInd <- NULL
      for(i in 1:nrow(Dates)) {RejInd[i] <- Date.Func(Dates$Date[i], WYDate, WYEnd)}
      RejInd <- which(RejInd == TRUE)
      if(length(RejInd > 0)) {RejInd <- RejInd} else {RejInd <- NULL}
    }
    if(is.null(RejInd) == TRUE) {Result <- Dates} else {Result <- Dates[-RejInd,]}}
  else {Result <- Dates}
  rownames(Result) <- seq(1, nrow(Result))
  return(Result)
}



#' Peaks over threshold (POT) data extraction
#'
#' Extracts independent peaks over a threshold from a sample
#'
#'  If the x argument is a numeric vector, the peaks will be extracted with no time information. x can instead be a data.frame with dates in the first column and the numeric vector in the second. In this latter case, the peaks will be timestamped and a hydrograph including POT will be plotted by default. The method of extracting independent peaks assumes that there is a value either side of which, events can be considered independent. For example, if two peaks above the chosen threshold are separated by the daily mean flow, they could be considered independent, but not if flow hasn't returned to daily mean at any time between the peaks. Daily mean flow may not always be appropriate, in which case the 'div' argument can be adjusted. The function was coded primarily for river flow but for extracting daily duration POT rainfall a div of zero could be used (making the assumption that rainfall events separated by a period of 24 hours, with no rain, are independent). For sub-daily rainfall, further work, after use of the function, would be necessary. For example, a div of zero could be used, and if two peaks are extracted but not separated by more than 24 hours, the lower of the two could be discarded. For this approach a data.frame with dates would be required. When plotted, the blue line is the threshold and the green line is the independence line (div).
#' @param x either a numeric vector or dataframe with date (or POSIXct) in the first column and hydrological variable in the second
#' @param div user chosen value, either side of which two peaks over the threshold are considered independent. Default is the mean of the sample
#' @param thresh user chosen threshold. Default is 0.975
#' @param Plot logical argument with a default of TRUE. When TRUE, the full hydrograph with the peaks over the threshold highlighted is plotted
#' @examples
#' #Extract POT data from Thames mean daily flow 1970-10-01 to 2015-09-25 with
#' #div = mean and threshold = 0.95. Then display the first six rows
#' ThamesQPOT <- POTextract(ThamesPQ[, c(1,3)], thresh = 0.90)
#' head(ThamesQPOT)
#' #Extract Thames POT from only the numeric vector of flows and display the
#' #first six rows
#' ThamesQPOT <- POTextract(ThamesPQ[, 3], thresh = 0.90)
#' ThamesQPOT
#' #Extract the Thames POT precipitation with a div of 0 and the default
#' #threshold. Then display the first six rows
#' ThamesPPOT <- POTextract(ThamesPQ[, c(1,2)], div = 0)
#' head(ThamesPPOT)
#' @return Prints the number of peaks per year and returns a data.frame with columns; Date and peak, with the option of a plot. Or a numeric vector of peaks is returned if only a numeric vector of the hydrological variable is input.
#' @author Anthony Hammond

POTextract <- function(x, div = NULL, thresh = 0.975, Plot = TRUE)
{
  Low.Func <- function(TS)
  {
    L <- length(TS)-2
    L1 <- length(TS)-1
    L2 <- length(TS)
    Vec1 <- TS[1:L]
    Vec2 <- TS[2:L1]
    Vec3 <- TS[3:L2]
    P1 <- ifelse(Vec2 <= Vec1 & Vec2 <= Vec3 & Vec1!= Vec2, Vec2, NA)
    return(P1)
  }

  P.Func <- function(TS)
  {
    L <- length(TS)-2
    L1 <- length(TS)-1
    L2 <- length(TS)
    Vec1 <- TS[1:L]
    Vec2 <- TS[2:L1]
    Vec3 <- TS[3:L2]
    P1 <- ifelse(Vec2 >= Vec1 & Vec2 >= Vec3 & Vec1!= Vec2, Vec2, NA)
    return(P1)
  }

  VP <- function(j, mu)
  {
    maxll <-  suppressWarnings(max(which(lows[1:j] <= mu), na.rm = T))
    if(maxll == -Inf) {maxll <- j} else {maxll <- maxll}
    minlr <-   suppressWarnings(min(which(lows[j:length(lows)] <= mu), na.rm = T))
    if(minlr == Inf) {minlr <- j} else {minlr <- j+(minlr-1)}
    if(peaks[j] == max(peaks[maxll:minlr], na.rm = T)) {vp <- peaks[j]} else {vp <- NA}
    return(vp)
  }
  NAs <- FALSE
  if(class(x) != "data.frame") {
    if(is.null(div)) {mu <- mean(x,na.rm = TRUE)} else {mu <- div}
    if(mu >= quantile(x,thresh, na.rm = TRUE)) stop("The event division must be significantly lower than the event threshold")
    QThresh <- as.numeric(quantile(x, thresh, na.rm = TRUE))
    MinMuP <- min(which(x <= mu))
    MaxMuP <- max(which(x <= mu))
    PkBegin <- max(x[1:MinMuP])
    PkEnd <- max(x[MaxMuP:length(x)])
    x <- x[MinMuP:MaxMuP]
    lows <- Low.Func(x)
    peaks <- P.Func(x)
    MinMuL <- min(which(lows <= mu))
    MaxMuL <- max(which(lows <= mu))
    pt.ind <- which(peaks > QThresh)
    pt <- peaks[pt.ind]
    l <- length(pt.ind)
    POT <- NULL
    {for (i in 1:l) {POT[i] <- VP(pt.ind[i], mu)}}
    if(PkBegin > QThresh) {POT <- append(PkBegin, POT)}
    if(PkEnd > QThresh) {POT <- append(POT, PkEnd)}
    if(NAs == TRUE) {POT <- POT} else {POT <- POT[which(is.na(POT) == FALSE)]}
    return(POT)}
  else {
    if(is.null(div)) {mu <- mean(x[,2],na.rm = TRUE)} else {mu <- div}
    if(mu >= quantile(x[,2],thresh, na.rm = TRUE)) stop("The event division must be significantly lower than the event threshold")
    QThresh <- as.numeric(quantile(x[,2], thresh, na.rm = TRUE))
    MinMuP <- min(which(x[,2] <= mu), na.rm = TRUE)
    MaxMuP <- max(which(x[,2] <= mu), na.rm = TRUE)
    PkBegin <- which(x[1:MinMuP,2] == max(x[1:MinMuP,2], na.rm = TRUE))
    PkEnd <- which(x[MaxMuP:length(x[,2]),2] == max(x[MaxMuP:length(x[,2]),2], na.rm = TRUE))
    DBegin <- x[PkBegin,]
    DEnd <- x[((MaxMuP-1)+PkEnd),]
    colnames(DBegin) <- c("Date", "peak")
    colnames(DEnd) <- c("Date", "peak")
    xUse <- x[MinMuP:MaxMuP,]
    lows <- Low.Func(xUse[,2])
    peaks <- P.Func(xUse[,2])
    pt.ind <- which(peaks > QThresh)
    pt <- peaks[pt.ind]
    L <- length(pt.ind)
    POT <- NULL
    {for (i in 1:L) {POT[i] <- VP(pt.ind[i], mu)}}
    POT.Dates <- (xUse[,1][pt.ind])+1
    res <- data.frame(POT.Dates, POT)
    colnames(res) <- c("Date", "peak")
    if(DBegin$peak > QThresh) {res <- rbind(DBegin, res)}
    if(DEnd$peak > QThresh) {res <- rbind(res, DEnd)}
    rownames(res) <- seq(1, length(res[,2]))
    if(NAs == TRUE) {res <- res} else {res <- res[which(is.na(res$peak) == FALSE), ]}
    if(Plot == TRUE) {plot(x, type = "l", main = "Peaks over threshold", ylab = "Quantile", xlab= "Date")
      abline(h = quantile(x[,2], thresh, na.rm = TRUE), col = "blue")
      points(res, col = "red")}
    abline(h = mu, col = rgb(0, 0.7, 0.3))
    Years <- as.numeric((x[length(x[,1]),1]-x[1,1])/365.25)
    PPY <- length(res[,1])/Years
    print(paste("Peaks per year:", format(PPY, trim = TRUE), sep = " "))
    return(res)
  }
}

#' Annual maximum extraction
#'
#' Extracts the annual maximum peaks from a data.frame which has dates in the first column and variable in the second.
#'
#'  The peaks are extracted based on the UK hydrological year, which starts October 1st and ends September 30th. If there are partial years (years with missing data) the maximum value may not be the true annual maximum of the year. If there are NAs for full years in the data, an -Inf will be returned for that year.
#' @param x a data.frame with dates (or POSIXct) in the first column and variable in the second
#' @param Plot a logical argument with a default of TRUE. If TRUE the extracted annual maximum is plotted
#' @examples
#' #Extract the Thames AMAX daily mean flow and display the first six rows
#' ThamesAM <- AMextract(ThamesPQ[,c(1,3)])
#' head(ThamesAM)
#' @return a data.frame with columns; WaterYear and AM
#' @author Anthony Hammond
AMextract <- function(x, Plot = TRUE){
  Dates <- as.Date(x[,1])
  x <- data.frame(Dates, x[,2])
  Date1 <- x[1,1]
  DateLst <- x[length(x[,1]),1]
  DateExtract <- function(d){
    yr <- as.POSIXlt(d)$year+1900
    mnth <- as.POSIXlt(d)$mon+1
    return(c(yr, mnth))
  }
  Date1.ext <- DateExtract(Date1)
  DateLst.ext <- DateExtract(DateLst)
  if(Date1.ext[2] < 10) {WY <- Date1.ext[1]-1} else {WY <- Date1.ext[1]}
  if(DateLst.ext[2] < 10) {WYend <- DateLst.ext[1]-1} else {WYend <- DateLst.ext[1]}
  WYrSt <- as.Date(paste(WY, "10", "01", sep = "-"))
  WYrSt.to <- as.Date(paste(WYend, "10", "01", sep = "-"))
  YrStarts <- seq(WYrSt, WYrSt.to, by = "year")
  WYendst <- as.Date(paste(WY+1, "09", "30", sep = "-"))
  YrEnds <- seq(WYendst, length.out = length(YrStarts), by = "year")
  AM <- NULL
  for (i in 1:length(YrStarts)) {AM[i] <- suppressWarnings(max(x[,2][x[,1] >= YrStarts[i] & x[,1] <= YrEnds[i]], na.rm = TRUE))}
  WaterYear <- seq(WY, WYend)
  AMDF <- data.frame(WaterYear, AM)
  InfInd <- which(AMDF$AM == -Inf)
  if(length(InfInd) > 0) {AMDF <- AMDF[-InfInd,]} else {AMDF <- AMDF}
  if(length(InfInd) > 0) {print("Warning: at least one year had no data and returned -inf. The year/s in question have been removed")}
  if(Plot == TRUE) {plot(WaterYear, AM, type = "h", col = rgb(0,0.3,0.7), main = "Hydrological annual maximum sequence", ylab = "Annual maximum quantiles")}
  return(AMDF)
}


# Uncertainty -------------------------------------------------------------

#' Uncertainty quantification for gauged and ungauged pooled estimates
#'
#' Quantification of uncertainty for pooling results for the gauged and ungauged case
#'
#'  Uncertainty in the ungauged case is calulated as equations 8, 9, and 10 in Hammond, A. (2021). Sampling uncertainty of UK design flood estimation. Hydrology Research, 52 (6), 1357–1371. The 68 percent and 95 percent intervals are returned. For the gauged case the pooled group is bootstrapped 500 times and the enhanced single site weighted linear skewness (LSkew) and linear coefficient of variation (Lcv) are calculated 500 times accordingly and 500 associated growth factors are calculated. Each  growth factor (GF) is multiplied by a randomly selected median annual maximum flow (QMED) from the uncertainty distribution of median estimates for the gauged subject site. The distribution of medians is derived from bootstrapping the gauged site 500 times. The intervals are then the upper and lower quantiles (depending on the conf input) of the distribution of median * GFs. For the gauged case the user can choose the level for the intervals. The default is 0.95. Occasionally the single site central estimate will be outside the uncertainty intervals. In these cases the intervals are widened to incorporate it. i.e. if above the intervals, the upper interval is increased to the single site estimate and vice versa if below. This occurs regardless of the confidence setting. For details about the calculations of weighted growth curves & urban adjustment see the PoolEst() function details. A trend option is not included within the Uncertainty function and would need be considered separately if used in PoolEst. An indication of the uncertainty for trend applied in PoolEst is provided in the PoolEst function details. The gauged method is also detailed in Hammond (2021).
#' @param x the pooled group derived from the Pool() function
#' @param gauged a logical argument with a default of FALSE. If FALSE the uncertainty intervals are calculated for the ungauged case. If TRUE they are calculated for the gauged case
#' @param RP the return period of interest. Default is 100
#' @param dist a choice of distribution to use for the estimates. Choices are "GEV", "GenLog" or "Gumbel". The default is "GenLog"
#' @param qmed the QMED estimate for the ungauged case. Or for the gauged if the user wishes to override the median from the NRFA data
#' @param no.Donors number of donors used for estimation of QMED in the ungauged case
#' @param UrbAdj applies an urban adjustment to the growth curves
#' @param CDs catchment descriptors derived from either GetCDs or ImportCDs. Necessary if a UrbAdj is TRUE
#' @param conf the confidence level of the intervals for the gauged case. Default is 0.95. Must be between 0 and 1
#' @examples
#' #Get CDs, form an ungauged pooling group and quantify the uncertainty of the
#' #50-year pooled estimate when using a CDs estimate of QMED with no donors
#' CDs.203018 <- GetCDs(203018)
#' Pool.203018 <- Pool(CDs.203018, exclude = 203018)
#' Uncertainty(Pool.203018, qmed  = QMED(CDs.203018), no.Donors = 0, RP = 50)
#' #Form pooling group with subject site included. Quantify the uncertainty of the
#' #50-year pooled estimate at the 99% level.
#'  Pool.203018 <- Pool(CDs.203018)
#'  Uncertainty(Pool.203018, gauged = TRUE, RP = 50, conf = 0.99)
#' @return For the ungauged case a data.frame of four values relating to the lower 68 and upper 68 percent interval and the lower 95 and upper 95 percent intervals. These are headed by the associated percentiles. For the gauged case a numeric vector of two values is provided with the lower and upper intervals of the chosen conf level. The uncertainty function doesn't have a trend option; if trend is used in the pooled estimate this would need to be considered and intervals adjused accordingly. However a greater uncertainty should be considered.
#' @author Anthony Hammond
Uncertainty <- function(x, gauged = FALSE, RP = 100, dist = "GenLog", qmed = NULL, no.Donors = 2, UrbAdj = FALSE, CDs = NULL, conf = 0.95){
  if(is.data.frame(x) == FALSE) {stop("x must be a pooled group. Pooled groups can be created with the Pool() function")}
  if(ncol(x) != 24) stop ("x must be a pooled group. Pooled groups can be created with the Pool() function")
  if(UrbAdj == TRUE & is.null(CDs) == TRUE) stop("if UrbAdj = TRUE, CD object is necessary")
  if(is.null(CDs) == FALSE) {URBEXT2000 <- CDs[18,2]}
  if(gauged == FALSE) {
    if(is.null(qmed) == TRUE) stop("Need to input qmed & no.of donors")
    y <- -log(-log(1-1/RP))
    if(no.Donors == 0) {FSE <- 1.4665 - 0.0135*y + 0.0096*y^2}
    if(no.Donors == 1) {FSE <- 1.427 - 0.0134*y + 0.0098*y^2}
    if(no.Donors == 2) {FSE <- 1.4149 - 0.0163*y + 0.0102*y^2}
    Central <- as.numeric(PoolEst(x = x, gauged = gauged, RP = RP, dist = dist, QMED = qmed, trend = FALSE, UrbAdj = UrbAdj, CDs = CDs)[[1]][2])
    L68 <- Central/FSE
    U68 <- Central*FSE
    L95 <- Central/FSE^2
    U95 <- Central*FSE^2
    df <- data.frame(L68, U68, L95, U95)
    colnames(df) <- c("16%", "84%", "2.5%", "97.5%")
    return(df)
  } else {
    Unc.gauged <- function(x, RP, dist = "GenLog", UrbAdj = FALSE, trend = FALSE, qmed = NULL, fse = FALSE, conf = 0.68) {
      if(dist == "GenLog") {func <- GenLogGF}
      if(dist == "GEV") {func <- GEVGF}
      if(dist == "Gumbel") {func <- GumbelGF}
      Boot <- function(AM.ref) {
        x <- GetAM(AM.ref)
        x <- x[,2]
        resample <- sample(x, size = length(x)*500, replace = TRUE)
        mat <- matrix(resample, nrow = length(x), ncol = 500)
        lcvs <- apply(mat, 2, Lcv)
        lskews <- apply(mat, 2, LSkew)
        dfRatios <- data.frame(lcvs, lskews)
        return(dfRatios)
      }

      SiteRatios <- NULL
      for(i in 1:length(x$N)) {SiteRatios[[i]] <- Boot(rownames(x)[i])}

      LratTemp <- function(x, j){
        lcvs1 <- NULL
        for (i in 1:length(x$N)) {lcvs1[i] <- SiteRatios[[i]][j,1]}
        lskews1 <- NULL
        for (i in 1:length(x$N)) {lskews1[i] <- SiteRatios[[i]][j,2]}
        PoolTemp <- x
        PoolTemp[,16] <- lcvs1
        PoolTemp[,17] <- lskews1
        lcvGTemp <- WGaugLcv(PoolTemp)
        lskewGTemp <- WGaugLSkew(PoolTemp)
        df <- data.frame(lcvGTemp, lskewGTemp)
        return(df)
      }

      Lratios <- LratTemp(x, 1)
      for(j in 2:500) {Lratios <- rbind(Lratios, LratTemp(x, j))}
      if(UrbAdj == TRUE) {LCVs <- LcvUrb(Lratios[,1], URBEXT2000)} else {LCVs <- Lratios[,1]}
      if(UrbAdj == TRUE) {LSKEWs <- LSkewUrb(Lratios[,2], URBEXT2000)} else {LSKEWs <- Lratios[,2]}
      if(dist == "Gumbel") {Zts <- func(LCVs, RP = RP)} else {Zts <- func(LCVs, LSKEWs, RP = RP)}
      AM <- GetAM(rownames(x)[1])
      AM <- AM[,2]
      resample <- sample(AM, size = length(AM)*500, replace = TRUE)
      mat <- matrix(resample, nrow = length(AM), ncol = 500)
      Meds <- apply(mat, 2, median)
      if(is.null(qmed) == TRUE) {QMEDCentral <- median(AM)} else {QMEDCentral <- qmed}
      lcvCentral <- WGaugLcv(x)
      lskewCentral <- WGaugLSkew(x)
      if(UrbAdj == TRUE) {lcvCentral <- LcvUrb(lcvCentral, URBEXT2000 = URBEXT2000)}
      if(UrbAdj == TRUE) {lskewCentral <- LSkewUrb(lskewCentral, URBEXT2000 = URBEXT2000)}
      if(dist == "Gumbel") {ZtCentral <- func(lcv = lcvCentral, RP = RP)} else {ZtCentral <- func(lcv = lcvCentral, lskew = lskewCentral, RP = RP)}
      FSEest <- function(x) {exp(sd(log(x) - mean(log(x))))}
      #if(trend == TRUE){Meds <- Meds*1.082} else {Meds <- Meds}
      Res500 <- Meds*Zts
      FSE <- FSEest(Res500)
      Intervals <- quantile(Res500, c((1-conf)/2, 1-(1-conf)/2))
      Res <- list(fse, Intervals)
      names(Res) <- c("Factorial standard error", "Intervals")
      if(fse == TRUE) {return(Res)} else {return(Intervals)}
    }
    Res <- Unc.gauged(x = x, RP = RP, dist = dist, trend = FALSE, UrbAdj = UrbAdj, qmed = qmed, fse = FALSE, conf = conf)
    if(dist == "GenLog") {func <- GenLogGF}
    if(dist == "GEV") {func <- GEVGF}
    if(dist == "Gumbel") {func <- GumbelGF}
    MedianAM <- GetQMED(rownames(x)[1])
    if(is.null(qmed) == TRUE) {QMEDCentral <- MedianAM} else {QMEDCentral <- qmed}
    if(dist == "Gumbel") {SSEst <- QMEDCentral*func(x[1,16], RP = RP)} else {SSEst <- QMEDCentral*func(x[1,16], x[1,17], RP = RP)}
    if(SSEst < Res[1]) {Res[1] <- SSEst}
    if(SSEst > Res[2]) {Res[2] <- SSEst}
  }
  return(Res)
}


#' Uncertainty for the single site
#'
#' Quantifies the aleatoric uncertainty for a single site estimate, by bootstrapping the sample
#'
#'The bootstrapping procedure resamples from a sample N*500 times with replacement. After splitting into 500 samples of size N, the statsitic of interest is calculated on each. upper and lower quantiles of the resulting distribution are used as the quantification of uncertainty. Any function that provides an estimate based on a sample of data can be used. Including any function that provides estimates as a function of return period.
#' @param x a numeric vector. The sample of interest
#' @param func the function to be applied
#' @param conf the confidence level of the intervals
#' @param RP return period. Necessary if func requires RP
#' @examples
#' #Extract an AMAX sample and quantify uncertainty for the GEV estimated 50-year flow
#' AM.203018 <- GetAM(203018)
#' UncSS(AM.203018$Flow, func = GEVAM, RP = 50)
#' #Quantify uncertainty for the sample standard deviation at the 90 percent confidence level
#' UncSS(AM.203018$Flow, func = sd, conf = 0.90)
#' @return A data.frame of three values; central, lower, and upper bootstrapped estimates.
#' @author Anthony Hammond
UncSS <- function(x, func, conf = 0.95, RP = FALSE) {
  if(is.numeric(x) == FALSE) {stop("x must be a numeric vector")}
  resample <- sample(x, size = length(x)*500, replace = TRUE)
  mat <- matrix(resample, nrow = length(x), ncol = 500)
  if(RP == FALSE) {res <- apply(mat, 2, func)} else  {res <- apply(mat, 2, func, RP)}
  lint <- (1-conf)/2
  uint <- ((1-conf)/2)+conf
  lower <-  quantile(res, lint, na.rm = TRUE)
  upper <- quantile(res, uint, na.rm = TRUE)
  centre <- quantile(res, 0.5,na.rm = TRUE)
  frame <- data.frame(centre, lower, upper)
  rownames(frame) <- ""
  return(frame)
}



# Diagnostics -------------------------------------------------------------

#' Goodness of tail fit (GoTF).
#'
#' Provides two GoTF scores for the generalised extreme value (GEV), Gumbel, generalised Pareto (GenPareto), or generalised logistic (GenLog) distribution. Also for any simulated numeric distribution
#'
#' The closer the results are to one, the better the fit. The GoTF is calculated by simulating the sample 5000 times with the desired distribution and calculating a statistic (in this case the coefficient of variation (CV) & mean) for the upper 25 percent of each sample. The same is calculated for the subject sample and compared to the distribution. The number of statistics from the simulated samples that are greater than the sample statistics is divided by 5000. The GoTF is this latter number where it is <0.5 and 1 minus this latter number where is it >0.5. If any further distributions are of interest, the representative distribution (RepDist) argument can be used. In this case a simulation of 5000*length(x) from that distribution can be used as RepDist, in place of using the dist input. If a sample that is not equal to 5000 time length(x) is in the RepDist argument, it will be resampled with replacement. An alternative is to use the pars or GF arguments which simulate from the distribution choice (dist) based on the parameters (location, scale, shape) or the growth factor (GF) inputs; the median annual maximum flow (QMED), linear coefficient of variation (Lcv), and linear skewnes (LSkew). The resulting probabilities for each statistic (the GoTF score) represent the probability of observing that statistic if the sample distribution has the same underlying distribution as the one under scrutiny.
#' @param x a numeric vector. The sample of interest
#' @param dist a choice of distribution to be assessed. The choices are "GenLog", "GEV", "Gumbel", or "GenPareto". The default is "GenLog"
#' @param pars numeric vector of parameters for the GEV, GenLog, Gumbel, or GenPareto distributions. In the order of location, scale, shape (excluding shape for Gumbel)
#' @param GF numeric vector of length three which are the growth factor statistics & QMED, in the order of Lcv, Lskew, & QMED
#' @param RepDist a simulated sample (ideally of size = 5000*n) of a representative distribution to compare to the sample of interest
#' @examples
#' #Get an AMAX sample and derive GoTF score against the GenLog and the GEV distributions
#' \donttest{AM <- GetAM(203018)}
#' \donttest{GoTF(AM$Flow, dist = "GenLog")}
#' \donttest{GoTF(AM$Flow, dist = "GEV")}
#' #Derive the GF parameters for the ungauged pooled estimate for the AM and
#' #calculate a GoTF for GenLog (assuming the gauged QMED)
#' #For this assume 0.16 and 0.2 as the ungauged Lcv & LSkew pooled estimates
#'  \donttest{GoTF(AM$Flow, GF = c(0.16, 0.2, median(AM$Flow)))}
#' #calculate the GoTF based on parameters of the GenLog estimated inadequately.
#' \donttest{Loc <- mean(AM$Flow)}
#' \donttest{Scale <- sd(AM$Flow)}
#' \donttest{Skew <- 1-(median(AM$Flow)/mean(AM$Flow))}
#' \donttest{GoTF(AM$Flow, pars = c(Loc, Scale, Skew))}
#' @return A data.frame with one row of probabilities representing the GoTF. The first column is the Tail cv and the second is the tail mean.
#' @author Anthony Hammond

GoTF <- function(x, dist = "GenLog", pars = NULL, GF = NULL, RepDist = NULL){
  if(is.numeric(x) == FALSE) {stop("x must be a numeric vector")}
  if(is.null(RepDist) == FALSE) {
    if(length(RepDist) != (5000*length(x))) {print("Warning: RepDist not equal to 5000 * length(x), resampling has been used")}
    if(length(RepDist) != (5000*length(x))) {RepDist <- sample(RepDist, 5000*length(x), replace = TRUE)}
    MMR <- function(x) {sd(x[x > quantile(x, 0.75, na.rm = TRUE)])/mean(x[x > quantile(x, 0.75, na.rm = TRUE)])}
    Mat.1 <- matrix(RepDist, nrow = length(x), ncol = 5000)
    MMRs <- apply(Mat.1, 2, MMR)
    MMRo <- MMR(x)
    SortMMRs <- sort(MMRs)
    Ind <- suppressWarnings(min(which(SortMMRs >  MMRo)))
    if(Ind == Inf | Ind == 0) {Prop <- 0.0002} else {Prop <- Ind/5000}
    if(Prop > 0.5) {res <- 1-Prop} else {res <- Prop}
    if(Ind == Inf | res == 0) {res <- "< 0.0002"} else {res <- res/0.5}

    TailMean <- function(x) {mean(x[x > quantile(x, 0.75, na.rm = TRUE)])}
    Mat.2 <- matrix(RepDist, nrow = length(x), ncol = 5000)
    TMs <- apply(Mat.1, 2, TailMean)
    TMo <- TailMean(x)
    SortTMs <- sort(TMs)
    Ind2 <- suppressWarnings(min(which(SortTMs >  TMo)))
    if(Ind2 == Inf | Ind2 == 0) {Prop2 <- 0.0002} else {Prop2 <- Ind2/5000}
    if(Prop2 > 0.5) {res2 <- 1-Prop2} else {res2 <- Prop2}
    if(Ind2 == Inf | res2 == 0) {res2 <- "< 0.0002"} else {res2 <- res2/0.5}

    ResDF <- data.frame(res, res2)
    colnames(ResDF) <- c("p(Tail cv)", "p(Tail mean)")
    return(ResDF)

  } else {

    if(dist == "GenLog") {funcX <- GenLogAM
    funcPars <- GenLogEst
    funcGF <- GenLogGF}
    if(dist == "GEV")
    {funcX <- GEVAM
    funcPars <- GEVEst
    funcGF <- GEVGF}
    if(dist == "Gumbel")
    {funcX <- GumbelAM
    funcPars <- GumbelEst
    funcGF <- GumbelGF}
    if(dist == "GenPareto")
    {funcX <- GenParetoPOT
    funcPars <- GenParetoEst
    funcGF <- GenParetoGF}
    MMR <- function(x) {sd(x[x > quantile(x, 0.75, na.rm = TRUE)])/mean(x[x > quantile(x, 0.75, na.rm = TRUE)])}
    Rands <- 1/runif(length(x)*5000)
    if(is.null(pars) == TRUE & is.null(GF) == TRUE) {Sims <- funcX(x, RP = Rands)}
    if(is.null(pars) == FALSE) {
      if(dist == "Gumbel") {Sims <- funcPars(pars[1], pars[2], RP = Rands)} else
      {Sims <- funcPars(pars[1], pars[2], pars[3], RP = Rands)}}
    if(is.null(GF) == FALSE)  {
      if(dist == "Gumbel") {Sims <- funcGF(GF[1], RP = Rands)*GF[3]} else
      {Sims <- funcGF(GF[1], GF[2], RP = Rands)*GF[3]}}

    Mat.1 <- matrix(Sims, nrow = length(x), ncol = 5000)
    MMRs <- apply(Mat.1, 2, MMR)
    MMRo <- MMR(x)
    SortMMRs <- sort(MMRs)
    Ind <- suppressWarnings(min(which(SortMMRs >  MMRo)))
    if(Ind == Inf | Ind == 0) {Prop <- 0.0002} else {Prop <- Ind/5000}
    if(Prop > 0.5) {res <- 1-Prop} else {res <- Prop}
    if(Ind == Inf | res == 0) {res <- "< 0.0002"} else {res <- res/0.5}

    TailMean <- function(x) {mean(x[x > quantile(x, 0.75, na.rm = TRUE)])}
    Mat.2 <- matrix(Sims, nrow = length(x), ncol = 5000)
    TMs <- apply(Mat.1, 2, TailMean)
    TMo <- TailMean(x)
    SortTMs <- sort(TMs)
    Ind2 <- suppressWarnings(min(which(SortTMs >  TMo)))
    if(Ind2 == Inf | Ind2 == 0) {Prop2 <- 0.0002} else {Prop2 <- Ind2/5000}
    if(Prop2 > 0.5) {res2 <- 1-Prop2} else {res2 <- Prop2}
    if(Ind2 == Inf | res2 == 0) {res2 <- "< 0.0002"} else {res2 <- res2/0.5}
    if(class(res) == "character") {res <- res} else {res <- round(res, 4)}
    if(class(res2) == "character") {res2 <- res2} else {res2 <- round(res2, 4)}
    ResDF <- data.frame(res, res2)
    colnames(ResDF) <- c("p(Tail cv)", "p(Tail mean)")
    return(ResDF)

  }
}

#' Goodness of tail fit (GoTF) for pooling groups
#'
#' Calculates GoTF scores for pooling groups for both generalised extreme value (GEV), generalised logistic (GenLog) & Gumbel distributions
#'
#'  The GoTF for pooling groups is calculated by standardising all the sites in the group (dividing by median) and calculating the linear coefficient of variation (Lcv) and linear skewness (Lskew) of the pooled data as if it was one sample. The GoTF() function is then applied to the pooled data with the GF arguments using the aforementioned Lcv and Lskew, and QMED equal to one. The GoTF scores are calculated for the GEV, Gumbel, and GenLog distributions and can be used to assist the decision of which distribution to use for the final estimates. See details for the GoTF function for information about the resulting values. The closer the scores are to one, the better the tail fit.
#' @param x pooling group derived from the Pool function
#' @examples
#' #Get CDs, create pooled group and calculate GoTFs.
#' \donttest{CDs.203018 <- GetCDs(203018)}
#' \donttest{Pool.203018 <- Pool(CDs.203018)}
#' \donttest{GoTFPool(Pool.203018)}
#' @return A list of two data.frames. Each with one row of the two GoTF values related to the columns; p(Tail cv) & p(Tail mean). See GoTF details. The first data.frame is for the GEV distribution, the second is for the GenLog distribution, and the third is for the Gumbel distribution.
#' @author Anthony Hammond
GoTFPool <- function(x) {
  if(is.data.frame(x) == FALSE) {stop("x must be a pooled group. Pooled groups can be created with the Pool() function")}
  if(ncol(x) != 24) stop ("x must be a pooled group. Pooled groups can be created with the Pool() function")
  Sites <- rownames(x)
  SiteScale <- GetAM(Sites[1])[,2]/median(GetAM(Sites[1])[,2])
  for(i in 2:length(Sites)) {SiteScale <- append(SiteScale,GetAM(Sites[i])[,2]/median(GetAM(Sites[i])[,2]))}
  lcv <- Lcv(SiteScale)
  lskew <- LSkew(SiteScale)
  pGEV <- GoTF(SiteScale, dist = "GEV", GF = c(lcv, lskew, 1))
  pGenLog <- GoTF(SiteScale, dist = "GenLog", GF = c(lcv, lskew, 1))
  pGumbel <- GoTF(SiteScale, dist = "Gumbel", GF = c(lcv, lskew, 1))
  ResList <- list(pGEV, pGenLog, pGumbel)
  names(ResList) <- c("GEV", "GenLog", "Gumbel")
  return(ResList)
}

#' Zdist Goodness of fit measure for pooling groups
#'
#' Calculates the goodness of fit score for pooling groups with the method outlined in the Flood Estimation Hanbook (1999), volume 3.
#'
#'   The goodness of fit measure was developed by Hosking & Wallis and can be found in their book 'Regional Frequency Analysis: an approach based on LMoments (1997), as well as Flood Estimation Handbook volume 3.
#' @param x pooling group derived from the Pool() function
#' @examples
#' #Get CDs, form a pooling group and calculate the Zdist
#' CDs.203018 <- GetCDs(203018)
#' Pool.203018 <- Pool(CDs.203018)
#' Zdists(Pool.203018)
#' @return A list with the first element a data.frame of three GoF scores related to the columns; "GEV, "GenLog", and "Gumebl. The second element is a character stating which has the best fit.
#' @author Anthony Hammond

Zdists <- function(x){
  if(is.data.frame(x) == FALSE) {stop("x must be a pooled group. Pooled groups can be created with the Pool() function")}
  if(ncol(x) != 24) stop ("x must be a pooled group. Pooled groups can be created with the Pool() function")
  tR4 <- mean(x$LKurt)
  tR3 <- mean(x$LSkew)
  Pool.Kap.pars <- function(x)
  {
    l1 <- 1
    l2 <- mean(x$Lcv)
    lskew <- mean(x$LSkew)
    lkurt <- mean(x$LKurt)
    pars <- c(l1,l2,lskew, lkurt)
    return(pars)
  }
  Lcv <- function(x)
  {
    Sort.x <- sort(x)
    Rank <- seq(1, length(x))
    b0 <- mean(x)
    b1 <- mean((Rank-1)/(length(x)-1)*Sort.x)
    b2 <- mean(((Rank-1)*(Rank-2))/((length(x)-1)*(length(x)-2))*Sort.x)
    b3 <- mean(((Rank-1)*(Rank-2)*(Rank-3))/((length(x)-1)*(length(x)-2)*(length(x)-3))*Sort.x)
    L1 <- b0
    L2 <- 2*b1-b0
    Lcv <- L2/L1
    return(Lcv)
  }
  LSkew <- function(x)
  {
    Sort.x <- sort(x)
    Rank <- seq(1, length(x))
    b0 <- mean(x)
    b1 <- mean((Rank-1)/(length(x)-1)*Sort.x)
    b2 <- mean(((Rank-1)*(Rank-2))/((length(x)-1)*(length(x)-2))*Sort.x)
    b3 <- mean(((Rank-1)*(Rank-2)*(Rank-3))/((length(x)-1)*(length(x)-2)*(length(x)-3))*Sort.x)
    L1 <- b0
    L2 <- 2*b1-b0
    L3 <- 6*b2-6*b1+b0
    LSkew <- L3/L2
    return(LSkew)
  }

  LKurt <- function(x)
  {
    Sort.x <- sort(x)
    Rank <- seq(1, length(x))
    b0 <- mean(x)
    b1 <- mean((Rank-1)/(length(x)-1)*Sort.x)
    b2 <- mean(((Rank-1)*(Rank-2))/((length(x)-1)*(length(x)-2))*Sort.x)
    b3 <- mean(((Rank-1)*(Rank-2)*(Rank-3))/((length(x)-1)*(length(x)-2)*(length(x)-3))*Sort.x)
    L2 <- 2*b1-b0
    L4 <- 20*b3-30*b2+12*b1-b0
    LKurt <- L4/L2
    return(LKurt)
  }

  Kap.pars <- function(L1, L2, LSkew, LKurt)
  {

    Kap.opt <- function(LSkew,LKurt)
    {
      min.SSR <- function(par)
      {

        if (par[2]>0)
        {
          g1 <- (1*gamma(1+par[1])*gamma(1/par[2]))/(par[2]^(1+par[1])*gamma(1+par[1]+1/par[2]))
          g2 <- (2*gamma(1+par[1])*gamma(2/par[2]))/(par[2]^(1+par[1])*gamma(1+par[1]+2/par[2]))
          g3 <- (3*gamma(1+par[1])*gamma(3/par[2]))/(par[2]^(1+par[1])*gamma(1+par[1]+3/par[2]))
          g4 <- (4*gamma(1+par[1])*gamma(4/par[2]))/(par[2]^(1+par[1])*gamma(1+par[1]+4/par[2]))
        }
        else
        {
          g1 <- (1*gamma(1+par[1])*gamma(-par[1]-1/par[2]))/((-par[2])^(1+par[1])*gamma(1-1/par[2]))
          g2 <- (2*gamma(1+par[1])*gamma(-par[1]-2/par[2]))/((-par[2])^(1+par[1])*gamma(1-2/par[2]))
          g3 <- (3*gamma(1+par[1])*gamma(-par[1]-3/par[2]))/((-par[2])^(1+par[1])*gamma(1-3/par[2]))
          g4 <- (4*gamma(1+par[1])*gamma(-par[1]-4/par[2]))/((-par[2])^(1+par[1])*gamma(1-4/par[2]))
        }
        t3.kap <- (-g1+3*g2-2*g3)/(g1-g2)
        t4.kap <- (g1 - 6*g2 + 10*g3 -5*g4)/(g1-g2)
        ss <- sum((t3.kap - LSkew)^2)+((t4.kap-LKurt)^2)
      }
      Op <- suppressWarnings(optim(par = c(0.01, -0.4), fn = min.SSR))
      return(Op)

    }

    Kap.kh <- Kap.opt(LSkew, LKurt)$par
    K <- Kap.kh[1]
    H <- Kap.kh[2]

    gr <- function(k, h)
    {
      if (h>0)
      {
        g1 <- (1*gamma(1+k)*gamma(1/h))/(h^(1+k)*gamma(1+k+1/h))
        g2 <- (2*gamma(1+k)*gamma(2/h))/(h^(1+k)*gamma(1+k+2/h))
        g3 <- (3*gamma(1+k)*gamma(3/h))/(h^(1+k)*gamma(1+k+3/h))

      }
      else
      {
        g1 <- (1*gamma(1+k)*gamma(-k-1/h))/((-h)^(1+k)*gamma(1-1/h))
        g2 <- (2*gamma(1+k)*gamma(-k-2/h))/((-h)^(1+k)*gamma(1-2/h))
        g3 <- (3*gamma(1+k)*gamma(-k-3/h))/((-h)^(1+k)*gamma(1-3/h))
      }
      vec <- c(g1,g2)
      return(vec)
    }
    g12 <- gr(k = K,h = H)
    G1 <- g12[1]
    G2 <- g12[2]

    a <- L2/((G1-G2)/K)
    loc <- L1 - a*(1-G1)/K
    pars <- c(loc, a, K, H)
    return(pars)
  }

  Qt.kap <- function(loc, scale, k, h, T = 100) {loc + (scale/k)*((1-((1-(1-(1/T))^h)/h)^k))}
  Ls <- Pool.Kap.pars(x)
  Pars <- Kap.pars(Ls[1],Ls[2], Ls[3], Ls[4])
  LK.Sim <- function(x)
  {
    LKurt.sim <- NULL
    for (i in 1:nrow(x)) {LKurt.sim[i] <- LKurt(Qt.kap(Pars[1],Pars[2],Pars[3],Pars[4], T = 1/runif(x$N[i])))}
    t4m <- mean(LKurt.sim)
    return(t4m)
  }
  LK.vec <- NULL
  for (i in 1:500) {LK.vec[i] <- LK.Sim(x)}
  B4 <- mean(LK.vec - tR4)
  sig4 <- ((500-1)^-1 * (sum(LK.vec - tR4)^2 - 500*B4^2))^0.5
  t4.GEV <- function(k) {(5*(1-4^-k)-10*(1-3^-k)+6*(1-2^-k))/(1-2^-k)}
  t4.GLO <- function(k) {(1+5*k^2)/6}
  t4.LN3 <- function(k) {1.2260172*10^-1+k^2*((1.8756590*10^-1+(-2.5353147*10^-3)*k^2+2.6995102*10^-4*k^4+(-1.8446680*10^-6)*k^6)/(1+8.2325617*10^-2*k^2+4.26814448*10^-3*k^4+1.1653690*10^-4*k^6))}
  t4.gum <- 0.150375
  Z.GEV <- (t4.GEV(tR3)-tR4 + B4)/sig4
  Z.GLO <- (t4.GLO(tR3)-tR4 + B4)/sig4
  #Z.LN3 <- (t4.LN3(tR3)-tR4 + B4)/sig4
  #Z.gum <- (t4.gum-tR4 + B4)/sig4
  #Z.frame <- data.frame(Z.GEV, Z.GLO, Z.LN3, Z.gum)
  Z.frame <- data.frame(Z.GEV, Z.GLO)
  bestInd <- which(abs(Z.frame[1,]) == min(abs(Z.frame)))
  #colnames(Z.frame) <- c("GEV", "GL", "LN3", "Gumbel")
  colnames(Z.frame) <- c("GEV", "GenLog")
  if (bestInd == 1) {Result <- "GEV has the best fit"}
  if (bestInd == 2) {Result <- "GenLog has the best fit"}
  #if (bestInd == 3) {Result <- "Gumbel has the best fit"}
  #if (bestInd == 4) {Result <- "LN3 has the best fit"}
  return(list(Z.frame, Result))
}

#' Heterogeneity measure (H2) for pooling groups.
#'
#' Quantifies the heterogeneity of a pooled group
#'
#' The H2 measure was developed by Hosking & Wallis and can be found in their book 'Regional Frequency Analysis: an approach based on LMoments (1997). It was also adopted for use by the Flood Estimation Handbook (1999) and is described in volume 3.
#' @param x pooling group derived from the Pool() function
#' @examples
#' #Get CDs, form a pooling group and calculate H2
#' CDs.203018 <- GetCDs(203018)
#' Pool.203018 <- Pool(CDs.203018)
#' H2(Pool.203018)
#' @return A vector of two characters; the first representing the H2 score and the second stating a qualitative measure of heterogeneity.
#' @author Anthony Hammond

H2 <- function(x){
  if(is.data.frame(x) == FALSE) {stop("x must be a pooled group. Pooled groups can be created with the Pool() function")}
  if(ncol(x) != 24) stop ("x must be a pooled group. Pooled groups can be created with the Pool() function")
  Pool.Kap.pars <- function(x)
  {
    l1 <- 1
    l2 <- mean(x$Lcv)
    lskew <- mean(x$LSkew)
    lkurt <- mean(x$LKurt)
    pars <- c(l1,l2,lskew, lkurt)
    return(pars)
  }

  v2 <- function(x)
  {
    t2r <- mean(x$Lcv)
    t3r <- mean(x$LSkew)
    ni <- ((x$Lcv-t2r)^2 + (x$LSkew-t3r)^2)
    nni <- sum(x$N*ni)
    mn <- sum(x$N)
    v2 <- (nni/mn)^0.5
    return(v2)
  }

  Lcv <- function(x)
  {
    Sort.x <- sort(x)
    Rank <- seq(1, length(x))
    b0 <- mean(x)
    b1 <- mean((Rank-1)/(length(x)-1)*Sort.x)
    b2 <- mean(((Rank-1)*(Rank-2))/((length(x)-1)*(length(x)-2))*Sort.x)
    b3 <- mean(((Rank-1)*(Rank-2)*(Rank-3))/((length(x)-1)*(length(x)-2)*(length(x)-3))*Sort.x)
    L1 <- b0
    L2 <- 2*b1-b0
    Lcv <- L2/L1
    return(Lcv)
  }
  LSkew <- function(x)
  {
    Sort.x <- sort(x)
    Rank <- seq(1, length(x))
    b0 <- mean(x)
    b1 <- mean((Rank-1)/(length(x)-1)*Sort.x)
    b2 <- mean(((Rank-1)*(Rank-2))/((length(x)-1)*(length(x)-2))*Sort.x)
    b3 <- mean(((Rank-1)*(Rank-2)*(Rank-3))/((length(x)-1)*(length(x)-2)*(length(x)-3))*Sort.x)
    L1 <- b0
    L2 <- 2*b1-b0
    L3 <- 6*b2-6*b1+b0
    LSkew <- L3/L2
    return(LSkew)
  }

  Kap.pars <- function(L1, L2, LSkew, LKurt)
  {

    Kap.opt <- function(LSkew,LKurt)
    {
      min.SSR <- function(par)
      {

        if (par[2]>0)
        {
          g1 <- (1*gamma(1+par[1])*gamma(1/par[2]))/(par[2]^(1+par[1])*gamma(1+par[1]+1/par[2]))
          g2 <- (2*gamma(1+par[1])*gamma(2/par[2]))/(par[2]^(1+par[1])*gamma(1+par[1]+2/par[2]))
          g3 <- (3*gamma(1+par[1])*gamma(3/par[2]))/(par[2]^(1+par[1])*gamma(1+par[1]+3/par[2]))
          g4 <- (4*gamma(1+par[1])*gamma(4/par[2]))/(par[2]^(1+par[1])*gamma(1+par[1]+4/par[2]))
        }
        else
        {
          g1 <- (1*gamma(1+par[1])*gamma(-par[1]-1/par[2]))/((-par[2])^(1+par[1])*gamma(1-1/par[2]))
          g2 <- (2*gamma(1+par[1])*gamma(-par[1]-2/par[2]))/((-par[2])^(1+par[1])*gamma(1-2/par[2]))
          g3 <- (3*gamma(1+par[1])*gamma(-par[1]-3/par[2]))/((-par[2])^(1+par[1])*gamma(1-3/par[2]))
          g4 <- (4*gamma(1+par[1])*gamma(-par[1]-4/par[2]))/((-par[2])^(1+par[1])*gamma(1-4/par[2]))
        }
        t3.kap <- (-g1+3*g2-2*g3)/(g1-g2)
        t4.kap <- (g1 - 6*g2 + 10*g3 -5*g4)/(g1-g2)
        ss <- sum((t3.kap - LSkew)^2)+((t4.kap-LKurt)^2)
      }
      Op <- suppressWarnings(optim(par = c(0.01, -0.4), fn = min.SSR))
      return(Op)

    }

    Kap.kh <- Kap.opt(LSkew, LKurt)$par
    K <- Kap.kh[1]
    H <- Kap.kh[2]

    gr <- function(k, h)
    {
      if (h>0)
      {
        g1 <- (1*gamma(1+k)*gamma(1/h))/(h^(1+k)*gamma(1+k+1/h))
        g2 <- (2*gamma(1+k)*gamma(2/h))/(h^(1+k)*gamma(1+k+2/h))
        g3 <- (3*gamma(1+k)*gamma(3/h))/(h^(1+k)*gamma(1+k+3/h))

      }
      else
      {
        g1 <- (1*gamma(1+k)*gamma(-k-1/h))/((-h)^(1+k)*gamma(1-1/h))
        g2 <- (2*gamma(1+k)*gamma(-k-2/h))/((-h)^(1+k)*gamma(1-2/h))
        g3 <- (3*gamma(1+k)*gamma(-k-3/h))/((-h)^(1+k)*gamma(1-3/h))
      }
      vec <- c(g1,g2)
      return(vec)
    }
    g12 <- gr(k = K,h = H)
    G1 <- g12[1]
    G2 <- g12[2]

    a <- L2/((G1-G2)/K)
    loc <- L1 - a*(1-G1)/K
    pars <- c(loc, a, K, H)
    return(pars)
  }


  Qt.kap <- function(loc, scale, k, h, T = 100) {loc + (scale/k)*((1-((1-(1-(1/T))^h)/h)^k))}
  V.2 <- v2(x)
  Ls <- Pool.Kap.pars(x)
  Pars <- Kap.pars(Ls[1],Ls[2], Ls[3], Ls[4])

  V2.Sim <- function(x)
  {
    Pars <- Pool.Kap.pars(x)
    Ns <- x$N
    LCV.sim <- NULL
    for (i in 1:nrow(x)) {LCV.sim[i] <- Lcv(Qt.kap(Pars[1],Pars[2],Pars[3],Pars[4],