R/All.R

Defines functions AggDayHour MonthlyStats UEF LRatioChange NonFloodAdjPool NonFloodAdj Rating BFI NGRDist TrendTest EncProb ARF DDF99 DDF99Pars DDF DDFImport ReFH LKurt LSkew Lcv WeightsGLSkew WeightsGLcv WeightsUnLSkew WeightsUnLcv WGaugLcv WGaugLSkew WungLSkew WungLcv Lmoms ERPlot DesHydro DiagPlots AMplot HydroPlot EVPool EVPlotAdd EVPlot H2 Zdists GoTFPool GoTF AMextract POTextract AMImport GetCDs CDsXML DeTrend GetAM UAF LSkewUrb LcvUrb GetQMED QMEDfseSS DonAdj QMEDDonEq QMEDLink QMEDPOT QMED SimData GumbelPars GenParetoPars GenLogPars GEVPars GumbelEst GEVEst GenParetoEst GenLogEst GumbelAM GenParetoPOT GEVAM GenLogAM Kappa3GF GumbelGF GenParetoGF GEVGF GenLogGF OptimPars PoolEst Pool

Documented in AggDayHour AMextract AMImport AMplot ARF BFI CDsXML DDF DDF99 DDF99Pars DDFImport DesHydro DeTrend DiagPlots DonAdj EncProb ERPlot EVPlot EVPlotAdd EVPool GenLogAM GenLogEst GenLogGF GenLogPars GenParetoEst GenParetoGF GenParetoPars GenParetoPOT GetAM GetCDs GetQMED GEVAM GEVEst GEVGF GEVPars GoTF GoTFPool GumbelAM GumbelEst GumbelGF GumbelPars H2 HydroPlot Kappa3GF Lcv LcvUrb LKurt Lmoms LRatioChange LSkew LSkewUrb MonthlyStats NGRDist NonFloodAdj NonFloodAdjPool OptimPars Pool PoolEst POTextract QMED QMEDDonEq QMEDfseSS QMEDLink QMEDPOT Rating ReFH SimData TrendTest UAF UEF WeightsGLcv WeightsGLSkew WeightsUnLcv WeightsUnLSkew WGaugLcv WGaugLSkew WungLcv WungLSkew Zdists

globalVariables(c("ThamesPQ", "NRFAData", "QMEDData", "UKOutline", "AMSP", "id", "URBEXT2000"))


# QuickResults ------------------------------------------------------------

#' Quick pooled results
#'
#' Provides pooled gauged, ungauged, or fake ungauged results, directly from the catchment descriptors
#'
#' The quick results function provides results with a default pooling group. If gauged = FALSE the median annual maximum flood (QMED) is estimated from catchment descriptors using the QMED equation and then adjusted with two of the closest un-urban gauged sites (can be changed to 0 or 1 donors). If the site is urban, an urban adjustment is made to the QMED and to the pooled growth curve. If gauged = TRUE QMED is the median of the gauged annual maxima and the growth curve is formed with the gauged weighting procedure (often known as enhanced single site). If the gauged catchment is urban, it's included in the pooling group and deurbanised before an urban adjustment is made to the final growth curve. If FUngauged = TRUE, the top site in the pooling group is excluded and the estimate is performed henceforth in the manner of gauged = FALSE. If the CDs are from a gauged site that is not in the list of sites that are considered suitable for pooling, it won't be included in the pooling group. In which case, if gauged = TRUE, the result will be erroneous.
#'@param CDs catchment descriptors derived from either GetCDs or CDsXML
#'@param gauged logical argument with a default of FALSE. TRUE for gauged results and FALSE for ungauged
#'@param dons number of donors required with a choice of 0, 1, or 2
#'@param Qmed user supplied QMED which overrides the default QMED estimate
#'@param FUngauged logical argument with a default of FALSE. TRUE provides an ungauged estimate whilst excluding the gauged site (the site with the most similar CDs)
#'@param plot logical argument with a default of TRUE. TRUE provides an extreme value plot. FALSE prevents the plot
#'@param dist a choice of distribution for the estimates. The choices are "GenLog", "GEV", or "Gumbel; the generalised logistic, generalised extreme value, and Gumbel distributions, respectively. The default is "GenLog"
#'@examples
#'#Get some catchment descriptors
#'CDs.73005 <- GetCDs(73005)
#'#Get default ungauged results
#'QuickResults(CDs.73005)
#'#Get gauged results with a GEV distribution
#'QuickResults(CDs.73005, gauged = TRUE, dist = "GEV")
#'#Get fake ungauged results with one donor
#'QuickResults(CDs.73005, FUngauged = TRUE, dons = 1)
#'
#'
#'@return A list of length two. Element one is a data frame with columns; return period (RP), peak flow estimates (Q) and growth factor estimates (GF). Two additional columns quantify the uncertainty. The second element is the estimated Lcv and Lskew (linear coefficient of variation and skewness). By default an extreme value plot is also returned
#'@author Anthony Hammond
QuickResults <- function (CDs, gauged = FALSE, dons = 2, Qmed = NULL,
                          FUngauged = FALSE, plot = TRUE, dist = "GenLog")
{
  if(is.data.frame(CDs) == FALSE) {stop("CDs doesn't appear to be a CDs object")}
  if(is.na(CDs[20,1]) == TRUE | CDs[20,1] != "Northing") {stop("CDs doesn't appear to be a CDs object")}
  Donor1 <- function(CDs, DonSite) {
    QMED.cd <- 8.3062 * CDs[1, 2]^0.851 * 0.1536^(1000/CDs[15,
                                                           2]) * CDs[8, 2]^3.4451 * 0.046^(CDs[5, 2]^2)
    Site <- DonSite
    Donors <- DonAdj(CDs = CDs, rows = 500)
    Rw <- which(rownames(Donors) == DonSite)
    Result <- Donors[Rw, 27]
    return(Result)
  }
  Donor2 <- function(CDs, Sites) {
    rij <- function(d) {
      0.4598 * exp(-0.02 * d) + (1 - 0.4598) * exp(-0.4785 *
                                                     d)
    }
    NGRDist <- function(i, j) {
      sqrt((i[1] - j[1])^2 + (i[2] - j[2])^2)/1000
    }
    Site1 <- Sites[1]
    Site2 <- Sites[2]
    CDs.Site1 <- GetCDs(Site1)
    CDs.Site2 <- GetCDs(Site2)
    Dist1 <- NGRDist(c(CDs[19, 2], CDs[20, 2]), c(CDs.Site1[19,
                                                            2], CDs.Site1[20, 2]))
    Dist2 <- NGRDist(c(CDs[19, 2], CDs[20, 2]), c(CDs.Site2[19,
                                                            2], CDs.Site2[20, 2]))
    Dist12 <- NGRDist(c(CDs.Site1[19, 2], CDs.Site1[20, 2]),
                      c(CDs.Site2[19, 2], CDs.Site2[20, 2]))
    ps1 <- rij(Dist1)
    p12 <- rij(Dist12)
    ps2 <- rij(Dist2)
    a1 <- (ps1 - p12 * ps2)/(1 - p12^2)
    a2 <- (ps2 - p12 * ps1)/(1 - p12^2)
    QMEDscd <- 8.3062 * CDs[1, 2]^0.851 * 0.1536^(1000/CDs[15,
                                                           2]) * CDs[8, 2]^3.4451 * 0.046^(CDs[5, 2]^2)
    QMED1cd <- 8.3062 * CDs.Site1[1, 2]^0.851 * 0.1536^(1000/CDs.Site1[15,
                                                                       2]) * CDs.Site1[8, 2]^3.4451 * 0.046^(CDs.Site1[5,
                                                                                                                       2]^2)
    QMED2cd <- 8.3062 * CDs.Site2[1, 2]^0.851 * 0.1536^(1000/CDs.Site2[15,
                                                                       2]) * CDs.Site2[8, 2]^3.4451 * 0.046^(CDs.Site2[5,
                                                                                                                       2]^2)
    QMED1obs <- QMEDData$QMED[which(rownames(QMEDData) ==
                                      Site1)]
    QMED2obs <- QMEDData$QMED[which(rownames(QMEDData) ==
                                      Site2)]
    QMEDs.adj <- QMEDscd * (QMED1obs/QMED1cd)^a1 * (QMED2obs/QMED2cd)^a2
    return(QMEDs.adj)
  }
  if (gauged == TRUE & FUngauged == TRUE) {
    print("Warning: Gauged & FUngauged are both TRUE. Gauged results provided")
  }
  if (gauged == FALSE) {
    PoolGroup <- Pool(CDs = CDs)
    if (CDs[18, 2] > 0.03) {
      Ptemp <- Pool(CDs = CDs, iug = TRUE)
    }
    else {
      Ptemp <- Pool(CDs = CDs)
    }
    Ex <- as.numeric(rownames(Ptemp)[1])
    PoolGroupFun <- Pool(CDs = CDs, exclude = Ex)
    if (is.null(Qmed) == TRUE) {
      qmed <- 8.3062 * CDs[1, 2]^0.851 * 0.1536^(1000/CDs[15,
                                                          2]) * CDs[8, 2]^3.4451 * 0.046^(CDs[5, 2]^2)
      DonQMED <- DonAdj(CDs = CDs, rows = 500)
      if (FUngauged == TRUE) {
        DonQMED <- DonQMED[-1, ]
      }
      UrbInd <- which(DonQMED$URBEXT2000 > 0.03)
      if (length(UrbInd) < 1) {
        DonQMED <- DonQMED
      }
      else {
        DonQMED <- DonQMED[-UrbInd, ]
      }
      D2Result <- Donor2(CDs = CDs, Sites = rownames(DonQMED)[1:2])
      D1Result <- Donor1(CDs = CDs, DonSite = rownames(DonQMED)[1])
      if (CDs[18, 2] <= 0.03) {
        if (dons == 0) {
          qmed <- qmed
        }
        if (dons == 1) {
          qmed <- D1Result
        }
        if (dons == 2) {
          qmed <- D2Result
        }
      }
      else {
        if (dons == 0) {
          qmed <- QMED(CDs = CDs, UrbAdj = TRUE)[[1]]
        }
        if (dons == 1) {
          qmed <- QMED(CDs = CDs, Don1 = rownames(DonQMED)[1],
                       UrbAdj = TRUE)[[1]]
        }
        if (dons == 2) {
          qmed <- QMED(CDs = CDs, Don2 = rownames(DonQMED)[1:2],
                       UrbAdj = TRUE)[[1]]
        }
      }
    }
    else {
      qmed = Qmed
    }
    if (FUngauged == FALSE) {
      if (CDs[18, 2] <= 0.03) {
        Est <- PoolEst(PoolGroup, QMED = qmed,
                       dist = dist)
      }
      else {
        Est <- PoolEst(PoolGroup, QMED = qmed, UrbAdj = TRUE,
                       CDs = CDs, dist = dist)
      }
    }
    else {
      if (CDs[18, 2] <= 0.03) {
        Est <- PoolEst(PoolGroupFun, QMED = qmed,
                       dist = dist)
      }
      else {
        Est <- PoolEst(PoolGroupFun, QMED = qmed, UrbAdj = TRUE,
                       CDs = CDs, dist = dist)
      }
    }
  }
  if (gauged == TRUE) {
    if (CDs[18, 2] > 0.03) {
      PoolGroup <- Pool(CDs = CDs, iug = TRUE, DeUrb = TRUE)
    }
    else {
      PoolGroup <- Pool(CDs = CDs)
    }
    Site <- rownames(PoolGroup)[1]
    AMAX <- GetAM(Site)
    if (is.null(Qmed) == TRUE) {
      qmed <- median(AMAX$Flow)
    }
    else {
      qmed <- Qmed
    }
    if (CDs[18, 2] <= 0.03) {
      Est <- PoolEst(PoolGroup, gauged = TRUE, QMED = qmed,
                     dist = dist)
    }
    else {
      Est <- PoolEst(PoolGroup, gauged = TRUE, QMED = qmed,
                     UrbAdj = TRUE, CDs = CDs, dist = dist)
    }
  }
  if (plot == TRUE) {
    if (CDs[18, 2] <= 0.03) {
      if (gauged == FALSE) {
        EVPool(PoolGroup, dist = dist)
      }
      if (gauged == TRUE) {
        EVPool(PoolGroup, gauged = TRUE, dist = dist)
      }
    }
    else {
      if (gauged == FALSE) {
        EVPool(PoolGroup, UrbAdj = TRUE, CDs = CDs, dist = dist)
      }
      if (gauged == TRUE) {
        EVPool(PoolGroup, gauged = TRUE, UrbAdj = TRUE,
               CDs = CDs, dist = dist)
      }
    }
  }
  SEess <- function(x, RP) {
    VAR <-  (((median(x) * Lcv(x))^2)/length(x))* exp(1.3125 + 0.599*(log(RP-1)) + 0.00399*(log(RP-1)^2))
    SE <- sqrt(VAR)
    return(SE)
  }
  fseUGAH <- function(RP, Dons) {
    y <- -log(-log(1-1/RP))
    if(Dons == 2) {Result <- 1.4149 - 0.0163*y + 0.0102*y^2}
    if(Dons == 1) {Result <- 1.427 - 0.0134*y + 0.0098*y^2}
    if(Dons == 0) {Result <- 1.4665 - 0.0135*y + 0.0096*y^2}
    return(Result)
  }
  if(gauged == FALSE) {
    fses <- fseUGAH(RP = Est[[1]][,1], Dons = dons)
    lower68 <- round(Est[[1]][,2]/(fses), 3)
    upper68 <- round(Est[[1]][,2]*(fses), 3)
    Est[[1]] <- data.frame(Est[[1]][,-c(4,5)], lower68, upper68)
  }
  if(gauged == TRUE) {
    SEs <- SEess(AMAX[,2], RP = Est[[1]][,1])
    lower95 <- round(Est[[1]][,2]-SEs*1.96, 3)
    upper95 <- round(Est[[1]][,2]+SEs*1.96, 3)
    Est[[1]] <- data.frame(Est[[1]][,-c(4,5)], lower95, upper95)
  }
  return(Est)
}




# Pool --------------------------------------------------------------------

#' Create pooling group
#'
#' Function to develop a pooling group based on catchment descriptors
#'
#' A pooling group is created from a CDs object, derived from GetCDs or CDsXML, or specifically with the catchment descriptors (see arguments). To change the default pooling group, one or more sites can be excluded using the 'exclude' option, which requires either a site reference or multiple site references in a vector. If this is done, the site with the next lowest similarity distance measure is added to the group (until the total number of years is at least N). Sites with URBEXT2000 (urban extent) > 0.03 are excluded by default and this can be adjusted with UrbMax. If a gauged assessment is required and the site of interest is > UrbMax it can be included by setting iug = TRUE. De-urbanise the Lcv and Lskew (L-moment ratios) for sites with URBEXT2000 > UrbMax by setting DeUrb = TRUE. If the user has more data available for a particular site within the pooling group, the Lcv and Lskew for the site can be updated after the group has been finalised. An example of doing so is provided below. The pooling method is outlined in Science Report: SC050050 - Improving the FEH statistical procedures for flood frequency estimation.
#'@param CDs catchment descriptors derived from either GetCDs or CDsXML
#'@param AREA catchment area in km2
#'@param SAAR catchment standard average annual rainfall (1961-1990) in mm
#'@param FARL catchment flood attenuation from reservoirs & lakes
#'@param FPEXT catchment floodplain extent. The proportion of the catchment that is estimated to be inundated by a 100-year flood
#'@param N minimum Number of total gauged record years for the pooling group
#'@param exclude sites to exclude from the pooling group. Either a single site reference or a vector of site references (numeric)
#'@param iug iug stands for 'include urban gauge' - which refers to a gauged subject site if it's > UrbMax. It's a logical argument with default of FALSE. TRUE will over-ride the default and add the closest site in catchment descriptor space (should be the gauge of interest) to the pooling group if it has URBEXT2000 >= UrbMax
#'@param UrbMax Maximum URBEXT2000 level with a default of 0.03. Any catchment with URBEXT2000 above this level will be excluded from the pooling group
#'@param DeUrb logical argument with a default of FALSE. If true, the Lcv and LSkew of any site in the pooling group with URBEXT2000 > 0.03 will be de-urbanised
#'@examples
#'#Get some catchment descriptors
#'CDs.73005 <- GetCDs(73005)
#'#Set up a pooling group object called Pool.73005 excluding sites 79005 & 71011.
#'#Then print the group to the console
#'Pool.73005 <- Pool(CDs.73005, exclude = c(79005, 71011))
#'Pool.73005
#'#Form a pooling group, called PoolGroup, with the catchment descriptors specifically
#'PoolGroup <- Pool(AREA = 1000, SAAR = 800, FARL = 1, FPEXT = 0.01)
#'#Form a pooling group using an urban catchment which is intended for enhanced
#'#single site estimation - by including it in the group.
#'CDs.39001 <- GetCDs(39001)
#'Pool.39001 <- Pool(CDs.39001, iug = TRUE, DeUrb = TRUE)
#'#Change the Lcv and LSkew of the top site in the pooling group to 0.19 & 0.18,
#'#respectively.
#'PoolUpdate <- LRatioChange(Pool.39001, SiteID = 39001, 0.19, 0.18)
#'@return A data.frame of the pooling group with site reference row names and 24 columns, each providing catchment & gauge details for the sites in the pooling group.
#'@author Anthony Hammond
Pool <- function(CDs = NULL, AREA, SAAR, FARL, FPEXT, N = 500, exclude = NULL, iug = FALSE, UrbMax = 0.03, DeUrb = FALSE){
  if(is.null(exclude) == FALSE) {
    Site.id <- match(exclude, row.names(NRFAData))
    if(any(is.na(Site.id)) == TRUE) stop ("Site ID/s not within the set of sites considered suitable for pooling, therefore it is/they are already excluded")}
  suppressWarnings(if(is.null(CDs) == TRUE){

    SDMs <- function(x, AREA, SAAR, FARL, FPEXT)
    {
      sqrt(
        (3.2*((log(AREA) - log(x[,1]))/1.28)^2)
        + (0.5*((log(SAAR) - log(x[,15]))/0.37)^2)
        + (0.1*((FARL - x[,8])/0.05)^2)
        + (0.2*((FPEXT - x[,9])/0.04)^2))
    }
    suppressWarnings(if (is.null(exclude) == TRUE) {NRFAData <- NRFAData} else
    {
      index <- NULL
      for (i in 1:length(exclude)) {index[i] <- which(row.names(NRFAData) == exclude[i])}
      NRFAData <- NRFAData[-index,]})
    Site <- SDMs(NRFAData, AREA, SAAR, FARL, FPEXT)
    Refs <- data.frame(row.names(NRFAData), Site)
    colnames(Refs) <- c("Site", "SDM")
    Refs.Order <- Refs[order(Refs$SDM),]
    Char.Sites <- Refs.Order$Site
    Char.Sites <- as.character(Char.Sites)
    Site.NRFA <- NRFAData[Char.Sites, ]
    UrbInd <- Char.Sites[1]
    ug.index <- which(row.names(NRFAData) == UrbInd)
    UrbUrbInd <- NRFAData[ug.index,22]
    Site.NRFA <- subset(Site.NRFA, URBEXT2000 <= UrbMax)
    if(iug == FALSE) {Site.NRFA <- Site.NRFA}
    if(iug == TRUE & UrbUrbInd > UrbMax) {Site.NRFA <- rbind(NRFAData[ug.index,], Site.NRFA)} else {Site.NRFA <- Site.NRFA}
    if(iug == TRUE & UrbUrbInd <= UrbMax) {print("Warning: iug = TRUE and the closest site in catchment descriptor space has URBEXT2000 <UrbMax. Group formed as if iug = FALSE")}
    SDM <- SDMs(Site.NRFA,AREA, SAAR, FARL, FPEXT)
    Site.NRFA <- cbind(Site.NRFA, SDM)
    Cum.N <- NULL
    for (i in 1:length(Site.NRFA$N)) {Cum.N[i] <- sum(Site.NRFA$N[1:i])}
    min(which(Cum.N >= 500))
    Site.NRFA <- Site.NRFA[1:min(which(Cum.N >= N)),]
    Ds <-  function(x)
    {
      u.hat <- apply(tf, 2, mean)
      Res <- numeric(1)
      for (i in 1:length(Site.NRFA$N)) {Res <- Res+as.numeric(tf[i,]-u.hat)%*%t(as.numeric((tf[i,]-u.hat)))}
      D <- NULL
      for (i in 1:length(Site.NRFA$N)) {D[i] <- ((1/3)*length(Site.NRFA$N))*as.numeric(tf[i,]-u.hat)%*%solve(Res)%*%(as.numeric((tf[i,]-u.hat)))}
      return(D)
    }
    tf <- data.frame(Site.NRFA$Lcv, Site.NRFA$LSkew, Site.NRFA$LKurt)
    Discordancy <- Ds(tf)
    crit.vs <- c(1.333, 1.648, 1.917, 2.140, 2.329, 2.491, 2.632, 2.757, 2.869, 2.971, 3)
    xd <- seq(5,15)
    Crit.frame <- data.frame(xd, crit.vs)
    C.V <- Crit.frame[min(which(Crit.frame$xd >= length(Site.NRFA$N))),2]
    Discordant <- NULL
    for (i in 1:length(Discordancy)) {Discordant[i] <- isTRUE(Discordancy[i] > C.V)}
    Site.NRFA <- cbind(Site.NRFA, Discordancy, Discordant)
    Site.NRFA <- Site.NRFA[,-c(12,13,14,16,19,20)]
    colnames(Site.NRFA)[24] <- "Discordant?"
    if(DeUrb == FALSE) {Site.NRFA <- Site.NRFA} else
    { LcvCol <- which(colnames(Site.NRFA) == "Lcv")
    LskewCol <- which(colnames(Site.NRFA) == "LSkew")
    UrbCol <- which(colnames(Site.NRFA) == "URBEXT2000")
    UrbInd0.03 <- which(Site.NRFA[,UrbCol] > 0.03)
    if(length(UrbInd0.03) < 1) stop("DeUrb is not FALSE, but there are no sites with URBEXT2000 > 0.03")
    DeUrbesLCV <- NULL
    for(i in 1:length(UrbInd0.03)) {DeUrbesLCV[i] <- LcvUrb(Site.NRFA[UrbInd0.03[i], LcvCol], Site.NRFA[UrbInd0.03[i], UrbCol], DeUrb = TRUE)}
    DeUrbesLSKEW <- NULL
    for(i in 1:length(UrbInd0.03)) {DeUrbesLSKEW[i] <- LSkewUrb(Site.NRFA[UrbInd0.03[i], LskewCol], Site.NRFA[UrbInd0.03[i], UrbCol], DeUrb = TRUE)}
    Site.NRFA[UrbInd0.03,LcvCol] <- DeUrbesLCV
    Site.NRFA[UrbInd0.03,LskewCol] <- DeUrbesLSKEW
    }
    return(Site.NRFA)
  } else {
    SDMs <- function(x, AREA, SAAR, FARL, FPEXT)
    {
      sqrt(
        (3.2*((log(AREA) - log(x[,1]))/1.28)^2)
        + (0.5*((log(SAAR) - log(x[,15]))/0.37)^2)
        + (0.1*((FARL - x[,8])/0.05)^2)
        + (0.2*((FPEXT - x[,9])/0.04)^2))
    }
    suppressWarnings(if (is.null(exclude) == TRUE) {NRFAData <- NRFAData} else
    {
      index <- NULL
      for (i in 1:length(exclude)) {index[i] <- which(row.names(NRFAData) == exclude[i])}
      NRFAData <- NRFAData[-index,]})
    Site <- SDMs(NRFAData, CDs[1,2], CDs[15,2], CDs[8,2], CDs[9,2])
    Refs <- data.frame(row.names(NRFAData), Site)
    colnames(Refs) <- c("Site", "SDM")
    Refs.Order <- Refs[order(Refs$SDM),]
    Char.Sites <- Refs.Order$Site
    Char.Sites <- as.character(Char.Sites)
    Site.NRFA <- NRFAData[Char.Sites, ]
    UrbInd <- Char.Sites[1]
    ug.index <- which(row.names(NRFAData) == UrbInd)
    UrbUrbInd <- NRFAData[ug.index, which(colnames(NRFAData) == "URBEXT2000")]
    Site.NRFA <- subset(Site.NRFA, URBEXT2000 <= UrbMax)
    if(iug == FALSE) {Site.NRFA <- Site.NRFA}
    if(iug == TRUE & UrbUrbInd > UrbMax) {Site.NRFA <- rbind(NRFAData[ug.index,], Site.NRFA)} else {Site.NRFA <- Site.NRFA}
    if(iug == TRUE & UrbUrbInd <= UrbMax) {print("Warning: iug = TRUE and the closest site in catchment descriptor space has URBEXT2000 < UrbMax. Group formed as if iug = FALSE")}
    SDM <- SDMs(Site.NRFA,CDs[1,2], CDs[15,2], CDs[8,2], CDs[9,2])
    Site.NRFA <- cbind(Site.NRFA, SDM)
    Cum.N <- NULL
    for (i in 1:length(Site.NRFA$N)) {Cum.N[i] <- sum(Site.NRFA$N[1:i])}
    min(which(Cum.N >= 500))
    Site.NRFA <- Site.NRFA[1:min(which(Cum.N >= N)),]
    Ds <-  function(x)
    {
      u.hat <- apply(tf, 2, mean)
      Res <- numeric(1)
      for (i in 1:length(Site.NRFA$N)) {Res <- Res+as.numeric(tf[i,]-u.hat)%*%t(as.numeric((tf[i,]-u.hat)))}
      D <- NULL
      for (i in 1:length(Site.NRFA$N)) {D[i] <- ((1/3)*length(Site.NRFA$N))*as.numeric(tf[i,]-u.hat)%*%solve(Res)%*%(as.numeric((tf[i,]-u.hat)))}
      return(D)
    }
    tf <- data.frame(Site.NRFA$Lcv, Site.NRFA$LSkew, Site.NRFA$LKurt)
    Discordancy <- Ds(tf)
    crit.vs <- c(1.333, 1.648, 1.917, 2.140, 2.329, 2.491, 2.632, 2.757, 2.869, 2.971, 3)
    xd <- seq(5,15)
    Crit.frame <- data.frame(xd, crit.vs)
    C.V <- Crit.frame[min(which(Crit.frame$xd >= length(Site.NRFA$N))),2]
    Discordant <- NULL
    for (i in 1:length(Discordancy)) {Discordant[i] <- isTRUE(Discordancy[i] > C.V)}
    Site.NRFA <- cbind(Site.NRFA, Discordancy, Discordant)
    Site.NRFA <- Site.NRFA[,-c(12,13,14,16,19,20)]
    colnames(Site.NRFA)[24] <- "Discordant?"
    if(DeUrb == FALSE) {Site.NRFA <- Site.NRFA} else
    { LcvCol <- which(colnames(Site.NRFA) == "Lcv")
    LskewCol <- which(colnames(Site.NRFA) == "LSkew")
    UrbCol <- which(colnames(Site.NRFA) == "URBEXT2000")
    UrbInd0.03 <- which(Site.NRFA[,UrbCol] > 0.03)
    if(length(UrbInd0.03) < 1) stop("DeUrb is not FALSE, but there are no sites with URBEXT2000 > 0.03")
    DeUrbesLCV <- NULL
    for(i in 1:length(UrbInd0.03)) {DeUrbesLCV[i] <- LcvUrb(Site.NRFA[UrbInd0.03[i], LcvCol], Site.NRFA[UrbInd0.03[i], UrbCol], DeUrb = TRUE)}
    DeUrbesLSKEW <- NULL
    for(i in 1:length(UrbInd0.03)) {DeUrbesLSKEW[i] <- LSkewUrb(Site.NRFA[UrbInd0.03[i], LskewCol], Site.NRFA[UrbInd0.03[i], UrbCol], DeUrb = TRUE)}
    Site.NRFA[UrbInd0.03,LcvCol] <- DeUrbesLCV
    Site.NRFA[UrbInd0.03,LskewCol] <- DeUrbesLSKEW
    }
    return(Site.NRFA)
  } )
}



# Pool Small catchments--------------------------------------------------------------------

#' Create pooling group for small catchments
#'
#' Function to develop a small catchments pooling group based on catchment descriptors
#'
#' A pooling group is created from a CDs object, derived from GetCDs or CDsXML, or specifically with the necessary catchment descriptors (see arguments). To change the default pooling group one or more sites can be excluded using the 'exclude' option, which requires either a site reference or multiple site references in a vector. If this is done, the site with the next lowest similarity distance measure is added to the group (until the total number of years is at least N). Sites with URBEXT2000 (urban extent) > 0.03 are excluded by default and this can be adjusted with the UrbMax argument. If a gauged assessment is required and the site of interest is > UrbMax it can be included by setting iug = TRUE. De-urbanise the Lcv and Lskew (L-moment ratios) of sites with URBEXT2000 > 0.03 by setting DeUrb = TRUE. If the user has more data available for a particular site within the pooling group, the Lcv and Lskew for the site can be updated after the group has been finalised.
#'@param CDs catchment descriptors derived from either GetCDs or CDsXML
#'@param AREA catchment area in km2
#'@param SAAR catchment standard average annual rainfall (1961-1990) in mm
#'@param N minimum Number of total gauged record years for the pooling group
#'@param exclude sites to exclude from the pooling group. Either a single site reference or a vector of site references (numeric)
#'@param iug iug stands for 'include urban gauge' - which refers to a gauged subject site if it's > UrbMax. It's a logical argument with default of FALSE. TRUE will over-ride the default and add the closest site in catchment descriptor space (should be the gauge of interest) to the pooling group if it has URBEXT2000 >= UrbMax
#'@param UrbMax Maximum URBEXT2000 level with a default of 0.03. Any catchment with URBEXT2000 above this level will be excluded from the pooling group
#'@param DeUrb logical argument with a default of FALSE. If true, the Lcv and LSkew of any site in the pooling group with URBEXT2000 > 0.03 will be de-urbanised
#'@examples
#'#Get some catchment descriptors
#'CDs.21001 <- GetCDs(21001)
#'#Set up a pooling group object called Pool.21001 excluding site 206006
#'#Then print the group to the console
#'Pool.21001 <- PoolSmall(CDs.21001, exclude = 206006)
#'Pool.21001
#'#Form a pooling group, called PoolGroup, with the catchment descriptors specifically
#'PoolGroup <- PoolSmall(AREA = 22, SAAR = 1702)
#'@return A data.frame of the pooling group with site reference row names and 24 columns, each providing catchment & gauge details for the sites in the pooling group.
#'@author Anthony Hammond
PoolSmall <- function (CDs = NULL, AREA, SAAR, N = 500, exclude = NULL,
                       iug = FALSE, UrbMax = 0.03, DeUrb = FALSE)
{
  if (is.null(exclude) == FALSE) {
    Site.id <- match(exclude, row.names(NRFAData))
    if (any(is.na(Site.id)) == TRUE)
      stop("Site ID/s not within the set of sites considered suitable for pooling, therefore it is/they are already excluded")
  }
  suppressWarnings(if (is.null(CDs) == TRUE) {
    SDMs <- function(x, AREA, SAAR) {
      sqrt((((log(AREA) - log(x[, 1]))/1.264)^2) +
             (((log(SAAR) - log(x[, 15]))/0.349)^2))
    }
    suppressWarnings(if (is.null(exclude) == TRUE) {
      NRFAData <- NRFAData
    }
    else {
      index <- NULL
      for (i in 1:length(exclude)) {
        index[i] <- which(row.names(NRFAData) == exclude[i])
      }
      NRFAData <- NRFAData[-index, ]
    })
    Site <- SDMs(NRFAData, AREA, SAAR)
    Refs <- data.frame(row.names(NRFAData), Site)
    colnames(Refs) <- c("Site", "SDM")
    Refs.Order <- Refs[order(Refs$SDM), ]
    Char.Sites <- Refs.Order$Site
    Char.Sites <- as.character(Char.Sites)
    Site.NRFA <- NRFAData[Char.Sites, ]
    UrbInd <- Char.Sites[1]
    ug.index <- which(row.names(NRFAData) == UrbInd)
    UrbUrbInd <- NRFAData[ug.index, 22]
    Site.NRFA <- subset(Site.NRFA, URBEXT2000 <= UrbMax)
    if (iug == FALSE) {
      Site.NRFA <- Site.NRFA
    }
    if (iug == TRUE & UrbUrbInd > UrbMax) {
      Site.NRFA <- rbind(NRFAData[ug.index, ], Site.NRFA)
    }
    else {
      Site.NRFA <- Site.NRFA
    }
    if (iug == TRUE & UrbUrbInd <= UrbMax) {
      print("Warning: iug = TRUE and the closest site in catchment descriptor space has URBEXT2000 < UrbMax. Group formed as if iug = FALSE")
    }
    SDM <- SDMs(Site.NRFA, AREA, SAAR)
    Site.NRFA <- cbind(Site.NRFA, SDM)
    Cum.N <- NULL
    for (i in 1:length(Site.NRFA$N)) {
      Cum.N[i] <- sum(Site.NRFA$N[1:i])
    }
    min(which(Cum.N >= 500))
    Site.NRFA <- Site.NRFA[1:min(which(Cum.N >= N)), ]
    Ds <- function(x) {
      u.hat <- apply(tf, 2, mean)
      Res <- numeric(1)
      for (i in 1:length(Site.NRFA$N)) {
        Res <- Res + as.numeric(tf[i, ] - u.hat) %*%
          t(as.numeric((tf[i, ] - u.hat)))
      }
      D <- NULL
      for (i in 1:length(Site.NRFA$N)) {
        D[i] <- ((1/3) * length(Site.NRFA$N)) * as.numeric(tf[i,
        ] - u.hat) %*% solve(Res) %*% (as.numeric((tf[i,
        ] - u.hat)))
      }
      return(D)
    }
    tf <- data.frame(Site.NRFA$Lcv, Site.NRFA$LSkew, Site.NRFA$LKurt)
    Discordancy <- Ds(tf)
    crit.vs <- c(1.333, 1.648, 1.917, 2.14, 2.329, 2.491,
                 2.632, 2.757, 2.869, 2.971, 3)
    xd <- seq(5, 15)
    Crit.frame <- data.frame(xd, crit.vs)
    C.V <- Crit.frame[min(which(Crit.frame$xd >= length(Site.NRFA$N))),
                      2]
    Discordant <- NULL
    for (i in 1:length(Discordancy)) {
      Discordant[i] <- isTRUE(Discordancy[i] > C.V)
    }
    Site.NRFA <- cbind(Site.NRFA, Discordancy, Discordant)
    Site.NRFA <- Site.NRFA[, -c(12, 13, 14, 16, 19, 20)]
    colnames(Site.NRFA)[24] <- "Discordant?"
    if (DeUrb == FALSE) {
      Site.NRFA <- Site.NRFA
    }
    else {
      LcvCol <- which(colnames(Site.NRFA) == "Lcv")
      LskewCol <- which(colnames(Site.NRFA) == "LSkew")
      UrbCol <- which(colnames(Site.NRFA) == "URBEXT2000")
      UrbInd0.03 <- which(Site.NRFA[,UrbCol] > 0.03)
      if(length(UrbInd0.03) < 1) stop("DeUrb is not FALSE, but there are no sites with URBEXT2000 > 0.03")
      DeUrbesLCV <- NULL
      for(i in 1:length(UrbInd0.03)) {DeUrbesLCV[i] <- LcvUrb(Site.NRFA[UrbInd0.03[i], LcvCol], Site.NRFA[UrbInd0.03[i], UrbCol], DeUrb = TRUE)}
      DeUrbesLSKEW <- NULL
      for(i in 1:length(UrbInd0.03)) {DeUrbesLSKEW[i] <- LSkewUrb(Site.NRFA[UrbInd0.03[i], LskewCol], Site.NRFA[UrbInd0.03[i], UrbCol], DeUrb = TRUE)}
      Site.NRFA[UrbInd0.03,LcvCol] <- DeUrbesLCV
      Site.NRFA[UrbInd0.03,LskewCol] <- DeUrbesLSKEW
    }
    return(Site.NRFA)
  }
  else {
    SDMs <- function(x, AREA, SAAR) {
      sqrt((((log(AREA) - log(x[, 1]))/1.264)^2) +
             (((log(SAAR) - log(x[, 15]))/0.349)^2))
    }
    suppressWarnings(if (is.null(exclude) == TRUE) {
      NRFAData <- NRFAData
    }
    else {
      index <- NULL
      for (i in 1:length(exclude)) {
        index[i] <- which(row.names(NRFAData) == exclude[i])
      }
      NRFAData <- NRFAData[-index, ]
    })
    Site <- SDMs(NRFAData, CDs[1, 2], CDs[15, 2])
    Refs <- data.frame(row.names(NRFAData), Site)
    colnames(Refs) <- c("Site", "SDM")
    Refs.Order <- Refs[order(Refs$SDM), ]
    Char.Sites <- Refs.Order$Site
    Char.Sites <- as.character(Char.Sites)
    Site.NRFA <- NRFAData[Char.Sites, ]
    UrbInd <- Char.Sites[1]
    ug.index <- which(row.names(NRFAData) == UrbInd)
    UrbUrbInd <- NRFAData[ug.index, which(colnames(NRFAData) ==
                                            "URBEXT2000")]
    Site.NRFA <- subset(Site.NRFA, URBEXT2000 <= UrbMax)
    if (iug == FALSE) {
      Site.NRFA <- Site.NRFA
    }
    if (iug == TRUE & UrbUrbInd > UrbMax) {
      Site.NRFA <- rbind(NRFAData[ug.index, ], Site.NRFA)
    }
    else {
      Site.NRFA <- Site.NRFA
    }
    if (iug == TRUE & UrbUrbInd <= UrbMax) {
      print("Warning: iug = TRUE and the closest site in catchment descriptor space has URBEXT2000 < UrbMax. Group formed as if iug = FALSE")
    }
    SDM <- SDMs(Site.NRFA, CDs[1, 2], CDs[15, 2])
    Site.NRFA <- cbind(Site.NRFA, SDM)
    Cum.N <- NULL
    for (i in 1:length(Site.NRFA$N)) {
      Cum.N[i] <- sum(Site.NRFA$N[1:i])
    }
    min(which(Cum.N >= 500))
    Site.NRFA <- Site.NRFA[1:min(which(Cum.N >= N)), ]
    Ds <- function(x) {
      u.hat <- apply(tf, 2, mean)
      Res <- numeric(1)
      for (i in 1:length(Site.NRFA$N)) {
        Res <- Res + as.numeric(tf[i, ] - u.hat) %*%
          t(as.numeric((tf[i, ] - u.hat)))
      }
      D <- NULL
      for (i in 1:length(Site.NRFA$N)) {
        D[i] <- ((1/3) * length(Site.NRFA$N)) * as.numeric(tf[i,
        ] - u.hat) %*% solve(Res) %*% (as.numeric((tf[i,
        ] - u.hat)))
      }
      return(D)
    }
    tf <- data.frame(Site.NRFA$Lcv, Site.NRFA$LSkew, Site.NRFA$LKurt)
    Discordancy <- Ds(tf)
    crit.vs <- c(1.333, 1.648, 1.917, 2.14, 2.329, 2.491,
                 2.632, 2.757, 2.869, 2.971, 3)
    xd <- seq(5, 15)
    Crit.frame <- data.frame(xd, crit.vs)
    C.V <- Crit.frame[min(which(Crit.frame$xd >= length(Site.NRFA$N))),
                      2]
    Discordant <- NULL
    for (i in 1:length(Discordancy)) {
      Discordant[i] <- isTRUE(Discordancy[i] > C.V)
    }
    Site.NRFA <- cbind(Site.NRFA, Discordancy, Discordant)
    Site.NRFA <- Site.NRFA[, -c(12, 13, 14, 16, 19, 20)]
    colnames(Site.NRFA)[24] <- "Discordant?"
    if (DeUrb == FALSE) {
      Site.NRFA <- Site.NRFA
    }
    else {
      LcvCol <- which(colnames(Site.NRFA) == "Lcv")
      LskewCol <- which(colnames(Site.NRFA) == "LSkew")
      UrbCol <- which(colnames(Site.NRFA) == "URBEXT2000")
      UrbInd0.03 <- which(Site.NRFA[,UrbCol] > 0.03)
      if(length(UrbInd0.03) < 1) stop("DeUrb is not FALSE, but there are no sites with URBEXT2000 > 0.03")
      DeUrbesLCV <- NULL
      for(i in 1:length(UrbInd0.03)) {DeUrbesLCV[i] <- LcvUrb(Site.NRFA[UrbInd0.03[i], LcvCol], Site.NRFA[UrbInd0.03[i], UrbCol], DeUrb = TRUE)}
      DeUrbesLSKEW <- NULL
      for(i in 1:length(UrbInd0.03)) {DeUrbesLSKEW[i] <- LSkewUrb(Site.NRFA[UrbInd0.03[i], LskewCol], Site.NRFA[UrbInd0.03[i], UrbCol], DeUrb = TRUE)}
      Site.NRFA[UrbInd0.03,LcvCol] <- DeUrbesLCV
      Site.NRFA[UrbInd0.03,LskewCol] <- DeUrbesLSKEW
    }
    return(Site.NRFA)
  })
}


# PoolEst -----------------------------------------------------------------

#' Pooled flood estimates
#'
#' Provides pooled results from a pooling group - gauged, ungauged and with urban adjustment if necessary.
#'
#' PoolEst is a function to provide results from a pooling group derived using the Pool function. QMED (median annual maximum flow) needs to be supplied and can be derived from the QMED function for ungauged estimates or the annual maximum sample for gauged estimates. If the catchment of interest is urban, the UrbAdj argument can be set to TRUE. If this is done, either URBEXT (urban extent) needs to be provided or the catchment descriptors, derived from CDsXML or GetCDs. The methods for estimating pooled growth curves are according to Science Report: SC050050 - Improving the FEH statistical procedures for flood frequency estimation. The methods for estimating the L-moments and growth factors are outlined in the Flood Estimation Handbook (1999), volume 3. The methods for quantifying uncertainty are detailed in Hammond, A. (2022). Easy methods for quantifying the uncertainty of FEH pooling analysis. Circulation - The Newsletter of the British Hydrological Society (152). When UrbAdj = TRUE, urban adjustment is applied to the QMED estimate according to the method outlined in the guidance by Wallingford HydroSolutions: 'WINFAP 4 Urban Adjustment Procedures'.
#'
#'@param x pooling group derived from the Pool function
#'@param gauged logical argument with a default of FALSE. TRUE for gauged results and FALSE for ungauged
#'@param QMED estimate of the median annual maximum flow
#'@param dist a choice of distribution for the estimates. The choices are "GenLog", "GEV", or "Gumbel"; the generalised logistic, generalised extreme value, and Gumbel distribution, respectively. The default is "GenLog"
#'@param RP return period of interest. By default the following RPs are provided: 2, 5, 10, 20, 50, 75, 100, 200, 500, 1000
#'@param UrbAdj logical argument with a default of FALSE. When TRUE, an urban adjustment is applied to the pooled Lcv and LSkew
#'@param CDs catchment descriptors derived from either GetCDs or CDsXML
#'@param URBEXT the catchment URBEXT2000, to be supplied if UrbAdj is TRUE and if CDs have not been
#'@param fseQMED factorial standard error of the median annual maximum (QMED) estimate, used for quantifying ungauged uncertainty. Default is 1.46
#'@examples
#'#Get some catchment descriptors and form a pooling group. It's urban and
#'#therefore the site of interest is not included.
#'CDs.27083 <- GetCDs(27083)
#'Pool.27083 <- Pool(CDs.27083)
#'#Get results for the ungauged case, with urban adjustment
#'PoolEst(Pool.27083, QMED = 11.941, UrbAdj = TRUE, CDs = CDs.27083)
#'#Form the group again with the urban gauge included & undertake a gauged estimate
#'#with urban adjustment. QMED in this example is estimated as the median of the annual
#'#maximum series for site 27083.
#'PoolG.27083 <- PoolG.27083 <- Pool(CDs.27083, iug = TRUE, DeUrb = TRUE)
#'PoolEst(PoolG.27083, QMED = 12.5, UrbAdj = TRUE, CDs = CDs.27083)
#'
#'@return A list of length two. Element one is a data frame with columns; return period (RP), peak flow estimates (Q), growth factor estimates (GF), lower and upper intervals of uncertainty (68 percent intervals for ungauged and 95 percent for gauged). The second element is the estimated Lcv and Lskew.
#'@author Anthony Hammond
PoolEst <- function(x, gauged = FALSE, QMED, dist = "GenLog", RP = c(2,5,10,20,50,75,100,200,500,1000), UrbAdj = FALSE, CDs = NULL, URBEXT = NULL, fseQMED = 1.46) {
  if(is.data.frame(x) == FALSE) {stop("x must be a pooled group. Pooled groups can be created with the Pool() function")}
  if(ncol(x) != 24) stop ("x must be a pooled group. Pooled groups can be created with the Pool() function")
  if(UrbAdj == TRUE) {
    if(is.null(URBEXT) == TRUE & is.null(CDs) == TRUE) stop("if Urbadj = TRUE, URBEXT or CDs must be provided")
    if(is.null(URBEXT) == TRUE) {URBEXT2000 <- CDs[18,2]} else {URBEXT2000 <- URBEXT}}
  if(dist == "GenLog") {func <- GenLogGF}
  if(dist == "GEV") {func <- GEVGF}
  if(dist == "Gumbel") {func <- GumbelGF}
  if(gauged == FALSE) {lcv <- WungLcv(x)} else {lcv <- WGaugLcv(x)}
  if(gauged == FALSE) {lskew <- WungLSkew(x)} else {lskew <- WGaugLSkew(x)}
  if(UrbAdj == TRUE) {lcv <- lcv*0.68654^(1.567*URBEXT2000)} else {lcv <- lcv}
  if(UrbAdj == TRUE) {lskew <- ((lskew+1)*1.096017^(1.567*URBEXT2000))-1} else {lskew <- lskew}
  if(dist == "Gumbel") {Zt <- func(lcv, RP = RP)} else {Zt <- func(lcv, lskew, RP = RP)}
  GF <- as.numeric(format(round(Zt, 3), nsmall = 3))
  Qt <- Zt*QMED
  Q <- as.numeric(format(round(Qt, 3), nsmall = 3))
  PooledLcv <- lcv
  PooledLSkew <- lskew
  if(gauged == TRUE) {
    VAR <-  (((median(x[1,15]) * x[1,16])^2)/x[1,21])* exp(1.3125 + 0.599*(log(RP-1)) + 0.00399*(log(RP-1)^2))
    SE <-sqrt(VAR)
    lower95 <- round(Q-SE*1.96, 3)
    upper95 <- round(Q+SE*1.96, 3)
    res <- data.frame(RP, Q, GF, lower95, upper95)
  }
  if(gauged == FALSE){
    fseGFfunc <- function(RP) {
      if(RP == 2) {fse <- 1} else {
        Y <- -log(-log(1-1/RP))
        fse <- round(0.0069*Y^2 - 0.0099*Y + 1.0039, 3)
      }
      return(fse)
    }
    fseGF <- NULL
    for(i in 1:length(RP)) {fseGF[i] <- fseGFfunc(RP[i])}
    fse <- fseGF*fseQMED
    lower68 <- round(Q/fse, 3)
    upper68 <- round(Q*fse, 3)
    res <- data.frame(RP, Q, GF, lower68, upper68)
  }
  #res <- data.frame(RP, Q, GF)
  Pars <- cbind(PooledLcv, PooledLSkew)
  return(list(res, Pars))
}


# OptimPars ---------------------------------------------------------------

#' Optimise distribution parameters
#'
#' Estimates the parameters of the generalised extreme value, generalised logistic, or Gumbel distribution from known return period estimates
#'
#' Given a dataframe with return periods (RPs) in the first column and associated estimates in the second column, this function provides an estimate of the distribution parameters. Ideally the first RP should be 2. Extrapolation outside the RPs used for calibration comes with greater uncertainty.
#'@param x a data.frame with RPs in the first column and associated estimates in the second column
#'@param dist a choice of distribution for the estimates. The choices are "GenLog", "GEV", or "Gumbel" - the generalised logistic, generalised extreme value and Gumbel distribution, respectively. The default is "GenLog"
#'@examples
#'#Get some catchment descriptors and some quick results. Then estmate the GenLog parameters
#'Results <- QuickResults(GetCDs(96001), plot = FALSE)[[1]]
#'OptimPars(Results[,1:2])
#'
#'@return The location, scale and shape parameters for the generalised logistic or Generalised extreme value distribution. Or the location and scale for the Gumbel.
#' @author Anthony Hammond
OptimPars <-  function(x, dist = "GenLog") {
  if(is.data.frame(x) == FALSE) {stop("x must be a data.frame with RPs in the first column and associated variable in the second")}
  res <- x
  RP <- x[,1]
  Q <- x[,2]
  if(dist == "GenLog") {
    min.GL <- function(data, par) {
      with(data, sum(((par[1]+par[2]/par[3]*(1-(RP-1)^-par[3])-Q)^2)))}
    result <- optim(par = c(x[1,2], mean((x[1,2]/4),(x[1,2]/3)), 0.01), fn = min.GL, data = res, lower = c(NA, NA, -1), upper = c(NA, NA, 1), method = "L-BFGS-B")
    Pars <- result$par
    Pars <- data.frame(Pars[1], Pars[2], Pars[3])
    colnames(Pars) <- c("loc", "scale", "shape")
  }
  if(dist == "GEV"){
    min.GEV <- function(data, par) {
      with(data, sum(((par[1]+par[2]/par[3]*(1-(-log(1-1/RP))^par[3])-Q)^2)))}
    result <- optim(par = c(x[1,2], mean((x[1,2]/4),(x[1,2]/3)), 0.01), fn = min.GEV, data = res, lower = c(NA, NA, -1), upper = c(NA, NA, 1), method = "L-BFGS-B")
    Pars <- result$par
    Pars <- data.frame(Pars[1], Pars[2], Pars[3])
    colnames(Pars) <- c("loc", "scale", "shape")
  }
  if(dist == "Gumbel"){
    min.Gum <- function(data, par) {
      with(data, sum((((par[1]+par[2]*(-log(-log(1-(1/RP)))))-Q)^2)))}
    result <- optim(par = c(x[1,2], (x[1,2]/3)), fn = min.Gum, data = res, method = "L-BFGS-B")
    Pars <- result$par
    Pars <- data.frame(Pars[1], Pars[2])
    colnames(Pars) <- c("loc", "scale")
  }
  return(Pars)
}




# DistFuncs ---------------------------------------------------------------

#'Generalised logistic distribution growth factors
#'
#'Estimated growth factors as a function of return period, with inputs of Lcv & LSkew (linear coefficient of variation & linear skewness)
#'
#'@details Growth factors are calculated by the method outlined in the Flood Estimation Handbook, volume 3, 1999.
#'
#' @param lcv linear coefficient of variation
#' @param lskew linear skewness
#' @param RP return period
#' @examples
#' #Estimate the 50-year growth factors from an Lcv and Lskew of 0.17 and 0.04, respectively.
#' GenLogGF(0.17, 0.04, RP = 50)
#' @return Generalised logistic estimated growth factor
#' @author Anthony Hammond

GenLogGF <- function(lcv, lskew, RP) {
  k <- -lskew
  B <- lcv*k*sin((pi)*k)/(k*pi*(k+lcv)-lcv*sin((pi)*k))
  zt <- 1+(B/k)*(1-(RP-1)^lskew)
  return(zt)
}

#'Generalised extreme value distribution growth factors
#'
#'Estimated growth factors as a function of return period, with inputs of Lcv & LSkew (linear coefficient of variation & linear skewness)
#'
#'@details Growth factors are calculated by the method outlined in the Flood Estimation Handbook, volume 3, 1999.
#' @param lcv linear coefficient of variation
#' @param lskew linear skewness
#' @param RP return period
#' @examples
#' #Estimate the 50-year growth factors from Lcv = 0.17 and Lskew = 0.04
#' GEVGF(0.17, 0.04, RP = 50)
#' @return Generalised extreme value estimated growth factor
#' @author Anthony Hammond
GEVGF <- function(lcv,lskew, RP) {
  C <- (2/(3+lskew)) - (log(2)/log(3))
  kgev <- 7.859*C+2.95548*C^2
  Bgev <- (kgev*lcv)/(lcv*(gamma(1+kgev)-(log(2))^kgev)+gamma(1+kgev)*(1-2^-kgev))
  zt <- 1+(Bgev/kgev)*(log(2)^kgev - (log(RP/(RP-1)))^kgev)
  return(zt)
}

#'Generalised Pareto distribution growth factors
#'
#'Estimated growth factors as a function of return period, with inputs of Lcv & LSkew (linear coefficient of variation & linear skewness)
#'
#'@details Growth factors (GF) are calculated by the method outlined in the Flood Estimation Handbook, volume 3, 1999. The average number of peaks per year argument (ppy) is for the function to convert from the peaks over threshold (POT) scale to the annual scale. For example, if there are 3 peaks per year, the probability associated with the 100-yr return period estimate would be 0.01/3 (i.e. an RP of 300 on the POT scale) rather than 0.01.
#' @param lcv linear coefficient of variation
#' @param lskew linear skewness
#' @param RP return period
#' @param ppy peaks per year
#' @examples
#' #Get POT flow data from the Thames at Kingston (noting the no. peaks per year).
#' #Then estimate the 100-year growth factor with lcv and lskew estimates
#' TPOT <- POTextract(ThamesPQ[,c(1,3)], thresh = 0.90)
#' GenParetoGF(Lcv(TPOT$peak), LSkew(TPOT$peak), RP = 100, ppy = 1.867)
#' #multiply by the median of the POT data for an estimate of the 100-yr flood
#' GenParetoGF(Lcv(TPOT$peak), LSkew(TPOT$peak), RP = 100, ppy = 1.867)*median(TPOT$peak)
#' @return Generalised Pareto estimated growth factor
#' @author Anthony Hammond
GenParetoGF <- function(lcv, lskew, RP, ppy = 1) {
  k <- (1-3*lskew)/(1+lskew)
  Bgp <- (lcv*k*(1+k)*(2+k))/(k-lcv*(2+k)*(2^-k*(1+k)-1))
  RPppy <- 1/((1/RP)/ppy)
  Zt <- 1 + (Bgp/k) *((2^-k)-(1-(1-(1/RPppy)))^k)
  return(Zt)
}


#'Gumbel distribution growth factors
#'
#'Estimated growth factors as a function of return period, with inputs of Lcv & LSkew (linear coefficient of variation & linear skewness)
#'
#'@details Growth factors are calculated by the method outlined in the Flood Estimation Handbook, volume 3, 1999.
#'
#' @param lcv linear coefficient of variation
#' @param RP return period
#' @examples
#' #Estimate the 50-year growth factors from an Lcv of 0.17.
#' GumbelGF(0.17, RP = 50)
#' @return Gumbel estimated growth factor
#' @author Anthony Hammond

GumbelGF <- function(lcv, RP){
  B <- lcv/(log(2)-lcv*(0.5772+log(log(2))))
  gf <- 1+B*(log(log(2))-log(-log(1-(1/RP))))
  return(gf)
}


#'Kappa3 distribution growth factors
#'
#'Estimated growth factors as a function of return period, with inputs of Lcv & LSkew (linear coefficient of variation & linear skewness)
#'
#' @details Growth factors are calculated by the method outlined in Kjeldsen, T (2019), 'The 3-parameter Kappa distribution as an alternative for use with FEH pooling groups.'Circulation - The Newsletter of the British Hydrological Society, no. 142
#' @param lcv linear coefficient of variation
#' @param lskew linear skewness
#' @param RP return period
#' @examples
#' #Get a ungauged pooled Lcv and LSkew for catchment 15006
#' PooledRes <- as.numeric(QuickResults(GetCDs(15006), plot = FALSE)[[2]])
#' #Calculate Kappa growth factor for the 100-year flood
#' Kappa3GF(PooledRes[1], PooledRes[2], RP = 100)
#' @return Kappa3 distribution estimated growth factor
#' @author Anthony Hammond
Kappa3GF <- function(lcv, lskew, RP) {
  gr <- function(k){
    g1 <- (1*gamma(1+k)*gamma(-k-1/-0.4))/((0.4)^(1+k)*gamma(1-1/-0.4))
    g2 <- (2*gamma(1+k)*gamma(-k-2/-0.4))/((0.4)^(1+k)*gamma(1-2/-0.4))
    g3 <- (3*gamma(1+k)*gamma(-k-3/-0.4))/((0.4)^(1+k)*gamma(1-3/-0.4))
    vec <- c(g1,g2,g3)
    return(vec)
  }
  KSolve <- function(k) {
    abs(((- gr(k)[1] + 3*gr(k)[2] - 2*gr(k)[3])/ (gr(k)[1] - gr(k)[2])) - lskew)
  }
  k <- 0.01
  KRes <- optim(par = k, fn = KSolve, method = "Brent", lower = -0.99, upper = 1)$par[1]
  GrRes <- gr(KRes)
  B <- (lcv * KRes) / ((GrRes[1]-GrRes[2]) + lcv*(GrRes[1] - (log(2.22)^KRes)))
  xT <- 1 + (B/KRes) *(log(2.22)^KRes - ((1-((RP-1)/RP)^-0.4)/-0.4)^KRes)
  return(xT)
}



#'Generalised logistic distribution - estimates directly from sample
#'
#'Estimated quantiles as a function of return period (RP) and vice versa, directly from the data
#'
#'If the argument q is used, it overrides RP and provides RP as a function of q (magnitude of variable) as opposed to q as a function of RP. The parameters are estimated by the method of L-moments, as detailed in 'Hosking J. Wallis J. 1997 Regional Frequency Analysis: An Approach Based on L-moments. Cambridge University Press, New York'. The trend argument allows the location parameter to move in line with the observed linear trend of the sample. Another option is to detrend the sample first with the DeTrend function. On average this makes little difference to the two year flow but lower results for longer return periods (not always) when compared to the trend option in this function.
#' @param x numeric vector (block maxima sample)
#' @param RP return period (default = 100)
#' @param q quantile (magnitude of variable)
#' @param trend logical argument with default of FALSE. If TRUE, a linear adjustment to the location parameter is made to account for non-stationarity
#' @examples
#' #Get an annual maximum sample and estimate the 50-year RP
#' AM.27090 <- GetAM(27090)
#' GenLogAM(AM.27090$Flow, RP = 50)
#' #Estimate the RP for a 600m3/s discharge
#' GenLogAM(AM.27090$Flow, q = 600)
#' #Estimate the 50-year RP allowing for non-stationarity in the location parameter
#' GenLogAM(AM.27090$Flow, RP = 50, trend = TRUE)
#' @return quantile as a function of RP or vice versa, with the option of accounting for the linear trend in the sample
#' @author Anthony Hammond
GenLogAM <- function(x, RP = 100, q = NULL, trend = FALSE)
{
  if(is.numeric(x) == FALSE) {stop("x must be a numeric vector")}
  Sort.x <- sort(x)
  Rank <- seq(1, length(x))
  b0 <- mean(x, na.rm = TRUE)
  b1 <- mean((Rank-1)/(length(x)-1)*Sort.x, na.rm = TRUE)
  b2 <- mean(((Rank-1)*(Rank-2))/((length(x)-1)*(length(x)-2))*Sort.x, na.rm = TRUE)
  b3 <- mean(((Rank-1)*(Rank-2)*(Rank-3))/((length(x)-1)*(length(x)-2)*(length(x)-3))*Sort.x, na.rm = TRUE)
  L1 <- b0
  L2 <- 2*b1-b0
  L3 <- 6*b2-6*b1+b0
  L4 <- 20*b3-30*b2+12*b1-b0
  Lcv <- L2/L1
  LSkew <- L3/L2
  k <- -LSkew
  a <- (L2*sin(k*pi))/(k*pi)
  loc <- b0-a*((1/k)-(pi/sin(k*pi)))
  if(is.null(q) == TRUE) {res <- loc+ a/k * (1-(RP-1) ^-k)}
  else {
    y <- -k^(-1) * log(1 - k * (q - loc)/a)
    P <- 1-(1/(1+exp(-y)))
    res <- 1/P
  }
  m <- function(i, j) {sum((i-mean(i))*(j-mean(j)))/sum((i-mean(i))^2)}
  M <- m(i = seq(1, length(x)), j = x)
  b <- mean(x)-M*mean(seq(1,length(x)))
  LM <- (M*(length(x))+b)-(M*median(seq(1,length(x)))+b)
  LocTrend <- loc+LM
  if (is.null(q) == TRUE) {resTrend <- LocTrend+ a/k * (1-(RP-1) ^-k)}
  else {
    yTrend <- -k^(-1) * log(1 - k * (q - LocTrend)/a)
    Ptrend <- P <- 1-(1/(1+exp(-yTrend)))
    resTrend <- 1/Ptrend}
  if(trend == FALSE) {return(res)} else {return(resTrend)}
}



#'Generalised extreme value distribution - estimates directly from sample
#'
#'Estimated quantiles as function of return period (RP) and vice versa, directly from the data
#'
#'If the argument q is used, it overrides RP and provides RP as a function of q (magnitude of variable) as opposed to q as a function of RP. The parameters are estimated by the method of L-moments, as detailed in 'Hosking J. Wallis J. 1997 Regional Frequency Analysis: An Approach Based on L-moments. Cambridge University Press, New York'. The trend argument allows the location parameter to move in line with the observed linear trend of the sample. Another option is to detrend the sample first with the DeTrend function. On average this makes little difference to the two year flow but lower results for longer return periods (not always) when compared to the trend option in this function.
#' @param x numeric vector (block maxima sample)
#' @param RP return period (default = 100)
#' @param q quantile (magnitude of variable)
#' @param trend logical argument with default of FALSE. If TRUE, a linear adjustment to the location parameter is made to account for non-stationarity
#' @examples
#' #Get an annual maximum sample and estimate the 50-year RP
#' AM.27090 <- GetAM(27090)
#' GEVAM(AM.27090$Flow, RP = 50)
#' #Estimate the RP for a 600m3/s discharge
#' GEVAM(AM.27090$Flow, q = 600)
#' #Estimate the 50-year RP allowing for non-stationarity in the location parameter
#' GEVAM(AM.27090$Flow, RP = 50, trend = TRUE)
#' @return quantile as a function of RP or vice versa, with the option of accounting for the linear trend in the sample
#' @author Anthony Hammond
GEVAM <- function(x, RP = 100, q = NULL, trend = FALSE)
{
  if(is.numeric(x) == FALSE) {stop("x must be a numeric vector")}
  Sort.x <- sort(x)
  Rank <- seq(1, length(x))
  b0 <- mean(x, na.rm = TRUE)
  b1 <- mean((Rank-1)/(length(x)-1)*Sort.x, na.rm = TRUE)
  b2 <- mean(((Rank-1)*(Rank-2))/((length(x)-1)*(length(x)-2))*Sort.x, na.rm = TRUE)
  b3 <- mean(((Rank-1)*(Rank-2)*(Rank-3))/((length(x)-1)*(length(x)-2)*(length(x)-3))*Sort.x, na.rm = TRUE)
  L1 <- b0
  L2 <- 2*b1-b0
  L3 <- 6*b2-6*b1+b0
  L4 <- 20*b3-30*b2+12*b1-b0
  Lcv <- L2/L1
  LSkew <- L3/L2
  C <- (2/(3+LSkew)) - (log(2)/log(3))
  k <- 7.859*C+2.95548*C^2
  a <- (L2*k)/((1-2^-k)*gamma(1+k))
  loc <- b0-a*((1-gamma(1+k))/k)
  if(is.null(q) == TRUE) {res <- loc+a/k*(1-(-log(1-1/RP))^k)} else {
    y <- -k^(-1) * log(1 - k * (q - loc)/a)
    P <- 1-(exp(-exp(-y)))
    res <- 1/P}
  m <- function(i, j) {sum((i-mean(i))*(j-mean(j)))/sum((i-mean(i))^2)}
  M <- m(i = seq(1, length(x)), j = x)
  b <- mean(x)-M*mean(seq(1,length(x)))
  LM <- (M*(length(x))+b)-(M*median(seq(1,length(x)))+b)
  LocTrend <- loc+LM
  if (is.null(q) == TRUE) {resTrend <- LocTrend+a/k*(1-(-log(1-1/RP))^k)}
  else {
    yTrend <- -k^(-1) * log(1 - k * (q - LocTrend)/a)
    Ptrend <- 1-(exp(-exp(-yTrend)))
    resTrend <- 1/Ptrend}
  if(trend == FALSE) {return(res)} else {return(resTrend)}
}



#'Generalised Pareto distribution - estimates directly from sample
#'
#'Estimated quantiles as function of return period (RP) and vice versa, directly from the data
#'
#'If the argument q is used, it overrides RP and provides RP as a function of q (magnitude of variable) as opposed to q as a function of RP. The average number of peaks per year argument (ppy) is for the function to convert from the peaks over threshold (POT) scale to the annual scale. For example, if there are 3 peaks per year, the probability associated with the 100-yr return period estimate would be 0.01/3 (i.e. an RP of 300 on the POT scale) rather than 0.01. The parameters are estimated by the method of L-moments, as detailed in 'Hosking J. Wallis J. 1997 Regional Frequency Analysis: An Approach Based on L-moments. Cambridge University Press, New York'.
#' @param x numeric vector (block maxima sample)
#' @param ppy peaks per year
#' @param RP return period (default = 100)
#' @param q quantile (magnitude of variable)
#' @examples
#' #Get a POT series and estimate the 50-year RP
#' ThamesPOT <- POTextract(ThamesPQ[,c(1,3)], thresh = 0.90)
#' GenParetoPOT(ThamesPOT$peak, ppy = 1.867, RP = 50)
#' #Estimate the RP for a 600m3/s discharge
#' GenParetoPOT(ThamesPOT$peak, ppy = 1.867, q = 600)
#' @return quantile as a function of RP or vice versa
#' @author Anthony Hammond
GenParetoPOT <- function(x, ppy = 1, RP = 100, q = NULL)
{
  if(is.numeric(x) == FALSE) {stop("x must be a numeric vector")}
  Sort.x <- sort(x)
  Rank <- seq(1, length(x))
  b0 <- mean(x, na.rm = TRUE)
  b1 <- mean((Rank-1)/(length(x)-1)*Sort.x, na.rm = TRUE)
  b2 <- mean(((Rank-1)*(Rank-2))/((length(x)-1)*(length(x)-2))*Sort.x, na.rm = TRUE)
  b3 <- mean(((Rank-1)*(Rank-2)*(Rank-3))/((length(x)-1)*(length(x)-2)*(length(x)-3))*Sort.x, na.rm = TRUE)
  L1 <- b0
  L2 <- 2*b1-b0
  L3 <- 6*b2-6*b1+b0
  L4 <- 20*b3-30*b2+12*b1-b0
  Lcv <- L2/L1
  LSkew <- L3/L2
  k <- (1-3*LSkew)/(1+LSkew)
  a <- (1+k)*(2+k)*L2
  loc <- L1-(2+k)*L2
  if(is.null(q) == TRUE) {res <- loc+a*(1-(1-(1-(1/RP)/ppy))^k)/k} else {
    y <- -k^-1*log(1-k*(q-loc)/a)
    P <- 1-(1-exp(-y))
    RPPOT <- 1/P
    res <- RPPOT/ppy
  }
  return(res)
}

#'Gumbel distribution - estimates directly from sample
#'
#' @description Estimated quantiles as a function of return period (RP) and vice versa, directly from the data
#' @details If the argument q is used, it overrides RP and provides RP as a function of q (magnitude of variable) as opposed to q as a function of RP. The parameters are estimated by the method of L-moments, as detailed in 'Hosking J. Wallis J. 1997 Regional Frequency Analysis: An Approach Based on L-moments. Cambridge University Press, New York'. The trend argument allows the location parameter to move in line with the observed linear trend of the sample. Another option is to detrend the sample first with the DeTrend function. On average this makes little difference to the two year flow but lower results for longer return periods (not always) when compared to the trend option in this function.
#' @param x numeric vector (block maxima sample)
#' @param RP return period (default = 100)
#' @param q quantile (magnitude of variable)
#' @param trend logical argument with default of FALSE. If TRUE, a linear adjustment to the location parameter is made to account for non-stationarity
#' @examples
#' #Get an annual maximum sample and estimate the 50-year RP
#' AM.27090 <- GetAM(27090)
#' GumbelAM(AM.27090$Flow, RP = 50)
#' #Estimate the RP for a 600m3/s discharge
#' GumbelAM(AM.27090$Flow, q = 600)
#' #Estimate the 50-year RP allowing for non-stationarity in the location parameter
#' GumbelAM(AM.27090$Flow, RP = 50, trend = TRUE)
#' @return quantile as a function of RP or vice versa, with the option of accounting for the linear trend in the sample
#' @author Anthony Hammond
GumbelAM <- function(x, RP = 100, q = NULL, trend = FALSE)
{
  if(is.numeric(x) == FALSE) {stop("x must be a numeric vector")}
  Sort.x <- sort(x)
  Rank <- seq(1, length(x))
  b0 <- mean(x, na.rm = TRUE)
  b1 <- mean((Rank-1)/(length(x)-1)*Sort.x, na.rm = TRUE)
  L1 <- b0
  L2 <- 2*b1-b0
  a <- L2/log(2)
  loc <- L1 - 0.5772*a
  if(is.null(q) == TRUE) {res <- loc - a * log(-log(1-(1/RP)))}
  else {
    Prob <- 1- exp(-exp(-(q - loc)/a))
    res <- 1/Prob
  }
  m <- function(i, j) {sum((i-mean(i))*(j-mean(j)))/sum((i-mean(i))^2)}
  M <- m(i = seq(1, length(x)), j = x)
  b <- mean(x)-M*mean(seq(1,length(x)))
  LM <- (M*(length(x))+b)-(M*median(seq(1,length(x)))+b)
  LocTrend <- loc+LM
  if (is.null(q) == TRUE) {resTrend <- LocTrend - a * log(-log(1-(1/RP)))}
  else {
    ProbTrend <- 1- exp(-exp(-(q - LocTrend)/a))
    resTrend <- 1/ProbTrend}
  if(trend == FALSE) {return(res)} else {return(resTrend)}
}



#'Generalised logistic distribution estimates from parameters
#'
#'Estimated quantiles as function of return period (RP) and vice versa, from user input parameters
#'
#'If the argument q is used, it overrides RP and provides RP as a function of q (magnitude of variable) as opposed to q as a function of RP.
#' @param loc location parameter
#' @param scale scale parameter
#' @param shape shape parameter
#' @param q quantile. magnitude of the variable under consideration
#' @param RP return period
#' @examples
#' #Get an annual maximum sample, estimate the parameters and estimate 50-year RP
#' AM.27090 <- GetAM(27090)
#' GenLogPars(AM.27090$Flow)
#' #Store parameters in an object
#' Pars <- as.numeric(GenLogPars(AM.27090$Flow))
#' #get estimate of 50-yr flow
#' GenLogEst(Pars[1], Pars[2], Pars[3], RP = 50)
#' #Estimate the RP for a 600m3/s discharge
#' GenLogEst(Pars[1], Pars[2], Pars[3], q = 600)
#' @return quantile as a function of RP or vice versa
#' @author Anthony Hammond
GenLogEst <- function(loc, scale, shape, q = NULL, RP = 100) {
  if(is.null(q) == TRUE) {res <- loc+ scale/shape * (1-(RP-1) ^-shape)}
  else {
    y <- -shape^(-1) * log(1 - shape * (q - loc)/scale)
    P <- 1-(1/(1+exp(-y)))
    res <- 1/P
  }
  return(res)
}


#'Generalised Pareto distribution estimates from parameters
#'
#'Estimated quantiles as function of return period (RP) and vice versa, from user input parameters
#'
#'If the argument q is used, it overrides RP and provides RP as a function of q (magnitude of variable) as opposed to q as a function of RP. The average number of peaks per year argument (ppy) is for the function to convert from the peaks over threshold (POT) scale to the annual scale. For example, if there are 3 peaks per year, the probability associated with the 100-yr return period estimate would be 0.01/3 (i.e. an RP of 300 on the POT scale) rather than 0.01.
#' @param loc location parameter
#' @param scale scale parameter
#' @param shape shape parameter
#' @param q quantile. magnitude of the variable under consideration
#' @param RP return period
#' @param ppy peaks per year. Default is one
#' @examples
#' #Get a POT sample, estimate the parameters, and estimate 50-year RP
#' ThamesPOT <- POTextract(ThamesPQ[,c(1,3)], thresh = 0.90)
#' GenParetoPars(ThamesPOT$peak)
#' #Store parameters in an object
#' Pars <- as.numeric(GenParetoPars(ThamesPOT$peak))
#' #get estimate of 50-yr flow
#' GenParetoEst(Pars[1], Pars[2], Pars[3], ppy = 1.867, RP = 50)
#' #Estimate the RP for a 600m3/s discharge
#' GenParetoEst(Pars[1], Pars[2], Pars[3], ppy = 1.867, q = 600)
#' @return quantile as a function of RP or vice versa
#' @author Anthony Hammond
GenParetoEst <- function(loc, scale, shape, q = NULL, RP = 100, ppy = 1) {
  if(is.null(q) == TRUE) {res <- loc+scale*(1-(1-(1-(1/RP)/ppy))^shape)/shape} else {
    y <- -shape^-1*log(1-shape*(q-loc)/scale)
    P <- 1-(1-exp(-y))
    RPPOT <- 1/P
    res <- RPPOT/ppy
  }
  return(res)
}


#'Generalised extreme value distribution estimates from parameters
#'
#'Estimated quantiles as function of return period (RP) and vice versa, from user input parameters
#'
#'If the argument q is used, it overrides RP and provides RP as a function of q (magnitude of variable) as opposed to q as a function of RP.
#' @param loc location parameter
#' @param scale scale parameter
#' @param shape shape parameter
#' @param q quantile. magnitude of the variable under consideration
#' @param RP return period
#' @examples
#' #Get an annual maximum sample, estimate the parameters and estimate 50-year RP
#' AM.27090 <- GetAM(27090)
#' GEVPars(AM.27090$Flow)
#' #Store parameters in an object
#' Pars <- as.numeric(GEVPars(AM.27090$Flow))
#' #get estimate of 50-yr flow
#' GEVEst(Pars[1], Pars[2], Pars[3], RP = 50)
#' #Estimate the RP for a 600m3/s discharge
#' GEVEst(Pars[1], Pars[2], Pars[3], q = 600)
#' @return quantile as a function of RP or vice versa
#' @author Anthony Hammond
GEVEst <- function(loc, scale, shape, q = NULL, RP = 100) {
  if(is.null(q) == TRUE) {res <- loc+scale/shape*(1-(-log(1-1/RP))^shape)} else {
    y <- -shape^(-1) * log(1 - shape * (q - loc)/scale)
    P <- 1-(exp(-exp(-y)))
    res <- 1/P}
  return(res)
}

#'Gumbel distribution estimates from parameters
#'
#'Estimated quantiles as function of return period (RP) and vice versa, from user input parameters
#'
#'If the argument q is used, it overrides RP and provides RP as a function of q (magnitude of variable) as opposed to q as a function of RP.
#' @param loc location parameter
#' @param scale scale parameter
#' @param q quantile. magnitude of the variable under consideration
#' @param RP return period
#' @examples
#' #Get an annual maximum sample, estimate the parameters and estimate 50-year RP
#' AM.27090 <- GetAM(27090)
#' Pars <- as.numeric(GumbelPars(AM.27090$Flow))
#' GumbelEst(Pars[1], Pars[2], RP = 50)
#' #Estimate the RP for a 600m3/s discharge
#' GumbelEst(Pars[1], Pars[2], q = 600)
#' @return quantile as a function of RP or vice versa
#' @author Anthony Hammond
GumbelEst <- function(loc, scale, q = NULL,RP = 100){
  if(is.null(q) == TRUE) {res <- loc+scale*(-log(-log(1-(1/RP))))}
  else {
    Prob <- 1- exp(-exp(-(q - loc)/scale))
    res <- 1/Prob}
  return(res)
}


#'Generalised extreme value distribution parameter estimates
#'
#'Estimated parameters from a sample (with Lmoments or maximum likelihood estimation) or from L1 (first L-moment), Lcv (linear coefficient of variation), and LSkew (linear skewness)
#'
#'@details The L-moment estimated parameters are by the method detailed in 'Hosking J. Wallis J. 1997 Regional Frequency Analysis: An Approach Based on L-moments. Cambridge University Press, New York'
#'
#' @param x numeric vector. The sample
#' @param mle logical argument with a default of FALSE. If FALSE the parameters are estimated with Lmoments, if TRUE the parameters are estimated by maximum likelihood estimation
#' @param L1 first Lmoment
#' @param LCV linear coefficient of variation
#' @param LSKEW linear skewness
#' @examples
#' #Get an annual maximum sample and estimate the parameters using Lmoments
#' AM.27090 <- GetAM(27090)
#' GEVPars(AM.27090$Flow)
#' #Estimate parameters using MLE
#' GEVPars(AM.27090$Flow, mle = TRUE)
#' #calculate Lmoments and estimate the parmeters with L1, Lcv and Lskew
#' Lmoms(AM.27090$Flow)
#' #store linear moments in an object
#' LPars <- as.numeric(Lmoms(AM.27090$Flow))[c(1,5,6)]
#' GEVPars(L1 = LPars[1], LCV = LPars[2], LSKEW = LPars[3])
#' @return Parameter estimates (location, scale, shape)
#' @author Anthony Hammond
GEVPars <- function(x = NULL, mle = FALSE, L1, LCV, LSKEW) {
  if(is.null(x) == FALSE & is.numeric(x) == FALSE) {stop("x must be a numeric vector")}
  if(mle == FALSE){
    if(is.null(x)) {C <- (2/(3+LSKEW)) - (log(2)/log(3))
    Shape <- 7.859*C+2.95548*C^2
    L2 <- L1*LCV
    Scale <- (L2*Shape)/((1-2^-Shape)*gamma(1+Shape))
    Loc <- L1-Scale*((1-gamma(1+Shape))/Shape)
    return(data.frame(Loc, Scale, Shape))}
    else {
      L1 <- mean(x, na.rm = TRUE)
      LCV <- Lcv(x)
      LSKEW <- LSkew(x)
      C <- (2/(3+LSKEW)) - (log(2)/log(3))
      Shape <- 7.859*C+2.95548*C^2
      L2 <- L1*LCV
      Scale <- (L2*Shape)/((1-2^-Shape)*gamma(1+Shape))
      Loc <- L1-Scale*((1-gamma(1+Shape))/Shape)
      return(data.frame(Loc, Scale, Shape))}}
  else {
    pars <- c(mean(x), sd(x)/1.5, 0.01)
    max.lhd <- function(q, par) {
      abs(sum(log(gev.pdf(q, loc = par[1], scale = par[2], shape = par[3]))))
    }
    gev.pdf <- function(q, loc, scale, shape) {
      y <- -shape^-1*log(1-shape*(q-loc)/scale)
      p <- scale^-1*exp(1)^(-(1-shape)*y-exp(1)^(-y))
      return(p)
    }
    result <- suppressWarnings(optim(par = pars, fn = max.lhd, q = x))
    loc <- result$par[1]
    scale <- result$par[2]
    shape <- result$par[3]
    log.likelihood <- -result$value[1]
    message <- result$message
    Res <- data.frame(loc, scale, shape, log.likelihood)
    return(Res)
  }
}

#'Generalised logistic distribution parameter estimates
#'
#'Estimated parameters from a sample (with Lmoments or maximum likelihood estimation) or from L1 (first L-moment), Lcv (linear coefficient of variation), and LSkew (linear skewness)
#'
#'@details The L-moment estimated parameters are by the method detailed in 'Hosking J. Wallis J. 1997 Regional Frequency Analysis: An Approach Based on L-moments. Cambridge University Press, New York'
#' @param x numeric vector. The sample
#' @param mle logical argument with a default of FALSE. If FALSE the parameters are estimated with Lmoments, if TRUE the parameters are estimated by maximum likelihood estimation.
#' @param L1 first Lmoment
#' @param LCV linear coefficient of variation
#' @param LSKEW linear skewness
#' @examples
#' #Get an annual maximum sample and estimate the parameters using Lmoments
#' AM.27090 <- GetAM(27090)
#' GenLogPars(AM.27090$Flow)
#' #Estimate parameters using MLE
#' GenLogPars(AM.27090$Flow, mle = TRUE)
#' #calculate Lmoments and estimate the parmeters with L1, Lcv and Lskew
#' Lmoms(AM.27090$Flow)
#' #store linear moments in an object
#' LPars <- as.numeric(Lmoms(AM.27090$Flow))[c(1,5,6)]
#' GenLogPars(L1 = LPars[1], LCV = LPars[2], LSKEW = LPars[3])
#' @return Parameter estimates (location, scale, shape)
#' @author Anthony Hammond
GenLogPars <- function(x = NULL, mle = FALSE, L1, LCV, LSKEW) {
  if(is.null(x) == FALSE & is.numeric(x) == FALSE) {stop("x must be a numeric vector")}
  if(mle == FALSE){
    if(is.null(x)) {
      Shape <- -LSKEW
      L2 <- L1*LCV
      Scale <- (L2*sin(Shape*pi))/(Shape*pi)
      Loc <- L1-Scale*((1/Shape)-(pi/sin(Shape*pi)))
      return(data.frame(Loc, Scale, Shape))}
    else {
      L1 <- mean(x, na.rm = TRUE)
      LCV <- Lcv(x)
      LSKEW <- LSkew(x)
      Shape <- -LSKEW
      L2 <- L1*LCV
      Scale <- (L2*sin(Shape*pi))/(Shape*pi)
      Loc <- L1-Scale*((1/Shape)-(pi/sin(Shape*pi)))
      return(data.frame(Loc, Scale, Shape))}}
  else {
    pars <- c(mean(x), sd(x)/1.5, 0.01)
    min.SLS <- function(q, par) {
      abs(sum(log(gl.pdf(q, loc = par[1], scale = par[2], shape = par[3]))))
    }
    gl.pdf <- function(q, loc, scale, shape) {
      y <- -shape^-1*log(1-shape*(q-loc)/scale)
      f <- (scale^-1*exp(1)^(-(1-shape)*y))/(1+exp(1)^-y)^2
      return(f)
    }
    result <- suppressWarnings(optim(par = pars, fn = min.SLS, q = x))
    loc <- result$par[1]
    scale <- result$par[2]
    shape <- result$par[3]
    log.likelihood <- -result$value[1]
    message <- result$message
    Res <- data.frame(loc, scale, shape, log.likelihood)
    return(Res)
  }

}

#'Generalised Pareto distribution parameter estimates
#'
#'Estimated parameters from a sample (with Lmoments or maximum likelihood estimation) or from L1 (first L-moment), Lcv (linear coefficient of variation), and LSkew (linear skewness)
#'
#'@details The L-moment estimated parameters are by the method detailed in 'Hosking J. Wallis J. 1997 Regional Frequency Analysis: An Approach Based on L-moments. Cambridge University Press, New York'
#'
#' @param x numeric vector. The sample
#' @param mle logical argument with a default of FALSE. If FALSE the parameters are estimated with Lmoments, if TRUE the parameters are estimated by maximum likelihood estimation
#' @param L1 first Lmoment
#' @param LCV linear coefficient of variation
#' @param LSKEW linear skewness
#' @examples
#' #Get a peaks over threshold sample and estimate the parameters using Lmoments
#' ThamesPOT <- ThamesPOT <- POTextract(ThamesPQ[,c(1,3)], thresh = 0.90)
#' GenParetoPars(ThamesPOT$peak)
#' #Estimate parameters using MLE
#' GenParetoPars(ThamesPOT$peak, mle = TRUE)
#' #calculate Lmoments and estimate the parmeters with L1, Lcv and Lskew
#' Lmoms(ThamesPOT$peak)
#' #store linear moments in an object
#' LPars <- as.numeric(Lmoms(ThamesPOT$peak))[c(1,5,6)]
#' GenParetoPars(L1 = LPars[1], LCV = LPars[2], LSKEW = LPars[3])
#' @return Parameter estimates (location, scale, shape)
#' @author Anthony Hammond
GenParetoPars <- function(x = NULL, mle = FALSE, L1, LCV, LSKEW) {
  if(is.null(x) == FALSE & is.numeric(x) == FALSE) {stop("x must be a numeric vector")}
  if(mle == FALSE){
    if(is.null(x)) {
      Shape <- (1-3*LSKEW)/(1+LSKEW)
      L2 <- L1*LCV
      Scale <- (1+Shape)*(2+Shape)*L2
      Loc <- L1-(2+Shape)*L2
      return(data.frame(Loc, Scale, Shape))}
    else {
      L1 <- mean(x, na.rm = TRUE)
      LCV <- Lcv(x)
      LSKEW <- LSkew(x)
      Shape <- (1-3*LSKEW)/(1+LSKEW)
      L2 <- L1*LCV
      Scale <- (1+Shape)*(2+Shape)*L2
      Loc <- L1-(2+Shape)*L2
      return(data.frame(Loc, Scale, Shape))}
  } else {
    pars <- c(sd(x)/1.5, 0.01)
    max.lhd <- function(q, par) {
      abs(sum(log(gp.pdf(q, loc = min(x), scale = par[1], shape = par[2]))))}
    gp.pdf <- function(q, loc, scale, shape) {
      y <- -shape^-1*log(1-shape*(q-loc)/scale)
      p <- scale^-1*exp(1)^(-(1-shape)*y)
      return(p)
    }
    result <- suppressWarnings(optim(par = pars, fn = max.lhd, q = x))
    loc <- min(x)
    scale <- result$par[1]
    shape <- result$par[2]
    log.likelihood <- -result$value[1]
    message <- result$message
    Res <- data.frame(loc, scale, shape, log.likelihood)
    return(Res)
  }
}

#'Gumbel distribution parameter estimates
#'
#'Estimated parameters from a sample (with Lmoments or maximum likelihood estimation) or from L1 (first L-moment), Lcv (linear coefficient of variation)
#'
#'@details The L-moment estimated parameters are by the method detailed in 'Hosking J. Wallis J. 1997 Regional Frequency Analysis: An Approach Based on L-moments. Cambridge University Press, New York'
#'
#' @param x numeric vector. The sample
#' @param mle logical argument with a default of FALSE. If FALSE the parameters are estimated with Lmoments, if TRUE the parameters are estimated by maximum likelihood estimation
#' @param L1 first Lmoment
#' @param LCV linear coefficient of variation
#' @examples
#' #Get an annual maximum sample and estimate the parameters using Lmoments
#' AM.27090 <- GetAM(27090)
#' GumbelPars(AM.27090$Flow)
#' #Estimate parameters using MLE
#' GumbelPars(AM.27090$Flow, mle = TRUE)
#' #calculate Lmoments and estimate the parmeters with L1 and Lcv
#' Pars <- as.numeric(Lmoms(AM.27090$Flow)[c(1,5)])
#' GumbelPars(L1 = Pars[1], LCV = Pars[2])
#' @return Parameter estimates (location, scale)
#' @author Anthony Hammond
GumbelPars <- function(x = NULL, mle = FALSE, L1, LCV){
  if(is.null(x) == FALSE & is.numeric(x) == FALSE) {stop("x must be a numeric vector")}
  if(mle == FALSE) {
    if(is.null(x)) {Scale <- (L1*LCV)/log(2)
    Loc <- L1 - 0.5772*Scale
    return(data.frame(Loc, Scale))
    } else {
      L1 <- mean(x, na.rm = TRUE)
      LCV <- Lcv(x)
      Scale <- (L1*LCV)/log(2)
      Loc <- L1 - 0.5772*Scale
      return(data.frame(Loc, Scale))
    }
  } else {
    pars <- c(mean(x), sd(x)/1.5)
    max.lhd <- function(q, par) {
      abs(sum(log(gum.pdf(q, loc = par[1], scale = par[2]))))
    }
    gum.pdf <- function(q, loc, scale) {
      p <- scale^-1 * exp(-(q-loc)/scale)*exp(-exp(-(q-loc)/scale))
      return(p)
    }
    result <- suppressWarnings(optim(par = pars, fn = max.lhd, q = x))
    loc <- result$par[1]
    scale <- result$par[2]
    log.likelihood <- -result$value[1]
    message <- result$message
    Res <- data.frame(loc, scale, log.likelihood)
    return(Res)
  }
}


#'Data simulator
#'
#'Simulation of a random sample from the generalised extreme value, generalised logistic, Gumbel, or generalised Pareto distributions
#'
#'The simulated sample can be generated using distribution parameters, or the growth factor (GF) inputs; linear coefficient of variationn (Lcv), linear skewness (LSkew) & the median annual maximum (QMED).
#' @param n sample size to be simulated
#' @param pars vector of parameters in the order of location, scale, shape (only location and shape for Gumbel)
#' @param dist choice of distribution. Either "GEV", "GenLog", "Gumbel" or "GenPareto"
#' @param GF vector of GF inputs in the order of Lcv, LSkew, QMED (only Lcv and QMED if dist = "Gumbel")
#' @examples
#' #Simulate a sample of size 30 using parameters GenLog and parameters 299, 51, -0.042
#' SimData(30, pars = c(299, 51, -0.042), dist = "GenLog")
#' #Now simulate using the Lcv, Lskew, and median (0.17, 0.04, 310)
#' SimData(30, GF = c(0.17, 0.04, 310), dist = "GenLog")
#' @return A random sample of size n for the chosen distribution.
#' @author Anthony Hammond
SimData <- function(n, pars = NULL, dist = "GenLog", GF = NULL) {
  if(is.null(GF) == TRUE){
    if(dist == "GenPareto") {res <- GenParetoEst(loc = pars[1], scale = pars[2], shape = pars[3], RP = 1/runif(n), ppy = 1)}
    if(dist == "GEV") {res <- GEVEst(loc = pars[1], scale = pars[2], shape = pars[3], RP = 1/runif(n))}
    if(dist == "GenLog") {res <- GenLogEst(loc = pars[1], scale = pars[2], shape = pars[3], RP = 1/runif(n))}
    if(dist == "Gumbel") {res <- GumbelEst(loc = pars[1], scale = pars[2], RP = 1/runif(n))}
    return(res)} else
    {if(dist == "GenPareto") {res <- GenParetoGF(lcv = GF[1], lskew = GF[2], RP = 1/runif(n))}
      if(dist == "GEV") {res <- GEVGF(lcv = GF[1], lskew = GF[2], RP = 1/runif(n))}
      if(dist == "GenLog") {res <- GenLogGF(lcv = GF[1], lskew = GF[2], RP = 1/runif(n))}
      if(dist == "Gumbel") {res <- GumbelGF(lcv = GF[1], RP = 1/runif(n))}
      if(dist == "Gumbel"){
        return(res*GF[2])
      } else
        return(res*GF[3])}
}


# QMED --------------------------------------------------------------------

#'QMED (median annual maximum flow) estimate from catchment descriptors
#'
#'Estimated median annual maximum flow from catchment descriptors and donor sites
#'
#'QMED is estimated from catchment descriptors: QMED = 8.3062*AREA^0.8510 0.1536^(1000/SAAR) FARL^3.4451 0.0460^(BFIHOST^2) as derived in Science Report: SC050050 - Improving the FEH statistical procedures for flood frequency estimation. The single donor method is from the same paper. The method for two donors is outlined in 'Kjeldsen, T. (2019). Adjustment of QMED in ungauged catchments using two donor sites. Circulation - The Newsletter of the British Hydrological Society, 4'. When UrbAdj = TRUE, urban adjustment is applied to the QMED estimate according to the method outlined in the guidance by Wallingford HydroSolutions: 'WINFAP 4 Urban Adjustment Procedures'. Urban donors should be avoided, but in the case that the subject catchment is rural, and the donor is urban, the QMEDcd estimate of the donor (or donors) can be urban adjusted by setting the DonUrbAdj argument to TRUE. For flexibility there is the option to input the relevant catchment descriptors directly rather than a CDs object.
#' @param CDs catchment descriptors derived from either GetCDs or CDsXML
#' @param Don1 numeric site reference for the a single donor (for donor candidates see DonAdj function)
#' @param Don2 vector of two site references for two donors (for donor candidates see DonAdj function)
#' @param UrbAdj logical argument with a default of FALSE. True applies an urban adjustment
#' @param DonUrbAdj logical argument with a defailt of FALSE. If TRUE, an urban adjustement is applied to the donor/s QMEDcds estimate.
#' @param AREA catchment area in km2
#' @param SAAR standard average annual rainfall (mm)
#' @param FARL flood attenuation from reservoirs and lakes
#' @param BFIHOST baseflow index calculated from the catchment hydrology of soil type classification
#' @param URBEXT2000 measure of catchment urbanisation
#' @examples
#' #Get some catchment descriptors and calculate QMED as if it was ungauged, with
#' #no donors, one donor, and two donors
#' CDs.55004 <- GetCDs(55004)
#' QMED(CDs.55004)
#' QMED(CDs.55004, Don1 = 55012)
#' QMED(CDs.55004, Don2 = c(55012, 60007))
#' #Get CDs for urban gauge and calculate QMED with urban adjustment
#' CDs.27083 <- GetCDs(27083)
#' QMED(CDs.27083, UrbAdj = TRUE)
#' @return An estimate of QMED from catchment descriptors
#' @author Anthony Hammond
QMED <- function(CDs = NULL, Don1 = NULL, Don2 = NULL, UrbAdj = FALSE, DonUrbAdj = FALSE, AREA, SAAR, FARL, BFIHOST, URBEXT2000 = NULL){
  if(is.null(Don1) == FALSE) {
    if(length(Don1) != 1) stop("The Don1 argument has length that is not 1")
  }
  if(is.null(Don2) == FALSE) {
    if(length(Don2) != 2) stop("The Don2 argument has length that is not 2")
  }

  Donor1 <- function(CDs, DonSite){
    QMED.cd <- 8.3062*CDs[1,2]^0.8510*0.1536^(1000/CDs[15,2])*CDs[8,2]^3.4451*0.0460^(CDs[5,2]^2)
    Site <- DonSite
    Donors <- DonAdj(CDs = CDs, rows = 500)
    Rw <- which(rownames(Donors) == DonSite)
    DonCDs <- GetCDs(rownames(Donors)[Rw])
    if(DonUrbAdj == TRUE) {
      DonQMEDcdUrb <- as.numeric(UAF(CDs = DonCDs)[2]) * Donors$QMEDcd[Rw]
      Result <- QMEDDonEq(CDs[1,2], CDs[15,2], CDs[8,2], CDs[5,2], Donors$QMED[Rw], DonQMEDcdUrb,
                          xSI = CDs[19,2], ySI = CDs[20,2], xDon = DonCDs[19,2],
                          yDon = DonCDs[20,2], alpha = TRUE)
    }
    if(DonUrbAdj == FALSE) {
      Result <- Donors$QMED.adj[Rw]}
    return(Result)
  }
  Donor2 <- function(CDs, Sites) {
    rij <- function(d) {0.4598*exp(-0.0200*d)+(1-0.4598)*exp(-0.4785*d)}
    NGRDist <- function(i, j) {sqrt((i[1]-j[1])^2+(i[2]-j[2])^2)/1000}
    Site1 <- Sites[1]
    Site2 <- Sites[2]
    CDs.Site1 <- GetCDs(Site1)
    CDs.Site2 <- GetCDs(Site2)
    Dist1 <- NGRDist(c(CDs[19,2], CDs[20,2]), c(CDs.Site1[19,2], CDs.Site1[20,2]))
    Dist2 <- NGRDist(c(CDs[19,2], CDs[20,2]), c(CDs.Site2[19,2], CDs.Site2[20,2]))
    Dist12 <- NGRDist(c(CDs.Site1[19,2], CDs.Site1[20,2]), c(CDs.Site2[19,2], CDs.Site2[20,2]))
    ps1 <- rij(Dist1)
    p12 <- rij(Dist12)
    ps2 <- rij(Dist2)
    a1 <- (ps1-p12*ps2)/(1-p12^2)
    a2 <- (ps2-p12*ps1)/(1-p12^2)
    QMEDscd <- 8.3062*CDs[1,2]^0.8510*0.1536^(1000/CDs[15,2])*CDs[8,2]^3.4451*0.0460^(CDs[5,2]^2)
    QMED1cd <- 8.3062*CDs.Site1[1,2]^0.8510*0.1536^(1000/CDs.Site1[15,2])*CDs.Site1[8,2]^3.4451*0.0460^(CDs.Site1[5,2]^2)
    QMED2cd <- 8.3062*CDs.Site2[1,2]^0.8510*0.1536^(1000/CDs.Site2[15,2])*CDs.Site2[8,2]^3.4451*0.0460^(CDs.Site2[5,2]^2)
    if(DonUrbAdj == TRUE) {
      QMED1cd <- as.numeric(UAF(CDs = CDs.Site1)[2]) * QMED1cd
      QMED2cd <- as.numeric(UAF(CDs = CDs.Site2)[2]) * QMED2cd
    }
    QMED1obs <- QMEDData$QMED[which(rownames(QMEDData) == Site1)]
    QMED2obs <- QMEDData$QMED[which(rownames(QMEDData) == Site2)]
    QMEDs.adj <- QMEDscd*(QMED1obs/QMED1cd)^a1 * (QMED2obs/QMED2cd)^a2
    return(QMEDs.adj)
  }
  if(is.null(CDs) == TRUE) {
    QMED.cd <- 8.3062*AREA^0.8510*0.1536^(1000/SAAR)*FARL^3.4451*0.0460^(BFIHOST^2)
    if(is.null(URBEXT2000) == TRUE & UrbAdj == TRUE) stop ("If UrbAdj is TRUE, URBEXT2000 is required")
    if(UrbAdj == TRUE) {
      Q.ua <- as.numeric(UAF(URBEXT2000 = URBEXT2000, BFIHOST = BFIHOST)[2])*QMED.cd}
    if (UrbAdj == FALSE) {QMED <- QMED.cd} else {QMED <- Q.ua}
    if (is.null(URBEXT2000) == TRUE & UrbAdj == FALSE) {print("No input for URBEXT2000. If it is above > 0.03, urban adjustment is recommended")}
    if(is.null(URBEXT2000) == FALSE){
      if(UrbAdj == FALSE & URBEXT2000 > 0.03){print("URBEXT > 0.03, urban adjustment is recommended")}}
    return(QMED)
  } else {
    QMED.cd <- 8.3062*CDs[1,2]^0.8510*0.1536^(1000/CDs[15,2])*CDs[8,2]^3.4451*0.0460^(CDs[5,2]^2)
    if(is.null(Don1) == TRUE) {QMED.cd <- QMED.cd} else {QMED.cd <- Donor1(CDs = CDs, Don1)}
    if(is.null(Don2) == TRUE) {QMED.cd <- QMED.cd} else {QMED.cd <- Donor2(CDs = CDs, Don2)}
    Q.ua <- as.numeric(UAF(CDs = CDs)[2])*QMED.cd
    if (UrbAdj == FALSE) {QMED <- QMED.cd} else {QMED <- Q.ua}
    if (CDs[18,2] > 0.03 & UrbAdj == FALSE) {print("URBEXT > 0.03, urban adjustment is recommended")}
    return(QMED)
  }
}


#' Empirical estimate of QMED from peaks over threshold (POT) data
#'
#' Estimates the median annual maximum flow (QMED) from peaks over threshold data
#'
#'@details If there are multiple peaks per year, the peaks per year (ppy) argument is used to convert to the annual scale to derive QMED. If ppy is one, then the median of the POT sample is returned (the median of x).
#' @param x numerical vector. POT data
#' @param ppy number of peaks per year in the POT data
#' @examples
#' #Extract some POT data and estimate QMED
#' ThamesPOT <- POTextract(ThamesPQ[,c(1,3)], thresh = 0.90)
#' QMEDPOT(ThamesPOT$peak, ppy = 1.867263)
#' @author Anthony Hammond
QMEDPOT <- function(x, ppy){
  if(is.numeric(x) == FALSE) {stop("x must be a numeric vector")}
  qu <- 1-(0.5/ppy)
  qmed <- quantile(x, qu, na.rm = TRUE)
  return(as.numeric(qmed))
}


#' QMED Linking equation
#'
#' Estimates the median annual maximum flow (QMED) from non-flood flows
#'
#'The QMED Linking equation estimates QMED as a function of the flow that is exceeded five percent of the time, the flow that is exceeded 10 percent of the time, the baseflow index, and the catchment desciptor; drainage path slope (DPSBAR). All of these can be found for sites on the National River Flow Archive (NRFA) website. The method is provided in the guidance note 'WINFAP 4 QMED Linking equation' (2016) by Wallingford HydroSolutions.
#' @param Q5dmf numeric. The daily mean flow that is exceeded 5 percent of the time
#' @param Q10dmf numeric. The daily mean flow that is exceeded 10 percent of the time
#' @param DPSBAR a catchment descriptor. The average drainage path slope of the catchment
#' @param BFI the baseflow index of the gauged flow
#' @examples
#' #Calculate the QMED for site 1001 (Wick at Tarroul)
#' QMEDLink(10.14, 7.352, 29.90, 0.39)
#' @author Anthony Hammond
QMEDLink <- function(Q5dmf, Q10dmf, DPSBAR, BFI) {
  GRAD <- (log10(Q5dmf)-log10(Q10dmf))/(qnorm(0.05)-qnorm(0.1))
  1.762*Q5dmf^0.866*(1+GRAD)^-0.775*DPSBAR^0.265*0.2388^(BFI^2)
}


#' QMED donor adjustment
#'
#' Applies a donor adjustment to the median annual maximum flow (QMED) estimate
#'
#'Although a single donor adjustment can be applied with the DonAdj() function and the QMED(), this is provided for flexibility. The method is that of Science Report: SC050050 - Improving the FEH statistical procedures for flood frequency estimation (2008).
#' @param AREA catchment area in km2
#' @param SAAR standardised average annual rainfall in mm
#' @param FARL flood attenuation from reservoirs and lakes
#' @param BFIHOST the baseflow index as a function of soil type
#' @param QMEDgObs the observed QMED at the donor site
#' @param QMEDgCds the QMED equation derived QMED at the donor site
#' @param xSI the catchment centroid easting for the site of interest
#' @param ySI the catchment centroid northing for the site of interest
#' @param xDon the catchment centroid easting for the donor site
#' @param yDon the catchment centroid northing for the donor site
#' @param alpha a logical argument with a default of TRUE. When FALSE the exponent in the donor equation is set to one. Otherwise it is determined by the distance between the donor and the subject site
#'
#' @examples
#' #Get observed QMED for site 96003
#' Qob <- median(GetAM(96003)[,2])
#' #Get QMED equation estimated QMED for the donor site
#' QCD <- QMED(CDs = GetCDs(96003))
#' #display CDs for site 96001 & note the easting and northing
#' GetCDs(96001)
#' #display CDs for site 96003 & note the easting and northing
#' GetCDs(96003)
#' #Apply the QMEDDonEq function with the information gained
#' QMEDDonEq(194, 1096, 0.955, 0.297, Qob, QCD, xSI = 289289,ySI = 947523,xDon = 280908,yDon = 953653)
#' @author Anthony Hammond
QMEDDonEq <- function(AREA, SAAR, FARL, BFIHOST, QMEDgObs, QMEDgCds, xSI, ySI, xDon, yDon, alpha = TRUE) {
  QMED.scd <- 8.3062*AREA^0.8510*0.1536^(1000/SAAR)*FARL^3.4451*0.0460^(BFIHOST^2)
  d <- NGRDist(i = c(xSI,ySI), j = c(xDon,yDon))
  rij <- function(d) {0.4598*exp(-0.0200*d)+(1-0.4598)*exp(-0.4785*d)}
  if(alpha == TRUE) {a <- rij(d)} else{a <- 1}
  QMED.adj <- QMED.scd*(QMEDgObs/QMEDgCds)^a
  return(QMED.adj)
}



#'Donor adjustment candidates & results
#'
#'Provides donor adjustment candidates, descriptors, and results in order of the proximity to the centroid of the subject catchment.
#'
#'When d2 is FALSE the results for single donor adjustment are in the final column headed 'QMED.adj' for each site. If alpha is set to FALSE, the results in this column are from the same donor equation but with an exponent of 1. The donor adjustment method is as outlined in Science Report: SC050050 - Improving the FEH statistical procedures for flood frequency estimation. The method for two donors is outlined in 'Kjeldsen, T. (2019). Adjustment of QMED in ungauged catchments using two donor sites. Circulation - The Newsletter of the British Hydrological Society, 4'. When two donors are used, only the result is returned, rather than donor candidates. The QMEDfse column provides the gauged factorial standard error for the median of the annual maximum sample. It is worth considering this when choosing a donor site (a high value indicates a poor donor). When choosing between two donors, the site with a lower QMEDfse would be an appropriate choice (all else being equal). The QMEDfse is calculated with the QMEDfseSS() function.
#' @param CDs catchment descriptors derived from either GetCDs or CDsXML
#' @param x catchment centroid easting (for when CDs isn't used)
#' @param y catchment centroid northing (for when CDs isn't used)
#' @param QMEDscd QMED estimate for the catchment of interest (for when CDs isn't used)
#' @param alpha logical argument with a default of TRUE. If FALSE the exponent of the donor adjustment equation is set to one
#' @param rows number of sites provided; default is 10
#' @param d2 a numeric vector of length two; the two site references for the donor catchments chosen for the two donor case
#' @examples
#' #Get some CDs and output candidate donor sites
#' CDs.54022 <- GetCDs(54022)
#' DonAdj(CDs.54022)
#' #Get results with inputs of x,y, and QMEDscd
#' DonAdj(x = 283261, y = 288067, QMEDscd = 17.931)
#' #Get a result with two donors
#' DonAdj(CDs.54022, d2 = c(54092, 54091))
#' @return A data.frame with rownames of site references and columns of catchment descriptors, distance from subect site, and associated results. When two donors are used, only the resulting adjusted QMED is returned
#' @author Anthony Hammond

DonAdj <- function(CDs = NULL, x,y, QMEDscd = NULL, alpha = TRUE, rows = 10, d2 = NULL){
  Donor2 <- function(CDs, Sites) {
    rij <- function(d) {0.4598*exp(-0.0200*d)+(1-0.4598)*exp(-0.4785*d)}
    NGRDist <- function(i, j) {sqrt((i[1]-j[1])^2+(i[2]-j[2])^2)/1000}
    Site1 <- Sites[1]
    Site2 <- Sites[2]
    CDs.Site1 <- GetCDs(Site1)
    CDs.Site2 <- GetCDs(Site2)
    Dist1 <- NGRDist(c(CDs[19,2], CDs[20,2]), c(CDs.Site1[19,2], CDs.Site1[20,2]))
    Dist2 <- NGRDist(c(CDs[19,2], CDs[20,2]), c(CDs.Site2[19,2], CDs.Site2[20,2]))
    Dist12 <- NGRDist(c(CDs.Site1[19,2], CDs.Site1[20,2]), c(CDs.Site2[19,2], CDs.Site2[20,2]))
    ps1 <- rij(Dist1)
    p12 <- rij(Dist12)
    ps2 <- rij(Dist2)
    a1 <- (ps1-p12*ps2)/(1-p12^2)
    a2 <- (ps2-p12*ps1)/(1-p12^2)
    QMEDscd <- 8.3062*CDs[1,2]^0.8510*0.1536^(1000/CDs[15,2])*CDs[8,2]^3.4451*0.0460^(CDs[5,2]^2)
    QMED1cd <- 8.3062*CDs.Site1[1,2]^0.8510*0.1536^(1000/CDs.Site1[15,2])*CDs.Site1[8,2]^3.4451*0.0460^(CDs.Site1[5,2]^2)
    QMED2cd <- 8.3062*CDs.Site2[1,2]^0.8510*0.1536^(1000/CDs.Site2[15,2])*CDs.Site2[8,2]^3.4451*0.0460^(CDs.Site2[5,2]^2)
    QMED1obs <- QMEDData$QMED[which(rownames(QMEDData) == Site1)]
    QMED2obs <- QMEDData$QMED[which(rownames(QMEDData) == Site2)]
    QMEDs.adj <- QMEDscd*(QMED1obs/QMED1cd)^a1 * (QMED2obs/QMED2cd)^a2
    return(QMEDs.adj)
  }
  if(is.null(d2) == FALSE) {
    if(length(d2) < 2) stop("d2 must be NULL or a vector of length 2")
  }
  if(is.null(QMEDscd) == TRUE & is.null(CDs) == TRUE) stop("The QMED estimate must be an input, either automatically using CDs, or the QMEDscd argument")
  if(is.null(QMEDscd) == TRUE) {QMEDscd <- 8.3062*CDs[1,2]^0.8510*0.1536^(1000/CDs[15,2])*CDs[8,2]^3.4451*0.0460^(CDs[5,2]^2)} else {QMEDscd <- QMEDscd}
  suppressWarnings(if(is.null(CDs) == TRUE) {
    NGRDist <- function(i, j) {sqrt((i[1]-j[1])^2+(i[2]-j[2])^2)/1000}
    Dists <- NULL
    for(i in 1:length(QMEDData$QMED)) {Dists[i] <- NGRDist(i = c(x,y), j = c(QMEDData$X[i],QMEDData$Y[i]))}
    Dists.Table <- data.frame(QMEDData, Dists)
    Dists.Order <- Dists.Table[order(Dists.Table$Dists),]
    rij <- function(d) {0.4598*exp(-0.0200*d)+(1-0.4598)*exp(-0.4785*d)}
    if(alpha == TRUE) {a <- rij(Dists.Order$Dists)} else{a <- 1}
    Dists.Order <- cbind(Dists.Order, a)
    QMED.adj <- QMEDscd*(Dists.Order$QMED/Dists.Order$QMEDcd)^a
    Dists.Order <- cbind(Dists.Order, QMED.adj)
    if(is.null(d2) == TRUE){
      return(Dists.Order[1:rows,])
    } else {
      Qd2 <- Donor2(CDs, Sites = d2)
      return(Qd2)
    }} else {
      NGRDist <- function(i, j) {sqrt((i[1]-j[1])^2+(i[2]-j[2])^2)/1000}
      Dists <- NULL
      for(i in 1:length(QMEDData$QMED)) {Dists[i] <- NGRDist(i = c(CDs$Value[19],CDs$Value[20]), j = c(QMEDData$X[i],QMEDData$Y[i]))}
      Dists.Table <- data.frame(QMEDData, Dists)
      Dists.Order <- Dists.Table[order(Dists.Table$Dists),]
      rij <- function(d) {0.4598*exp(-0.0200*d)+(1-0.4598)*exp(-0.4785*d)}
      if(alpha == TRUE) {a <- rij(Dists.Order$Dists)} else{a <- 1}
      Dists.Order <- cbind(Dists.Order, a)
      QMED.adj <- QMEDscd*(Dists.Order$QMED/Dists.Order$QMEDcd)^a
      Dists.Order <- cbind(Dists.Order, QMED.adj)
      if(is.null(d2) == TRUE){
        return(Dists.Order[1:rows,])
      } else {
        Qd2 <- Donor2(CDs, Sites = d2)
        return(Qd2)
      }

    })
}


#' QMED factorial standard error for gauged sites
#'
#' Estimates the median annual maximum flow (QMED) factorial standard error (FSE) by bootstrapping the sample
#'
#'The bootstrapping procedure resamples from the sample N*500 times with replacement. After splitting into 500 samples of size N, the median is calculated for each. Then the exponent of the standard deviation of the log transformed residuals is taken as the FSE. i.e. exp(sd(log(x)-mean(log(x)))), where x is the bootstrapped medians.
#' @param x a numeric vector. The sample of interest
#' @examples
#' #Extract an AMAX sample and estimate the QMED factorial standard error
#' AM.203018 <- GetAM(203018)
#' QMEDfseSS(AM.203018$Flow)
#' @return The factorial standard error for the median of a sample.
#' @author Anthony Hammond
QMEDfseSS <- function(x) {
  if(is.numeric(x) == FALSE) {stop("x must be a numeric vector")}
  resample <- sample(x, size = length(x)*500, replace = TRUE)
  mat <- matrix(resample, nrow = length(x), ncol = 500)
  res <- apply(mat, 2, median)
  FSE <- function(x) {exp(sd(log(x) - mean(log(x))))}
  fse <- FSE(res)
  return(fse)
}


#' QMED from a gauged site suitable for QMED
#'
#' Provides QMED (median annual maximum flow) from a site suitable for QMED, using the site reference.
#'
#' @param x the gauged reference
#' @examples
#' #Get the observed QMED from sites 55002
#' GetQMED(55002)
#' @return the median annual maximum
#' @author Anthony Hammond
GetQMED <- function(x) {
  MedianAM <- QMEDData[which(rownames(QMEDData) == x),19]
  if(length(MedianAM) < 1) stop("Site reference not recognised. Site not suitable for QMED or pooling.")
  return(MedianAM)
}


# UrbFuncs ----------------------------------------------------------------

#' Urban adjustment for the linear coefficient of variation (Lcv)
#'
#' Urbanises or de-urbanises the Lcv using the methods outlined in the guidance by Wallingford HydroSolutions: 'WINFAP 4 Urban Adjustment Procedures'
#'
#'The method for de-urbanisation isn't explicitly provided in 'WINFAP 4 Urban Adjustment Procedures', but the procedure is a re-arrangment of the urbanisation equation, solving for Lcv rather than Lcv-urban.
#' @param lcv the Lcv (numeric)
#' @param URBEXT2000 quantiication of urban and suburbanisation for the subject catchment
#' @param DeUrb logical argument with a default of FALSE. If set to TRUE, de-urbanisation adjustment is performed, if FALSE, urbanisation adjustment is performed
#' @examples
#' #Choose an urban site (site 53006) from the NRFA data then apply a de-urban
#' #adjustment using the Lcv and URBEXT2000 displayed
#' NRFAData[which(rownames(NRFAData) == 53006),]
#' LcvUrb(0.21, 0.1138, DeUrb = TRUE)
#' #Get the pooled Lmoment ratios results for catchment 53006 and apply the
#' #urban adjustment using the pooled Lcv, and the URBEXT2000 for site 53006.
#' CDs.53006 <- GetCDs(53006)
#' QuickResults(CDs.53006)[[2]]
#' LcvUrb(0.196, 0.1138)
#' @return The urban adjust Lcv or the de-urbanised Lcv
#' @author Anthony Hammond

LcvUrb <- function(lcv, URBEXT2000, DeUrb = FALSE) {if (DeUrb == FALSE) {lcv*0.68654^(1.567*URBEXT2000)} else {lcv/(0.68654^(1.567*URBEXT2000))}}


#' Urban adjustment for the linear skewness (LSkew)
#'
#' Urbanises or de-urbanises the LSkew using the methods outlined in the guidance by Wallingford HydroSolutions: 'WINFAP 4 Urban Adjustment Procedures'
#'
#'The method for de-urbanisation isn't explicitly provided in 'WINFAP 4 Urban Adjustment Procedures', but the procedure is a re-arrangment of the urbanisation equation, solving for LSkew rather than LSkew-urban.
#' @param lskew the LSkew (numeric)
#' @param URBEXT2000 quantiication of urban and suburbanisation for the subject site
#' @param DeUrb logical argument with a default of FALSE. If set to TRUE, de-urbanisation adjustment is performed, if FALSE, urbanisation adjustment is performed
#' @examples
#' #Choose an urban site (site 53006) from the NRFA data then apply a de-urban
#' #adjustment using the Lcv and URBEXT2000 displayed
#' NRFAData[which(rownames(NRFAData) == 53006),]
#' LSkewUrb(0.124, 0.1138, DeUrb = TRUE)
#' #Get the pooled Lmoment ratios results for catchment 53006 and apply the urban
#' #Get the CDS & adjustment using the pooled LSkew, and the URBEXT2000 for site 53006.
#' CDs.53006 <- GetCDs(53006)
#' QuickResults(CDs.53006)[[2]]
#' LSkewUrb(0.194, 0.1138)
#' @return The urban adjust Lcv or the de-urbanised Lcv
#' @author Anthony Hammond

LSkewUrb <- function(lskew, URBEXT2000, DeUrb = FALSE) {if(DeUrb == FALSE) {((lskew+1)*1.096017^(1.567*URBEXT2000))-1} else {((lskew+1)/1.096017^(1.567*URBEXT2000))-1}}


#' Urban adjustment factor (UAF) and percentage runoff urban adjustment factor (PRUAF)
#'
#' UAF and PRUAF from catchment descriptors for QMED estimation in ungauged urban catchments
#'
#' @param CDs catchment descriptors derived from either GetCDs or CDsXML
#' @param URBEXT2000 quantification of catchment urbanisation (used when CDs is not)
#' @param BFIHOST baseflow index as a function of hydrological soil type of the catchment (used when CDs is not)
#' @examples
#' #Get some catchment descriptors for an urban catchment calculate the UAF & PRUAF
#' CDs.53006 <- GetCDs(53006)
#' UAF(CDs.53006)
#' #Calculate UAF and PRUAF using a user input URBEXT2000 and BFIHOST
#' UAF(URBEXT2000 = 0.1138, BFIHOST = 0.3620)
#' @return a data.frame with columns UAF and PRUAF
#' @author Anthony Hammond

UAF <- function(CDs = NULL, URBEXT2000, BFIHOST) {
  if(is.null(CDs) == TRUE) {PRUAF <- 1+0.3*1.567*URBEXT2000*(70/(69.366-65.686*BFIHOST)-1)} else {PRUAF <-   1+0.3*1.567*CDs[18,2]*(70/(69.366-65.686*CDs[5,2])-1)}
  if(is.null(CDs) == TRUE) {UAF <- (1+0.3*(1.567*URBEXT2000))^1.25*PRUAF^1.33} else {UAF <- (1+0.3*(1.567*CDs[18,2]))^1.25*PRUAF^1.33}
  return(data.frame(PRUAF, UAF))
}




# DataFuncs ---------------------------------------------------------------
#' Get an annual maximum sample from the National River Flow Archive sites suitable for pooling
#'
#' Extracts the annual maximum peak flow sample and associated dates for the site of interest.
#' @param ref the site reference of interest (numeric)
#' @examples
#' #Get an AMAX sample and display it in the console
#' GetAM(203018)
#' #Save an AMAX sample as an object
#' AM.203018 <- GetAM(203018)
#' @return A data.frame with columns; Date, Flow, and id
#' @author Anthony Hammond

GetAM <- function(ref) {
  Test <- which(AMSP$id == ref)
  if(length(Test) < 1) stop("Only sites suitable for pooling are available via this function. Check the reference or use AMImport to get sites not suitable for pooling")
  AM <- subset(AMSP, id == ref)
  rws <- seq(1, length(AM$Flow))
  Date <- as.Date(AM[,1])
  AM <- AM[,-1]
  AM <- cbind(Date, AM)
  rownames(AM) <- rws
  return(AM)
}


# Detrend -----------------------------------------------------------------

#' Linearly detrend a sample
#'
#'@description Applies a linear detrend to a sample
#'@details Adjusts all the values in the sample, of size n, by the difference between the linearly modelled ith data point and the linearly modelled nth data point.
#'@param x a numeric vector
#'@examples
#'# Get an annual maximum (AM) sample that looks to have a significant trend
#'AM.21025 <- GetAM(21025)
#'# plot the resulting AM as a bar plot. Then detrend and compare with another plot
#'plot(AM.21025$Flow, type = "h", ylab = "Discharge (m3/s)")
#'AM.Detrend <- DeTrend(AM.21025$Flow)
#'plot(AM.Detrend, type = "h", ylab = "Discharge (m3/s)")
#'@return A numeric vector which is a linearly detrended version of x.
#'@author Anthony Hammond
DeTrend <- function(x) {
  if(is.numeric(x) == FALSE) {stop("x must be a numeric vector")}
  Lmod <- lm(x ~ seq(1, length(x)))
  Lpred <- as.numeric(predict(Lmod))
  DiffsTrend <- NULL
  for(i in 1:length(x)) {DiffsTrend[i] <- Lpred[length(x)]-Lpred[i]}
  Detrend <- x+DiffsTrend
  return(Detrend)
}


#' Import catchment descriptors from .xml files
#'
#' Imports catchment descriptors from xml files either from an FEH webservice download or from the Peakflows dataset downloaded from the national river flow archive (NRFA) website
#'
#' File paths for importing data require forward slashes. On some operating systems, such as windows, the copy and pasted file paths will have backward slashes and would need to be changed accordingly.
#' @param x the xml file path
#' @examples
#' #Import catchment descriptors from a NRFA peakflows xml file and display in console
#' \dontrun{CDs.4003 <- CDsXML("C:/Data/NRFAPeakFlow_v11/Suitable for QMED/4003.xml")}
#' \dontrun{CDs.4003}
#' #Import catchment descriptors from a FEH webserver xml file and display xml in the console
#' \dontrun{CDs.MySite <- CDsXML("C:/Data/FEH_Catchment_384200_458200.xml")}
#' @return A data.frame with columns; Descriptor and Value.
#' @author Anthony Hammond
CDsXML <- function(x) {
  xmlx <- xml2::read_xml(x)
  ListXML <- xml2::as_list(xmlx)
  if(attributes(ListXML)$names == "FEHCDROMExportedDescriptors") {
    CDS <- ListXML$FEHCDROMExportedDescriptors$CatchmentDescriptors
  }
  if(attributes(ListXML)$names == "FEHDescriptors") {
    CDS <- ListXML$FEHDescriptors$CatchmentDescriptors
  }

  Descriptor <- c("AREA", "ALTBAR", "ASPBAR", "ASPVAR", "BFIHOST19", "DPLBAR",
                  "DPSBAR", "FARL", "FPEXT", "LDP", "PROPWET", "RMED-1H",
                  "RMED-1D", "RMED-2D", "SAAR", "SAAR4170", "SPRHOST",
                  "URBEXT2000", "Easting", "Northing", "URBEXT1990", "BFIHOST")
  if(length(as.numeric(CDS$area)) < 1) {AREA <- NA} else {AREA <- as.numeric(CDS$area)}
  if(length(as.numeric(CDS$altbar)) < 1) {ALTBAR <- NA} else {ALTBAR <- as.numeric(CDS$altbar)}
  if(length(as.numeric(CDS$aspbar)) < 1) {ASPBAR <- NA} else {ASPBAR <- as.numeric(CDS$aspbar)}
  if(length(as.numeric(CDS$aspvar)) < 1) {ASPVAR <- NA} else {ASPVAR <- as.numeric(CDS$aspvar)}
  if(length(as.numeric(CDS$bfihost19)) < 1) {BFIHOST19 <- NA} else {BFIHOST19 <- round(as.numeric(CDS$bfihost19), 3)}
  if(length(as.numeric(CDS$dplbar)) < 1) {DPLBAR <- NA} else {DPLBAR <- as.numeric(CDS$dplbar)}
  if(length(as.numeric(CDS$dpsbar)) < 1) {DPSBAR <- NA} else {DPSBAR <- as.numeric(CDS$dpsbar)}
  if(length(as.numeric(CDS$farl)) < 1) {FARL <- NA} else {FARL <- as.numeric(CDS$farl)}
  if(length(as.numeric(CDS$fpext)) < 1) {FPEXT <- NA} else {FPEXT <- as.numeric(CDS$fpext)}
  if(length(as.numeric(CDS$ldp)) < 1) {LDP <- NA} else {LDP <- as.numeric(CDS$ldp)}
  if(length(as.numeric(CDS$propwet)) < 1) {PROPWET <- NA} else {PROPWET <- as.numeric(CDS$propwet)}
  if(length(as.numeric(CDS$rmed_1h)) < 1) {RMED1H <- NA} else {RMED1H <- as.numeric(CDS$rmed_1h)}
  if(length(as.numeric(CDS$rmed_1d)) < 1) {RMED1D <- NA} else {RMED1D <- as.numeric(CDS$rmed_1d)}
  if(length(as.numeric(CDS$rmed_2d)) < 1) {RMED2D <- NA} else {RMED2D <- as.numeric(CDS$rmed_2d)}
  if(length(as.numeric(CDS$saar)) < 1) {SAAR <- NA} else {SAAR <- as.numeric(CDS$saar)}
  if(length(as.numeric(CDS$saar4170)) < 1) {SAAR4170 <- NA} else {SAAR4170 <- as.numeric(CDS$saar4170)}
  if(length(as.numeric(CDS$sprhost)) < 1) {SPRHOST <- NA} else {SPRHOST <- as.numeric(CDS$sprhost)}
  if(length(as.numeric(CDS$urbext2000)) < 1) {URBEXT2000 <- NA} else {URBEXT2000 <- as.numeric(CDS$urbext2000)}
  Easting <- attributes(CDS$CatchmentCentroid)$x
  Northing <- attributes(CDS$CatchmentCentroid)$y
  if(length(as.numeric(CDS$urbext1990)) < 1) {URBEXT1990 <- NA} else {URBEXT1990 <- as.numeric(CDS$urbext1990)}
  if(length(as.numeric(CDS$bfihost)) < 1) {BFIHOST <- NA} else {BFIHOST <- as.numeric(CDS$bfihost)}
  Value <- c(AREA, ALTBAR, ASPBAR, ASPVAR, BFIHOST19, DPLBAR,
             DPSBAR, FARL, FPEXT, LDP, PROPWET, RMED1H,
             RMED1D, RMED2D, SAAR, SAAR4170, SPRHOST,
             URBEXT2000, Easting, Northing, URBEXT1990, BFIHOST)
  DF <- data.frame(Descriptor, Value)
  NALogic <- is.na(DF$Value)
  if(NALogic[5] == TRUE & NALogic[22] ==FALSE){DF$Value[5] <- DF$Value[22]}
  if(NALogic[5] == TRUE & NALogic[22] ==FALSE) print("BFIHOST19 is not in the xml file and has been replaced by BFIHOST")
  TrueInd <- which(NALogic == TRUE)
  if(length(TrueInd) > 0 & NALogic[5] == FALSE) print("One or more catchment descriptors are missing from the xml file")
  if(length(TrueInd) > 1 & NALogic[5] == TRUE) print("One or more catchment descriptors are missing from the xml file. This may impact use of this CDs object with other functions")
  DF[,2] <- as.numeric(DF[,2])
  return(DF)
}




#' Get catchment descriptors from the National River Flow Archive sites considered suitable for median annual maximum flow estimation (QMED) and pooling.
#'
#' Extracts the catchment descriptors for a site of interest from those suitable for QMED and pooling.
#' @param x the site reference of interest (numeric)
#' @examples
#' #Get CDs and display in the console
#' CDs.203018 <- GetCDs(203018)
#' CDs.203018
#' @return A data.frame with columns; Descriptor and Value.
#' @author Anthony Hammond
GetCDs <- function(x) {
  Site.id <- which(row.names(QMEDData) == x)
  if(length(Site.id) == 0) stop ("Site ID not within the set of sites considered suitable for QMED or pooling analysis. For further sites CDsXML can be used")
  Site <- QMEDData[Site.id,]
  Site <- Site[,-c(19,20)]
  colnames(Site)[colnames(Site) == "X"] <-  "Easting"
  colnames(Site)[colnames(Site) == "Y"] <-  "Northing"
  Site <- t(Site)
  rws <- as.vector(row.names(Site))
  values <- NULL
  for(i in 1:24) {values[i] <- Site[i,1]}
  dframe <- data.frame(rws, values)
  colnames(dframe) <- c("Descriptor", "Value")
  dframe[1:14,2] <- round(dframe[1:14,2], 4)
  dframe[c(15,16,19,20,22),2] <- round(dframe[c(15,16,19,20,22),2])
  dframe[21,2] <- round(dframe[21,2], 3)
  return(dframe)
}

#' Import an annual maximum (AMAX) sample from NRFA peak flow .AM files
#'
#' Imports the peak flows and dates from from NRFA peak flow .AM files, exluding the rejected years
#'
#'  File paths for importing data require forward slashes. On some operating systems, such as windows, the copy and pasted file paths will have backward slashes and would need to be changed accordingly.
#' @param x the file path for the .AM file
#' @examples
#' #Import an AMAX sample and display the first six rows in the console
#' \dontrun{AM.4003 <- AMImport("C:/Data/NRFAPeakFlow_v11/Suitable for QMED/4003.AM")}
#' \dontrun{head(AM.4003)]}
#' @return A data.frame with columns; Date and Flow
#' @author Anthony Hammond
AMImport <- function(x)
{
  AMAX <- read.table(x, sep = ",", col.names = c("Date", "Flow", "Stage"), colClasses = c("character", "numeric", "NULL"), fill = T, skip = 6)
  Row.Strt <- 1+which(AMAX[,1] == "[AM Values]")
  AM <- AMAX[Row.Strt:length(AMAX[,1])-1,]
  AM <- AM[-1,]
  Dates <- data.frame(as.Date(AM[,1], format = "%d %b %Y"), AM[,2])
  colnames(Dates) <- c("Date", "Flow")
  AM.c1 <- AMAX[,1]
  if(AMAX[1,1] == "[AM Rejected]"){
    Rej <- AM.c1[2:which(AM.c1 == "[AM Values]")-2][-1]
    RejEnd <- as.character(as.numeric(Rej)+1)
    WYDate <- as.Date(paste(Rej, "- 10 - 01"), format = "%Y - %m - %d")
    WYEnd <- as.Date(paste(RejEnd, "- 09 - 30"), format = "%Y - %m - %d")
    Date.Func <- function(x, y, z){
      isTRUE(x >= y & x <= z)
    }
    RejFunc <- function(ind){
      Rej <- NULL
      for(i in 1:nrow(Dates)) {Rej[i] <- Date.Func(Dates$Date[i], WYDate[ind], WYEnd[ind])}
      WhichTRUE <- which(Rej == TRUE)
      if(length(WhichTRUE) == 0) {return(NA)} else {return(WhichTRUE)}
    }
    if(length(WYDate) > 1) {
      RejInd <- list()
      for(i in 1:length(WYDate)) {RejInd[[i]] <- RejFunc(i)}
      RejInd <- unlist(RejInd)
      if(any(is.na(RejInd)) == TRUE) {
        NAInd <- which(is.na(RejInd == TRUE))
        if(length(NAInd) == length(WYDate)) {RejInd <- NULL} else {
          RejInd <- RejInd[-which(is.na(RejInd) == TRUE)]}}
    } else {
      RejInd <- NULL
      for(i in 1:nrow(Dates)) {RejInd[i] <- Date.Func(Dates$Date[i], WYDate, WYEnd)}
      RejInd <- which(RejInd == TRUE)
      if(length(RejInd > 0)) {RejInd <- RejInd} else {RejInd <- NULL}
    }
    if(is.null(RejInd) == TRUE) {Result <- Dates} else {Result <- Dates[-RejInd,]}}
  else {Result <- Dates}
  rownames(Result) <- seq(1, nrow(Result))
  return(Result)
}



#' Peaks over threshold (POT) data extraction
#'
#' Extracts independent peaks over a threshold from a sample
#'
#'  If the x argument is a numeric vector, the peaks will be extracted with no time information.
#'  x can instead be a data.frame with dates in the first column and the numeric vector in the second.
#'  In this latter case, the peaks will be time-stamped and a hydrograph, including POT, will be plotted by default.
#'  The method of extracting independent peaks assumes that there is a value either side of which, events can be considered independent.
#'  For example, if two peaks above the chosen threshold are separated by the mean flow, they could be considered independent,
#'  but not if flow hasn't returned to the mean at any time between the peaks. Mean flow may not always be appropriate, in which case the 'div' argument can be applied (and is a percentile).
#'  The TimeDiv argument can also be applied to ensure the peaks are separated by a number of timesteps either side of the peaks.
#'  For extracting POT rainfall a div of zero could be used and TimeDiv can be used for further separation - which would be necessary for sub-daily time-series.
#'  In which case, with hourly data for example, TimeDiv could be set to 120 to ensure each peak is separated by five days either side as well as at least one hour with 0 rainfall.
#'  When plotted, the blue line is the threshold, and the green line is the independence line (div).
#' @param x either a numeric vector or dataframe with date (or POSIXct) in the first column and hydrological variable in the second
#' @param div numeric  percentile (between 0 and thres), either side of which two peaks over the threshold are considered independent. Default is the mean of the sample.
#' @param TimeDiv Number of timesteps to define independence (supplements the div argument). As a default this is NULL and only 'div' defines independence. Currently this is only applicablee for data.frames.
#' @param thresh user chosen threshold. Default is 0.975
#' @param Plot logical argument with a default of TRUE. When TRUE, the full hydrograph with the peaks over the threshold highlighted is plotted
#' @param ylab Label for the plot yaxis. Default is "Magnitude"
#' @param main Title for the plot. Default is "Peaks over threshold"
#' @examples
#' #Extract POT data from Thames mean daily flow 1970-10-01 to 2015-09-25 with
#' #div = mean and threshold = 0.95. Then display the first six rows
#' ThamesQPOT <- POTextract(ThamesPQ[, c(1,3)], thresh = 0.9)
#' head(ThamesQPOT)
#' #Extract Thames POT from only the numeric vector of flows and display the
#' #first six rows
#' ThamesQPOT <- POTextract(ThamesPQ[, 3], thresh = 0.9)
#' head(ThamesQPOT)
#' #Extract the Thames POT precipitation with a div of 0, the default
#' #threshold, and 5 timesteps (days) either side of the peak. Then display the first six rows
#' ThamesPPOT <- POTextract(ThamesPQ[, c(1,2)], div = 0, TimeDiv = 5)
#' head(ThamesPPOT)
#' @return Prints the number of peaks per year and returns a data.frame with columns; Date and peak, with the option of a plot. Or a numeric vector of peaks is returned if only a numeric vector of the hydrological variable is input.
#' @author Anthony Hammond

POTextract <- function(x, div = NULL, TimeDiv = NULL, thresh = 0.975, Plot = TRUE, ylab = "Magnitude", main = "Peaks over threshold")
{
  Low.Func <- function(TS)
  {
    L <- length(TS)-2
    L1 <- length(TS)-1
    L2 <- length(TS)
    Vec1 <- TS[1:L]
    Vec2 <- TS[2:L1]
    Vec3 <- TS[3:L2]
    P1 <- ifelse(Vec2 <= Vec1 & Vec2 <= Vec3 & Vec1!= Vec2, Vec2, NA)
    return(P1)
  }

  P.Func <- function(TS)
  {
    L <- length(TS)-2
    L1 <- length(TS)-1
    L2 <- length(TS)
    Vec1 <- TS[1:L]
    Vec2 <- TS[2:L1]
    Vec3 <- TS[3:L2]
    P1 <- ifelse(Vec2 >= Vec1 & Vec2 >= Vec3 & Vec1!= Vec2, Vec2, NA)
    return(P1)
  }

  VP <- function(j, mu)
  {
    maxll <-  suppressWarnings(max(which(lows[1:j] <= mu), na.rm = T))
    if(maxll == -Inf) {maxll <- j} else {maxll <- maxll}
    minlr <-   suppressWarnings(min(which(lows[j:length(lows)] <= mu), na.rm = T))
    if(minlr == Inf) {minlr <- j} else {minlr <- j+(minlr-1)}
    MaxMin <- peaks[maxll:minlr]
    MaxInds <- which(MaxMin == max(MaxMin, na.rm = TRUE))
    MaxInds <- (maxll-1) + MaxInds
    if(peaks[j] == max(MaxMin, na.rm = T) & j == MaxInds[1]) {vp <- peaks[j]} else {vp <- NA}
    #IndPeakInEvent <- j - maxll
    #Max1 <- which.max(peaks[maxll:minlr])
    #if(IndPeakInEvent > Max1[1]) {vp <- NA} else {vp = peaks[j]}
    return(vp)
  }

  NAs <- FALSE
  if(is(x, "data.frame") == FALSE) {
    if(is.null(div) == FALSE) {
      if(div < 0 | div > thresh) stop("div must be between 0 and the thresh value")
      div <- quantile(x[x >0], div, na.rm = TRUE)
      if(div < min(x, na.rm = TRUE)) stop("Peaks division (div) is less than the minimum of x")}
    if(is.null(div)) {mu <- mean(x[x >0],na.rm = TRUE)} else {mu <- div}
    if(mu > quantile(x[x >0],thresh, na.rm = TRUE)) stop("The event division (div) must be equal to or lower than the event threshold")
    if(is.null(TimeDiv) == FALSE) print("Warning: TimeDiv isn't currently set up for vectors. The TimeDiv element entered has no impact on the result")
    QThresh <- as.numeric(quantile(x[x > 0], thresh, na.rm = TRUE))
    MinMuP <- min(which(x <= mu))
    MaxMuP <- max(which(x <= mu))
    PkBegin <- max(x[1:MinMuP])[1]
    PkEnd <- max(x[MaxMuP:length(x)])[1]
    x <- x[MinMuP:MaxMuP]
    lows <- Low.Func(x)
    peaks <- P.Func(x)
    MinMuL <- min(which(lows <= mu))
    MaxMuL <- max(which(lows <= mu))
    pt.ind <- which(peaks > QThresh)
    pt <- peaks[pt.ind]
    l <- length(pt.ind)
    POT <- NULL
    {for (i in 1:l) {POT[i] <- VP(pt.ind[i], mu)}}
    if(PkBegin > QThresh) {POT <- append(PkBegin, POT)}
    if(PkEnd > QThresh) {POT <- append(POT, PkEnd)}
    if(NAs == TRUE) {POT <- POT} else {POT <- POT[which(is.na(POT) == FALSE)]}
    return(POT)}
  if(is(x, "data.frame") == TRUE) {
    if(ncol(x) >2) stop("x must be a data.frame with two columns.")
    if(is.null(div) == FALSE) {
      if(div < 0 | div > thresh) stop("div must be between 0 and the thresh value")
      div <- as.numeric(quantile(x[,2][x[,2] >0], div, na.rm = TRUE))
      if(div < min(x[,2], na.rm = TRUE)) stop("Peaks division (div) is less than the minimum of x")}
    if(class(x[,1])[1] != "Date" & class(x[,1])[1] != "POSIXct") stop("First column must be Date or POSIXct class")
    if(is.null(div)) {mu <- mean(x[,2][x[,2] >0],na.rm = TRUE)} else {mu <- div}
    if(mu > quantile(x[,2][x[,2] >0],thresh, na.rm = TRUE)) stop("The event division (div) must be significantly lower than the event threshold")
    QThresh <- as.numeric(quantile(x[,2][x[,2] >0], thresh, na.rm = TRUE))
    MinMuP <- min(which(x[,2] <= mu), na.rm = TRUE)
    MaxMuP <- max(which(x[,2] <= mu), na.rm = TRUE)
    PkBegin <- which(x[1:MinMuP,2] == max(x[1:MinMuP,2], na.rm = TRUE))[1]
    PkEnd <- which(x[MaxMuP:length(x[,2]),2] == max(x[MaxMuP:length(x[,2]),2], na.rm = TRUE))[1]
    if(is.null(TimeDiv) == FALSE) {
      DBegin <- data.frame(x[PkBegin,], PkBegin)
      DEnd <-   data.frame(x[((MaxMuP-1)+PkEnd),], PkEnd)
      colnames(DBegin) <- c("Date", "peak", "Index")
      colnames(DEnd) <- c("Date", "peak", "Index")
    }
    if(is.null(TimeDiv) == TRUE){
      DBegin <- x[PkBegin,]
      DEnd <- x[((MaxMuP-1)+PkEnd),]
      colnames(DBegin) <- c("Date", "peak")
      colnames(DEnd) <- c("Date", "peak")}
    xUse <- x[MinMuP:MaxMuP,]
    lows <- Low.Func(xUse[,2])
    peaks <- P.Func(xUse[,2])
    pt.ind <- which(peaks > QThresh)
    pt <- peaks[pt.ind]
    L <- length(pt.ind)
    POT <- NULL
    {for (i in 1:L) {POT[i] <- VP(pt.ind[i], mu)}}
    POT.Dates <- (xUse[,1][pt.ind])+1
    if(is.null(TimeDiv) == FALSE){
      res <- data.frame(POT.Dates, POT, pt.ind)
      colnames(res) <- c("Date", "peak", "Index")}
    if(is.null(TimeDiv) == TRUE){
      res <- data.frame(POT.Dates, POT)
      colnames(res) <- c("Date", "peak")
    }
    if(DBegin$peak > QThresh) {res <- rbind(DBegin, res)}
    if(DEnd$peak > QThresh) {res <- rbind(res, DEnd)}
    rownames(res) <- seq(1, length(res[,2]))
    if(NAs == TRUE) {res <- res} else {res <- res[which(is.na(res$peak) == FALSE), ]}
    if(is.null(TimeDiv) == FALSE) {
      ev.start <- res$Index - TimeDiv
      ev.end <- res$Index + TimeDiv
      res <- data.frame(res, ev.start, ev.end)
      RMInd <- function(ind) {
        if(res$Index[ind] <= res$ev.end[ind-1]) {
          MaxInd <- which.max(res$peak[(ind-1):ind])
          if(MaxInd == 1) {Ind <- ind}
          if(MaxInd == 2) {Ind <- ind-1}
        }
        if(res$Index[ind] > res$ev.end[ind-1]){
          Ind <- NA
        }
        return(Ind)
      }
      while(any(res$Index[2:nrow(res)] - res$ev.end[1:(nrow(res)-1)] < 0)) {
        RMInds <- NULL
        for(i in 2:nrow(res)){RMInds[i] <- RMInd(i)}
        RMInds <- RMInds[is.na(RMInds) == FALSE]
        res <- res[-RMInds, ]
      }
      res <- res[,-c(3,4,5)]}
    if(Plot == TRUE) {
      if(mu == 0){plot(x, main = main, ylab = ylab, xlab= "Date", type = "h")}
      if(mu > 0){plot(x, main = main, ylab = ylab, xlab= "Date", type = "l")}
      abline(h = QThresh, col = "blue")
      points(res[,1:2], col = "red")
      abline(h = mu, col = rgb(0, 0.7, 0.3))}
    Years <- as.numeric((x[length(x[,1]),1]-x[1,1])/365.25)
    PPY <- length(res[,1])/Years
    print(paste("Peaks per year:", format(PPY, trim = TRUE), sep = " "))
    return(res)
  }
}

#' Annual maximum extraction
#'
#' Extracts the annual maximum peaks (with other options) from a data.frame which has dates (or POSIXct) in the first column and variable in the second.
#'
#'  The peaks are extracted based on the UK hydrological year (unless Calendar = TRUE), which starts October 1st and ends September 30th. If Trunc = TRUE, partial years (non-full years from the beginning and end) are removed, otherwise the maximum value may not be the true annual maximum of the year. If there are NAs for full years in the data, an -Inf or NA will be returned for that year. The default is to extract maximums but the user can use the func argument to choose other statistics (mean or sum for example). Note that if the data has a sub-daily resolution, it is first aggregated to a daily resolution (with a 09:00 start) before the extraction. For example, the maximum for each day is extracted, then the annual maximums are extracted.
#' @param x a data.frame with dates (or POSIXct) in the first column and variable in the second
#' @param func A user chosen function to extract statistics other than maximums.
#' @param Calendar logical. If FALSE, the hydrological year maximums are returned. If TRUE, the calendar year maximums are returned.
#' @param Trunc logical with a default of TRUE. When true the beginning and end of the data.frame are first truncated so that it starts and ends at the start and end of the hydrological year (or Calendar year if Calendar = TRUE).
#' @param Plot a logical argument with a default of TRUE. If TRUE the extracted annual maximum is plotted
#' @param Title Title of the plot when. Default is "Hydrological annual maximum sequence"
#' @param Ylabel Label for the y axis. Default is "Annual maximum quantiles"
#' @examples
#' #Extract the Thames AMAX daily mean flow and display the first six rows
#' ThamesAM <- AMextract(ThamesPQ[,c(1,3)])
#' head(ThamesAM)
#' #Extract the annual rainfall totals and change the plot labels accordingly
#' ThamesAnnualP <- AMextract(ThamesPQ[,1:2], func = sum, Title = "", Ylab = "Rainfall (mm)")
#' @return a data.frame with columns; WaterYear and AM. By default AM is the annual maximum sample, but will be any statistic used as the func argument.
#' @author Anthony Hammond
AMextract <- function(x, func = NULL, Calendar =FALSE, Trunc = TRUE, Plot = TRUE, Title = "Hydrological annual maximum sequence", Ylabel = "Magnitude")
{
  if(is(x, "data.frame") == FALSE) stop("x must be a data.frame")
  if(class(x[,1])[1] != "Date" & class(x[,1])[1] != "POSIXct") stop("First column must be Date or POSIXct class")
  if(is.null(func) == TRUE) {func <- max} else {func <- func}
  if(class(x[,1])[1] == "POSIXct") {x <- AggDayHour(x, func = func)}
  DayDiffs <- as.numeric(diff(x$Date))
  IndDiff <- which(DayDiffs > 1)
  LengthDiff <- length(IndDiff)
  if(LengthDiff >= 1) {MaxDiff <- max(DayDiffs) - 1}
  if(LengthDiff >= 1) {print(paste("Warning:", as.character(LengthDiff), "periods of data are missing.", "The maximum consecutive period of missing data is", as.character(MaxDiff), "days", sep = " "))}

  if(Trunc == TRUE) {
    if(Calendar == FALSE) {
      POSlt <- as.POSIXlt(x[,1])
      Mons <- (POSlt$mon)+1
      Oct1Ind <- min(which(Mons == 10))
      Sep30Ind <- max(which(Mons == 9))
      if(Oct1Ind == Inf | Oct1Ind == -Inf) stop("Truncation is looking for October 1st which isn't in the vector of Dates. Check dates or set Trunc to FALSE")
      if(Sep30Ind == Inf | Sep30Ind == -Inf) stop("Truncation is looking for September 30th which isn't in the vector of Dates. Check dates or set Trunc to FALSE")
      xTrunc <- x[Oct1Ind:Sep30Ind,]
    }
    if(Calendar == TRUE) {
      POSlt <- as.POSIXlt(x[,1])
      Mons <- (POSlt$mon)+1
      Jan1Ind <- min(which(Mons == 1))
      Dec31Ind <- max(which(Mons == 12))
      if(Jan1Ind == Inf | Jan1Ind == -Inf) stop("Truncation is looking for January 1st which isn't in the vector of Dates. Check dates or set Trunc to FALSE")
      if(Dec31Ind == Inf | Dec31Ind == -Inf) stop("Truncation is looking for December 31st which isn't in the vector of Dates. Check dates or set Trunc to FALSE")
      xTrunc <- x[Jan1Ind:Dec31Ind,]
    }}
  if(Trunc == FALSE) {xTrunc <- x}
  if(anyNA(xTrunc[,2]) == TRUE) {print("Warning: There is at least one missing value in the time series, this may compromise the calculated statistics")}
  Dates <- as.Date(xTrunc[, 1], tz = "Europe/London")
  xTrunc <- data.frame(Dates, xTrunc[, 2])
  Date1 <- xTrunc[1, 1]
  DateLst <- xTrunc[length(xTrunc[, 1]), 1]
  DateExtract <- function(d) {
    yr <- as.POSIXlt(d)$year + 1900
    mnth <- as.POSIXlt(d)$mon + 1
    return(c(yr, mnth))
  }
  if(Calendar == FALSE){
    Date1.ext <- DateExtract(Date1)
    DateLst.ext <- DateExtract(DateLst)
    if (Date1.ext[2] < 10) {
      WY <- Date1.ext[1] - 1
    }
    else {
      WY <- Date1.ext[1]
    }
    if (DateLst.ext[2] < 10) {
      WYend <- DateLst.ext[1] - 1
    }
    else {
      WYend <- DateLst.ext[1]
    }

    WYrSt <- as.Date(paste(WY, "10", "01", sep = "-"))
    WYrSt.to <- as.Date(paste(WYend, "10", "01",
                              sep = "-"))
    YrStarts <- seq(WYrSt, WYrSt.to, by = "year")
    WYendst <- as.Date(paste(WY + 1, "09", "30",
                             sep = "-"))
    YrEnds <- seq(WYendst, length.out = length(YrStarts), by = "year")
    AM <- NULL
    for (i in 1:length(YrStarts)) {
      AM[i] <- suppressWarnings(func(xTrunc[, 2][xTrunc[, 1] >= YrStarts[i] &
                                                   xTrunc[, 1] <= YrEnds[i]], na.rm = TRUE))
    }
    WaterYear <- seq(WY, WYend)
    AMDF <- data.frame(WaterYear, AM)
  }
  if(Calendar == TRUE) {
    Years <- as.POSIXlt(xTrunc[,1])$year + 1900
    xTrunc <- data.frame(xTrunc, Years)
    Year <- unique(Years)
    AM <- NULL
    for(i in 1:length(Year)) {AM[i] <- func(xTrunc[which(xTrunc$Years == Year[i]),2], na.rm = TRUE)}
    AMDF <- data.frame(Year, AM)
  }

  InfInd <- which(AMDF$AM == -Inf)
  if (length(InfInd) > 0) {
    AMDF[InfInd,2] <- NA
  }
  else {
    AMDF <- AMDF
  }
  if (length(InfInd) > 0) {
    print("Warning: at least one year had no data and returned -inf. The year/s in question is/are returned as NA")
  }
  if (Plot == TRUE) {
    if(Calendar == FALSE){
      plot(WaterYear, AM, type = "h", col = rgb(0,0.3,0.7), main = Title, ylab = Ylabel)
    }
    if(Calendar == TRUE){
      plot(Year, AM, type = "h", col = rgb(0,0.3,0.7), main = Title, ylab = Ylabel)}
  }
  return(AMDF)
}


# Uncertainty -------------------------------------------------------------

#' Uncertainty quantification for gauged and ungauged pooled estimates
#'
#' Quantification of uncertainty for pooling results for the gauged and ungauged case
#'
#'  Uncertainty in the ungauged case is calulated as detailed in Hammond, A. (2022). Easy methods for quantifying the uncertainty of FEH pooling analysis. Circulation - The Newsletter of the British Hydrological Society (152). The 68 percent and 95 percent intervals are returned. For the gauged case the pooled group is bootstrapped 500 times and the enhanced single site weighted linear skewness (LSkew) and linear coefficient of variation (Lcv) are calculated 500 times accordingly and 500 associated growth factors are calculated. Each  growth factor (GF) is multiplied by a randomly selected median annual maximum flow (QMED) from the uncertainty distribution of median estimates for the gauged subject site. The distribution of medians is derived from bootstrapping the gauged site 500 times. The intervals are then the upper and lower quantiles (depending on the conf input) of the distribution of median * GFs. For the gauged case the user can choose the level for the intervals. The default is 0.95. Occasionally the single site central estimate will be outside the uncertainty intervals. In these cases the intervals are widened to incorporate it. i.e. if above the intervals, the upper interval is increased to the single site estimate and vice versa if below. This occurs regardless of the confidence setting. For details about the calculations of weighted growth curves & urban adjustment see the PoolEst() function details. The gauged method is detailed in Hammond, A. (2021). Sampling uncertainty of UK design flood estimation. Hydrology Research, 52 (6), 1357–1371.
#' @param x the pooled group derived from the Pool() function
#' @param gauged a logical argument with a default of FALSE. If FALSE the uncertainty intervals are calculated for the ungauged case. If TRUE they are calculated for the gauged case
#' @param RP the return period of interest. Default is 100
#' @param dist a choice of distribution to use for the estimates. Choices are "GEV", "GenLog" or "Gumbel". The default is "GenLog"
#' @param qmed the QMED estimate for the ungauged case. Or for the gauged if the user wishes to override the median from the NRFA data
#' @param QMEDfse The factorial standard error of the QMED estimate for when an ungauged assessment has been undertaken. The default is 1.46
#' @param UrbAdj applies an urban adjustment to the growth curves
#' @param CDs catchment descriptors derived from either GetCDs or CDsXML. Necessary if a UrbAdj is TRUE
#' @param conf the confidence level of the intervals for the gauged case. Default is 0.95. Must be between 0 and 1
#' @examples
#' #Get CDs, form an ungauged pooling group and quantify the uncertainty of the
#' #50-year pooled estimate when using a CDs estimate of QMED with no donors
#' CDs.203018 <- GetCDs(203018)
#' Pool.203018 <- Pool(CDs.203018, exclude = 203018)
#' Uncertainty(Pool.203018, qmed  = QMED(CDs.203018), RP = 50)
#' #Form pooling group with subject site included. Quantify the uncertainty of the
#' #50-year pooled estimate at the 99% level.
#'  Pool.203018 <- Pool(CDs.203018)
#'  Uncertainty(Pool.203018, gauged = TRUE, RP = 50, conf = 0.99)
#' @return For the ungauged case a data.frame of four values relating to the lower 68 and upper 68 percent interval and the lower 95 and upper 95 percent intervals. These are headed by the associated percentiles. For the gauged case a numeric vector of two values is provided with the lower and upper intervals of the chosen conf level.
#' @author Anthony Hammond
Uncertainty <- function (x, gauged = FALSE, RP = 100, dist = "GenLog",
                         qmed = NULL, QMEDfse = 1.46, UrbAdj = FALSE, CDs = NULL, conf = 0.95)
{
  if (is.data.frame(x) == FALSE) {
    stop("x must be a pooled group. Pooled groups can be created with the Pool() function")
  }
  if (ncol(x) != 24)
    stop("x must be a pooled group. Pooled groups can be created with the Pool() function")
  if (UrbAdj == TRUE & is.null(CDs) == TRUE)
    stop("if UrbAdj = TRUE, CD object is necessary")
  if (is.null(CDs) == FALSE) {
    URBEXT2000 <- CDs[18, 2]
  }
  if (gauged == FALSE) {
    if (is.null(qmed) == TRUE)
      stop("Need to input qmed")
    y <- -log(-log(1 - 1/RP))
    FSE <- QMEDfse*(0.0069*y^2 - 0.0099*y + 1.0039)
    Central <- as.numeric(PoolEst(x = x, gauged = gauged,
                                  RP = RP, dist = dist, QMED = qmed,
                                  UrbAdj = UrbAdj, CDs = CDs)[[1]][2])
    L68 <- Central/FSE
    U68 <- Central * FSE
    L95 <- Central/FSE^2
    U95 <- Central * FSE^2
    df <- data.frame(L68, U68, L95, U95)
    colnames(df) <- c("16%", "84%", "2.5%",
                      "97.5%")
    return(df)
  }
  else {
    Unc.gauged <- function(x, RP, dist = "GenLog",
                           UrbAdj = FALSE, qmed = NULL, fse = FALSE,
                           conf = 0.68) {
      if (dist == "GenLog") {
        func <- GenLogGF
      }
      if (dist == "GEV") {
        func <- GEVGF
      }
      if (dist == "Gumbel") {
        func <- GumbelGF
      }
      Boot <- function(AM.ref) {
        x <- GetAM(AM.ref)
        x <- x[, 2]
        resample <- sample(x, size = length(x) * 500,
                           replace = TRUE)
        mat <- matrix(resample, nrow = length(x), ncol = 500)
        lcvs <- apply(mat, 2, Lcv)
        lskews <- apply(mat, 2, LSkew)
        dfRatios <- data.frame(lcvs, lskews)
        return(dfRatios)
      }
      SiteRatios <- NULL
      for (i in 1:length(x$N)) {
        SiteRatios[[i]] <- Boot(rownames(x)[i])
      }
      LratTemp <- function(x, j) {
        lcvs1 <- NULL
        for (i in 1:length(x$N)) {
          lcvs1[i] <- SiteRatios[[i]][j, 1]
        }
        lskews1 <- NULL
        for (i in 1:length(x$N)) {
          lskews1[i] <- SiteRatios[[i]][j, 2]
        }
        PoolTemp <- x
        PoolTemp[, 16] <- lcvs1
        PoolTemp[, 17] <- lskews1
        lcvGTemp <- WGaugLcv(PoolTemp)
        lskewGTemp <- WGaugLSkew(PoolTemp)
        df <- data.frame(lcvGTemp, lskewGTemp)
        return(df)
      }
      Lratios <- LratTemp(x, 1)
      for (j in 2:500) {
        Lratios <- rbind(Lratios, LratTemp(x, j))
      }
      if (UrbAdj == TRUE) {
        LCVs <- LcvUrb(Lratios[, 1], URBEXT2000)
      }
      else {
        LCVs <- Lratios[, 1]
      }
      if (UrbAdj == TRUE) {
        LSKEWs <- LSkewUrb(Lratios[, 2], URBEXT2000)
      }
      else {
        LSKEWs <- Lratios[, 2]
      }
      if (dist == "Gumbel") {
        Zts <- func(LCVs, RP = RP)
      }
      else {
        Zts <- func(LCVs, LSKEWs, RP = RP)
      }
      AM <- GetAM(rownames(x)[1])
      AM <- AM[, 2]
      resample <- sample(AM, size = length(AM) * 500, replace = TRUE)
      mat <- matrix(resample, nrow = length(AM), ncol = 500)
      Meds <- apply(mat, 2, median)
      if (is.null(qmed) == TRUE) {
        QMEDCentral <- median(AM)
      }
      else {
        QMEDCentral <- qmed
      }
      lcvCentral <- WGaugLcv(x)
      lskewCentral <- WGaugLSkew(x)
      if (UrbAdj == TRUE) {
        lcvCentral <- LcvUrb(lcvCentral, URBEXT2000 = URBEXT2000)
      }
      if (UrbAdj == TRUE) {
        lskewCentral <- LSkewUrb(lskewCentral, URBEXT2000 = URBEXT2000)
      }
      if (dist == "Gumbel") {
        ZtCentral <- func(lcv = lcvCentral, RP = RP)
      }
      else {
        ZtCentral <- func(lcv = lcvCentral, lskew = lskewCentral,
                          RP = RP)
      }
      FSEest <- function(x) {
        exp(sd(log(x) - mean(log(x))))
      }
      Res500 <- Meds * Zts
      FSE <- FSEest(Res500)
      Intervals <- quantile(Res500, c((1 - conf)/2, 1 -
                                        (1 - conf)/2))
      Res <- list(fse, Intervals)
      names(Res) <- c("Factorial standard error",
                      "Intervals")
      if (fse == TRUE) {
        return(Res)
      }
      else {
        return(Intervals)
      }
    }
    Res <- Unc.gauged(x = x, RP = RP, dist = dist,
                      UrbAdj = UrbAdj, qmed = qmed, fse = FALSE, conf = conf)
    if (dist == "GenLog") {
      func <- GenLogGF
    }
    if (dist == "GEV") {
      func <- GEVGF
    }
    if (dist == "Gumbel") {
      func <- GumbelGF
    }
    MedianAM <- GetQMED(rownames(x)[1])
    if (is.null(qmed) == TRUE) {
      QMEDCentral <- MedianAM
    }
    else {
      QMEDCentral <- qmed
    }
    if (dist == "Gumbel") {
      SSEst <- QMEDCentral * func(x[1, 16], RP = RP)
    }
    else {
      SSEst <- QMEDCentral * func(x[1, 16], x[1, 17], RP = RP)
    }
    if (SSEst < Res[1]) {
      Res[1] <- SSEst
    }
    if (SSEst > Res[2]) {
      Res[2] <- SSEst
    }
  }
  return(Res)
}


#' Uncertainty for the single site
#'
#' Quantifies the aleatoric uncertainty for a single site estimate, by bootstrapping the sample
#'
#'The bootstrapping procedure resamples from a sample N*500 times with replacement. After splitting into 500 samples of size N, the statsitic of interest is calculated on each. upper and lower quantiles of the resulting distribution are used as the quantification of uncertainty. Any function that provides an estimate based on a sample of data can be used. Including any function that provides estimates as a function of return period.
#' @param x a numeric vector. The sample of interest
#' @param func the function to be applied
#' @param conf the confidence level of the intervals
#' @param RP return period. Necessary if func requires RP
#' @examples
#' #Extract an AMAX sample and quantify uncertainty for the GEV estimated 50-year flow
#' AM.203018 <- GetAM(203018)
#' UncSS(AM.203018$Flow, func = GEVAM, RP = 50)
#' #Quantify uncertainty for the sample standard deviation at the 90 percent confidence level
#' UncSS(AM.203018$Flow, func = sd, conf = 0.90)
#' @return A data.frame of three values; central, lower, and upper bootstrapped estimates.
#' @author Anthony Hammond
UncSS <- function (x, func, conf = 0.95, RP = NULL)
{
  if (is.numeric(x) == FALSE) {
    stop("x must be a numeric vector")
  }
  resample <- sample(x, size = length(x) * 500, replace = TRUE)
  mat <- matrix(resample, nrow = length(x), ncol = 500)
  if (is.null(RP) == TRUE) {
    res <- apply(mat, 2, func)
  }
  else {
    res <- apply(mat, 2, func, RP = RP)
  }
  lint <- (1 - conf)/2
  uint <- ((1 - conf)/2) + conf
  lower <- quantile(res, lint, na.rm = TRUE)
  upper <- quantile(res, uint, na.rm = TRUE)
  centre <- quantile(res, 0.5, na.rm = TRUE)
  frame <- data.frame(centre, lower, upper)
  rownames(frame) <- ""
  return(frame)
}



# Diagnostics -------------------------------------------------------------

#' Goodness of tail fit (GoTF).
#'
#' Provides two GoTF scores for the generalised extreme value (GEV), Gumbel, generalised Pareto (GenPareto), or generalised logistic (GenLog) distribution. Also for any simulated numeric distribution
#'
#' The closer the results are to one, the better the fit. The GoTF is calculated by simulating the sample 5000 times with the desired distribution and calculating a statistic (in this case the coefficient of variation (CV) & mean) for the upper 25 percent of each sample. The same is calculated for the subject sample and compared to the distribution. The number of statistics from the simulated samples that are greater than the sample statistics is divided by 5000. The GoTF is this latter number where it is <0.5 and 1 minus this latter number where is it >0.5. If any further distributions are of interest, the representative distribution (RepDist) argument can be used. In this case a simulation of 5000*length(x) from that distribution can be used as RepDist, in place of using the dist input. If a sample that is not equal to 5000 time length(x) is in the RepDist argument, it will be resampled with replacement. An alternative is to use the pars or GF arguments which simulate from the distribution choice (dist) based on the parameters (location, scale, shape) or the growth factor (GF) inputs; the median annual maximum flow (QMED), linear coefficient of variation (Lcv), and linear skewnes (LSkew). The resulting probabilities for each statistic (the GoTF score) represent the probability of observing that statistic if the sample distribution has the same underlying distribution as the one under scrutiny.
#' @param x a numeric vector. The sample of interest
#' @param dist a choice of distribution to be assessed. The choices are "GenLog", "GEV", "Gumbel", or "GenPareto". The default is "GenLog"
#' @param pars numeric vector of parameters for the GEV, GenLog, Gumbel, or GenPareto distributions. In the order of location, scale, shape (excluding shape for Gumbel)
#' @param GF numeric vector of length three which are the growth factor statistics & QMED, in the order of Lcv, Lskew, & QMED
#' @param RepDist a simulated sample (ideally of size = 5000*n) of a representative distribution to compare to the sample of interest
#' @examples
#' #Get an AMAX sample and derive GoTF score against the GenLog and the GEV distributions
#' \donttest{AM <- GetAM(203018)}
#' \donttest{GoTF(AM$Flow, dist = "GenLog")}
#' \donttest{GoTF(AM$Flow, dist = "GEV")}
#' #Derive the GF parameters for the ungauged pooled estimate for the AM and
#' #calculate a GoTF for GenLog (assuming the gauged QMED)
#' #For this assume 0.16 and 0.2 as the ungauged Lcv & LSkew pooled estimates
#'  \donttest{GoTF(AM$Flow, GF = c(0.16, 0.2, median(AM$Flow)))}
#' #calculate the GoTF based on parameters of the GenLog estimated inadequately.
#' \donttest{Loc <- mean(AM$Flow)}
#' \donttest{Scale <- sd(AM$Flow)}
#' \donttest{Skew <- 1-(median(AM$Flow)/mean(AM$Flow))}
#' \donttest{GoTF(AM$Flow, pars = c(Loc, Scale, Skew))}
#' @return A data.frame with one row of probabilities representing the GoTF. The first column is the Tail cv and the second is the tail mean.
#' @author Anthony Hammond

GoTF <- function(x, dist = "GenLog", pars = NULL, GF = NULL, RepDist = NULL){
  if(is.numeric(x) == FALSE) {stop("x must be a numeric vector")}
  if(is.null(RepDist) == FALSE) {
    if(length(RepDist) != (5000*length(x))) {print("Warning: RepDist not equal to 5000 * length(x), resampling has been used")}
    if(length(RepDist) != (5000*length(x))) {RepDist <- sample(RepDist, 5000*length(x), replace = TRUE)}
    MMR <- function(x) {sd(x[x > quantile(x, 0.75, na.rm = TRUE)])/mean(x[x > quantile(x, 0.75, na.rm = TRUE)])}
    Mat.1 <- matrix(RepDist, nrow = length(x), ncol = 5000)
    MMRs <- apply(Mat.1, 2, MMR)
    MMRo <- MMR(x)
    SortMMRs <- sort(MMRs)
    Ind <- suppressWarnings(min(which(SortMMRs >  MMRo)))
    if(Ind == Inf | Ind == 0) {Prop <- 0.0002} else {Prop <- Ind/5000}
    if(Prop > 0.5) {res <- 1-Prop} else {res <- Prop}
    if(Ind == Inf | res == 0) {res <- "< 0.0002"} else {res <- res/0.5}

    TailMean <- function(x) {mean(x[x > quantile(x, 0.75, na.rm = TRUE)])}
    Mat.2 <- matrix(RepDist, nrow = length(x), ncol = 5000)
    TMs <- apply(Mat.1, 2, TailMean)
    TMo <- TailMean(x)
    SortTMs <- sort(TMs)
    Ind2 <- suppressWarnings(min(which(SortTMs >  TMo)))
    if(Ind2 == Inf | Ind2 == 0) {Prop2 <- 0.0002} else {Prop2 <- Ind2/5000}
    if(Prop2 > 0.5) {res2 <- 1-Prop2} else {res2 <- Prop2}
    if(Ind2 == Inf | res2 == 0) {res2 <- "< 0.0002"} else {res2 <- res2/0.5}

    ResDF <- data.frame(res, res2)
    colnames(ResDF) <- c("p(Tail cv)", "p(Tail mean)")
    return(ResDF)

  } else {

    if(dist == "GenLog") {funcX <- GenLogAM
    funcPars <- GenLogEst
    funcGF <- GenLogGF}
    if(dist == "GEV")
    {funcX <- GEVAM
    funcPars <- GEVEst
    funcGF <- GEVGF}
    if(dist == "Gumbel")
    {funcX <- GumbelAM
    funcPars <- GumbelEst
    funcGF <- GumbelGF}
    if(dist == "GenPareto")
    {funcX <- GenParetoPOT
    funcPars <- GenParetoEst
    funcGF <- GenParetoGF}
    MMR <- function(x) {sd(x[x > quantile(x, 0.75, na.rm = TRUE)])/mean(x[x > quantile(x, 0.75, na.rm = TRUE)])}
    Rands <- 1/runif(length(x)*5000)
    if(is.null(pars) == TRUE & is.null(GF) == TRUE) {Sims <- funcX(x, RP = Rands)}
    if(is.null(pars) == FALSE) {
      if(dist == "Gumbel") {Sims <- funcPars(pars[1], pars[2], RP = Rands)} else
      {Sims <- funcPars(pars[1], pars[2], pars[3], RP = Rands)}}
    if(is.null(GF) == FALSE)  {
      if(dist == "Gumbel") {Sims <- funcGF(GF[1], RP = Rands)*GF[3]} else
      {Sims <- funcGF(GF[1], GF[2], RP = Rands)*GF[3]}}

    Mat.1 <- matrix(Sims, nrow = length(x), ncol = 5000)
    MMRs <- apply(Mat.1, 2, MMR)
    MMRo <- MMR(x)
    SortMMRs <- sort(MMRs)
    Ind <- suppressWarnings(min(which(SortMMRs >  MMRo)))
    if