#' The bootstrap uncertainty analysis for runPairs results
#'
#' The function that does the uncertainty analysis for determining the change between any
#' pair of years. It is very similar to the \code{\link{wBT}} function that runs the WRTDS
#' bootstrap test. It differs from \code{\link{wBT}} in that it runs a specific number of
#' bootstrap replicates, unlike the \code{\link{wBT}} approach that will stop running replicates
#' based on the status of the test statistics along the way. Also, this code can be used with
#' generalized flow normalization, which handles non-stationary discharge,
#' whereas \code{\link{wBT}} does not.
#'
#' @param eList named list with at least the Daily, Sample, and INFO dataframes
#' @param pairResults data frame returned from \code{\link[EGRET]{runPairs}}
#' @param nBoot the maximum number of bootstrap replicates to be used, typically 100
#' @param blockLength integer size of subset, expressed in days. 200 days has been found to be a good choice.
#' @param startSeed setSeed value. Defaults to 494817. This is used to make repeatable output.
#' @param jitterOn logical, if TRUE, adds "jitter" to the data in an attempt to avoid some numerical problems. Default = FALSE. See Details below.
#' @param V numeric a multiplier for addition of jitter to the data, default = 0.2.
#' @export
#' @details
#' In some situations numerical problems are encountered in the bootstrap process, resulting in highly unreasonable spikes in the confidence intervals.
#' The use of "jitter" can often prevent these problems, but should only be used when it is clearly needed.
#' It adds a small amount of random "jitter" to the explanatory variables of the WRTDS model. The V parameter sets the scale of variation in the log discharge values.
#' The standard deviation of the added jitter is V * standard deviation of Log Q.
#' The default for V is 0.2. Larger values should generally be avoided, and smaller values may be sufficient.
#' @return eBoot, a named list with bootOut, wordsOut, xConc, xFlux, pConc, pFlux values.
#' \itemize{
#' \item{bootOut is a data frame with the results of the bootstrap test. }
#' \item{wordsOut is a character vector describing the results.}
#' \item{xConc and xFlux are vectors of length iBoot, of the change in flow normalized concentration
#' and flow normalized flux computed from each of the bootstrap replicates. }
#' \item{pConc and pFlux are vectors of length iBoot, of the change in flow normalized concentration
#' or flow normalized flux computed from each of the bootstrap replicates expressed as \% change.}
#' }
#' @seealso \code{\link{runGroupsBoot}}, \code{\link[EGRET]{runPairs}}
#' @importFrom EGRET jitterSam
#' @examples
#' library(EGRET)
#' eList <- Choptank_eList
#' year1 <- 1985
#' year2 <- 2009
#'
#' \dontrun{
#' pairOut_2 <- runPairs(eList, year1, year2, windowSide = 7)
#'
#' boot_pair_out <- runPairsBoot(eList, pairOut_2)
#'
#' plotHistogramTrend(eList, boot_pair_out, caseSetUp = NA)
#' }
runPairsBoot <- function(eList, pairResults,
nBoot = 100, startSeed = 494817,
blockLength = 200, jitterOn = FALSE, V = 0.2) {
interactive <- FALSE
localINFO <- eList$INFO
localDaily <- eList$Daily
localSample <- eList$Sample
firstDayDaily <- min(localDaily$Date, na.rm = TRUE)
lastDayDaily <- max(localDaily$Date, na.rm = TRUE)
firstDaySample <- min(localSample$Date, na.rm = TRUE)
lastDaySample <- max(localSample$Date, na.rm = TRUE)
prob = c(0.025, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.975)
words <- function(z) {
out <- if (z)
"Reject Ho"
else "Do Not Reject Ho"
return(out)
}
bootOut <- as.data.frame(matrix(ncol = 27, nrow = 1))
colnames(bootOut) <- c("rejectC", "pValC", "estC", "lowC90",
"upC90", "lowC50", "upC50", "lowC95", "upC95", "likeCUp",
"likeCDown", "rejectF", "pValF", "estF", "lowF90", "upF90",
"lowF50", "upF50", "lowF95", "upF95", "likeFUp", "likeFDown",
"baseConc", "baseFlux", "nBoot","startSeed","blockLength")
# get the parameters out of the attributes of pairResults
paStart <- attr(pairResults, "yearPair")[["paStart"]]
paLong <- attr(pairResults, "yearPair")[["paLong"]]
year1 <- attr(pairResults, "yearPair")[["year1"]]
year2 <- attr(pairResults, "yearPair")[["year2"]]
startEnd1 <- EGRET::startEnd(paStart, paLong, year1)
startEnd2 <- EGRET::startEnd(paStart, paLong, year2)
if(startEnd2$startDate > range(eList$Sample$Date)[2]){
stop("year2 is outside the Sample range")
}
if(startEnd1$endDate < range(eList$Sample$Date)[1]){
stop("year1 is outside the Sample range")
}
start1 <- as.Date(startEnd1[["startDate"]])
end1 <- as.Date(startEnd1[["endDate"]])
start2 <- as.Date(startEnd2[["startDate"]])
end2 <- as.Date(startEnd2[["endDate"]])
dateInfo <- attr(pairResults, "dateInfo")
sample1StartDate <- attr(pairResults, "SampleBlocks")[["sample1StartDate"]]
sample1EndDate <- attr(pairResults, "SampleBlocks")[["sample1EndDate"]]
sample2StartDate <- attr(pairResults, "SampleBlocks")[["sample2StartDate"]]
sample2EndDate <- attr(pairResults, "SampleBlocks")[["sample2EndDate"]]
minNumObs <- attr(pairResults, "Other")[["minNumObs"]]
minNumUncen <- attr(pairResults, "Other")[["minNumUncen"]]
windowY <- attr(pairResults, "Other")[["windowY"]]
windowQ <- attr(pairResults, "Other")[["windowQ"]]
windowS <- attr(pairResults, "Other")[["windowS"]]
wall <- attr(pairResults, "Other")[["wall"]]
edgeAdjust <- attr(pairResults, "Other")[["edgeAdjust"]]
xConc <- rep(NA, nBoot)
xFlux <- rep(NA, nBoot)
pConc <- rep(NA, nBoot)
pFlux <- rep(NA, nBoot)
#
regDeltaConc <- pairResults$x22[1] - pairResults$x11[1]
estC <- regDeltaConc
baseConc <- pairResults$x11[1]
regDeltaConcPct <- (regDeltaConc/baseConc) * 100
LConcDiff <- log(pairResults$x22[1]) - log(pairResults$x11[1])
# pairResults are in 10^6 kg/year, when we get to the bootstrap results
# we will need to convert them all to 10^6 kg/year units
regDeltaFlux <- (pairResults$x22[2] - pairResults$x11[2])
estF <- regDeltaFlux
baseFlux <- pairResults$x11[2]
regDeltaFluxPct <- (regDeltaFlux/baseFlux) * 100
LFluxDiff <- log(pairResults$x22[2]) - log(pairResults$x11[2])
fcc <- format(regDeltaConc, digits = 3, width = 7)
ffc <- format(regDeltaFlux, digits = 3, width = 8)
Daily1 <- localDaily[localDaily$Date >= as.Date(dateInfo$flowNormStart[1]) & localDaily$Date <=
as.Date(dateInfo$flowNormEnd[1]), ]
Daily2 <- localDaily[localDaily$Date >= as.Date(dateInfo$flowNormStart[2]) & localDaily$Date <=
as.Date(dateInfo$flowNormEnd[2]), ]
# bootstrap loop starts here
nBootGood <- 0
for (iBoot in 1:(2*nBoot)){
bootSample <- blockSample(localSample = localSample, blockLength = blockLength, startSeed = startSeed + iBoot)
if(jitterOn) bootSample <- jitterSam(bootSample, V = V)
eListBoot <- suppressMessages(EGRET::as.egret(localINFO, localDaily, bootSample, NA))
Sample1 <- bootSample[bootSample$Date >= sample1StartDate &
bootSample$Date <= sample1EndDate,]
possibleError3 <- tryCatch( surfaces1 <- suppressMessages(EGRET::estSurfaces(eListBoot, surfaceStart = start1, surfaceEnd = end1,edgeAdjust = edgeAdjust,
localSample = Sample1, minNumObs = minNumObs, minNumUncen = minNumUncen,
verbose = FALSE)), error = function(e) e)
Sample2 <- bootSample[bootSample$Date >= sample2StartDate &
bootSample$Date <= sample2EndDate,]
possibleError4 <- tryCatch( surfaces2 <- suppressMessages(EGRET::estSurfaces(eListBoot, surfaceStart = start2, surfaceEnd = end2,edgeAdjust = edgeAdjust,
localSample = Sample2, minNumObs = minNumObs, minNumUncen = minNumUncen,
verbose = FALSE)), error = function(e) e)
if (!inherits(possibleError3, "error") &
!inherits(possibleError4, "error")) {
# note that all the flux calculations inside the bootstrap loop are in kg/day units
DailyRS1FD1 <- EGRET::estDailyFromSurfaces(eListBoot, localsurfaces = surfaces1, localDaily = Daily1)
annualFlex <- EGRET::setupYears(DailyRS1FD1, paLong = paLong, paStart = paStart)
# runPairs are in 10^6 kg/year, when we get to the bootstrap results
# Converting them all to 10^6 kg/year units
c11 <- mean(annualFlex$FNConc, na.rm = TRUE)
f11 <- mean(annualFlex$FNFlux, na.rm = TRUE) * 0.00036525
DailyRS2FD2 <- EGRET::estDailyFromSurfaces(eListBoot, localsurfaces = surfaces2, localDaily = Daily2)
annualFlex <- EGRET::setupYears(DailyRS2FD2, paLong = paLong, paStart = paStart)
c22 <- mean(annualFlex$FNConc, na.rm = TRUE)
f22 <- mean(annualFlex$FNFlux, na.rm = TRUE) * 0.00036525
xConc_here <- (2 * regDeltaConc) - (c22 - c11)
xFlux_here <- (2 * regDeltaFlux) - (f22 - f11)
if(!is.na(xConc_here) & !is.na(xFlux_here)){
nBootGood <- nBootGood + 1
xConc[nBootGood] <- xConc_here
xFlux[nBootGood] <- xFlux_here
LConc <- (2 * LConcDiff) - (log(c22) - log(c11))
pConc[nBootGood] <- (100 * exp(LConc)) - 100
LFlux <- (2 * LFluxDiff) - (log(f22) - log(f11))
pFlux[nBootGood] <- (100 * exp(LFlux)) - 100
cat("\n iBoot, xConc and xFlux",nBootGood, xConc[nBootGood], xFlux[nBootGood])
# end of bootstrap replicates loop
if(nBootGood >= nBoot) {
break()
}
}
}
}
if(iBoot == 2*nBoot){
message(iBoot, " iterations were run. They only achieved ", nBootGood, " sucessful runs.")
} else if (iBoot > nBoot){
message("It took ", iBoot, " iterations to achieve ", nBoot, " sucessful runs.")
}
# now summarize the bootstrap outputs
quantConc <- quantile(xConc, prob, type = 6, na.rm = TRUE)
lowConc <- quantConc[["5%"]]
highConc <- quantConc[["95%"]]
quantFlux <- quantile(xFlux, prob, type = 6, na.rm = TRUE)
lowFlux <- quantFlux[["5%"]]
highFlux <- quantFlux[["95%"]]
rejectC <- lowConc * highConc > 0
rejectF <- lowFlux * highFlux > 0
cat("\n ", eList$INFO$shortName, "\n ", eList$INFO$paramShortName)
periodName <- EGRET::setSeasonLabelByUser(paStart, paLong)
cat("\n ", periodName, "\n")
cat("\n\n Change estimates are for ", year2," minus ", year1)
if(wall) cat("\n Sample data set was partitioned with a wall at ", as.character(sample1EndDate), "\n\n")
cat("\n\nShould we reject Ho that Flow Normalized Concentration Trend = 0 ?",
words(rejectC))
fquantConc <- format(quantConc, digits = 3, width = 8)
cat("\n best estimate of change in concentration is", fcc, "mg/L\n Lower and Upper 90% CIs",
fquantConc[["5%"]], fquantConc[["95%"]])
lowC <- quantConc[["5%"]]
upC <- quantConc[["95%"]]
cat("\n also 95% CIs", fquantConc[["2.5%"]], fquantConc[["97.5%"]],
"\n and 50% CIs", fquantConc[["25%"]], fquantConc[["75%"]])
lowC50 <- quantConc[["25%"]]
upC50 <- quantConc[["75%"]]
lowC95 <- quantConc[["2.5%"]]
upC95 <- quantConc[["97.5%"]]
pValC <- pVal(xConc)
cat("\n approximate two-sided p-value for Conc", format(pValC,
digits = 2, width = 9))
xConc <- as.numeric(na.omit(xConc))
nBootGood <- length(xConc)
posX <- ifelse(xConc > 0, 1, 0)
posXConc <- sum(posX)
if (posXConc == 0 | posXConc == nBootGood)
cat("\n* Note p-value should be considered to be < stated value")
likeCUp <- (posXConc + 0.5)/(nBootGood + 1)
likeCDown <- 1 - likeCUp
cat("\n Likelihood that Flow Normalized Concentration is trending up =",
format(likeCUp, digits = 3), " is trending down =",
format(likeCDown, digits = 3))
if(nBootGood < nBoot) cat("\n The number of good replicates in the bootstrap was ", nBootGood,
" out of the ", nBoot, "total")
# end of Concentration summary
cat("\n\nShould we reject Ho that Flow Normalized Flux Trend = 0 ?",
words(rejectF))
fquantFlux <- format(quantFlux, digits = 3, width = 8)
cat("\n best estimate of change in flux is", ffc, "10^6 kg/year\n Lower and Upper 90% CIs",
fquantFlux[["5%"]], fquantFlux[["95%"]])
lowF <- quantFlux[["5%"]]
upF <- quantFlux[["95%"]]
cat("\n also 95% CIs", fquantFlux[["2.5%"]], fquantFlux[["97.5%"]],
"\n and 50% CIs", fquantFlux[["25%"]], fquantFlux[["75%"]])
lowF50 <- quantFlux[["25%"]]
upF50 <- quantFlux[["75%"]]
lowF95 <- quantFlux[["2.5%"]]
upF95 <- quantFlux[["97.5%"]]
pValF <- pVal(xFlux)
cat("\n approximate two-sided p-value for Flux", format(pValF,
digits = 2, width = 9))
xFlux <- as.numeric(na.omit(xFlux))
nBootGood <- length(xFlux)
posX <- ifelse(xFlux > 0, 1, 0)
posXFlux <- sum(posX)
if (posXFlux == 0 | posXFlux == nBootGood)
cat("\n* Note p-value should be considered to be < stated value")
likeFUp <- (posXFlux + 0.5)/(nBootGood + 1)
likeFDown <- 1 - likeFUp
cat("\n Likelihood that Flow Normalized Flux is trending up =",
format(likeFUp, digits = 3), " is trending down =",
format(likeFDown, digits = 3))
if(nBootGood < nBoot) cat("\n The number of good replicates in the bootstrap was ", nBootGood,
" out of the ", nBoot, "total")
bootOut <- data.frame(rejectC, pValC, estC, lowC, upC,
lowC50, upC50, lowC95, upC95, likeCUp, likeCDown,
rejectF, pValF, estF, lowF, upF, lowF50, upF50, lowF95,
upF95, likeFUp, likeFDown, baseConc, baseFlux, nBoot,
startSeed, blockLength, nBootGood)
likeList <- c(likeCUp, likeCDown, likeFUp, likeFDown)
wordsOut <- wordLike(likeList)
cat("\n\n", format(wordsOut[1], width = 30), "\n", format(wordsOut[3],
width = 30))
cat("\n", format(wordsOut[2], width = 30), "\n", format(wordsOut[4],
width = 30))
pConc <- as.numeric(na.omit(pConc))
pFlux <- as.numeric(na.omit(pFlux))
pairsBootOut <- list(bootOut = bootOut, wordsOut = wordsOut,
xConc = xConc, xFlux = xFlux, pConc = pConc, pFlux = pFlux,
startSeed = startSeed)
attr(pairsBootOut, "year1") <- year1
attr(pairsBootOut, "year2") <- year2
attr(pairsBootOut, "paStart") <- paStart
attr(pairsBootOut, "paLong") <- paLong
return(pairsBootOut)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.