Chapter06.R
In EnvStats: Package for Environmental Statistics, Including US EPA Guidance

#####
# File:     Chapter06.R
#
# Purpose:  Reproduce Examples in Chapter 6 of the book:
#
#           Millard, SP. (2013).  EnvStats: An R Package for 
#             Environmental Statistics.  Springer-Verlag.
#
# Author:   Steven P. Millard
#
# Last
# Updated:  2013/02/17
#####


#######################################################################################

library(EnvStats)

############################################################

# 6.2 Prediction Intervals 
#-------------------------

# Figure 6.1
#-----------
n <- 10
k <- 1
N <- 100
PI.mat <- matrix(nrow = N, ncol = 3)
dimnames(PI.mat) <- list(NULL, c("LPL", "UPL", "Future.Obs"))
set.seed(429)
for(i in 1:N) {
	x <- rnorm(n = 10, mean = 5, sd = 1)
	PI.mat[i, c("LPL", "UPL")] <- predIntNorm(x, conf.level = 0.8)$interval$limits
	PI.mat[i, "Future.Obs"] <- rnorm(1, mean = 5, sd = 1)
}

windows()
plot(1:N, PI.mat[, "LPL"], ylim = range(pretty(range(PI.mat[, c("LPL", "UPL")]))), 
	type = "n", xlab = "Experiment Number", 
	ylab = "Prediction Interval and Future Observation",
	main = paste("Coverage of 80% Prediction Intervals for k=1 Future Observation", 
	"for N(5,1) Distribuiton with n=10", sep = "\n"))
errorBar(x = 1:N,  y = rowMeans(PI.mat[, c("LPL", "UPL")]), 
	lower = PI.mat[, "LPL"], upper = PI.mat[, "UPL"], incr = FALSE, 
	bar.ends = TRUE, gap = FALSE, add = TRUE, 
	bar.ends.size = 0.2, col = "dark grey")
points(1:N, PI.mat[, "Future.Obs"], pch = 18, cex = 0.8)

sum(PI.mat[, "LPL"] <= PI.mat[, "Future.Obs"] &  PI.mat[, "Future.Obs"] <= PI.mat[, "UPL"])

rm(n, k, N, PI.mat, x, i)


###################################################################

# 6.2.1 Prediction Intervals for a Normal Distribution
#-----------------------------------------------------

#
# Prediction Intervals for Future Observations
#
EPA.09.Ex.18.1.arsenic.df

attach(EPA.09.Ex.18.1.arsenic.df)

predIntNorm(Arsenic.ppb[Sampling.Period == "Background"], k = 4, 
		pi.type = "upper", conf.level = 0.95, method = "exact"))

predIntNorm(Arsenic.ppb[Sampling.Period == "Background"], k = 4, pi.type = "upper",
	conf.level = 0.95)$interval$limits["UPL"]

detach("EPA.09.Ex.18.1.arsenic.df")


#
# Prediction Intervals for Future Means
#
EPA.09.Ex.18.2.chrysene.df

attach(EPA.09.Ex.18.2.chrysene.df)

predIntNorm(log(Chrysene.ppb)[Well.type == "Background"], n.mean = 4, k = 1, 
	pi.type = "upper", conf.level = 0.99)

mean(log(Chrysene.ppb)[Well.type == "Compliance"])

detach("EPA.09.Ex.18.2.chrysene.df")

###################################################################

# 6.2.2 Prediction Intervals for a Lognormal Distribution
#--------------------------------------------------------

#
# Prediction Intervals for Future Observations
#
attach(EPA.94b.tccb.df)

predIntLnorm(TcCB[Area=="Reference"], k=77, method="exact", pi.type="upper", conf.level=0.95)

sum(TcCB[Area=="Cleanup"] > 2.68)

detach("EPA.94b.tccb.df")


#
# Prediction Intervals for Future Means
#
attach(EPA.09.Ex.18.2.chrysene.df)

predIntLnorm(Chrysene.ppb[Well.type == "Background"], n.geomean = 4, k = 1, 
	pi.type = "upper", conf.level = 0.99)

geomean(Chrysene.ppb[Well.type == "Compliance"])

detach("EPA.09.Ex.18.2.chrysene.df")


############################################################################

# 6.2.3 Prediction Intervals for a Gamma Distribution
#----------------------------------------------------

#
# Prediction Intervals for Future Observations
#
attach(EPA.94b.tccb.df)

predIntGamma(TcCB[Area=="Reference"], k=77, method="exact", 
	pi.type="upper", conf.level=0.95)

detach("EPA.94b.tccb.df")


#
# Prediction Intervals for Future Means
#
attach(EPA.09.Ex.18.2.chrysene.df)

predInt.list <- predIntGamma(Chrysene.ppb[Well.type == "Background"], 
	n.transmean = 4, k = 1, pi.type = "upper", conf.level = 0.99)

predInt.list

trans.power <- predInt.list$interval$normal.transform.power

trans.power

mean.of.trans <- mean(Chrysene.ppb[Well.type == "Compliance"] ^ trans.power)

mean.of.trans ^ (1 / trans.power)


detach("EPA.09.Ex.18.2.chrysene.df")
rm(predInt.list, trans.power, mean.of.trans)

############################################################################

# 6.2.4 Nonparametric Prediction Intervals
#-----------------------------------------

#
# Prediction Intervals for Future Observations
#

# Table 6.2
n <- c(seq(5, 20, by = 5), seq(25, 100, by = 25))
round(100 * predIntNparConfLevel(n = n, m = 3, pi.type = "upper"))

rm(n)

# Prediction limit for the next m=4 trichloroethylene concentrations
# at the compliance well
#-------------------------------------------------------------------
EPA.09.Ex.18.3.TCE.df

with(EPA.09.Ex.18.3.TCE.df, 
	predIntNpar(TCE.ppb[Well.type=="Background"], m = 4, lb = 0, pi.type="upper"))


# Prediction Intervals for Future Medians
#----------------------------------------
EPA.09.Ex.18.4.xylene.df

with(EPA.09.Ex.18.4.xylene.df, 
	predIntNpar(Xylene.ppb[Well.type=="Background"], k = 2, m = 3, 
		lb = 0, pi.type="upper")

####################################################################################

# 6.3.1 Simultaneous Prediction Intervals for a Normal Distribution
#---------------------------------------------------------------------

nw <- 20
nc <- 10

# Set r = Number of Evaluations per year = 2
r <- 2


# Set Individual Test Type I Error to 
# 1 - (1 - SWFPR)^(1 / (Number of Constituents * Number of Wells))
#
# which translates to setting the confidence limit to 
# (1 - SWFPR)^(1 / (Number of Constituents * Number of Wells))

conf.level <- (1 - 0.1)^(1 / (nc * nw))
conf.level


#
# Prediction intervals based on each of the rules
#

attach(EPA.09.Ex.18.1.arsenic.df)
As.Bkgrd <- Arsenic.ppb[Sampling.Period == "Background"]


# 1-of-2 Rule
predIntNormSimultaneous(As.Bkgrd, k = 1, m = 2, r = r, 
	rule = "k.of.m", pi.type = "upper", 
	conf.level = conf.level)

# 1-of-3 Rule
predIntNormSimultaneous(As.Bkgrd, k = 1, m = 3, r = r, 
	rule = "k.of.m", pi.type = "upper", 
	conf.level = conf.level)$interval$limits["UPL"]

# Modified California Rule
predIntNormSimultaneous(As.Bkgrd, r = r, rule = "Modified.CA", 
	pi.type = "upper", conf.level = conf.level)$interval$limits["UPL"]

# 1-of-2 Rule based on Means of order 2
predIntNormSimultaneous(As.Bkgrd, n.mean = 2, k = 1, m = 2, r = r, 
	rule = "k.of.m", pi.type = "upper", 
	conf.level = conf.level)$interval$limits["UPL"]


#
# Create data frame showing the upper prediction limits for each of the rules, 
# along with the power of detecting a change in concentration of 
# three standard deviations at any of the 20 compliance wells 
# during the course of a year
#

n <- sum(!is.na(As.Bkgrd))
rule.vec <- c("k.of.m", "k.of.m", "Modified.CA", "k.of.m")
n.mean.vec <- c(1, 1, 1, 2)
m.vec <- c(2, 3, 4, 2)
n.rules <- length(rule.vec)
UPL.vec <- rep(as.numeric(NA), n.rules)

for(i in 1:n.rules) {
	UPL.vec[i] <- predIntNormSimultaneous(As.Bkgrd, n.mean = n.mean.vec[i], 
		k = 1, m = m.vec[i], r = r, rule = rule.vec[i], 
		pi.type = "upper", conf.level = conf.level)$interval$limits["UPL"]
}

Power.vec <- predIntNormSimultaneousTestPower(n = n, k = 1, 
		m = m.vec, n.mean = n.mean.vec, r = 2, rule = rule.vec, 
		delta.over.sigma = 3, pi.type = "upper", conf.level = conf.level)
 
data.frame(Rule = rule.vec, k = rep(1, n.rules), 
	m = m.vec, N.Mean = n.mean.vec, UPL = round(UPL.vec, 1), 
	Power = round(Power.vec, 2), 
	Total.Samples = n.mean.vec * m.vec * r)

rm(nw, nc, r, conf.level, As.Bkgrd, n, rule.vec, n.mean.vec, m.vec, 
	n.rules, UPL.vec, Power.vec, i)

detach("EPA.09.Ex.18.1.arsenic.df")


####################################################################################

# 6.3.2 Simultaneous Prediction Intervals for a Lognormal Distribution
#---------------------------------------------------------------------

# Look at backround data:
EPA.09.Ex.19.1.sulfate.df

# Set confidence level:
nw <- 50
nc <- 10

conf.level <- (1 - 0.1)^(1 / (nc * nw))
conf.level


# Compare power of detecting a change in concentration of three standard deviations 
# (on the log scale) at any of the 50 compliance wells during the course of a year 
# for the 1-of-2 rule versus the 1-of-3 rule
predIntNormSimultaneousTestPower(n = 25, k = 1, m = 2:3, r = 2, 
	rule = "k.of.m", delta.over.sigma = 3, pi.type = "upper", 
	conf.level = conf.level)


# Compute upper prediction limit based on 1-of-3 rule
#----------------------------------------------------
with(EPA.09.Ex.19.1.sulfate.df, 
	predIntLnormSimultaneous(Sulfate.mg.per.l, k = 1, m = 3, r = 2, 
		rule = "k.of.m", pi.type = "upper", conf.level = conf.level))

#####################################################################################

# 6.3.3 Simultaneous Prediction Intervals for a Gamma Distribution
#-----------------------------------------------------------------
 
# Compute upper prediction limit based on 1-of-3 rule
#----------------------------------------------------
with(EPA.09.Ex.19.1.sulfate.df, 
	predIntGammaSimultaneous(x = Sulfate.mg.per.l, k = 1, m = 3, r = 2, 
		rule = "k.of.m", pi.type = "upper", 
		conf.level = conf.level)$interval$limits["UPL"]


# Clean up
rm(nw, nc, conf.level)

#####################################################################################

# 6.3.4 Simultaneous Nonparametric Prediction Intervals
#------------------------------------------------------

# Look at the backround and compliance well data:
#------------------------------------------------
EPA.09.Ex.19.5.mercury.df

# n  = 20 based on 4 Background Wells with 5 observations per well
# nw = 10 Compliance Wells
# nc =  5 Constituents
# ne =  1 Evaluation per year

n  <- 20
nw <- 10
nc <-  5
ne <-  1


# Set confidence level and alpha level:
conf.level <- (1 - 0.1)^(1 / nc)
conf.level

alpha <- 1 - conf.level
alpha


# Set the number of future sampling occasions r to the product of 
# the number of compliance wells and the number of evaluations per year
r <- nw * ne


# Create data objects that store information about the 
# six different sampling plans
rule.vec <- c(rep("k.of.m", 3), "Modified.CA", rep("k.of.m", 2))
k.vec    <- rep(1, 6)
m.vec    <- c(2:4, 4, 1, 2)
n.median.vec <- c(rep(1, 4), rep(3, 2))
n.plans <- length(rule.vec)


# Alpha-levels based on using Maximum background observation
alpha.vec.Max <- 1 - predIntNparSimultaneousConfLevel(
	n = n, n.median = n.median.vec, 
	k = k.vec, m = m.vec, r = r, 
	rule = rule.vec, pi.type="upper")


# Alpha-levels based on using 2nd largest background observation
alpha.vec.2nd <- 1 - predIntNparSimultaneousConfLevel(
	n = n, n.median = n.median.vec, 
	k = k.vec, m = m.vec, r = r, 
	rule = rule.vec, pi.type="upper",
	n.plus.one.minus.upl.rank = 2)

# Alpha-levels based on using 3rd largest background observation
alpha.vec.3rd <- 1 - predIntNparSimultaneousConfLevel(
	n = n, n.median = n.median.vec, 
	k = k.vec, m = m.vec, r = r, 
	rule = rule.vec, pi.type="upper",
	n.plus.one.minus.upl.rank = 3)


# Now create a data frame listing all of the plans, their associated 
# Type I error rate, and their associated upper prediction limit

Bkgd.Hg.Sorted <- with(EPA.09.Ex.19.5.mercury.df, 
	sort(Mercury.ppb[Well.type == "Background"], 
		decreasing = TRUE))

Candidate.Plans.df <- data.frame(Rule = rep(rule.vec, 3), 
	k = rep(k.vec, 3), m = rep(m.vec, 3),
	Median.n = rep(n.median.vec, 3), 
	Order.Statistic = rep(c("Max", "2nd", "3rd"), each = n.plans), 
	Achieved.alpha = round(
		c(alpha.vec.Max, alpha.vec.2nd, alpha.vec.3rd), 4),
	BG.Limit = rep(Bkgd.Hg.Sorted[1:3], each = n.plans))

Candidate.Plans.df


#--------------------------------------------------------------------
# Eliminate plans that do not achieve the per-constituent alpha level
#--------------------------------------------------------------------
index <- Candidate.Plans.df$Achieved.alpha <= alpha
Candidate.Plans.df <- Candidate.Plans.df[index, ]

Candidate.Plans.df


#-------------------------------------------------
# Determine whether Well CW-1 is out of compliance
#-------------------------------------------------

EPA.09.Ex.19.5.mercury.df[Well == "CW-1", c("Well", "Event", "Mercury.ppb.orig")]
#   Well Event Mercury.ppb.orig
#25 CW-1     1             0.22
#26 CW-1     2              0.2
#27 CW-1     3              <.2
#28 CW-1     4             0.25
#29 CW-1     5             0.24
#30 CW-1     6              <.2


#        Rule Median.n k m Order.Statistic Achieved.alpha BG.Limit Result
# ----------- -------- - - --------------- -------------- -------- ------
#      k.of.m        1 1 3             Max         0.0055     0.28 Pass. First value of 0.22 < BG Limit.
#      k.of.m        1 1 4             Max         0.0009     0.28 Pass. First value of 0.22 < BG Limit.
# Modified.CA        1 1 4             Max         0.0140     0.28 Pass. First value of 0.22 < BG Limit.
#      k.of.m        3 1 2             Max         0.0060     0.28 Pass. First value of 0.22 < BG Limit.
#      k.of.m        1 1 4             2nd         0.0046     0.25 Pass. First value of 0.22 < BG Limit.
#      k.of.m        1 1 4             3rd         0.0135     0.24 Pass. First value of 0.22 < BG Limit.




#-------------------------------------------------
# Determine whether Well CW-2 is out of compliance
#-------------------------------------------------

EPA.09.Ex.19.5.mercury.df[Well == "CW-2", c("Well", "Event", "Mercury.ppb.orig")]
#   Well Event Mercury.ppb.orig
#31 CW-2     1             0.36
#32 CW-2     2             0.41
#33 CW-2     3             0.28
#34 CW-2     4             0.45
#35 CW-2     5             0.43
#36 CW-2     6             0.54


# Resampling is required for all plans because initial value of 0.36
# is greater than BG Limit, and median of first 3 values (also 0.36)
# is greater than BG Limit.

#        Rule Median.n k m Order.Statistic Achieved.alpha BG.Limit Result
# ----------- -------- - - --------------- -------------- -------- ------
#      k.of.m        1 1 3             Max         0.0055     0.28 Pass. Third value of 0.28 = BG Limit.
#      k.of.m        1 1 4             Max         0.0009     0.28 Pass. Third value of 0.28 = BG Limit.
# Modified.CA        1 1 4             Max         0.0140     0.28 Fail. Only 1 of observations 2-4 <= 0.28.
#      k.of.m        3 1 2             Max         0.0060     0.28 Fail. Both medians (0.36 and 0.45) > BG Limit.
#      k.of.m        1 1 4             2nd         0.0046     0.25 Fail. Observations 1-4 all > 0.25.
#      k.of.m        1 1 4             3rd         0.0135     0.24 Fail. Observations 1-4 all > 0.25.



#------------------------------------------------------------------------------
# Assume a normal distribution and compute the power of detecting a change in 
# concentration of three standard deviations at any of the 10 compliance wells 
# during the course of a year, as well as the total number of potential samples 
# that may have to be taken.
#------------------------------------------------------------------------------

Power.vec <- predIntNparSimultaneousTestPower(n = n,
	n.median = Candidate.Plans.df[, "Median.n"],
	k = Candidate.Plans.df[, "k"], m = Candidate.Plans.df[, "m"],
	r = r, rule = as.character(Candidate.Plans.df[, "Rule"]), 
	n.plus.one.minus.upl.rank = 
		match(Candidate.Plans.df[, "Order.Statistic"] , 
			c("Max", "2nd", "3rd")), 
	delta.over.sigma = 3, pi.type = "upper", r.shifted = 1, 
	distribution = "norm", method = "approx")


data.frame(Candidate.Plans.df[, 
	c("Rule", "k", "m", "Median.n", "Order.Statistic")], 
	Power = round(Power.vec, 2), 
	Total.Samples = Candidate.Plans.df$Median.n * Candidate.Plans.df$m * ne)


rm(n, nw, nc, ne, conf.level, alpha, r, rule.vec, k.vec, m.vec, 
	n.median.vec, n.plans, alpha.vec.Max, alpha.vec.2nd, alpha.vec.3rd,
	Bkgd.Hg.Sorted, Candidate.Plans.df, index, Power.vec)


############################################################

# 6.4 Tolerance Intervals 
#------------------------


# 6.4.1 Tolerance Intervals for a Normal Distribution
#----------------------------------------------------

# First assume facility is in compliance/assessment monitoring
# so compute a lower 99% confidence limit for the 95th percentile 
# for the distribution at each of the three compliance wells.
# This is equivalent to computing a lower beta-content tolerance 
# limit with coverage 5% and associated confidence level of 99%.  
#----------------------------------------------------------------

Aldicarb <- EPA.09.Ex.21.1.aldicarb.df$Aldicarb.ppb
Well <- EPA.09.Ex.21.1.aldicarb.df$Well

tolIntNorm(Aldicarb[Well == "Well.1"], coverage = 0.05, 
	ti.type = "lower", conf.level = 0.99)

tolIntNorm(Aldicarb[Well == "Well.2"], coverage = 0.05, 
	ti.type = "lower", 
	conf.level = 0.99)$interval$limits["LTL"]

tolIntNorm(Aldicarb[Well == "Well.3"], coverage = 0.05, 
	ti.type = "lower", 
	conf.level = 0.99)$interval$limits["LTL"]

# One command to compute the LTL for all three wells at once:
sapply(split(Aldicarb, Well), function(x) {
	tolIntNorm(x, coverage = 0.05, ti.type = "lower", 
	conf.level = 0.99)$interval$limits["LTL"]})

# Now assume assume the site is in corrective action monitoring, so 
# compute the one-sided 99% upper confidence limit for the 95th 
# percentile and compare that to the MCL.  This is equivalent to 
# computing an upper beta-content tolerance limit with coverage 
# 95% and associated confidence level of 99%.
#-------------------------------------------------------------------

sapply(split(Aldicarb, Well), function(x) {
	tolIntNorm(x, coverage = 0.95, ti.type = "upper", 
	conf.level = 0.99)$interval$limits["UTL"]})

rm(Aldicarb, Well)

########################################################################

# 6.4.2 Tolerance Intervals for a Lognormal Distribution
#-------------------------------------------------------

# "Hot-Measurment Comparison" for TcCB data
#------------------------------------------
with(EPA.94b.tccb.df, 
	tolIntLnorm(TcCB[Area == "Reference"], coverage = 0.95, ti.type = "upper", 
		conf.level = 0.95))



# Compare chrysene data from three compliance wells
# to upper tolerance limit based on data from two 
# background wells
#--------------------------------------------------

EPA.09.Ex.17.3.chrysene.df

with(EPA.09.Ex.17.3.chrysene.df, 
	tolIntLnorm(Chrysene.ppb[Well.type == "Background"], 
		coverage = 0.95, ti.type = "upper", 
		conf.level = 0.95))$interval$limits["UTL"]


########################################################################

# 6.4.3 Tolerance Intervals for a Gamma Distribution
#---------------------------------------------------

# "Hot-Measurment Comparison" for TcCB data
#------------------------------------------
with(EPA.94b.tccb.df, 
	tolIntGamma(TcCB[Area == "Reference"], coverage = 0.95, ti.type = "upper", 
		conf.level = 0.95))$interval$limits["UTL"]

########################################################################

# 6.4.4 Nonparametric Tolerance Intervals
#----------------------------------------

EPA.09.Ex.17.4.copper.df

with(EPA.09.Ex.17.4.copper.df, 
	tolIntNpar(Copper.ppb[Well.type == "Background"], 
		conf.level = 0.95, ti.type = "upper", lb = 0))
Any scripts or data that you put into this service are public.
EnvStats documentation built on Sept. 11, 2024, 6:03 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
EnvStats
Package for Environmental Statistics, Including US EPA Guidance

inst/scripts/Manual/Chapter06.R
In EnvStats: Package for Environmental Statistics, Including US EPA Guidance

Try the EnvStats package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

EnvStats Package for Environmental Statistics, Including US EPA Guidance

inst/scripts/Manual/Chapter06.R In EnvStats: Package for Environmental Statistics, Including US EPA Guidance

Try the EnvStats package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

EnvStats
Package for Environmental Statistics, Including US EPA Guidance

inst/scripts/Manual/Chapter06.R
In EnvStats: Package for Environmental Statistics, Including US EPA Guidance