Chapter02.R
In EnvStats: Package for Environmental Statistics, Including US EPA Guidance

#####
# File:     Chapter02.R
#
# Purpose:  Reproduce Examples in Chapter 2 of the book:
#
#           Millard, SP. (2013).  EnvStats: An R Package for 
#             Environmental Statistics.  Springer-Verlag.
#
# Author:   Steven P. Millard
#
# Last
# Updated:  2013/02/03
#####


#######################################################################################

library(EnvStats)

############################################################

# 2.8.1	Confidence Interval for the Mean of a Normal Distribution
#----------------------------------------------------------------
EPA.09.Ex.16.1.sulfate.df

Sulfate.back <- with(EPA.09.Ex.16.1.sulfate.df,
	Sulfate.ppm[Well.type == "Background"])
 
enorm(Sulfate.back, ci = TRUE)

enorm(Sulfate.back, ci = TRUE, ci.param = "variance")$interval

ciNormHalfWidth(n.or.n1 = 8, sigma.hat = c(15, 30, 60))

ciNormHalfWidth(n.or.n1 = 4, sigma.hat = 30)

ciNormN(half.width = 10, sigma.hat = 30)

# Figure 2.1
#-----------
windows()
plotCiNormDesign(sigma.hat=30, range.x.var=c(4, 80), 
  conf=0.99, xlim=c(0, 80), ylim=c(0, 90), 
  main = paste("Half-Width vs. Sample Size for CI for Mean",
    "with Estimated SD=30 and Various Confidence Levels", sep = "\n"))
plotCiNormDesign(sigma.hat=30, range.x.var=c(4, 80), conf=0.95, plot.lty=2, 
	plot.col = 2, add= TRUE)
plotCiNormDesign(sigma.hat=30, range.x.var=c(4, 80), conf=0.90, plot.lty=3, 
	plot.col = 4, add= TRUE)
legend("topright", paste(c("99%", "95%", "90%"), "Confidence"), col= c(1, 2, 4), 
	lty=1:3, lwd=3, bty = "n")

#--------------------------------------------------------------------------------------

Sulfate.down <- with(EPA.09.Ex.16.1.sulfate.df,
	Sulfate.ppm[Well.type == "Downgradient"])

enorm(Sulfate.down)

t.test(Sulfate.down, Sulfate.back, var.equal = TRUE)$conf.int

summary(lm(Sulfate.ppm ~ Well.type, data = EPA.09.Ex.16.1.sulfate.df))$sigma

ciTableMean(n1 = 8, n2 = 8, diff = c(100, 50, 0), 
  SD = c(15, 25, 50), digits = 0)


rm(Sulfate.back, Sulfate.down)

############################################################

# 2.8.2	Confidence Interval for a Binomal Proportion
#---------------------------------------------------

EPA.92c.benzene1.df

with(EPA.92c.benzene1.df, ebinom(Censored, ci = TRUE))

ciBinomHalfWidth(n.or.n1 = 36, p.hat = c(0.75, 0.85, 0.95))

ciBinomHalfWidth(n.or.n1 = 10, p.hat = 0.9)

ciBinomN(half.width = 0.03, p.hat = 0.9)

# Figure 2.2
#-----------
windows()
plotCiBinomDesign(p.hat=0.9, range.x.var=c(10, 200), conf=0.99, 
  xlim=c(0, 200), ylim=c(0, 0.3), 
  main=paste("Half-Width vs. Sample Size for CI for p", 
  "With Estimated p=0.9 and Various Confidence Levels", sep = "\n"))
plotCiBinomDesign(p.hat=0.9, range.x.var=c(10, 200), conf=0.95, plot.col = 2, plot.lty=2, add= TRUE)
plotCiBinomDesign(p.hat=0.9, range.x.var=c(10, 200), conf=0.90, plot.col = 4, plot.lty=3, add= TRUE)
legend("topright", paste(c("99%", "95%", "90%"), "Confidence"), col = c(1, 2, 4), lty=1:3, lwd=3, bty = "n")
mtext("CI Method = Score normal approximation, with continuity correction", line = 0)

#--------------------------------------------------------------------------------------

ciTableProp(n1 = 36, p1.hat = c(0.2, 0.4, 0.6), 
  n2 = 36, p2.hat.minus.p1.hat = c(0.3, 0.15, 0))

############################################################

# 2.8.3	Nonparametric Confidence Interval for a Percentile
#---------------------------------------------------------

EPA.92c.copper2.df

Cu.Bkgrd <- with(EPA.92c.copper2.df, Copper[Well.type == "Background"])

eqnpar(Cu.Bkgrd, p = 0.95)

ciNparConfLevel(n=24, p=0.95, ci.type="upper")

ciNparConfLevel(n=12, p=0.95, ci.type="upper")

ciNparN(p=0.95, ci.type="upper")

# Figure 2.3
#-----------
windows()
plotCiNparDesign(p=0.95, ci.type="upper", 
  range.x.var=c(2, 100), ylim=c(0, 1))


rm(Cu.Bkgrd)

############################################################

# 2.9.1	Prediction Interval for a Normal Distribution
#----------------------------------------------------

EPA.92c.arsenic3.df

As.Bkgrd <- with(EPA.92c.arsenic3.df, Arsenic[Well.type == "Background"])
predIntNorm(As.Bkgrd, k = 4, method = "exact")


# Figure 2.4
#-----------
windows()
plotPredIntNormDesign(range.x.var=c(4, 50), k=4, sigma.hat=17, 
  xlim=c(0, 50), ylim=c(30, 110), 
  main=paste("Half-Width vs. Sample Size for Prediction Interval", 
  "With Estimate SD=17 and Various Numbers of Future Values", sep = "\n"))
plotPredIntNormDesign(range.x.var=c(4, 50), k=2, sigma.hat=17, 
	plot.col = 2, plot.lty=2, add= TRUE)
plotPredIntNormDesign(range.x.var=c(4, 50), k=1, sigma.hat=17, 
	plot.col = 4, plot.lty=3, add= TRUE)
legend("topright", c("k=4", "k=2", "k=1"), col = c(1, 2, 4), lty=1:3, lwd = 3, bty = "n")

rm(As.Bkgrd)

############################################################

# 2.9.2 Nonparametric Prediction Interval
#----------------------------------------

predIntNparN(m = rep(c(1, 5, 10), 2), 
  conf.level = rep(c(0.9, 0.95), each = 3))

# Figure 2.5
#-----------
windows()
plotPredIntNparDesign(range.x.var = c(2, 100), k = 1, m = 1, 
  xlim = c(0, 100), ylim = c(0, 1),
  main=paste("Confidence Level vs. Sample Size for Nonparametric PI", 
  "With Various Values of m (Two-Sided PI)", sep = "\n"))
plotPredIntNparDesign(range.x.var=c(2, 100), k=5, m=5, plot.col = 2, plot.lty=2, add= TRUE)
plotPredIntNparDesign(range.x.var=c(2, 100), k=10, m=10, plot.col = 4, plot.lty=3, add= TRUE)
legend("bottomright", c("m=  1", "m=  5", "m=10"), col = c(1, 2, 4), lty = 1:3, lwd=3, bty = "n")


############################################################

# 2.10.1 Tolerance Interval for a Normal Distribution
#----------------------------------------------------


As.Bkgrd <- with(EPA.92c.arsenic3.df, Arsenic[Well.type == "Background"])
tolIntNorm(As.Bkgrd,  coverage = 0.95, cov.type = "content", 
    ti.type = "two-sided", conf.level = 0.99)

# Figure 2.6
#-----------
windows()
plotTolIntNormDesign(range.x.var = c(5, 50), sigma.hat = 17, 
  coverage = 0.99, conf = 0.99, 
  xlim = c(0, 50), ylim = c(0, 200), 
  main=paste("Half-Width vs. Sample Size for TI With Estimated", 
  "SD = 17 and Various Values of Coverage (99% Confidence)", 
  sep = "\n"))
plotTolIntNormDesign(range.x.var=c(5, 50), sigma.hat=17, coverage=0.95, 
  conf=0.99, plot.col = 2, plot.lty=2, add= TRUE)
plotTolIntNormDesign(range.x.var=c(5, 50), sigma.hat=17, coverage=0.90, 
  conf=0.99, plot.col = 4, plot.lty=3, add= TRUE)
legend("topright", paste(c("99%", "95%", "90%"), "Coverage"), col = c(1, 2, 4), 
	lty=1:3, lwd=3, bty = "n")

rm(As.Bkgrd)

############################################################

# 2.10.2 Nonparametric Tolerance Interval
#----------------------------------------

tolIntNparN(coverage = rep(c(0.8, 0.9, 0.95), 2), 
  conf.level = rep(c(0.9, 0.95), each = 3))


# Figure 2.7
#-----------
windows()
plotTolIntNparDesign(range.x.var = c(2, 100), coverage = 0.8, 
  xlim = c(0, 100), ylim = c(0, 1), 
  main=paste("Confidence Level vs. Sample Size for Nonparametric TI", 
  "With Various Values of Coverage (Two-Sided TI)", 
  sep = "\n"))
plotTolIntNparDesign(range.x.var = c(2, 100), coverage = 0.90, 
  plot.col = 2, plot.lty = 2, add = TRUE)
plotTolIntNparDesign(range.x.var = c(2, 100), coverage = 0.95, 
  plot.col = 4, plot.lty = 3, add = TRUE)
legend("bottomright", paste(c("80%", "90%", "95%"), "Coverage"), 
  col = c(1, 2, 4), lty = 1:3, lwd = 3, bty = "n")

#########################################################################

# 2.11.1 Testing the Mean of a Normal Distribution
#-------------------------------------------------

summaryStats(VC.ppb ~ Well, data = EPA.09.Ex.22.1.VC.df, 
  subset = Period == "Background", digits = 1)

VC.lm.fit <- lm(VC.ppb ~ Well, data = EPA.09.Ex.22.1.VC.df, 
  subset = Period == "Background")

summary(VC.lm.fit)$sigma

sqrt(enorm(VC.lm.fit$residuals, ci = TRUE, ci.param = "variance")$interval$limits)

tTestAlpha(n.or.n1 = 4, delta.over.sigma = 5 / c(3.2, 6), 
  power = 0.8, sample.type = "one.sample", 
  alternative = "greater")

tTestPower(n.or.n1 = c(4, 8, 12), delta.over.sigma = 5 / 3.2, 
  alpha = 0.01, alternative = "greater")

tTestN(delta.over.sigma = 5 / 6, alpha = 0.01, power = 0.9, 
  alternative = "greater")

rm(VC.lm.fit)


# Figure 2.8
#-----------
windows()
plotTTestDesign(alpha = 0.01, delta.over.sigma = 1, range.x.var = c(2, 35), 
  xlim = c(0, 35), ylim = c(0, 1), alternative="greater", approx = FALSE)


# Figure 2.9
#-----------
windows()
plotTTestDesign(y.var = "delta.over.sigma", alpha = 0.01, power = 0.9, 
  range.x.var = c(2, 15), xlim = c(0, 15), ylim = c(0, 40), 
  alternative = "greater", approx = FALSE)


#########################################################################

# 2.11.2 Testing a Binomial Proportion
#-------------------------------------------------

# Sample size based on normal approximation to the binomial
# without continuity correction
#----------------------------------------------------------
propTestN(p.or.p1 = 0.3, p0.or.p2 = 0.1, alpha = 0.05, 
  power = 0.9, sample.type = "one.sample", 
  alternative = "greater", approx = TRUE, round.up = TRUE)

# Simulation showing true Type I error rate using test based on
# normal approximation WITHOUT continuity correction is about 7%, not 5%
#-----------------------------------------------------------------------
set.seed(274)
N <- 10000
Reject.vec <- logical(N)
for(i in 1:N) {
  Reject.vec[i] <- prop.test(
    x = rbinom(n = 1, size = 30, prob = 0.1), 
    n = 30, p = 0.1, alternative = "greater", correct = FALSE)$p.value < 0.05
}
mean(Reject.vec)

binom.test(x = sum(Reject.vec), n = length(Reject.vec), p = 0.05)$conf.int


# Simulation showing true Type I error rate using test based on
# normal approximation WITH continuity correction is about 2.6%, not 5%
#-----------------------------------------------------------------------
set.seed(538)
N <- 10000
Reject.vec <- logical(N)
for(i in 1:N) {
  Reject.vec[i] <- prop.test(
    x = rbinom(n = 1, size = 30, prob = 0.1), 
    n = 30, p = 0.1, alternative = "greater", correct = TRUE)$p.value < 0.05
}
mean(Reject.vec)

binom.test(x = sum(Reject.vec), n = length(Reject.vec), p = 0.05)$conf.int

rm(N, Reject.vec, i)


# Sample size based on exact binomial test
#-----------------------------------------
propTestN(p.or.p1 = 0.3, p0.or.p2 = 0.1, alpha = 0.05, 
  power = 0.9, sample.type = "one.sample", 
  alternative = "greater", approx = FALSE, round.up = TRUE)

propTestN(p.or.p1 = 0.3, p0.or.p2 = 0.1, alpha = 0.05, 
  tol.alpha = 0.01, power = 0.9, sample.type = "one.sample", 
  alternative = "greater", approx = FALSE, round.up = TRUE)


#########################################################################


# 2.11.3 Testing Multiple Wells for Compliance with Simultaneous Prediction Intervals
#------------------------------------------------------------------------------------
nc <- 20
nw <- 100
conf.level <- (1 - 0.1)^(1 / (nc * nw))
conf.level

n <- 25
r <- 2

# Compute power of detecting an increase of three standard deviations 
# in concentration using the prediction interval based on the 1-of-2 
# resampling rule.
#---------------------------------------------------------------------
predIntNormSimultaneousTestPower(n = n, k = 1, m = 2, r = r, 
  rule = "k.of.m", delta.over.sigma = 3, pi.type = "upper", 
  conf.level = conf.level)


# Reproduce table shown in Step 2 on page 19-23 of the EPA (2009) 
# guidance document.
#----------------------------------------------------------------
rule.vec <- c(rep("k.of.m", 3), "Modified.CA", rep("k.of.m", 3))
m.vec <- c(2, 3, 4, 4, 1, 2, 1)
n.mean.vec <- c(rep(1, 4), 2, 2, 3)
n.scenarios <- length(rule.vec)

K.vec <- predIntNormSimultaneousK(
		n = n, k = 1, m = m.vec,  
		n.mean = n.mean.vec,
		r = r, rule = rule.vec, 
		pi.type = "upper", conf.level = conf.level)

Power.vec <- predIntNormSimultaneousTestPower(
		n = n, k = 1, m = m.vec, 
		n.mean = n.mean.vec, r = r, 
		rule = rule.vec, delta.over.sigma = 3, 
		pi.type = "upper", conf.level = conf.level)

data.frame(Rule = rule.vec, k = rep(1, n.scenarios), 
	m = m.vec, N.Mean = n.mean.vec, 
	K = round(K.vec, 2), 
	Power = round(Power.vec, 2), 
	Total.Samples = m.vec * n.mean.vec)




# Figure 2.10
#------------
# Power for designs based on SINGLE future observations
#------------------------------------------------------
windows()
par(mar = c(5,4,6,1) + 0.1)
plotPredIntNormSimultaneousTestPowerCurve(n = n, 
	k = 1, m = 4, r = r, rule="k.of.m", pi.type = "upper", 
	conf.level = conf.level,  
	xlab = "SD Units Above Background",
	main = "")
plotPredIntNormSimultaneousTestPowerCurve(n = n, 
	k = 1, m = 3, r = r, rule="k.of.m", pi.type = "upper", 
	conf.level = conf.level,  
	add = TRUE, plot.col = 2, plot.lty = 2)
plotPredIntNormSimultaneousTestPowerCurve(n = n, 
	k = 1, m = 2, r = r, rule="k.of.m", pi.type = "upper", 
	conf.level = conf.level,  
	add = TRUE, plot.col = 3, plot.lty = 3)
plotPredIntNormSimultaneousTestPowerCurve(n = n, 
	r = r, rule="Modified.CA", pi.type = "upper", 
	conf.level = conf.level,  
	add = TRUE, plot.col = 4, plot.lty = 4)
legend(0, 1, c("1-of-4", "Modified CA", "1-of-3", "1-of-2"), 
	col = c(1, 4, 2, 3), lty = c(1, 4, 2, 3), lwd = 2, bty="n")
mtext(paste('Power of 1-of-2, 1-of-3, 1-of-4, and Modified CA Designs to',
	"\nDetect An Increase At", nw, "Wells for SWFPR = 10%"), 
	line = 3, cex = 1.25)
mtext(paste("(Based on", n, "Background Observations and\n", 
	"Adjusted for", nc, "Consituents and", r, "Evaluations per year)"), 
	line = 1)


# Figure 2.11
#------------
# Power for designs based on the MEAN of future observations
#-----------------------------------------------------------
windows()
par(mar = c(5,4,6,1) + 0.1)
plotPredIntNormSimultaneousTestPowerCurve(n = n, 
	k = 1, m = 2, n.mean = 2, r = r, rule="k.of.m", 
	pi.type = "upper", conf.level = conf.level,  
	xlab = "SD Units Above Background",
	main = "")
plotPredIntNormSimultaneousTestPowerCurve(n = n, 
	k = 1, m = 1, n.mean = 2, r = r, rule="k.of.m", 
	pi.type = "upper", 
	conf.level = conf.level, 
	add = TRUE, plot.col = 2, plot.lty = 2)
plotPredIntNormSimultaneousTestPowerCurve(n = n, 
	k = 1, m = 1, n.mean = 3, r = r, rule="k.of.m", 
	pi.type = "upper", conf.level = conf.level, 
	add = TRUE, plot.col = 3, plot.lty = 3)
legend(0, 1, c("1-of-2, Order 2", "1-of-1, Order 3", "1-of-1, Order 2"), 
	col = c(1, 3, 2), lty = c(1, 3, 2), lwd = 2, 
	bty="n")
mtext(paste('Power of 1-of-1 and 1-of-2 Designs Based on Means to Detect',
	"\nAn Increase At", nw, "Wells for SWFPR = 10%"), 
	line = 3, cex = 1.25)
mtext(paste("(Based on", n, "Background Observations and\n", 
	"Adjusted for", nc, "Consituents and", r, "Evaluations per Year)"), 
	line = 1)


rm(nc, nw, conf.level, n, r, rule.vec, m.vec, n.mean.vec, 
  n.scenarios, K.vec, Power.vec)
Any scripts or data that you put into this service are public.
EnvStats documentation built on June 8, 2025, 11:37 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
EnvStats
Package for Environmental Statistics, Including US EPA Guidance

inst/scripts/Manual/Chapter02.R
In EnvStats: Package for Environmental Statistics, Including US EPA Guidance

Try the EnvStats package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

EnvStats Package for Environmental Statistics, Including US EPA Guidance

inst/scripts/Manual/Chapter02.R In EnvStats: Package for Environmental Statistics, Including US EPA Guidance

Try the EnvStats package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

EnvStats
Package for Environmental Statistics, Including US EPA Guidance

inst/scripts/Manual/Chapter02.R
In EnvStats: Package for Environmental Statistics, Including US EPA Guidance