ch05.R
In MASS: Support Functions and Datasets for Venables and Ripley's MASS

#-*- R -*-

## Script from Fourth Edition of `Modern Applied Statistics with S'

# Chapter 5   Univariate Statistics

# for later use, from section 5.6
perm.t.test <- function(d) {
# ttest is function(x) mean(x)/sqrt(var(x)/length(x))
    binary.v <- function(x, digits) {
       if(missing(digits)) {
           mx <- max(x)
           digits <- if(mx > 0) 1 + floor(log(mx, base = 2)) else 1
       }
       ans <- 0:(digits - 1)
       lx <- length(x)
       x <- rep(x, rep(digits, lx))
       x <- (x %/% 2^ans) %% 2
       dim(x) <- c(digits, lx)
       x
    }
    digits <- length(d)
    n <- 2^digits
    x <- d * 2 * (binary.v(1:n, digits) - 0.5)
    mx <- matrix(1/digits, 1, digits) %*% x
    s <- matrix(1/(digits - 1), 1, digits)
    vx <- s %*% (x - matrix(mx, digits, n, byrow=TRUE))^2
    as.vector(mx/sqrt(vx/digits))
}

library(MASS)
options(width=65, digits=5, height=9999)
library(lattice)
pdf(file="ch05.pdf", width=8, height=6, pointsize=9)

rm(A, B) # precautionary clear-out
attach(shoes)
tperm <- perm.t.test(B - A) # see section 5.6
detach()

# from ch04
if(!exists("fgl.df")) {
fgl0 <- fgl[ ,-10] # omit type.
fgl.df <- data.frame(type = rep(fgl$type, 9),
  y = as.vector(as.matrix(fgl0)),
  meas = factor(rep(1:9, each = 214), labels = names(fgl0)))
  invisible()
}


# 5.1  Probability distributions


x <- rt(250, df = 9)
par(pty = "s")
qqnorm(x)
qqline(x)
par(pty = "m")

x <- rgamma(100, shape = 5, rate = 0.1)
fitdistr(x, "gamma")
x2 <- rt(250, df = 9)
fitdistr(x2, "t", df = 9)
fitdistr(x2, "t")


# 5.2  Generating random data

contam <- rnorm( 100, 0, (1 + 2*rbinom(100, 1, 0.05)) )


# 5.3  Data summaries

par(mfrow=c(2,3))
hist(geyser$duration, "scott", xlab="duration")
hist(chem, "scott")
hist(tperm, "scott")
hist(geyser$duration, "FD", xlab="duration")
hist(chem, "FD")
hist(tperm, "FD")
par(mfrow=c(1,1))

swiss.fertility <- swiss[, 1]
stem(swiss.fertility)
stem(chem)
stem(abbey)
stem(abbey, scale = 0.4) ## use scale = 0.4 in R


par(mfrow = c(1,2))
boxplot(chem, sub = "chem", range = 0.5)
boxplot(abbey, sub = "abbey")
par(mfrow = c(1,1))

bwplot(type ~ y | meas, data = fgl.df, scales = list(x="free"),
  strip = function(...) strip.default(..., style=1), xlab = "")


# 5.4  Classical univariate statistics

attach(shoes)
t.test(A, mu = 10)
t.test(A)$conf.int

wilcox.test(A, mu = 10)

var.test(A, B)

t.test(A, B, var.equal = TRUE)

t.test(A, B, var.equal = FALSE)

wilcox.test(A, B)

t.test(A, B, paired = TRUE)

wilcox.test(A, B, paired = TRUE)
detach()

par(mfrow = c(1, 2))
truehist(tperm, xlab = "diff")
x <- seq(-4,4, 0.1)
lines(x, dt(x,9))
#cdf.compare(tperm, distribution = "t", df = 9)
sres <- c(sort(tperm), 4)
yres <- (0:1024)/1024
plot(sres, yres, type="S", xlab="diff", ylab="")
lines(x, pt(x,9), lty=3)

legend(-5, 1.05, c("Permutation dsn","t_9 cdf"), lty = c(1,3))
par(mfrow = c(1, 1))


# 5.5  Robust summaries

# Figure 5.7 was obtained by
x <- seq(-10, 10, len=500)
y <- dt(x, 25, log = TRUE)
z <- -diff(y)/diff(x)
plot(x[-1], z, type = "l", xlab = "", ylab = "psi")
y2 <-  dt(x, 5, log = TRUE)
z2 <- -diff(y2)/diff(x)
lines(x[-1], z2, lty = 2)


sort(chem)
mean(chem)
median(chem)
#location.m(chem)
#location.m(chem, psi.fun="huber")
mad(chem)
#scale.tau(chem)
#scale.tau(chem, center=3.68)
unlist(huber(chem))
unlist(hubers(chem))
fitdistr(chem, "t", list(m = 3, s = 0.5), df = 5)

sort(abbey)
mean(abbey)
median(abbey)
#location.m(abbey)
#location.m(abbey, psi.fun="huber")
unlist(hubers(abbey))
unlist(hubers(abbey, k = 2))
unlist(hubers(abbey, k = 1))
fitdistr(abbey, "t", list(m = 12, s = 5), df = 10)


# 5.6  Density estimation

# Figure 5.8
attach(geyser)
par(mfrow=c(2,3))
truehist(duration, h=0.5, x0=0.0, xlim=c(0, 6), ymax=0.7)
truehist(duration, h=0.5, x0=0.1, xlim=c(0, 6), ymax=0.7)
truehist(duration, h=0.5, x0=0.2, xlim=c(0, 6), ymax=0.7)
truehist(duration, h=0.5, x0=0.3, xlim=c(0, 6), ymax=0.7)
truehist(duration, h=0.5, x0=0.4, xlim=c(0, 6), ymax=0.7)

breaks <- seq(0, 5.9, 0.1)
counts <- numeric(length(breaks))
for(i in (0:4)) counts[i+(1:55)] <- counts[i+(1:55)] +
    rep(hist(duration, breaks=0.1*i + seq(0, 5.5, 0.5),
    prob=TRUE, plot=FALSE)$density, rep(5,11))
plot(breaks+0.05, counts/5, type="l", xlab="duration",
    ylab="averaged", bty="n", xlim=c(0, 6), ylim=c(0, 0.7))
detach()


attach(geyser)
truehist(duration, nbins = 15, xlim = c(0.5, 6), ymax = 1.2)
lines(density(duration, width = "nrd"))

truehist(duration, nbins = 15, xlim = c(0.5, 6), ymax = 1.2)
lines(density(duration, width = "SJ", n = 256), lty = 3)
lines(density(duration, n = 256, width = "SJ-dpi"), lty = 1)
detach()

gal <- galaxies/1000
plot(x = c(0, 40), y = c(0, 0.3), type = "n", bty = "l",
    xlab = "velocity of galaxy (1000km/s)", ylab = "density")
rug(gal)
lines(density(gal, width = "SJ-dpi", n = 256), lty = 1)
lines(density(gal, width = "SJ", n = 256), lty = 3)
library(polspline)
x <- seq(5, 40, length = 500)
lines(x, doldlogspline(x, oldlogspline(gal)), lty = 2)


geyser2 <- data.frame(as.data.frame(geyser)[-1, ],
                      pduration = geyser$duration[-299])
attach(geyser2)
par(mfrow = c(2, 2))
plot(pduration, waiting, xlim = c(0.5, 6), ylim = c(40, 110),
   xlab = "previous duration", ylab = "waiting")
f1 <- kde2d(pduration, waiting, n = 50, lims=c(0.5, 6, 40, 110))
image(f1, zlim = c(0, 0.075),
     xlab = "previous duration", ylab = "waiting")
f2 <- kde2d(pduration, waiting, n = 50, lims=c(0.5, 6, 40, 110),
  h = c(width.SJ(duration), width.SJ(waiting)) )
image(f2, zlim = c(0, 0.075),
      xlab = "previous duration", ylab = "waiting")
persp(f2,  phi = 30, theta = 20, d = 5,
      xlab = "previous duration", ylab = "waiting", zlab = "")
detach()

density(gal, n = 1, from = 20.833, to = 20.834, width = "SJ")$y
1/(2 * sqrt(length(gal)) * 0.13)

set.seed(101)
m <- 1000
res <- numeric(m)
for (i in 1:m) res[i] <- median(sample(gal, replace = TRUE))
mean(res - median(gal))
sqrt(var(res))

truehist(res, h = 0.1)
lines(density(res, width = "SJ-dpi", n = 256))
quantile(res, p = c(0.025, 0.975))
x <- seq(19.5, 22.5, length = 500)
lines(x, doldlogspline(x, oldlogspline(res)), lty = 3)

library(boot)
set.seed(101)
gal.boot <- boot(gal, function(x, i) median(x[i]), R = 1000)
gal.boot

boot.ci(gal.boot, conf = c(0.90, 0.95),
         type = c("norm","basic","perc","bca"))
plot(gal.boot)

if(FALSE) { # bootstrap() is an S-PLUS function
gal.bt <- bootstrap(gal, median, seed = 101, B = 1000)
summary(gal.bt)
plot(gal.bt)
qqnorm(gal.bt)

limits.emp(gal.bt)
limits.bca(gal.bt)
}

sim.gen  <- function(data, mle) {
 n <- length(data)
 data[sample(n, replace = TRUE)]  + mle*rnorm(n)
}
gal.boot2 <- boot(gal, median, R = 1000,
 sim = "parametric", ran.gen = sim.gen, mle = 0.5)
boot.ci(gal.boot2, conf = c(0.90, 0.95),
         type = c("norm","basic","perc"))

attach(shoes)
t.test(B - A)
shoes.boot <- boot(B - A, function(x,i) mean(x[i]), R = 1000)
boot.ci(shoes.boot, type = c("norm", "basic", "perc", "bca"))
mean.fun <- function(d, i) {
 n <- length(i)
 c(mean(d[i]), (n-1)*var(d[i])/n^2)
}
shoes.boot2 <- boot(B - A, mean.fun, R = 1000)
boot.ci(shoes.boot2, type = "stud")
detach()

# End of ch05

Any scripts or data that you put into this service are public.

MASS documentation built on June 22, 2024, 10:42 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

MASS
Support Functions and Datasets for Venables and Ripley's MASS

inst/scripts/ch05.R
In MASS: Support Functions and Datasets for Venables and Ripley's MASS

Try the MASS package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

MASS Support Functions and Datasets for Venables and Ripley's MASS

inst/scripts/ch05.R In MASS: Support Functions and Datasets for Venables and Ripley's MASS

Try the MASS package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

MASS
Support Functions and Datasets for Venables and Ripley's MASS

inst/scripts/ch05.R
In MASS: Support Functions and Datasets for Venables and Ripley's MASS