Karl:

#  File src/library/stats/tests/nafns.R
#  Part of the R package, https://www.R-project.org
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  A copy of the GNU General Public License is available at
#  https://www.R-project.org/Licenses/

## Tests of functions handling NAs in fits
## These functions were introduced in 1.3.0.
## They are used by lm and glm in base R, and by
## packages MASS, rpart and survival.

## Comparison strictness is set at 256eps, increased from 100eps for v.2.1.0
## May look lenient, but notice that the Ozone levels that are being modeled
## can be more than 100.

dim(airquality)
nd <- airquality[c(6,25:27), ]

sm <- function(x) cat("length", length(x), "with", sum(is.na(x)), "NAs\n")

# default is to omit some rows
fit <- lm(Ozone ~ ., data=airquality, na.action=na.omit)
summary(fit)
sm(fitted(fit))
sm(resid(fit))
sm(predict(fit))
(pp <- predict(fit, nd))

fit2 <- lm(Ozone ~ ., data=airquality, na.action=na.exclude)
summary(fit2) # same as before
sm(fitted(fit2))
sm(resid(fit2))
sm(predict(fit2))
(pp2 <- predict(fit2, nd))

## same as before: napredict is only applied to predictions on the
## original data, following Therneau's original code (and S-PLUS).
## However, as from R 1.8.0 there is a separate na.action arg to predict.lm()
stopifnot(all.equal(pp, pp2))

## should fail
try(fit3 <- lm(Ozone ~ ., data=airquality, na.action=na.fail))

## more precise tests.
f1 <- fitted(fit)
f2 <- fitted(fit2)
common <- match(names(f1), names(f2))
stopifnot(max(abs(f1 - f2[common])) <= 256*.Machine$double.eps)
stopifnot(all(is.na(f2[-common])))

r1 <- resid(fit)
r2 <- resid(fit2)
common <- match(names(r1), names(r2))
stopifnot(max(abs(r1 - r2[common])) <= 256*.Machine$double.eps)
stopifnot(all(is.na(r2[-common])))

p1 <- predict(fit)
p2 <- predict(fit2)
common <- match(names(p1), names(p2))
stopifnot(max(abs(p1 - p2[common])) <= 256*.Machine$double.eps)
stopifnot(all(is.na(p2[-common])))


### now try out glm
gfit <- glm(Ozone ~ ., data=airquality, na.action=na.omit)
summary(gfit)
sm(fitted(gfit))
sm(resid(gfit))
sm(predict(gfit))
predict(gfit, nd)
(pp <- predict(gfit, nd))

gfit2 <- glm(Ozone ~ ., data=airquality, na.action=na.exclude)
summary(gfit2) # same as before
sm(fitted(gfit2))
sm(resid(gfit2))
sm(predict(gfit2))
(pp2 <- predict(gfit2, nd))
stopifnot(all.equal(pp, pp2))

## more precise tests.
f1 <- fitted(gfit)
f2 <- fitted(gfit2)
common <- match(names(f1), names(f2))
stopifnot(max(abs(f1 - f2[common])) <= 256*.Machine$double.eps)
stopifnot(all(is.na(f2[-common])))

r1 <- resid(gfit)
r2 <- resid(gfit2)
common <- match(names(r1), names(r2))
stopifnot(max(abs(r1 - r2[common])) <= 256*.Machine$double.eps)
stopifnot(all(is.na(r2[-common])))

p1 <- predict(gfit)
p2 <- predict(gfit2)
common <- match(names(p1), names(p2))
stopifnot(max(abs(p1 - p2[common])) <= 256*.Machine$double.eps)
stopifnot(all(is.na(p2[-common])))

## tests of diagnostic measures.
set.seed(11)
x <- 1:10
y <- c(rnorm(9),NA)
fit <- lm(y ~ x, na.action=na.exclude)
fit2 <- lm(y ~ x, subset=-10)

lm.influence(fit2); lm.influence(fit)

rstandard(fit2); rstandard(fit)
rstudent(fit2); rstudent(fit)

dffits(fit2); dffits(fit)

dfbetas(fit2); dfbetas(fit)

covratio(fit2); covratio(fit)

cooks.distance(fit2); cooks.distance(fit)

(inf <- influence.measures(fit))
(inf2 <- influence.measures(fit2))

summary(inf)
summary(inf2)

plot(fit)