inst/slowtests/test.modguide.R

# test.modguide.bat: test model1 and model2 (linmod examples) in modguide.pdf

source("test.prolog.R")
almost.equal <- function(x, y, max=1e-8)
{
    stopifnot(max >= 0 && max < .01)
    length(x) == length(y) && max(abs(x - y)) < max
}
# check that fit model matches ref lm model in all essential details
check.lm <- function(fit, ref, newdata=trees[3:5,],
                     check.coef.names=TRUE,
                     check.casenames=TRUE,
                     check.newdata=TRUE)
{
    check.names <- function(fit.names, ref.names)
    {
        if(check.casenames &&
           # lm always adds rownames even if "1", "2", "3"
           # this seems wasteful of resources, so linmod doesn't do this
           !is.null(fit.names) &&
           !identical(fit.names, ref.names)) {
            print(fit.names)
            print(ref.names)
            stop(deparse(substitute(fit.names)), " != ",
                 deparse(substitute(ref.names)))
        }
    }
    cat("check ", deparse(substitute(fit)), " vs ",
        deparse(substitute(ref)), "\n", sep="")

    stopifnot(coef(fit) == coef(ref))
    if(check.coef.names)
        stopifnot(identical(names(coef(fit)), names(coef(ref))))

    stopifnot(identical(dim(fit$coefficients), dim(ref$coefficients)))
    stopifnot(length(fit$coefficients) == length(ref$coefficients))
    stopifnot(almost.equal(fit$coefficients, ref$coefficients))

    stopifnot(identical(dim(fit$residuals), dim(ref$residuals)))
    stopifnot(length(fit$residuals) == length(ref$residuals))
    stopifnot(almost.equal(fit$residuals, ref$residuals))

    stopifnot(identical(dim(fit$fitted.values), dim(ref$fitted.values)))
    stopifnot(length(fit$fitted.values) == length(ref$fitted.values))
    stopifnot(almost.equal(fit$fitted.values, ref$fitted.values))

    if(!is.null(fit$vcov) && !is.null(ref$vcov)) {
        stopifnot(identical(dim(fit$vcov), dim(ref$vcov)))
        stopifnot(length(fit$vcov) == length(ref$vcov))
        stopifnot(almost.equal(fit$vcov, ref$vcov))
    }
    ref.sigma <- ref$sigma
    if(is.null(ref.sigma)) # in lm models, sigma is only available from summary()
        ref.sigma <- summary(ref)$sigma
    stopifnot(almost.equal(fit$sigma, ref.sigma))

    stopifnot(almost.equal(fit$df, ref$df))

    stopifnot(almost.equal(fitted(fit), fitted(ref)))
    check.names(names(fitted(fit)), names(fitted(ref)))

    stopifnot(almost.equal(residuals(fit), residuals(ref)))
    check.names(names(residuals(fit)), names(residuals(ref)))

    stopifnot(almost.equal(predict(fit), predict(ref)))
    check.names(names(predict(fit)), names(predict(ref)))
    if(check.newdata) {
        stopifnot(almost.equal(predict(fit, newdata=newdata),
                               predict(ref, newdata=newdata)))
        check.names(names(predict(fit, newdata=newdata)),
                    names(predict(ref, newdata=newdata)))
    }
}
### Model 1: original code from Friedrich Leisch tutorial

source("modguide.model1.R")

cat("==example issues with predict with functions in the tutorial\n")
data(trees)
tr <- trees # trees data but with rownames
rownames(tr) <- paste("tree", 1:nrow(trees), sep="")
fit1 <- linmod(Volume~., data=tr)
expect.err(try(predict(fit1, newdata=data.frame(Girth=10, Height=80))), "object 'Volume' not found")
expect.err(try(predict(fit1, newdata=as.matrix(tr[1:3,]))), "'data' must be a data.frame, not a matrix or an array")
library(plotmo)
expect.err(try(plotmo(fit1)), "object 'Volume' not found")
fit2 <- linmod(cbind(1, tr[,1:2]), tr[,3])
stopifnot(coef(fit1) == coef(fit2))
# following fail because newdata is a data.frame not a matrix
expect.err(try(predict(fit2, newdata=tr[,1:2])), "requires numeric/complex matrix/vector arguments")
expect.err(try(predict(fit2, newdata=data.frame(Girth=10, Height=80))), "requires numeric/complex matrix/vector arguments")
expect.err(try(predict(fit2, newdata=as.matrix(data.frame(Girth=10, Height=80)))), "non-conformable arguments")
expect.err(try(plotmo(fit2)), "requires numeric/complex matrix/vector arguments")

cat("==a plotmo method function can deal with the issues\n")
plotmo.predict.linmod <- function(object, newdata, ...)
{
    if(is.null(object$formula))                                # x,y interface?
        plotmo:::plotmo.predict.defaultm(object, newdata, ...) # pass matrix not data.frame
    else {
        # add dummy response column to newdata
        newdata[[as.character(as.list(object$formula)[[2]])]] <- 1
        plotmo:::plotmo.predict.default(object, newdata, ...)
    }
}
plotmo(fit1, pt.col=2, caption="fit1 with original tutorial code and plotmo.predict.linmod")
plotmo(fit2, pt.col=2, caption="fit2 with original tutorial code and plotmo.predict.linmod")
remove(plotmo.predict.linmod)

### Model 2: minimal changes version for vignette "Guidelines for S3 Regression Models"

source("modguide.model2.R")

cat("==check that example issues with functions in the tutorial have gone\n")
fit1.form <- linmod(Volume~., data=tr)
cat("==print(summary(fit1.form))\n")
print(summary(fit1.form))
stopifnot(abs(predict(fit1.form, newdata=data.frame(Girth=10, Height=80)) - 16.234045) < 1e-5)
stopifnot(sum(abs(predict(fit1.form, newdata=as.matrix(tr[1:3,])) - c(4.8376597, 4.5538516, 4.8169813))) < 1e-5)

lm.tr <- lm(Volume~., data=tr)
check.lm(fit1.form, lm.tr)

fit1.mat <- linmod(tr[,1:2], tr[,3]) # note no need for intercept term
cat("==print(summary(fit1.mat))\n")
print(summary(fit1.mat))
stopifnot(abs(predict(fit1.mat, newdata=data.frame(Girth=10, Height=80)) - 16.234045) < 1e-5)
stopifnot(sum(abs(predict(fit1.mat, newdata=tr[1:3,1:2]) - c(4.8376597, 4.5538516, 4.8169813))) < 1e-5)
stopifnot(abs(predict(fit1.mat, newdata=as.matrix(data.frame(Girth=10, Height=80))) - 16.234045) < 1e-5)

check.lm(fit1.mat, lm.tr, newdata=trees[3:5,1:2])

cat("==example plots\n")

library(plotmo)
data(trees)

fit1.form <- linmod(Volume~., data=trees)
print(fit1.form)
print(summary(fit1.form))

fit1.mat <- linmod(trees[,1:2], trees[,3])
print(fit1.mat)
print(summary(fit1.mat))

plotmo(fit1.form)
plotmo(fit1.mat)

plotres(fit1.form)
plotres(fit1.mat)

cat("==test model building with different numeric args\n")

x <- tr[,1:2]
y <- tr[,3]
fit2.mat <- linmod(x, y)
check.lm(fit2.mat, lm.tr, newdata=trees[3:5,1:2])

# check consistency with lm
expect.err(try(linmod(y~x)), "invalid type (list) for variable 'x'")
expect.err(try(lm(y~x)), "invalid type (list) for variable 'x'")

fit3.mat <- linmod(as.matrix(x), as.matrix(y))
check.lm(fit3.mat, lm.tr, newdata=trees[3:5,1:2])

fit4.form <- linmod(y ~ as.matrix(x))
lm4 <- linmod(y ~ as.matrix(x))
check.lm(fit4.form, lm4)
stopifnot(coef(fit4.form)  == coef(lm.tr),
          gsub("as.matrix(x)", "", names(coef(fit4.form)), fixed=TRUE)  == names(coef(lm.tr)))

xm <- as.matrix(x)
fit5.form <- linmod(y ~ xm)
lm5 <- linmod(y ~ xm)
check.lm(fit5.form, lm5)
stopifnot(coef(fit5.form)  == coef(lm.tr),
          gsub("xm", "", names(coef(fit5.form)), fixed=TRUE)  == names(coef(lm.tr)))

cat("==test correct use of global x1 and y1\n")
x1 <- tr[,1]
y1 <- tr[,3]
linmod1 <- linmod(y1~x1)

fit6.mat <- linmod(x1, y1)
check.lm(fit6.mat, linmod1, newdata=x1[3:5],
         check.newdata=FALSE, # TODO needed because linmod1 ignores newdata(!)
         check.coef.names=FALSE, check.casenames=FALSE)
print(predict(fit6.mat, newdata=x1[3:5]))
stopifnot(almost.equal(predict(fit6.mat, newdata=x1[3]), 7.63607739644657))
# production version only:
# stopifnot(coef(fit6.mat) == coef(linmod1),
#           names(coef(fit6.mat)) == c("(Intercept)", "V1")) # names(coef(linmod1) are "(Intercept)" "x1"

fit6.form <- linmod(y1~x1)
check.lm(fit6.form, linmod1)

cat("==check integer input (sibsp is an integer) \n")

library(earth) # for etitanic data
data(etitanic)
tit <- etitanic[seq(1, nrow(etitanic), by=60), ] # small set of data for tests (18 cases)
tit$survived <- tit$survived != 0 # convert to logical
rownames(tit) <- paste("pas", 1:nrow(tit), sep="")
cat(paste(colnames(tit), "=", sapply(tit, class), sep="", collapse=", "), "\n")

fit7.mat <- linmod(tit$age, tit$sibsp)
lm7 <- lm.fit(cbind(1, tit$age), tit$sibsp)
stopifnot(coef(fit7.mat) == coef(lm7)) # coef names will differ

fit7.form <- linmod(sibsp~age, data=tit)
lm7.form  <- lm(sibsp~age, data=tit)
check.lm(fit7.form, lm7.form, newdata=tit[3:5,])

fit8.mat <- linmod(tit$sibsp, tit$age)
lm8 <- lm.fit(cbind(1, tit$sibsp), tit$age)
stopifnot(coef(fit8.mat) == coef(lm8)) # coef names will differ

fit8.form <- linmod(age~sibsp, data=tit)
lm8.form  <- lm(age~sibsp, data=tit)
check.lm(fit8.form, lm8.form, newdata=tit[3:5,])

# drop=FALSE so response is a data frame
fit1a.mat <- linmod(trees[,1:2], trees[, 3, drop=FALSE])
print(fit1a.mat)
print(summary(fit1.mat))
plotres(fit1a.mat) # plot caption shows response name "Volume"

cat("==test model building with different non numeric args\n")

library(earth) # for etitanic data
data(etitanic)
tit <- etitanic[seq(1, nrow(etitanic), by=60), ] # small set of data for tests (18 cases)
tit$survived <- tit$survived != 0 # convert to logical
rownames(tit) <- paste("pas", 1:nrow(tit), sep="")
cat(paste(colnames(tit), "=", sapply(tit, class), sep="", collapse=", "), "\n")

lm9 <- lm(survived~., data=tit)
fit9.form <- linmod(survived~., data=tit)
check.lm(fit9.form, lm9, newdata=tit[3:5,])

options(warn=2) # treat warnings as errors
# factors in x
expect.err(try(linmod(tit[,c(1,3,4,5,6)], tit[,"survived"])), "NAs introduced by coercion")
options(warn=1) # print warnings as they occur
expect.err(try(linmod(tit[,c(1,3,4,5,6)], tit[,"survived"])), "NA/NaN/Inf in foreign function call (arg 1)")

options(warn=2) # treat warnings as errors
expect.err(try(lm(pclass~., data=tit)), "using type = \"numeric\" with a factor response will be ignored")
# minimal version
expect.err(try(linmod(pclass~., data=tit)), "(converted from warning) NAs introduced by coercion")
expect.err(try(linmod(tit$pclass, tit$survived)), "(converted from warning) NAs introduced by coercion")
# # production version
# expect.err(try(linmod(pclass~., data=tit)), "'y' is not numeric or logical")
options(warn=1)

lm10 <- lm(pclass~., data=tit) # will give warnings
fit10.form <- linmod(as.numeric(pclass)~., data=tit)
stopifnot(coef(fit10.form) == coef(lm10))
stopifnot(names(coef(fit10.form)) == names(coef(lm10)))
# check.lm(fit10.form, lm10) # fails because lm10 fitted is all NA

# production version: (minimal version just gives warnings and builds lousy model)
# expect.err(try(linmod(pclass~., data=tit)), "'y' is not numeric or logical")
# expect.err(try(linmod(tit[,-1], tit[,1])), "'y' is not numeric or logical")
# expect.err(try(linmod(1:10, paste(1:10))), "'y' is not numeric or logical")

fit10a.form <- linmod(survived~pclass, data=tit)
lm10a <- lm(survived~pclass, data=tit)
check.lm(fit10a.form, lm10a, newdata=tit[3:5,])

expect.err(try(linmod(paste(1:10), 1:10)), "requires numeric/complex matrix/vector arguments")

lm11 <- lm(as.numeric(pclass)~., data=tit)
fit11.form <- linmod(as.numeric(pclass)~., data=tit)
check.lm(fit11.form, lm11, newdata=tit[3:5,])

cat("==data.frame with strings\n")

df.with.string <-
    data.frame(1:5,
               c(1,2,-1,4,5),
               c("a", "b", "a", "a", "b"),
               stringsAsFactors=FALSE)
colnames(df.with.string) <- c("num1", "num2", "string")

fit30.form <- linmod(num1~num2, df.with.string)
lm30       <- lm(num1~num2, df.with.string)
check.lm(fit30.form, lm30, check.newdata=FALSE)

fit31.form <- linmod(num1~., df.with.string)
lm31       <- lm(num1~., df.with.string)
check.lm(fit31.form, lm31, check.newdata=FALSE)

expect.err(try(linmod(string~., df.with.string)), "non-numeric argument to binary operator")
# production version
# expect.err(try(linmod(string~., df.with.string)), "'y' is not numeric or logical")

vec <- c(1,2,3,4,3)
options(warn=2) # treat warnings as errors
expect.err(try(linmod(df.with.string, vec)), "NAs introduced by coercion")
options(warn=1)
# minimal version
expect.err(try(linmod(df.with.string, vec)), "NA/NaN/Inf in foreign function call (arg 1)")
# production version
# expect.err(try(linmod(df.with.string, vec)), "NA in 'x'")

options(warn=2) # treat warnings as errors
expect.err(try(linmod(df.with.string, vec)), "NAs introduced by coercion")
options(warn=1)
# minimal version
expect.err(try(linmod(df.with.string, vec)), "NA/NaN/Inf in foreign function call (arg 1)")
# production version
# expect.err(try(linmod(df.with.string, vec)), "NA in 'x'")

cat("==more variables  than cases\n")

set.seed(1)
x2 <- matrix(rnorm(6), nrow=2)
y2 <- c(1,2)
# production version
# expect.err(try(linmod(y2~x2)), "more variables than cases")
# minimal version
expect.err(try(linmod(y2~x2)), "'size' cannot exceed nrow(x) = 2")

x3 <- matrix(1:10, ncol=2)
y3 <- c(1,2,9,4,5)
# production version will give a better error message
expect.err(try(linmod(y3~x3)), "singular matrix 'a' in 'solve'")

cat("==nrow(x) does not match length(y)\n")
# note that the production version gives better error messages

x4 <- matrix(1:10, ncol=2)
y4 <- c(1,2,9,4)
expect.err(try(linmod(x4, y4)), "singular matrix 'a' in 'solve'")

x5 <- matrix(1:10, ncol=2)
y5 <- c(1,2,9,4,5,9)
expect.err(try(linmod(x5, y5)), "singular matrix 'a' in 'solve'")

cat("==y has multiple columns\n")

vec <- c(1,2,3,4,3)
y2 <- cbind(c(1,2,3,4,9), vec^2)
expect.err(try(linmod(vec, y2)), "'qr' and 'y' must have the same number of rows")
# following does not issue any error message, it should
# expect.err(try(linmod(y2~vec)), "error message")

### Model 3: production version of linmod is tested in test.linmod.R

source("test.epilog.R")
andreabecsek/project documentation built on May 3, 2019, 1:28 p.m.