tests/RUnit/common/runit.mvrVal.R

### runit.mvrVal.R: test functions for R2(), MSEP() and RMSEP()
### By Bjørn-Helge Mevik
### Started 2007-06-20

###
### Check for capturing argument errors:
###
test.argErrors <- function() {
    mod <- mvr(density ~ NIR, ncomp = 6, data = yarn, validation = "CV")
    checkException(R2(mod, comps = 2:5), silent = TRUE)
    checkException(R2(mod, estimate = "adjCV"), silent = TRUE)
    checkException(MSEP(mod, comps = 1:4), silent = TRUE)
}

###
### Checking dimensions and dimnames of output
###
test.dimR2 <- function() {
    ##
    ## Uni-response models:
    ##
    n <- nrow(yarn)
    ncomp <- 7
    comps <- c(1,3,5)
    ## No CV
    mod <- mvr(density ~ NIR, ncomp = ncomp, data = yarn, method = "svdpc")
    res <- R2(mod)
    checkEquals(dim(res$val), c(1,1,ncomp + 1))
    checkIdentical(dimnames(res$val)[[1]], "train")
    checkEquals(res$comps, 0:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- R2(mod, newdata = yarn)
    checkEquals(dim(res$val), c(1,1,ncomp + 1))
    checkIdentical(dimnames(res$val)[[1]], "test")
    checkEquals(res$comps, 0:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- R2(mod, ncomp = 2:5)
    checkEquals(dim(res$val), c(1,1,4 + 1))
    checkIdentical(dimnames(res$val)[[1]], "train")
    checkEquals(res$comps, c(0,2:5))
    checkIdentical(res$cumulative, TRUE)
    res <- R2(mod, newdata = yarn, estimate = "all")
    checkEquals(dim(res$val), c(2,1,ncomp + 1))
    checkIdentical(dimnames(res$val)[[1]], c("train", "test"))
    checkEquals(res$comps, 0:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- R2(mod, newdata = yarn, intercept = FALSE, estimate = c("test", "train"))
    checkEquals(dim(res$val), c(2,1,ncomp))
    checkIdentical(dimnames(res$val)[[1]], c("test", "train"))
    checkEquals(res$comps, 1:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- R2(mod, intercept = TRUE, comps = comps)
    checkEquals(dim(res$val), c(1,1,2), "uni, intercept, comps, 1")
    checkIdentical(dimnames(res$val)[[1]], "train")
    checkEquals(res$comps, c(0, comps), "uni, intercept, comps, 3")
    checkIdentical(res$cumulative, FALSE)
    res <- R2(mod, intercept = FALSE, comps = comps)
    checkEquals(dim(res$val), c(1,1,1), "uni, no intercept, comps, 1")
    checkIdentical(dimnames(res$val)[[1]], "train")
    checkEquals(res$comps, comps)
    checkIdentical(res$cumulative, FALSE)
    ## With CV
    mod <- mvr(density ~ NIR, ncomp = ncomp, data = yarn, validation = "CV")
    res <- R2(mod)
    checkEquals(dim(res$val), c(1,1,ncomp + 1), "uni, CV, 1")
    checkIdentical(dimnames(res$val)[[1]], "CV")
    checkEquals(res$comps, 0:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- R2(mod, newdata = yarn)
    checkEquals(dim(res$val), c(1,1,ncomp + 1))
    checkIdentical(dimnames(res$val)[[1]], "test")
    checkEquals(res$comps, 0:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- R2(mod, ncomp = 2:5)
    checkEquals(dim(res$val), c(1,1,4 + 1))
    checkIdentical(dimnames(res$val)[[1]], "CV")
    checkEquals(res$comps, c(0,2:5))
    checkIdentical(res$cumulative, TRUE)
    res <- R2(mod, intercept = FALSE)
    checkEquals(dim(res$val), c(1,1,ncomp))
    checkIdentical(dimnames(res$val)[[1]], "CV")
    checkEquals(res$comps, 1:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- R2(mod, newdata = yarn, estimate = "all")
    checkEquals(dim(res$val), c(3,1,ncomp + 1))
    checkIdentical(dimnames(res$val)[[1]], c("train", "CV", "test"), "uni, CV, newdata, all, 2")
    checkEquals(res$comps, 0:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- R2(mod, newdata = yarn, intercept = FALSE, estimate = "CV")
    checkEquals(dim(res$val), c(1,1,ncomp))
    checkIdentical(dimnames(res$val)[[1]], c("CV"))
    checkEquals(res$comps, 1:ncomp)
    checkIdentical(res$cumulative, TRUE)
    ##
    ## Multi-response models:
    ##
    n <- nrow(oliveoil)
    nresp <- ncol(oliveoil$chemical)
    ncomp <- 4
    comps <- c(2,4)
    ## No CV
    mod <- mvr(chemical ~ sensory, ncomp = ncomp, data = oliveoil)
    res <- R2(mod)
    checkEquals(dim(res$val), c(1,nresp,ncomp + 1))
    checkIdentical(dimnames(res$val)[[1]], "train")
    checkEquals(res$comps, 0:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- R2(mod, newdata = oliveoil)
    checkEquals(dim(res$val), c(1,nresp,ncomp + 1))
    checkIdentical(dimnames(res$val)[[1]], "test")
    checkEquals(res$comps, 0:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- R2(mod, ncomp = 2:4)
    checkEquals(dim(res$val), c(1,nresp,3 + 1), "multi, ncomp, 1")
    checkIdentical(dimnames(res$val)[[1]], "train", "multi, ncomp,2")
    checkEquals(res$comps, c(0,2:4), "multi, ncomp,3")
    checkIdentical(res$cumulative, TRUE, "multi, ncomp,4")
    res <- R2(mod, newdata = oliveoil, estimate = "all")
    checkEquals(dim(res$val), c(2,nresp,ncomp + 1))
    checkIdentical(dimnames(res$val)[[1]], c("train", "test"))
    checkEquals(res$comps, 0:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- R2(mod, newdata = oliveoil, intercept = FALSE, estimate = c("test", "train"))
    checkEquals(dim(res$val), c(2,nresp,ncomp))
    checkIdentical(dimnames(res$val)[[1]], c("test", "train"))
    checkEquals(res$comps, 1:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- R2(mod, intercept = TRUE, comps = comps)
    checkEquals(dim(res$val), c(1,nresp,2), "multi, intercept, comps, 1")
    checkIdentical(dimnames(res$val)[[1]], "train")
    checkEquals(res$comps, c(0, comps), "multi, intercept, comps, 3")
    checkIdentical(res$cumulative, FALSE)
    res <- R2(mod, intercept = FALSE, comps = comps)
    checkEquals(dim(res$val), c(1,nresp,1), "multi, no intercept, comps, 1")
    checkIdentical(dimnames(res$val)[[1]], "train")
    checkEquals(res$comps, comps)
    checkIdentical(res$cumulative, FALSE)
    ## With CV
    mod <- mvr(chemical ~ sensory, ncomp = ncomp, data = oliveoil, validation = "CV", method = "svdpc")
    res <- R2(mod)
    checkEquals(dim(res$val), c(1,nresp,ncomp + 1), "multi, CV, 1")
    checkIdentical(dimnames(res$val)[[1]], "CV")
    checkEquals(res$comps, 0:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- R2(mod, newdata = oliveoil)
    checkEquals(dim(res$val), c(1,nresp,ncomp + 1))
    checkIdentical(dimnames(res$val)[[1]], "test")
    checkEquals(res$comps, 0:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- R2(mod, ncomp = 2:4)
    checkEquals(dim(res$val), c(1,nresp,3 + 1), "multi, CV, ncomp, 1")
    checkIdentical(dimnames(res$val)[[1]], "CV", "multi, CV, ncomp, 2")
    checkEquals(res$comps, c(0,2:4), "multi, CV, ncomp, 3")
    checkIdentical(res$cumulative, TRUE, "multi, CV, ncomp, 4")
    res <- R2(mod, intercept = FALSE)
    checkEquals(dim(res$val), c(1,nresp,ncomp))
    checkIdentical(dimnames(res$val)[[1]], "CV")
    checkEquals(res$comps, 1:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- R2(mod, newdata = oliveoil, estimate = "all")
    checkEquals(dim(res$val), c(3,nresp,ncomp + 1))
    checkIdentical(dimnames(res$val)[[1]], c("train", "CV", "test"), "multi, CV, newdata, all, 2")
    checkEquals(res$comps, 0:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- R2(mod, newdata = oliveoil, intercept = FALSE, estimate = "CV")
    checkEquals(dim(res$val), c(1,nresp,ncomp))
    checkIdentical(dimnames(res$val)[[1]], c("CV"))
    checkEquals(res$comps, 1:ncomp)
    checkIdentical(res$cumulative, TRUE)
}

test.dimMSEP <- function() {
    ##
    ## Uni-response models:
    ##
    n <- nrow(yarn)
    ncomp <- 7
    comps <- c(1,3,5)
    ## No CV
    mod <- mvr(density ~ NIR, ncomp = ncomp, data = yarn, method = "svdpc")
    res <- MSEP(mod)
    checkEquals(dim(res$val), c(1,1,ncomp + 1), "uni, 1")
    checkIdentical(dimnames(res$val)[[1]], "train", "uni, 2")
    checkEquals(res$comps, 0:ncomp, "uni, 3")
    checkIdentical(res$cumulative, TRUE, "uni, 4")
    res <- MSEP(mod, newdata = yarn)
    checkEquals(dim(res$val), c(1,1,ncomp + 1))
    checkIdentical(dimnames(res$val)[[1]], "test")
    checkEquals(res$comps, 0:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- MSEP(mod, ncomp = 2:5)
    checkEquals(dim(res$val), c(1,1,4 + 1))
    checkIdentical(dimnames(res$val)[[1]], "train")
    checkEquals(res$comps, c(0,2:5))
    checkIdentical(res$cumulative, TRUE)
    res <- MSEP(mod, newdata = yarn, estimate = "all")
    checkEquals(dim(res$val), c(2,1,ncomp + 1))
    checkIdentical(dimnames(res$val)[[1]], c("train", "test"))
    checkEquals(res$comps, 0:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- MSEP(mod, newdata = yarn, intercept = FALSE, estimate = c("test", "train"))
    checkEquals(dim(res$val), c(2,1,ncomp))
    checkIdentical(dimnames(res$val)[[1]], c("test", "train"))
    checkEquals(res$comps, 1:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- MSEP(mod, intercept = TRUE, comps = comps)
    checkEquals(dim(res$val), c(1,1,2), "uni, intercept, comps, 1")
    checkIdentical(dimnames(res$val)[[1]], "train")
    checkEquals(res$comps, c(0, comps), "uni, intercept, comps, 3")
    checkIdentical(res$cumulative, FALSE)
    res <- MSEP(mod, intercept = FALSE, comps = comps)
    checkEquals(dim(res$val), c(1,1,1), "uni, no intercept, comps, 1")
    checkIdentical(dimnames(res$val)[[1]], "train")
    checkEquals(res$comps, comps)
    checkIdentical(res$cumulative, FALSE)
    ## With CV
    mod <- mvr(density ~ NIR, ncomp = ncomp, data = yarn, validation = "CV")
    res <- MSEP(mod)
    checkEquals(dim(res$val), c(2,1,ncomp + 1), "uni, CV, 1")
    checkIdentical(dimnames(res$val)[[1]], c("CV", "adjCV"))
    checkEquals(res$comps, 0:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- MSEP(mod, newdata = yarn)
    checkEquals(dim(res$val), c(1,1,ncomp + 1))
    checkIdentical(dimnames(res$val)[[1]], "test")
    checkEquals(res$comps, 0:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- MSEP(mod, ncomp = 2:5)
    checkEquals(dim(res$val), c(2,1,4 + 1))
    checkIdentical(dimnames(res$val)[[1]], c("CV", "adjCV"))
    checkEquals(res$comps, c(0,2:5))
    checkIdentical(res$cumulative, TRUE)
    res <- MSEP(mod, intercept = FALSE)
    checkEquals(dim(res$val), c(2,1,ncomp))
    checkIdentical(dimnames(res$val)[[1]], c("CV", "adjCV"))
    checkEquals(res$comps, 1:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- MSEP(mod, newdata = yarn, estimate = "all")
    checkEquals(dim(res$val), c(4,1,ncomp + 1))
    checkIdentical(dimnames(res$val)[[1]], c("train", "CV", "adjCV", "test"), "uni, CV, newdata, all, 2")
    checkEquals(res$comps, 0:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- MSEP(mod, newdata = yarn, intercept = FALSE, estimate = "CV")
    checkEquals(dim(res$val), c(1,1,ncomp))
    checkIdentical(dimnames(res$val)[[1]], c("CV"))
    checkEquals(res$comps, 1:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- MSEP(mod, newdata = yarn, intercept = FALSE, estimate = "adjCV")
    checkEquals(dim(res$val), c(1,1,ncomp))
    checkIdentical(dimnames(res$val)[[1]], c("adjCV"))
    checkEquals(res$comps, 1:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- MSEP(mod, newdata = yarn, intercept = FALSE, estimate = c("adjCV", "CV"))
    checkEquals(dim(res$val), c(2,1,ncomp))
    checkIdentical(dimnames(res$val)[[1]], c("adjCV", "CV"), "uni, newdata, no intercept, adjCV, CV")
    checkEquals(res$comps, 1:ncomp)
    checkIdentical(res$cumulative, TRUE)
    ##
    ## Multi-response models:
    ##
    n <- nrow(oliveoil)
    nresp <- ncol(oliveoil$chemical)
    ncomp <- 5
    comps <- c(1,3,4)
    ## No CV
    mod <- mvr(chemical ~ sensory, ncomp = ncomp, data = oliveoil, method = "svdpc")
    res <- MSEP(mod)
    checkEquals(dim(res$val), c(1,nresp,ncomp + 1))
    checkIdentical(dimnames(res$val)[[1]], "train")
    checkEquals(res$comps, 0:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- MSEP(mod, newdata = oliveoil)
    checkEquals(dim(res$val), c(1,nresp,ncomp + 1))
    checkIdentical(dimnames(res$val)[[1]], "test")
    checkEquals(res$comps, 0:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- MSEP(mod, ncomp = 2:5)
    checkEquals(dim(res$val), c(1,nresp,4 + 1))
    checkIdentical(dimnames(res$val)[[1]], "train")
    checkEquals(res$comps, c(0,2:5))
    checkIdentical(res$cumulative, TRUE)
    res <- MSEP(mod, newdata = oliveoil, estimate = "all")
    checkEquals(dim(res$val), c(2,nresp,ncomp + 1))
    checkIdentical(dimnames(res$val)[[1]], c("train", "test"))
    checkEquals(res$comps, 0:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- MSEP(mod, newdata = oliveoil, intercept = FALSE, estimate = c("test", "train"))
    checkEquals(dim(res$val), c(2,nresp,ncomp))
    checkIdentical(dimnames(res$val)[[1]], c("test", "train"))
    checkEquals(res$comps, 1:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- MSEP(mod, intercept = TRUE, comps = comps)
    checkEquals(dim(res$val), c(1,nresp,2), "uni, intercept, comps, 1")
    checkIdentical(dimnames(res$val)[[1]], "train")
    checkEquals(res$comps, c(0, comps), "uni, intercept, comps, 3")
    checkIdentical(res$cumulative, FALSE)
    res <- MSEP(mod, intercept = FALSE, comps = comps)
    checkEquals(dim(res$val), c(1,nresp,1), "uni, no intercept, comps, 1")
    checkIdentical(dimnames(res$val)[[1]], "train")
    checkEquals(res$comps, comps)
    checkIdentical(res$cumulative, FALSE)
    ## With CV
    mod <- mvr(chemical ~ sensory, ncomp = ncomp, data = oliveoil, validation = "CV")
    res <- MSEP(mod)
    checkEquals(dim(res$val), c(2,nresp,ncomp + 1), "uni, CV, 1")
    checkIdentical(dimnames(res$val)[[1]], c("CV", "adjCV"))
    checkEquals(res$comps, 0:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- MSEP(mod, newdata = oliveoil)
    checkEquals(dim(res$val), c(1,nresp,ncomp + 1))
    checkIdentical(dimnames(res$val)[[1]], "test")
    checkEquals(res$comps, 0:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- MSEP(mod, ncomp = 2:5)
    checkEquals(dim(res$val), c(2,nresp,4 + 1))
    checkIdentical(dimnames(res$val)[[1]], c("CV", "adjCV"))
    checkEquals(res$comps, c(0,2:5))
    checkIdentical(res$cumulative, TRUE)
    res <- MSEP(mod, intercept = FALSE)
    checkEquals(dim(res$val), c(2,nresp,ncomp))
    checkIdentical(dimnames(res$val)[[1]], c("CV", "adjCV"))
    checkEquals(res$comps, 1:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- MSEP(mod, newdata = oliveoil, estimate = "all")
    checkEquals(dim(res$val), c(4,nresp,ncomp + 1))
    checkIdentical(dimnames(res$val)[[1]], c("train", "CV", "adjCV", "test"), "uni, CV, newdata, all, 2")
    checkEquals(res$comps, 0:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- MSEP(mod, newdata = oliveoil, intercept = FALSE, estimate = "CV")
    checkEquals(dim(res$val), c(1,nresp,ncomp))
    checkIdentical(dimnames(res$val)[[1]], c("CV"))
    checkEquals(res$comps, 1:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- MSEP(mod, newdata = oliveoil, intercept = FALSE, estimate = "adjCV")
    checkEquals(dim(res$val), c(1,nresp,ncomp))
    checkIdentical(dimnames(res$val)[[1]], c("adjCV"))
    checkEquals(res$comps, 1:ncomp)
    checkIdentical(res$cumulative, TRUE)
    res <- MSEP(mod, newdata = oliveoil, intercept = FALSE, estimate = c("adjCV", "CV"))
    checkEquals(dim(res$val), c(2,nresp,ncomp))
    checkIdentical(dimnames(res$val)[[1]], c("adjCV", "CV"), "uni, newdata, no intercept, adjCV, CV")
    checkEquals(res$comps, 1:ncomp)
    checkIdentical(res$cumulative, TRUE)
}


###
### Check some numerical values
###
test.estimates <- function() {
    mod <- mvr(chemical ~ sensory, data = oliveoil)
    res <- R2(mod, newdata = oliveoil, estimate = "all")
    checkEquals(res$val["train",,], res$val["test",,], "R2, train = test")
    checkEquals(res$val["train","Acidity","2 comps"], cor(fitted(mod)[,"Acidity","2 comps"], oliveoil$chemical[,"Acidity"])^2, "R2, train R^2 = cor^2")
    res <- MSEP(mod, newdata = oliveoil, estimate = "all")
    checkEquals(res$val["train",,], res$val["test",,], "MSEP, train = test")
    checkEqualsNumeric(res$val["train",,-1], colMeans((fitted(mod)- c(oliveoil$chemical))^2), "MSEP, train directly")
    mod <- mvr(chemical ~ sensory, data = oliveoil, validation = "CV")
    res <- MSEP(mod, estimate = "CV")
    checkEqualsNumeric(res$val["CV",,-1],
                colMeans((mod$validation$pred - c(oliveoil$chemical))^2))
    ## FIXME: Temporary check:
    checkEquals(res$val["CV",,1],
                apply(oliveoil$chemical, 2, var) * nrow(oliveoil) / (nrow(oliveoil)-1))
}


###
### Test handling of missing values
###

## Handling of missing data in train and/or test data
test.MSEPmissing <- function() {
    ## Two missing in X, one in Y
    olmiss <- oliveoil
    olmiss$chemical[2,3] <- NA
    olmiss$chemical[3,4] <- NA
    olmiss$sensory[4,2] <- NA
    ## Common segments for comparison:
    ## Parametres:
    ## - mvr: na.action: omit (default) / exclude
    ## - MSEP: ncomp (default) / comps

    ## na.action = na.omit (default)
    mod <- mvr(sensory ~ chemical, data = olmiss, validation = "LOO")
    res1 <- MSEP(mod, estimate = "all", newdata = olmiss)
    checkTrue(all(is.finite(res1$val)), "na.omit, all is finite 1")
    checkEquals(res1$val["train",,], res1$val["test",,],
                "na.omit, train == test")
    checkEqualsNumeric(res1$val["train",,-1],
                       colMeans((fitted(mod) - c(olmiss$sensory[-c(2:4),]))^2),
                       "na.omit, train == fitted")
    checkEqualsNumeric(res1$val["CV",,-1],
                       mod$validation$PRESS / (nrow(olmiss) - 3),
                       "na.omit, CV == PRESS/n")
    ## FIXME: Check adjCV

    res2 <- MSEP(mod, estimate = c("train", "test"), newdata = olmiss,
                 comps = 1:5)
    checkTrue(all(is.finite(res2$val)), "na.omit, all is finite 2")
    checkEquals(drop(res2$val), res1$val[c("train", "test"),,6],
                "na.omit, comps == ncomp")

    ## na.action = na.exclude
    mod <- mvr(sensory ~ chemical, data = olmiss, validation = "LOO",
                na.action = na.exclude)
    res3 <- MSEP(mod, estimate = "all", newdata = olmiss)
## Superfluous:
##     checkTrue(all(is.finite(res3$val)), "na.exclude, all is finite 1")
##     checkEquals(res3$val["train",,], res3$val["test",,],
##                 "na.exclude, train == test")
##     checkEqualsNumeric(res3$val["train",,-1],
##                        colMeans((fitted(mod)[-c(2:4),,] -
##                                  c(olmiss$sensory[-c(2:4),]))^2),
##                        "na.exclude, train == fitted")
##     checkEqualsNumeric(res3$val["CV",,-1],
##                        mod$validation$PRESS / (nrow(olmiss) - 3),
##                        "na.exclude, CV == PRESS/n")
##     ## FIXME: Check adjCV

    res4 <- MSEP(mod, estimate = c("train", "test"), newdata = olmiss,
                 comps = 1:5)
## Superfluous:
##     checkTrue(all(is.finite(res4$val)), "na.exclude, all is finite 2")
##     checkEquals(drop(res4$val), res3$val[c("train", "test"),,6],
##                 "na.exclude, comps == ncomp")

    ## na.omit vs. na.exclude
    checkIdentical(res1, res3, "res1 == res3")
    checkIdentical(res2, res4, "res2 == res4")
}
bhmevik/pls documentation built on Nov. 23, 2023, 5:56 a.m.