# test-nipals.R
# see setup-expectations.R for expect_aligned
# Note, I looked at the python repository:
# https://github.com/fredrikw/python-nipals/blob/master/tests/test_nipals.py
# I did not understand the tests and so did not incorporate the yarn
# data or oliveoil data.
## ---------------------------------------------------------------------------
# Create data for tests
B_full <- matrix(c(50, 67, 90, 98, 120,
55, 71, 93, 102, 129,
65, 76, 95, 105, 134,
50, 80, 102, 130, 138,
60, 82, 97, 135, 151,
65, 89, 106, 137, 153,
75, 95, 117, 133, 155), ncol=5, byrow=TRUE)
rownames(B_full) <- c("G1","G2","G3","G4","G5","G6","G7")
colnames(B_full) <- c("E1","E2","E3","E4","E5")
# Same data with 2 NAs
B_miss <- matrix(c(NA, 67, 90, 98, 120,
NA, 71, 93, 102, 129,
65, 76, 95, 105, 134,
50, 80, 102, 130, 138,
60, 82, 97, 135, 151,
65, 89, 106, 137, 153,
75, 95, 117, 133, 155), ncol=5, byrow=TRUE)
rownames(B_miss) <- c("G1","G2","G3","G4","G5","G6","G7")
colnames(B_miss) <- c("E1","E2","E3","E4","E5")
## ---------------------------------------------------------------------------
test_that("For complete-data matrix, fitted values match original data", {
m1 <- nipals(B_full, center=TRUE, scale=TRUE, fitted=TRUE, tol=1e-8)
expect_equal( round( m1$fitted, 3), B_full )
m2 <- nipals(B_full, center=TRUE, scale=FALSE, tol=1e-8, fitted=TRUE)
expect_equal( round( m2$fitted, 3), B_full )
m3 <- nipals(B_full, center=FALSE, scale=FALSE, tol=1e-8, fitted=TRUE)
expect_equal( round( m3$fitted, 3), B_full )
m4 <- nipals(B_full, center=FALSE, scale=TRUE, tol=1e-8, fitted=TRUE)
expect_equal( round( m4$fitted, 3), B_full )
})
test_that("Estimate fitted values for NAs", {
# Published examples of NIPALS with missing values are almost nonexistant.
# Here is an example with XLSTAT (details on methodology are limited)
#
# Circa 2017 XLSTAT estimated the missing values below as
# https://help.xlstat.com/customer/en/portal/articles/2062415-missing-data-imputation-using-nipals-in-excel?b_id=9283
# 1365.236, 88.600, 175.798, 1051.698, 432.470, 168.554
#
# In 2019, XLSTAT estimates
# https://help.xlstat.com/6497-missing-data-imputation-using-nipals-excel
# 1398.829, 88.151, 178.313, 1060.534, 444.254, 169.686
#
auto <- data.frame(capacity = c(1396, 1721, 1580, 1769, 2068, 1769L),
power = c(90, 92, 83, 90, 88, 90L),
speed = c(174, 180, 170, 180, 180, 182L),
weight = c(850, 965, 970, 1080, 1135, 1060L),
length = c(369, 415, 395, 440, 446, 424L),
width = c(166, 169, 170, 169, 170, 168L))
rownames(auto) <- c("Honda civic", "Renault 19", "Fiat Tipo",
"Peugeot 405", "Renault 21", "Citroen BX")
# create missing values along the diagonal
auto[1,1] <- auto[2,2] <- auto[3,3] <- auto[4,4] <- auto[5,5] <- auto[6,6] <- NA
library(nipals)
# These settings give results similar to XLstat
m1 <- nipals(auto, fitted=TRUE, gramschmidt=FALSE)
expect_equal(diag(m1$fitted),
c(1365.236, 88.600, 175.798, 1051.698, 432.470, 168.554),
tol=1e-1)
# Test Github issue #2
expect_silent(nipals(auto[,-1], ncomp=1, fitted=TRUE))
})
test_that("Code coverage of nipals function arguments", {
library(nipals)
Bnarow = B_full
Bnarow[1,] = NA
expect_error(nipals(Bnarow))
Bnacol = B_full
Bnacol[,1] = NA
expect_error(nipals(Bnacol))
# ncomp
m1 = nipals(B_miss, ncomp=1)
# center/scale
m1 = nipals(B_miss, center=FALSE, scale=FALSE)
m1 = nipals(B_miss, center=TRUE, scale=FALSE)
m1 = nipals(B_miss, center=TRUE, scale=TRUE)
# maxiter
expect_warning(nipals(B_full, maxiter=2))
# tol
m1 = nipals(B_miss, tol=1e-1, verbose=TRUE)
m1 = nipals(B_miss, tol=1e-10, verbose=TRUE)
# startcol
m1 = nipals(B_full, startcol=0, verbose=TRUE)
m1 = nipals(B_full, startcol=5, verbose=TRUE)
# fitted
expect_null(nipals(B_full)$fitted)
expect_null(nipals(B_full, fitted=FALSE)$fitted)
expect_false(is.null(nipals(B_full, fitted=TRUE)$fitted))
# force.na
m1 = nipals(B_full, force.na=FALSE)
m1 = nipals(B_full, force.na=TRUE)
# gramschmidt
m1 = nipals(B_miss, gramschmidt=FALSE) # default
round(crossprod(m1$loadings), 3) # 1 on diagonal, but not identity
m2 = nipals(B_miss, gramschmidt=TRUE)
round(crossprod(m2$loadings), 3) # should be identity 5x5
# verbose
m1 = nipals(B_full, verbose=TRUE)
})
test_that("Start column function", {
# corn data from C.Majer
corn <- structure(c(20.73, 23.58, 22.41, 19.97, 21.42, 24.48, 23.19, 25.73,
22.66, 23.93, 18.79, 18.56, 18.47, 18.33, 20.01, 19.03,
19.36, 21.2, 19.17, 18.17, 20.41, 17.67, 17.89, 21.41,
18.81, 22.77, NA, 19.86, 19.43, NA, 17.28, 14.9, 16.52,
14.77, 19.35, 22.46, 24.61, NA, NA, 26.16, NA, 19.48,
NA, 20.72, 17.8, 18.55, 19.56, 20.01, 20.05, 17.18,
16.29, 17.41, 15.86, 15.7, 17.84, 24.1, 27.02, 26.76,
26, 26.02, 20.63, 20.37, 21.17, 21.55, 19.12),
.Dim = c(5L, 13L),
.Dimnames = list(c("G1", "G2", "G3", "G4", "G5"),
c("E01", "E02", "E03", "E04", "E05",
"E06", "E07", "E08", "E09", "E10",
"E11", "E12", "E13")))
# specify the startcol as a function
expect_silent( nipals(corn, startcol=function(x) sum(abs(x), na.rm=TRUE)) )
# using column with maximum variance fails
# do NOT use this line...has problems when CRAN checks with --noLD
# expect_warning( nipals(corn, startcol=function(x) var(x, na.rm=TRUE)) )
})
test_that("Calculate predictions from model", {
# choose 75% of rows for training sample
set.seed(42)
ix <- sample(nrow(iris), nrow(iris)*0.75)
iris.train <- iris[ix,1:4]
iris.test <- iris[-ix,1:4]
# Pre-computed predictions for reference
p1ref <- structure(
c(-2.1634, -2.45126, -2.53552, -2.32751, -2.41099,
-0.8063, -0.52704, -0.18264, 0.02092, -1.26167, 0.26757, -0.01858,
-0.31982, 0.10316, -0.10626, -0.09459, -0.02626, 0.03138, 0.02455,
0.0337), .Dim = 5:4, .Dimnames = list(c("2", "3", "7", "8", "9"),
c("PC1", "PC2", "PC3", "PC4")))
# Method 1: Assign a class to the nipals model
m1 <- nipals(iris.train, startcol=2)
class(m1) <- "princomp"
p1 <- predict(m1, newdata=iris.test)
# Method 2: Call stats:::predict.princomp
# m2 <- nipals(iris.train[,1:4])
# stats:::predict.princomp(m2, newdata=iris.test[,1:4])
# # prcomp uses x$rotation, princomp uses x$loadings
# m3 <- m1
# m3$rotation <- m3$loadings
# stats:::predict.prcomp(m3, newdata=iris.valid[,1:4])
#
# # princomp
# m4 <- princomp(iris.train[,1:4])
# predict(m4, newdata=iris.valid[,1:4])
})
test_that("avg_angular_distance", {
# https://math.stackexchange.com/questions/2113634/
rot1 <- matrix(c(-0.956395958, -0.292073218, 0.000084963,
0.292073230, -0.956395931, 0.000227268,
0.000014880, 0.000242173, 0.999999971),
ncol=3, byrow=TRUE)
rot2 <- matrix(c(-0.956227882, -0.292623029, -0.000021887,
0.292623030, -0.956227882, -0.000024473,
-0.000013768, -0.000029806, 0.999999999),
ncol=3, byrow=TRUE)
expect_equal(avg_angular_distance(rot1, rot2), .0004950387)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.