tests/testthat/test-nipals.R

# test-nipals.R

# see setup-expectations.R for expect_aligned

# Note, I looked at the python repository:
# https://github.com/fredrikw/python-nipals/blob/master/tests/test_nipals.py
# I did not understand the tests and so did not incorporate the yarn
# data or oliveoil data.

## ---------------------------------------------------------------------------

# Create data for tests

B_full <- matrix(c(50, 67, 90, 98, 120,
                   55, 71, 93, 102, 129,
                   65, 76, 95, 105, 134,
                   50, 80, 102, 130, 138,
                   60, 82, 97, 135, 151,
                   65, 89, 106, 137, 153,
                   75, 95, 117, 133, 155), ncol=5, byrow=TRUE)
rownames(B_full) <- c("G1","G2","G3","G4","G5","G6","G7")
colnames(B_full) <- c("E1","E2","E3","E4","E5")

# Same data with 2 NAs
B_miss <- matrix(c(NA, 67, 90, 98, 120,
                   NA, 71, 93, 102, 129,
                   65, 76, 95, 105, 134,
                   50, 80, 102, 130, 138,
                   60, 82, 97, 135, 151,
                   65, 89, 106, 137, 153,
                   75, 95, 117, 133, 155), ncol=5, byrow=TRUE)
rownames(B_miss) <- c("G1","G2","G3","G4","G5","G6","G7")
colnames(B_miss) <- c("E1","E2","E3","E4","E5")

## ---------------------------------------------------------------------------

test_that("For complete-data matrix, fitted values match original data", {

  m1 <- nipals(B_full, center=TRUE, scale=TRUE, fitted=TRUE, tol=1e-8)
  expect_equal( round( m1$fitted, 3), B_full )

  m2 <- nipals(B_full, center=TRUE, scale=FALSE, tol=1e-8, fitted=TRUE)
  expect_equal( round( m2$fitted, 3), B_full )
  
  m3 <- nipals(B_full, center=FALSE, scale=FALSE, tol=1e-8, fitted=TRUE)
  expect_equal( round( m3$fitted, 3), B_full )
  
  m4 <- nipals(B_full, center=FALSE, scale=TRUE, tol=1e-8, fitted=TRUE)
  expect_equal( round( m4$fitted, 3), B_full )
    
})

test_that("Estimate fitted values for NAs", {
  # Published examples of NIPALS with missing values are almost nonexistant.
  # Here is an example with XLSTAT (details on methodology are limited)
  # 
  # Circa 2017 XLSTAT estimated the missing values below as
  # https://help.xlstat.com/customer/en/portal/articles/2062415-missing-data-imputation-using-nipals-in-excel?b_id=9283
  #   1365.236, 88.600, 175.798, 1051.698, 432.470, 168.554
  #
  # In 2019, XLSTAT estimates
  # https://help.xlstat.com/6497-missing-data-imputation-using-nipals-excel
  #   1398.829, 88.151, 178.313, 1060.534, 444.254, 169.686
  #
  auto <- data.frame(capacity = c(1396, 1721, 1580, 1769, 2068, 1769L),
                     power = c(90, 92, 83, 90, 88, 90L),
                     speed = c(174, 180, 170, 180, 180, 182L),
                     weight = c(850, 965, 970, 1080, 1135, 1060L),
                     length = c(369, 415, 395, 440, 446, 424L),
                     width = c(166, 169, 170, 169, 170, 168L))
  rownames(auto) <- c("Honda civic", "Renault 19", "Fiat Tipo",
                      "Peugeot 405", "Renault 21", "Citroen BX")
  # create missing values along the diagonal
  auto[1,1] <- auto[2,2] <- auto[3,3] <- auto[4,4] <- auto[5,5] <- auto[6,6] <- NA

  library(nipals)
  # These settings give results similar to XLstat 
  m1 <- nipals(auto, fitted=TRUE, gramschmidt=FALSE)
  expect_equal(diag(m1$fitted),
               c(1365.236, 88.600, 175.798, 1051.698, 432.470, 168.554),
               tol=1e-1)

  # Test Github issue #2
  expect_silent(nipals(auto[,-1], ncomp=1, fitted=TRUE))
})

test_that("Code coverage of nipals function arguments", {
  
  library(nipals)
  Bnarow = B_full
  Bnarow[1,] = NA
  expect_error(nipals(Bnarow))
  Bnacol = B_full
  Bnacol[,1] = NA
  expect_error(nipals(Bnacol))
  
  # ncomp
  m1 = nipals(B_miss, ncomp=1)
  
  # center/scale
  m1 = nipals(B_miss, center=FALSE, scale=FALSE)
  m1 = nipals(B_miss, center=TRUE, scale=FALSE)
  m1 = nipals(B_miss, center=TRUE, scale=TRUE)
  
  # maxiter
  expect_warning(nipals(B_full, maxiter=2))

  # tol
  m1 = nipals(B_miss, tol=1e-1, verbose=TRUE)
  m1 = nipals(B_miss, tol=1e-10, verbose=TRUE)

  # startcol
  m1 = nipals(B_full, startcol=0, verbose=TRUE)
  m1 = nipals(B_full, startcol=5, verbose=TRUE)

  # fitted
  expect_null(nipals(B_full)$fitted)
  expect_null(nipals(B_full, fitted=FALSE)$fitted)
  expect_false(is.null(nipals(B_full, fitted=TRUE)$fitted))

  # force.na
  m1 = nipals(B_full, force.na=FALSE)
  m1 = nipals(B_full, force.na=TRUE)
  
  # gramschmidt
  m1 = nipals(B_miss, gramschmidt=FALSE) # default
  round(crossprod(m1$loadings), 3)  # 1 on diagonal, but not identity
  m2 = nipals(B_miss, gramschmidt=TRUE)
  round(crossprod(m2$loadings), 3)  # should be identity 5x5
  
  # verbose
  m1 = nipals(B_full, verbose=TRUE)
  
})

test_that("Start column function", {
  # corn data from C.Majer
  corn <- structure(c(20.73, 23.58, 22.41, 19.97, 21.42, 24.48, 23.19, 25.73,
                      22.66, 23.93, 18.79, 18.56, 18.47, 18.33, 20.01, 19.03, 
                      19.36, 21.2, 19.17, 18.17, 20.41, 17.67, 17.89, 21.41,
                      18.81, 22.77, NA, 19.86, 19.43, NA, 17.28, 14.9, 16.52,
                      14.77, 19.35, 22.46, 24.61, NA, NA, 26.16, NA, 19.48,
                      NA, 20.72, 17.8, 18.55, 19.56, 20.01, 20.05, 17.18,
                      16.29, 17.41, 15.86, 15.7, 17.84, 24.1, 27.02, 26.76,
                      26, 26.02, 20.63, 20.37, 21.17, 21.55, 19.12),
                    .Dim = c(5L, 13L),
                    .Dimnames = list(c("G1", "G2", "G3", "G4", "G5"),
                                     c("E01", "E02", "E03", "E04", "E05",
                                       "E06", "E07", "E08", "E09", "E10",
                                       "E11", "E12", "E13")))
  # specify the startcol as a function
  expect_silent( nipals(corn, startcol=function(x) sum(abs(x), na.rm=TRUE)) )
  # using column with maximum variance fails
  # do NOT use this line...has problems when CRAN checks with --noLD
  # expect_warning( nipals(corn, startcol=function(x) var(x, na.rm=TRUE)) )
  
})

test_that("Calculate predictions from model", {
  
  # choose 75% of rows for training sample
  set.seed(42)
  ix <- sample(nrow(iris), nrow(iris)*0.75)
  iris.train <- iris[ix,1:4]
  iris.test <- iris[-ix,1:4]

  # Pre-computed predictions for reference
  p1ref <- structure(
    c(-2.1634, -2.45126, -2.53552, -2.32751, -2.41099, 
      -0.8063, -0.52704, -0.18264, 0.02092, -1.26167, 0.26757, -0.01858, 
      -0.31982, 0.10316, -0.10626, -0.09459, -0.02626, 0.03138, 0.02455, 
      0.0337), .Dim = 5:4, .Dimnames = list(c("2", "3", "7", "8", "9"),
                                            c("PC1", "PC2", "PC3", "PC4")))

  # Method 1: Assign a class to the nipals model
  m1 <- nipals(iris.train, startcol=2)
  class(m1) <- "princomp"
  p1 <- predict(m1, newdata=iris.test)

  # Method 2: Call stats:::predict.princomp
  # m2 <- nipals(iris.train[,1:4])
  # stats:::predict.princomp(m2, newdata=iris.test[,1:4])
    
  # # prcomp uses x$rotation, princomp uses x$loadings
  # m3 <- m1
  # m3$rotation <- m3$loadings
  # stats:::predict.prcomp(m3, newdata=iris.valid[,1:4])
  # 
  # # princomp 
  # m4 <- princomp(iris.train[,1:4])
  # predict(m4, newdata=iris.valid[,1:4])
  
})

test_that("avg_angular_distance", {
  #  https://math.stackexchange.com/questions/2113634/
  rot1 <- matrix(c(-0.956395958, -0.292073218, 0.000084963,
                   0.292073230, -0.956395931, 0.000227268,
                   0.000014880, 0.000242173, 0.999999971),
                 ncol=3, byrow=TRUE)
  rot2 <- matrix(c(-0.956227882, -0.292623029, -0.000021887,
                   0.292623030, -0.956227882, -0.000024473,
                   -0.000013768, -0.000029806, 0.999999999),
                 ncol=3, byrow=TRUE)
  expect_equal(avg_angular_distance(rot1, rot2), .0004950387)
})
kwstat/nipals documentation built on Feb. 6, 2024, 7:20 a.m.