tests/testthat/test-EMLeastSquaresClassifier.R

context("Expectation Maximization Like Least Squares Classifier")

# Simple dataset used in the tests
data(testdata)
modelform <- testdata$modelform
classname<-all.vars(modelform)[1] 
D <- testdata$D
D_test <- testdata$D_test
X <- testdata$X
X_u <- testdata$X_u
y <- testdata$y
X_test <- testdata$X_test
y_test <- testdata$y_test

set.seed(1)

test_that("Formula and matrix formulation give same results",{
  g_matrix <- EMLeastSquaresClassifier(X,factor(y),X_u)
  g_model <- EMLeastSquaresClassifier(modelform, D)
  
  expect_equal(predict(g_matrix,X_test),predict(g_model,D_test)) 
  expect_equal(loss(g_matrix, X_test, y_test),loss(g_model, D_test)) 
  expect_equal(g_matrix@classnames,g_model@classnames)
})

test_that("Different settings return the same loss",{
  g_1 <- EMLeastSquaresClassifier(X,y,X_u,intercept=TRUE,scale=TRUE)
  g_2 <- EMLeastSquaresClassifier(X,y,X_u,intercept=TRUE,x_center=TRUE,scale=TRUE)
  
  expect_equal(loss(g_1,X_test,y_test),loss(g_2,X_test,y_test),tolerance=10e-6)
  
  # We get a different loss when we center the output
  g_3<-EMLeastSquaresClassifier(X,y,X_u,intercept=TRUE,x_center=TRUE,scale=TRUE,y_scale=TRUE)
  g_4<-EMLeastSquaresClassifier(X,y,X_u,intercept=TRUE,scale=TRUE,x_center=TRUE,y_scale=TRUE)
  
  expect_equal(loss(g_3,X_test,y_test),loss(g_4,X_test,y_test),tolerance=10e-6)
})

test_that("Hard label EM and self-learning give the same result", {
  
  data <- generate2ClassGaussian(200,d=2, expected=FALSE)
  data <- add_missinglabels_mar(data, Class~., prob=0.9)
  data_test <- generate2ClassGaussian(200,d=2, expected=FALSE)
  
  g_block <-  EMLeastSquaresClassifier(Class~.,data,method="block",objective="responsibility",init="supervised",save_all=TRUE)
  
  g_self <- SelfLearning(Class~.,data,method=LeastSquaresClassifier)
  
  expect_equal(loss(g_block,data_test), loss(g_self,data_test))
  expect_equal(g_block@theta, g_self@model@theta)
})

test_that("Gradient superficially correct",{
  library("numDeriv")
  
  data(testdata)
  
  X <- cbind(1,testdata$X)
  X_u <- cbind(1,testdata$X_u)
  Xe <- rbind(X,X_u)
  Y <- model.matrix(~y-1,data.frame(y=testdata$y))[,1,drop=FALSE]
  
  for (i in 1:100) {
    w <- c(rnorm(ncol(X)),runif(nrow(X_u)))
    
    grad_num <- as.numeric(
      numDeriv::grad(
        loss_minmin_lsy,
        w, Xe=Xe, Y=Y, X_u=X_u,
        method="simple")
    )
    
    grad_exact <- as.numeric(
      gradient_minmin_lsy(
        w, Xe=Xe, Y=Y, X_u=X_u)
    )
    
    expect_equal(grad_num,grad_exact,
                 tolerance=10e-4)
  }
})

test_that("Gradient superficially correct",{
  library("numDeriv")
  
  data(testdata)
  
  X <- cbind(1,testdata$X)
  X_u <- cbind(1,testdata$X_u)
  Xe <- rbind(X,X_u)
  Y <- model.matrix(~y-1,data.frame(y=testdata$y))[,1,drop=FALSE]
  
  for (i in 1:100) {
    w <- c(rnorm(ncol(X)),runif(nrow(X_u)))
    
    grad_num <- as.numeric(
      numDeriv::grad(
        loss_minmin_lsq,
        w, Xe=Xe, Y=Y, X_u=X_u,X=X,
        method="simple")
    )
    
    grad_exact <- as.numeric(
      gradient_minmin_lsq(
        w, Xe=Xe, Y=Y, X_u=X_u,X=X)
    )
    
    expect_equal(grad_num,grad_exact,
                 tolerance=10e-4)
  }
})

Try the RSSL package in your browser

Any scripts or data that you put into this service are public.

RSSL documentation built on March 31, 2023, 7:27 p.m.