tests/testthat/test-predictions.R

library(testthat)
library(GeDS) 

test_that("IRIS - NGeDSgam predictions consistency", {
  # Prepare the iris dataset
  iris_subset <- subset(iris, Species %in% c("setosa", "versicolor"))
  iris_subset$Species <- factor(iris_subset$Species)
  
  # Compute ranges from iris_subset
  ranges <- lapply(iris_subset[, 1:4], range)
  
  # Filter iris for observations within these ranges
  within_ranges <- with(iris, 
                        Sepal.Length >= ranges$Sepal.Length[1] & Sepal.Length <= ranges$Sepal.Length[2] &
                          Sepal.Width  >= ranges$Sepal.Width[1]  & Sepal.Width  <= ranges$Sepal.Width[2] &
                          Petal.Length >= ranges$Petal.Length[1] & Petal.Length <= ranges$Petal.Length[2] &
                          Petal.Width  >= ranges$Petal.Width[1]  & Petal.Width  <= ranges$Petal.Width[2]
  )
  iris_in_range <- iris[within_ranges, ]
  combined <- rbind(iris_subset, iris_in_range)
  new_rows <- !duplicated(combined)[(nrow(iris_subset) + 1):nrow(combined)]
  iris_in_range_new <- iris_in_range[new_rows, ]
  
  for (normalize in c(TRUE, FALSE)) {
    # Run NGeDSgam silently
    invisible(capture.output({
      Gmodgam <- suppressWarnings(
        NGeDSgam(Species ~ f(Sepal.Length, Sepal.Width) + f(Petal.Length) + Petal.Width,
                 data = iris_subset, family = binomial(link = "cauchit"), normalize_data = normalize,
                 phi_gam_exit = 0.8)
        )
      }))
    
    # Check that prediction differences are essentially zero
    expect_equal(
      predict(Gmodgam, newdata = iris_subset, type = "response", n = 2),
      Gmodgam$predictions$pred_linear,
      tolerance = 1e-8
    )
    
    expect_equal(
      predict(Gmodgam, newdata = iris_subset, type = "response", n = 3),
      Gmodgam$predictions$pred_quadratic,
      tolerance = 1e-8
    )
    
    expect_equal(
      predict(Gmodgam, newdata = iris_subset, type = "response", n = 4),
      Gmodgam$predictions$pred_cubic,
      tolerance = 1e-8
    )
    
    for (ord in 2:4) {
      expect_equal(
        predict(Gmodgam, newdata = iris_subset, type = "response", n = ord),
        predict(Gmodgam, newdata = rbind(iris_subset, iris_in_range_new), type = "response", n = ord)[1:nrow(iris_subset)],
        tolerance = 1e-8
      )
    }
  }
})

test_that("IRIS - NGeDSboost predictions consistency", {
  # Prepare the iris dataset
  iris_subset <- subset(iris, Species %in% c("setosa", "versicolor"))
  iris_subset$Species <- factor(iris_subset$Species)
  
  # Compute ranges from iris_subset
  ranges <- lapply(iris_subset[, 1:4], range)
  
  # Filter iris for observations within these ranges
  within_ranges <- with(iris, 
                        Sepal.Length >= ranges$Sepal.Length[1] & Sepal.Length <= ranges$Sepal.Length[2] &
                          Sepal.Width  >= ranges$Sepal.Width[1]  & Sepal.Width  <= ranges$Sepal.Width[2] &
                          Petal.Length >= ranges$Petal.Length[1] & Petal.Length <= ranges$Petal.Length[2] &
                          Petal.Width  >= ranges$Petal.Width[1]  & Petal.Width  <= ranges$Petal.Width[2]
  )
  iris_in_range <- iris[within_ranges, ]
  combined <- rbind(iris_subset, iris_in_range)
  new_rows <- !duplicated(combined)[(nrow(iris_subset) + 1):nrow(combined)]
  iris_in_range_new <- iris_in_range[new_rows, ]
  
  for (normalize in c(TRUE, FALSE)) {
    for (init_learner in c(TRUE, FALSE)) {
      
      # Run NGeDSboost silently
      invisible(capture.output({
        Gmodboost <- suppressWarnings(
          NGeDSboost(Species ~ f(Sepal.Length, Sepal.Width) + f(Petal.Length) + f(Petal.Width),
                     data = iris_subset, family = mboost::Binomial(link = "probit"),
                     initial_learner = init_learner, normalize_data = normalize,
                     phi_boost_exit = 0.8)
          )
        }))
      
      # Check that prediction differences are essentially zero
      expect_equal(
        predict(Gmodboost, newdata = iris_subset, type = "response", n = 2),
        Gmodboost$predictions$pred_linear,
        tolerance = 1e-8
      )
      
      expect_equal(
        predict(Gmodboost, newdata = iris_subset, type = "response", n = 3),
        Gmodboost$predictions$pred_quadratic,
        tolerance = 1e-8
      )
      
      expect_equal(
        predict(Gmodboost, newdata = iris_subset, type = "response", n = 4),
        Gmodboost$predictions$pred_cubic,
        tolerance = 1e-8
      )
      
      for (ord in 2:4) {
        expect_equal(
          predict(Gmodboost, newdata = iris_subset, type = "response", n = ord),
          predict(Gmodboost, newdata = rbind(iris_subset, iris_in_range_new), type = "response", n = ord)[1:nrow(iris_subset)],
          tolerance = 1e-8
        )
      }
      
    }
  }
})

### MTCARS - NGeDSgam Checks
test_that("MTCARS - NGeDSgam predictions consistency", {
  data(mtcars)
  # Convert specified variables to factors
  categorical_vars <- c("cyl", "vs", "am", "gear", "carb")
  mtcars[categorical_vars] <- lapply(mtcars[categorical_vars], factor)
  
  for (normalize in c(TRUE, FALSE)) {
    invisible(capture.output({
      Gmodgam <- suppressWarnings(
        NGeDSgam(mpg ~ cyl + f(disp, hp) + f(drat) + f(wt) + f(qsec) + vs + am + gear + carb,
                 data = mtcars, family = gaussian, normalize_data = normalize)
        )
      }))
    
    # Check prediction differences for orders 2, 3, and 4
    expect_equal(
      predict(Gmodgam, newdata = mtcars, type = "response", n = 2),
      Gmodgam$predictions$pred_linear,
      tolerance = 1e-8
    )
    expect_equal(
      predict(Gmodgam, newdata = mtcars, type = "response", n = 3),
      Gmodgam$predictions$pred_quadratic,
      tolerance = 1e-8
    )
    expect_equal(
      predict(Gmodgam, newdata = mtcars, type = "response", n = 4),
      Gmodgam$predictions$pred_cubic,
      tolerance = 1e-8
    )
    
    # Check that the sum of base learner contributions equals the overall prediction
    for (ord in 2:4) {
      pred1 = predict(Gmodgam, n = ord, newdata = mtcars,  base_learner = "cyl")
      pred2 = predict(Gmodgam, n = ord, newdata = mtcars,  base_learner = "f(disp, hp)")
      pred3 = predict(Gmodgam, n = ord, newdata = mtcars,  base_learner = "f(drat)")
      pred4 = predict(Gmodgam, n = ord, newdata = mtcars,  base_learner = "f(wt)")
      pred5 = predict(Gmodgam, n = ord, newdata = mtcars,  base_learner = "f(qsec)")
      pred6 = predict(Gmodgam, n = ord, newdata = mtcars,  base_learner = "vs")
      pred7 = predict(Gmodgam, n = ord, newdata = mtcars,  base_learner = "am")
      pred8 = predict(Gmodgam, n = ord, newdata = mtcars,  base_learner = "gear")
      pred9 = predict(Gmodgam, n = ord, newdata = mtcars,  base_learner = "carb")
      
      if (ord == 2) {
        b0 = Gmodgam$final_model$Linear.Fit$Theta["b0"]
        alpha = if(Gmodgam$args$normalize_data) 0 else mean(mtcars$mpg)
        pred0 = alpha + b0
      } else {
        pred0 = 0
      }
      sum <-  pred0 + pred1 + pred2 + pred3 + pred4 + pred5 + pred6 + pred7 + pred8 + pred9
      
      if (Gmodgam$args$normalize_data && ord == 2) {
        sum <- sum * Gmodgam$args$Y_sd + Gmodgam$args$Y_mean
      }
      
      expect_equal(
        sum,
        predict(Gmodgam, newdata = mtcars, type = "response", n = ord),
        tolerance = 1e-8
      )
    }
  }
})

### MTCARS - NGeDSboost Checks
test_that("MTCARS - NGeDSboost predictions consistency", {
  data(mtcars)
  # Convert specified variables to factors
  categorical_vars <- c("cyl", "vs", "am", "gear", "carb")
  mtcars[categorical_vars] <- lapply(mtcars[categorical_vars], factor)
  
  for (normalize in c(TRUE, FALSE)) {
    for (init_learner in c(TRUE, FALSE)) {
      invisible(capture.output({
        Gmodboost <- suppressWarnings(
          NGeDSboost(mpg ~ cyl + f(disp, hp) + f(drat) + f(wt) + f(qsec) + vs + am + gear + carb,
                     data = mtcars, family = mboost::Gaussian(), initial_learner = init_learner,
                     normalize_data = normalize)
          )
        }))
      
      # Check prediction differences for orders 2, 3, and 4
      expect_equal(
        predict(Gmodboost, newdata = mtcars, type = "response", n = 2),
        Gmodboost$predictions$pred_linear,
        tolerance = 1e-8
      )
      expect_equal(
        predict(Gmodboost, newdata = mtcars, type = "response", n = 3),
        Gmodboost$predictions$pred_quadratic,
        tolerance = 1e-8
      )
      expect_equal(
        predict(Gmodboost, newdata = mtcars, type = "response", n = 4),
        Gmodboost$predictions$pred_cubic,
        tolerance = 1e-8
      )
      
      # Check that the sum of base learner contributions equals the overall prediction
      for (ord in 2:4) {
        pred1 = predict(Gmodboost, n = ord, newdata = mtcars,  base_learner = "cyl")
        pred2 = predict(Gmodboost, n = ord, newdata = mtcars,  base_learner = "f(disp, hp)") 
        pred3 = predict(Gmodboost, n = ord, newdata = mtcars,  base_learner = "f(drat)") 
        pred4 = predict(Gmodboost, n = ord, newdata = mtcars,  base_learner = "f(wt)")
        pred5 = predict(Gmodboost, n = ord, newdata = mtcars,  base_learner = "f(qsec)")
        pred6 = predict(Gmodboost, n = ord, newdata = mtcars,  base_learner = "vs")
        pred7 = predict(Gmodboost, n = ord, newdata = mtcars,  base_learner = "am")
        pred8 = predict(Gmodboost, n = ord, newdata = mtcars,  base_learner = "gear")
        pred9 = predict(Gmodboost, n = ord, newdata = mtcars,  base_learner = "carb")
        
        if(!Gmodboost$args$initial_learner && !Gmodboost$args$normalize_data && ord == 2) {
          pred0 <- mean(mtcars$mpg)
        } else {
          pred0 <- 0
        }
        
        sum <- pred0 + pred1+pred2+pred3+pred4+pred5+pred6+pred7+pred8+pred9
        
        if (Gmodboost$args$normalize_data && ord == 2) {
          sum <- sum * Gmodboost$args$Y_sd + Gmodboost$args$Y_mean
        }
        
        expect_equal(
          sum,
          predict(Gmodboost, newdata = mtcars, type = "response", n = ord),
          tolerance = 1e-8
        )
      }
    }
  }
})
alattuada/GeDS documentation built on April 13, 2025, 7:58 p.m.