tests/testthat/test-gkwreg-predict-tests.R

# Test Suite for predict.gkwreg Method
# Author: Lopes, J. E.
# Description: Comprehensive tests for the predict() method for gkwreg objects
#              covering different prediction types, newdata handling, and special features

library(testthat)
library(gkwreg)
library(gkwdist)

# Setup: Generate test data and fit model
setup_predict_data <- function() {
  set.seed(54321)
  n <- 200
  x1 <- rnorm(n)
  x2 <- runif(n, -1, 1)
  x3 <- factor(sample(c("A", "B"), n, replace = TRUE))

  # Generate Kumaraswamy response
  alpha <- exp(0.6 + 0.25 * x1)
  beta <- exp(1.1 - 0.18 * x2 + 0.3 * (x3 == "B"))
  y <- rkw(n, alpha = alpha, beta = beta)

  # Ensure strictly in (0, 1)
  y <- pmax(pmin(y, 1 - 1e-10), 1e-10)

  list(
    data = data.frame(y = y, x1 = x1, x2 = x2, x3 = x3),
    alpha_true = alpha,
    beta_true = beta
  )
}

# =============================================================================
# CORE PREDICT TESTS
# =============================================================================

test_that("Test 1: Basic prediction with newdata returns correct response type", {
  # Test predicting mean response on new data
  setup <- setup_predict_data()

  # Fit model on first 150 observations
  train <- setup$data[1:150, ]
  test <- setup$data[151:200, ]

  fit <- gkwreg(y ~ x1 | x2 + x3, data = train, family = "kw")

  # Predict on test data
  pred <- predict(fit, newdata = test, type = "response")

  expect_type(pred, "double")
  expect_length(pred, nrow(test))
  expect_true(all(pred > 0 & pred < 1))
  expect_true(all(is.finite(pred)))
})

test_that("Test 2: Prediction without newdata uses original data correctly", {
  # Test that omitting newdata produces fitted values
  setup <- setup_predict_data()

  fit <- gkwreg(y ~ x1 + x2, data = setup$data, family = "kw")

  # Predict without newdata
  pred_no_newdata <- predict(fit, type = "response")

  # Should match fitted values
  expect_equal(pred_no_newdata, fitted(fit), tolerance = 1e-10)
  expect_length(pred_no_newdata, nrow(setup$data))
})


test_that("Test 6: Density calculation at specified points works correctly", {
  # Test type = "density" evaluates PDF at given points
  setup <- setup_predict_data()

  fit <- gkwreg(y ~ x1, data = setup$data, family = "kw")

  # Evaluate density at specific points
  at_values <- c(0.1, 0.3, 0.5, 0.7, 0.9)
  pred_dens <- predict(fit, type = "density", at = at_values, elementwise = FALSE)

  expect_true(is.matrix(pred_dens) || is.array(pred_dens))
  expect_equal(nrow(pred_dens), nrow(setup$data))
  expect_equal(ncol(pred_dens), length(at_values))
  expect_true(all(pred_dens >= 0))
  expect_true(all(is.finite(pred_dens)))
})

test_that("Test 7: Probability/CDF calculation returns values in [0,1]", {
  # Test type = "probability" evaluates CDF at given points
  setup <- setup_predict_data()

  fit <- gkwreg(y ~ x1 + x2, data = setup$data, family = "kw")

  # Evaluate CDF at specific points
  at_values <- c(0.2, 0.4, 0.6, 0.8)
  pred_prob <- predict(fit, type = "probability", at = at_values, elementwise = FALSE)

  expect_true(is.matrix(pred_prob) || is.array(pred_prob))
  expect_equal(nrow(pred_prob), nrow(setup$data))
  expect_equal(ncol(pred_prob), length(at_values))

  # CDF values must be in [0, 1]
  expect_true(all(pred_prob >= 0 & pred_prob <= 1))

  # CDF should be monotonically increasing
  for (i in 1:nrow(pred_prob)) {
    expect_true(all(diff(pred_prob[i, ]) >= 0))
  }
})

test_that("Test 8: Quantile calculation returns values in (0,1) for valid probabilities", {
  # Test type = "quantile" computes inverse CDF
  setup <- setup_predict_data()

  fit <- gkwreg(y ~ x1, data = setup$data, family = "kw")

  # Compute quantiles for specific probabilities
  probs <- c(0.1, 0.25, 0.5, 0.75, 0.9)
  pred_quant <- predict(fit, type = "quantile", at = probs, elementwise = FALSE)

  expect_true(is.matrix(pred_quant) || is.array(pred_quant))
  expect_equal(nrow(pred_quant), nrow(setup$data))
  expect_equal(ncol(pred_quant), length(probs))

  # Quantiles must be in (0, 1) for bounded distribution
  expect_true(all(pred_quant > 0 & pred_quant < 1))

  # Quantiles should be monotonically increasing
  for (i in 1:nrow(pred_quant)) {
    expect_true(all(diff(pred_quant[i, ]) >= 0))
  }
})

test_that("Test 9: Elementwise parameter controls behavior correctly", {
  # Test elementwise = TRUE vs FALSE for density/probability/quantile
  setup <- setup_predict_data()

  fit <- gkwreg(y ~ x1, data = setup$data, family = "kw")

  # Create observation-specific values
  n_obs <- 50
  test_data <- setup$data[1:n_obs, ]
  at_values <- runif(n_obs, 0.1, 0.9)

  # Elementwise = TRUE: each observation gets its own value
  pred_elem <- predict(fit,
    newdata = test_data, type = "density",
    at = at_values, elementwise = TRUE
  )

  expect_type(pred_elem, "double")
  expect_length(pred_elem, n_obs)

  # Elementwise = FALSE: all values applied to all observations
  pred_non_elem <- predict(fit,
    newdata = test_data[1:5, ], type = "density",
    at = c(0.3, 0.5, 0.7), elementwise = FALSE
  )

  expect_true(is.matrix(pred_non_elem))
  expect_equal(nrow(pred_non_elem), 5)
  expect_equal(ncol(pred_non_elem), 3)
})

# =============================================================================
# ADDITIONAL EDGE CASE AND VALIDATION TESTS
# =============================================================================

test_that("Predict handles different families correctly", {
  # Test predictions across different distribution families
  setup <- setup_predict_data()

  fit_kw <- gkwreg(y ~ x1, data = setup$data, family = "kw")
  fit_beta <- gkwreg(y ~ x1, data = setup$data, family = "beta")
  fit_ekw <- gkwreg(y ~ x1, data = setup$data, family = "ekw")

  pred_kw <- predict(fit_kw, type = "response")
  pred_beta <- predict(fit_beta, type = "response")
  pred_ekw <- predict(fit_ekw, type = "response")

  expect_true(all(is.finite(pred_kw)))
  expect_true(all(is.finite(pred_beta)))
  expect_true(all(is.finite(pred_ekw)))

  # All should be in (0, 1)
  expect_true(all(pred_kw > 0 & pred_kw < 1))
  expect_true(all(pred_beta > 0 & pred_beta < 1))
  expect_true(all(pred_ekw > 0 & pred_ekw < 1))
})

test_that("Predict type aliases work correctly", {
  # Test that "mean" = "response", "pdf" = "density", "cdf" = "probability"
  setup <- setup_predict_data()

  fit <- gkwreg(y ~ x1, data = setup$data, family = "kw")

  at_vals <- c(0.3, 0.5, 0.7)
  pred_density <- predict(fit, type = "density", at = at_vals, elementwise = FALSE)
  pred_pdf <- predict(fit, type = "pdf", at = at_vals, elementwise = FALSE)
  expect_equal(pred_density, pred_pdf)

  pred_probability <- predict(fit, type = "probability", at = at_vals, elementwise = FALSE)
  pred_cdf <- predict(fit, type = "cdf", at = at_vals, elementwise = FALSE)
  expect_equal(pred_probability, pred_cdf)
})

test_that("Predict with factor variables in formula works", {
  # Test predictions with categorical predictors
  setup <- setup_predict_data()

  fit <- gkwreg(y ~ x3 | x3, data = setup$data, family = "kw")

  # Create newdata with both factor levels
  newdata <- data.frame(x3 = factor(c("A", "B", "A", "B")))

  pred <- predict(fit, newdata = newdata, type = "response")

  expect_length(pred, 4)
  expect_true(all(is.finite(pred)))

  # Predictions for same level should be identical
  expect_equal(pred[1], pred[3])
  expect_equal(pred[2], pred[4])
})

test_that("Predict with interaction terms works correctly", {
  # Test predictions with interaction effects
  setup <- setup_predict_data()

  fit <- gkwreg(y ~ x1 * x2, data = setup$data, family = "kw")

  newdata <- data.frame(
    x1 = c(0, 1, 0, 1),
    x2 = c(0, 0, 1, 1)
  )

  pred <- predict(fit, newdata = newdata, type = "response")

  expect_length(pred, 4)
  expect_true(all(is.finite(pred)))
  expect_true(all(pred > 0 & pred < 1))
})

test_that("Predict CDF and quantile are inverse operations", {
  # Test that quantile(CDF(y)) ≈ y
  setup <- setup_predict_data()

  fit <- gkwreg(y ~ x1, data = setup$data, family = "kw")

  # Take first 10 observations
  test_data <- setup$data[1:10, ]
  y_values <- test_data$y

  # Compute CDF at y values (elementwise)
  cdf_at_y <- predict(fit,
    newdata = test_data, type = "probability",
    at = y_values, elementwise = TRUE
  )

  # Compute quantiles at those CDF values (elementwise)
  quant_at_cdf <- predict(fit,
    newdata = test_data, type = "quantile",
    at = cdf_at_y, elementwise = TRUE
  )

  # Should approximately recover original y values
  expect_equal(quant_at_cdf, y_values, tolerance = 1e-4)
})

test_that("Predict density integrates to approximately 1", {
  # Test that PDF integrates to 1 (numerical check)
  setup <- setup_predict_data()

  fit <- gkwreg(y ~ 1, data = setup$data[1:5, ], family = "kw")

  # Create fine grid
  y_grid <- seq(0.001, 0.999, length.out = 1000)

  # Get density for first observation
  dens <- predict(fit,
    newdata = setup$data[1, , drop = FALSE],
    type = "density", at = y_grid, elementwise = FALSE
  )

  # Approximate integral using trapezoidal rule
  dy <- diff(y_grid)
  integral <- sum((dens[1, -1] + dens[1, -length(dens[1, ])]) / 2 * dy)

  # Should be close to 1
  expect_equal(integral, 1, tolerance = 0.05)
})


test_that("Predict with default at value works for density", {
  # Test default at = 0.5 for density/probability
  setup <- setup_predict_data()

  fit <- gkwreg(y ~ x1, data = setup$data, family = "kw")

  # Should use default at = 0.5
  pred_dens_default <- predict(fit, type = "density")
  pred_dens_explicit <- predict(fit, type = "density", at = 0.5, elementwise = FALSE)

  expect_equal(pred_dens_default[, 1], pred_dens_explicit[, 1])
})

test_that("Predict handles single observation correctly", {
  # Test prediction for single row of data
  setup <- setup_predict_data()

  fit <- gkwreg(y ~ x1 + x2, data = setup$data, family = "kw")

  single_obs <- setup$data[1, , drop = FALSE]
  pred_single <- predict(fit, newdata = single_obs, type = "response")

  expect_length(pred_single, 1)
  expect_true(is.finite(pred_single))
  expect_true(pred_single > 0 & pred_single < 1)
})

test_that("Predict with very large newdata works efficiently", {
  # Test scalability with large prediction dataset
  setup <- setup_predict_data()

  fit <- gkwreg(y ~ x1 + x2, data = setup$data[1:100, ], family = "kw")

  # Create large newdata
  large_newdata <- data.frame(
    x1 = rnorm(10000),
    x2 = runif(10000, -1, 1)
  )

  # Should complete without error
  expect_no_error(
    pred_large <- predict(fit, newdata = large_newdata, type = "response")
  )

  expect_length(pred_large, 10000)
  expect_true(all(is.finite(pred_large)))
})

test_that("Predict with missing required 'at' parameter errors appropriately", {
  # Test that density/probability/quantile require 'at' parameter
  setup <- setup_predict_data()

  fit <- gkwreg(y ~ x1, data = setup$data, family = "kw")

  # These should work with default at = 0.5 or error if at is required
  expect_no_error(
    predict(fit, type = "density")
  )
})

Try the gkwreg package in your browser

Any scripts or data that you put into this service are public.

gkwreg documentation built on Nov. 27, 2025, 5:06 p.m.