R/loss_fun.R
In NBKP: Beta Kernel Process Modeling with Negative Binomial Response

Documented in loss_fun

#' @title Loss Function for Kernel Process Model Tuning
#'
#' @description
#' Computes prediction loss for kernel process models (BKP binary / DKP multi-class)
#' during kernel hyperparameter tuning. Supports Brier score (MSE) and log-loss (cross-entropy)
#' with leave-one-out cross-validation (LOOCV).
#'
#' @details
#' This function is used internally to optimize kernel lengthscales.
#' It converts log10 hyperparameters \code{gamma} to lengthscales \code{theta},
#' computes the kernel matrix, applies LOOCV (diag(K)=0), and calculates loss
#' between estimated and empirical probabilities.
#'
#' @param gamma Numeric vector; log10-transformed kernel lengthscale parameters.
#' @param Xnorm Numeric matrix; normalized input matrix (values in [0,1]).
#' @param y Numeric vector; observed success counts (BKP model only).
#' @param m Numeric vector; number of trials (BKP model only).
#' @param Y Numeric matrix; observed class counts (DKP model only).
#' @param model Character string; model type: \code{"BKP"} (binary) or \code{"DKP"} (multi-class).
#' @param prior Character string; prior type: \code{"noninformative"}, \code{"fixed"}, \code{"adaptive"}.
#' @param r0 Numeric scalar; prior precision (default = 2).
#' @param p0 Numeric; global prior mean (for fixed prior).
#' @param loss Character string; loss type: \code{"brier"} (MSE) or \code{"log_loss"} (cross-entropy).
#' @param kernel Character string; kernel function: \code{"gaussian"}, \code{"matern52"}, \code{"matern32"}.
#'
#' @return Numeric scalar; loss value to minimize during optimization.
#'
#' @examples
#' \donttest{
#' # -------------------------- BKP Example --------------------------
#' set.seed(123)
#' n <- 10
#' Xnorm <- matrix(runif(2 * n), ncol = 2)
#' m <- rep(10, n)
#' y <- rbinom(n, size = m, prob = runif(n))
#' loss_fun(gamma = rep(0, 2), Xnorm = Xnorm, y = y, m = m, model = "BKP")
#'
#' # -------------------------- DKP Example --------------------------
#' set.seed(123)
#' n <- 10
#' q <- 3
#' Xnorm <- matrix(runif(2 * n), ncol = 2)
#' Y <- matrix(rmultinom(n, size = 10, prob = rep(1/q, q)), nrow = n, byrow = TRUE)
#' loss_fun(gamma = rep(0, 2), Xnorm = Xnorm, Y = Y, model = "DKP")
#' }
#'
#' @seealso
#' \code{\link{get_prior}}, \code{\link{kernel_matrix}}
#'
#' @references
#' Zhao J, Qing K, Xu J (2025).
#' \emph{BKP: An R Package for Beta Kernel Process Modeling}.
#' arXiv. https://doi.org/10.48550/arXiv.2508.10447
#'
#' @export
loss_fun <- function(
    gamma, Xnorm,
    y = NULL, m = NULL, Y = NULL,
    model = c("BKP", "DKP"),
    prior = c("noninformative", "fixed", "adaptive"), r0 = 2, p0 = NULL,
    loss = c("brier", "log_loss"),
    kernel = c("gaussian", "matern52", "matern32"))
{
  # ---- Argument checking ----
  if (!is.numeric(gamma)) stop("'gamma' must be a numeric vector.")
  if (!is.matrix(Xnorm) || anyNA(Xnorm)) stop("'Xnorm' must be a numeric matrix with no NA.")

  model <- match.arg(model)
  prior <- match.arg(prior)
  loss <- match.arg(loss)
  kernel <- match.arg(kernel)

  if (model == "BKP") {
    if (is.null(y) || is.null(m)) stop("'y' and 'm' must be provided for BKP model.")
    if (!is.numeric(y) || !is.numeric(m)) stop("'y' and 'm' must be numeric vectors.")
    if (any(y < 0) || any(m <= 0) || any(y > m)) stop("'y' must be in [0,m] and 'm' > 0.")
    if (length(y) != nrow(Xnorm) || length(m) != nrow(Xnorm)) {
      stop("'y' and 'm' must have the same length as number of rows in 'Xnorm'.")
    }
  } else {
    if (is.null(Y)) stop("'Y' must be provided for DKP model.")
    if (!is.matrix(Y) || anyNA(Y) || any(Y < 0)) stop("'Y' must be a numeric matrix with no NA and nonnegative entries.")
    if (nrow(Y) != nrow(Xnorm)) stop("Number of rows in 'Y' must match number of rows in 'Xnorm'.")
  }

  if (!is.numeric(r0) || length(r0) != 1 || r0 <= 0) stop("'r0' must be a positive scalar.")
  if (!is.null(p0) && (!is.numeric(p0) || any(p0 < 0))) stop("'p0' must be numeric and nonnegative.")

  # Convert gamma to kernel hyperparameters (theta = 10^gamma)
  theta <- 10^gamma

  # Compute kernel matrix using specified kernel and theta
  K <- kernel_matrix(Xnorm, theta = theta, kernel = kernel)
  diag(K) <- 0  # Leave-One-Out Cross-Validation (LOOCV)

  if (model == "BKP") {
    ## -------- Binary case (Beta-Binomial) --------
    # get the prior parameters: alpha0(x) and beta0(x)
    prior_par <- get_prior(prior = prior, model = model, r0 = r0, p0 = p0, y = y, m = m, K = K)
    alpha0 <- prior_par$alpha0
    beta0 <- prior_par$beta0

    # Compute posterior alpha and beta
    alpha_n <- alpha0 + as.vector(K %*% y)
    beta_n <- beta0 + as.vector(K %*% (m - y))

    # Posterior mean prediction of success probability
    pi_hat <- alpha_n / (alpha_n + beta_n)
    pi_hat <- pmin(pmax(pi_hat, 1e-10), 1 - 1e-10)   # avoid log(0)

    # Empirical success rate
    pi_tilde <- y / m

    # ---------------- Loss computation ----------------
    if (loss == "brier") {
      # Standard Brier score (mean squared error)
      brier <- mean((pi_hat - pi_tilde)^2)
      return(brier)
    } else {
      # Standard log-loss (cross-entropy)
      log_loss <- -mean(pi_tilde * log(pi_hat) + (1 - pi_tilde) * log(1 - pi_hat))
      return(log_loss)
    }
  } else {
    ## -------- Multiclass case (Dirichlet-Multinomial) --------
    # get the prior parameters: alpha0(x)
    alpha0 <- get_prior(prior = prior, model = model, r0 = r0, p0 = p0, Y = Y, K = K)

    # Compute posterior alpha
    alpha_n <- as.matrix(alpha0) + as.matrix(K %*% Y)

    # Posterior mean prediction of success probability
    pi_hat <- alpha_n / rowSums(alpha_n)
    pi_hat <- pmin(pmax(pi_hat, 1e-6), 1 - 1e-6)   # avoid log(0)

    # Empirical class probabilities
    pi_tilde <- Y / rowSums(Y)

    if (loss == "brier") {
      # Brier score (Mean Squared Error)
      brier <- mean((pi_hat - pi_tilde)^2)
      return(brier)
    } else {
      # log-loss (cross-entropy)
      log_loss <- -mean(pi_tilde * log(pi_hat))
      return(log_loss)
    }
  }
}