R/simulate_NBKP.R
In NBKP: Beta Kernel Process Modeling with Negative Binomial Response

Documented in simulate.NBKP

#' @rdname simulate.NBKP
#' @title Simulate from a Fitted NBKP Model
#'
#' @description Generates random draws from the posterior predictive distribution
#' of a fitted \code{NBKP} model at specified input locations.
#'
#' For NBKP models, posterior samples are generated from Gamma distributions
#' characterizing latent mean count values for negative‑binomial observations.
#'
#' @param object An object of class \code{"NBKP"}, typically returned by
#'   \code{\link{fit_NBKP}}.
#' @param Xnew A numeric matrix or vector of new input locations at which
#'   simulations are generated.
#' @param nsim Number of posterior samples to generate (default \code{1}).
#' @param seed Optional integer seed for reproducibility.
#' @param ... Additional arguments (currently unused).
#'
#' @return A list with the following components:
#' \describe{
#'   \item{\code{samples}}{A numeric matrix of size \code{nrow(Xnew) × nsim},
#'     where each column corresponds to one posterior draw of latent mean counts.}
#'   \item{\code{mean}}{A numeric vector of posterior mean count values
#'     at each \code{Xnew}.}
#'   \item{\code{X}}{The training input matrix used to fit the NBKP model.}
#'   \item{\code{Xnew}}{The new input locations at which simulations are generated.}
#' }
#'
#' @seealso \code{\link{fit_NBKP}} for model fitting;
#'   \code{\link{predict.NBKP}} for posterior prediction.
#'
#' @references Zhao J, Qing K, Xu J (2025). \emph{BKP: An R Package for Beta
#'   Kernel Process Modeling}. arXiv. https://doi.org/10.48550/arxiv.2508.10447.
#'
#' @keywords NBKP
#'
#' @examples
#' \donttest{
#' set.seed(123)
#'
#' # Define true mean function
#' true_mu_fun <- function(x) {
#'   exp(sin(x) + 0.5)
#' }
#'
#' n <- 30
#' Xbounds <- matrix(c(-2, 2), nrow = 1)
#' X <- tgp::lhs(n = n, rect = Xbounds)
#' true_mu <- true_mu_fun(X)
#' y <- rnbinom(n, size = 1, mu = true_mu)
#'
#' # Fit NBKP model
#' model <- fit_NBKP(X, y, Xbounds = Xbounds)
#'
#' # Simulate 5 posterior draws of latent mean counts
#' Xnew <- matrix(seq(-2, 2, length.out = 5), ncol = 1)
#' simulate(model, Xnew = Xnew, nsim = 5)
#' }
#'
#' @export
#' @method simulate NBKP
simulate.NBKP <- function(object, nsim = 1, seed = NULL, Xnew = NULL, ...)
{
  # ------ Argument Checking ------
  if (!is.numeric(nsim) || length(nsim) != 1 || nsim <= 0 || nsim != as.integer(nsim)) {
    stop("'nsim' must be a positive integer.")
  }
  nsim <- as.integer(nsim)

  if (!is.null(seed) && (!is.numeric(seed) || length(seed) != 1 || seed != as.integer(seed))) {
    stop("'seed' must be a single integer or NULL.")
  }

  d <- ncol(object$X)
  if (!is.null(Xnew)) {
    if (is.null(nrow(Xnew))) {
      Xnew <- matrix(Xnew, nrow = 1)
    }
    Xnew <- as.matrix(Xnew)
    if (!is.numeric(Xnew)) {
      stop("'Xnew' must be numeric.")
    }
    if (ncol(Xnew) != d) {
      stop("The number of columns in 'Xnew' must match the original input dimension.")
    }
  }

  # ------ Core Computation ------
  if (!is.null(seed)) set.seed(seed)

  if (!is.null(Xnew)) {
    # Compute posterior parameters at new inputs
    Xnorm <- object$Xnorm
    y <- object$y
    theta <- object$theta_opt
    kernel <- object$kernel
    prior <- object$prior
    r0 <- object$r0
    mu0 <- object$mu0
    Xbounds <- object$Xbounds
    phi <- object$phi
    n_train <- nrow(Xnorm)

    # Normalize new inputs
    Xnew_norm <- sweep(Xnew, 2, Xbounds[, 1], "-")
    Xnew_norm <- sweep(Xnew_norm, 2, Xbounds[, 2] - Xbounds[, 1], "/")

    # Compute kernel matrix
    K <- kernel_matrix(Xnew_norm, Xnorm, theta = theta, kernel = kernel)
    n_new <- nrow(K)

    # Get Gamma prior parameters
    if (prior == "noninformative") {
      alpha0 <- rep(0.01, n_new)
      beta0 <- rep(0.01, n_new)
    } else if (prior == "fixed") {
      alpha0 <- rep(r0 * mu0, n_new)
      beta0 <- rep(r0, n_new)
    } else if (prior == "adaptive") {
      w <- K / rowSums(K)
      mu_local <- as.vector(w %*% y)
      r_local <- r0 * rowSums(K)
      alpha0 <- r_local * mu_local
      beta0 <- rep(r0, n_new)
    }

    # Compute posterior Gamma parameters
    alpha_n <- pmax(alpha0 + as.vector(K %*% y), 1e-3)
    beta_n  <- pmax(beta0 + as.vector(K %*% rep(phi, n_train)), 1e-3)
  } else {
    # Use training data posterior parameters
    alpha_n <- pmax(object$alpha_n, 1e-3)
    beta_n  <- pmax(object$beta_n, 1e-3)
  }

  # Simulate from posterior Gamma distributions (latent mean count μ)
  samples <- matrix(stats::rgamma(n_new * nsim,
                                  shape = rep(alpha_n, nsim),
                                  rate  = rep(beta_n, nsim)),
                    nrow = n_new, ncol = nsim)
  colnames(samples) <- paste0("sim", 1:nsim)
  rownames(samples) <- paste0("x", 1:n_new)

  # Posterior mean count
  mu_mean <- alpha_n / beta_n

  simulation <- list(
    samples = samples,   # [n_new × nsim]: simulated latent mean counts
    mean    = mu_mean,   # [n_new]: posterior mean count
    X       = object$X,  # [n × d]: training inputs
    Xnew    = Xnew       # [n_new × d]: new inputs (if provided)
  )

  class(simulation) <- "simulate_NBKP"
  return(simulation)
}