R/simMVGPR.R

Defines functions simMVGPR

Documented in simMVGPR

#' Simulate Data for Multivariate Gaussian Process Regression
#'
#' \code{simMVGPR} generates simulated data for Multivariate Gaussian Process Regression (MVGPR) models,
#' including the true hyperparameters used for simulation.
#'
#' @param N Positive integer specifying the number of observations to simulate. Default is 200.
#' @param M positive integer specifying the number of response variables. Default is 2.
#' @param d Positive integer specifying the number of covariates for the covariance structure. Default is 3.
#' @param sigma2 Positive numeric value specifying the noise variance. Default is 0.1.
#' @param tau Positive numeric value specifying the global shrinkage parameter. Default is 2.
#' @param kernel_func Function specifying the covariance kernel. Default is \code{kernel_se}.
#' @param perc_spars Numeric value in \[0, 1\] indicating the proportion of inactive (zero) inverse length-scale parameters in \code{theta}. Default is 0.5.
#' @param rho Numeric value in \[0, 1\] indicating the correlation between the covariates. Default is 0.
#' @param theta \emph{Optional} numeric vector specifying the true inverse length-scale parameters.
#' If not provided, they are randomly generated.
#' @param Omega \emph{Optional} positive definite matrix of size \code{M x M}.
#' If not provided, it is generated as S x D x S, where D is a correlation matrix generated by
#' the LKJ distribution with shape parameter 1, and S is a diagonal matrix with entries sampled from a
#' gamma(1, 1) distribution.
#' @param device \emph{Optional} \code{torch_device} object specifying whether to run the simulation
#' on CPU or GPU. Defaults to GPU if available.
#' @return A list containing:
#' \itemize{
#'   \item \code{data}: A data frame with M + d columns \code{y.1, ..., y.M} (response variables) and
#'   \code{x.1, ..., x.d} (covariates for the covariance structure).
#'   \item \code{true_vals}: A list containing the true values used for the simulation:
#'     \itemize{
#'       \item \code{theta}: The true inverse length-scale parameters.
#'       \item \code{sigma2}: The true noise variance.
#'       \item \code{tau}: The true global shrinkage parameter.
#'     }
#' }
#' @details
#' This function simulates data from a multivariate Gaussian process regression model.
#' The response variable \code{y} is sampled from a matrix normal distribution with
#' a covariance matrix determined by the specified kernel function, \code{theta}, \code{tau},
#' the correlation matrix \code{Omega} and \code{sigma2} in the following way:
#'
#' \deqn{\bm Y \sim \mathcal{MN}_{N,M}(\bm 0, \bm K(\bm x; \bm \theta, \tau) + \bm I \sigma^2, \bm \Omega)}
#'
#' which is equivalent to
#'
#' \deqn{vec(\bm Y) \sim \mathcal{N}_{NM}(\bm 0, \bm \Omega \otimes (\bm K(\bm x; \bm \theta, \tau) + \bm I \sigma^2))}.
#'
#' @examples
#' if (torch::torch_is_installed()) {
#'   torch::torch_manual_seed(123)
#'
#'   # Simulate data with default settings
#'   sim_data <- simMVGPR()
#'
#'   # Simulate data with custom settings
#'   sim_data <- simMVGPR(N = 100, d = 5, perc_spars = 0.3, sigma2 = 0.5)
#'
#'   # Access the simulated data
#'   head(sim_data$data)
#'
#'   # Access the true values used for simulation
#'   sim_data$true_vals
#'   }
#' @export
simMVGPR <-  function(N = 200,
                      M = 2,
                      d = 3,
                      sigma2 = 0.1,
                      tau = 2,
                      kernel_func = kernel_se,
                      perc_spars = 0.5,
                      rho = 0,
                      theta,
                      Omega,
                      device){

  # Input checking for simGPR ----------------------------------------------

  # Check that N is a positive integer
  if (!is.numeric(N) || N <= 0 || N %% 1 != 0) {
    stop("The argument 'N' must be a positive integer.")
  }

  # Check that d is a positive integer
  if (!is.numeric(d) || d <= 0 || d %% 1 != 0) {
    stop("The argument 'd' must be a positive integer.")
  }

  # Check that sigma2 is a positive numeric value
  if (!is.numeric(sigma2) || sigma2 <= 0) {
    stop("The argument 'sigma2' must be a positive numeric value.")
  }

  # Check that tau is a positive numeric value
  if (!is.numeric(tau) || tau <= 0) {
    stop("The argument 'tau' must be a positive numeric value.")
  }

  # Check that kernel_func is a function
  if (!is.function(kernel_func)) {
    stop("The argument 'kernel_func' must be a valid function.")
  }

  # Check that perc_spars is a numeric value in [0, 1]
  if (!is.numeric(perc_spars) || perc_spars < 0 || perc_spars > 1) {
    stop("The argument 'perc_spars' must be a numeric value between 0 and 1.")
  }

  # Check that theta, if provided, is a numeric vector of length d
  if (!missing(theta)) {
    if (!is.numeric(theta) || !is.vector(theta) || length(theta) != d) {
      stop("The argument 'theta', if provided, must be a numeric vector of length 'd'.")
    }
  }

  # Check that Omega, if provided, is an SPD matrix of size M x M
  if (!missing(Omega)) {
    if (!is.numeric(Omega) || !is.matrix(Omega) || nrow(Omega) != M || ncol(Omega) != M) {
      stop("The argument 'Omega', if provided, must be a numeric symmetric positive definite matrix of size M x M.")
    }
    if (!isSymmetric(Omega) || any(eigen(Omega, only.values = TRUE)$values <= 0)) {
      stop("The argument 'Omega', if provided, must be a symmetric positive definite matrix.")
    }
  }

  # Check that device is a torch_device
  if (!missing(device) && !inherits(device, "torch_device")) {
    stop("The argument 'device', if provided, must be a valid 'torch_device' object.")
  }

  # Check that rho is a numeric value between 0 and 1
  if (!is.numeric(rho) || rho < 0 || rho > 1) {
    stop("The argument 'rho' must be a numeric value between 0 and 1.")
  }

  # Add device attribute, set to GPU if available
  if (missing(device)) {
    if (cuda_is_available()) {
      device <- torch_device("cuda")
    } else {
      device <- torch_device("cpu")
    }
  }

  # Take user supplied beta/theta or generate them
  if (missing(theta)){
    theta <- rgamma(d, 6, 24)

    # Sparsify
    theta[sample(1:d, round(d * perc_spars))] <- 0
  }

  if (missing(Omega)){
    # Generate Omega using LKJ prior with shape parameter 1
    D <- rlkjcorr(1, M, 1)
    S <- diag(rgamma(M, 1, 1))

    Omega <- S %*% D %*% S
  }

  # Transform into torch tensors
  theta_tens <- torch_tensor(theta, device = device)$unsqueeze(1)
  Omega_tens <- torch_tensor(Omega, device = device)
  tau_tensor <- torch_tensor(tau, device = device)$unsqueeze(2)

  # Generate x
  if (rho > 0) {
    Sigma <- matrix(rho, nrow = d, ncol = d)
    diag(Sigma) <- 1
    Sigma <- torch_tensor(Sigma, dtype = torch_float(), device = device)

    # Cholesky decomposition
    L <- linalg_cholesky(Sigma)

    # Generate standard normal noise
    Z <- torch_randn(c(N, d), device = device)

    # Multiply by Cholesky factor to introduce correlation
    x_tens <- Z$matmul(L$t())
  } else {
    x_tens <- torch_randn(N, d, device = device)
  }

  K <- kernel_func(theta_tens, tau_tensor, x_tens)$squeeze()

  S <- K + sigma2 * torch_eye(N, device=device)          # (N,N)
  L_S <- linalg_cholesky(S)                        # (N,N)
  L_Om <- linalg_cholesky(Omega_tens)              # (M,M)

  Z <- torch_randn(c(N, M), device=device)
  y <- L_S$matmul(Z)$matmul(L_Om$t())                    # (N,M)

  data_frame <- data.frame(y = as.matrix(y),
                           x = as.matrix(x_tens))


  res <- list(data = data_frame,
              true_vals = list(theta = theta,
                               Omega = Omega,
                               sigma2 = sigma2,
                               tau = tau))



  return(res)
}

Try the shrinkGPR package in your browser

Any scripts or data that you put into this service are public.

shrinkGPR documentation built on March 30, 2026, 5:06 p.m.