Nothing
#' Simulate Data for Multivariate Gaussian Process Regression
#'
#' \code{simMVGPR} generates simulated data for Multivariate Gaussian Process Regression (MVGPR) models,
#' including the true hyperparameters used for simulation.
#'
#' @param N Positive integer specifying the number of observations to simulate. Default is 200.
#' @param M positive integer specifying the number of response variables. Default is 2.
#' @param d Positive integer specifying the number of covariates for the covariance structure. Default is 3.
#' @param sigma2 Positive numeric value specifying the noise variance. Default is 0.1.
#' @param tau Positive numeric value specifying the global shrinkage parameter. Default is 2.
#' @param kernel_func Function specifying the covariance kernel. Default is \code{kernel_se}.
#' @param perc_spars Numeric value in \[0, 1\] indicating the proportion of inactive (zero) inverse length-scale parameters in \code{theta}. Default is 0.5.
#' @param rho Numeric value in \[0, 1\] indicating the correlation between the covariates. Default is 0.
#' @param theta \emph{Optional} numeric vector specifying the true inverse length-scale parameters.
#' If not provided, they are randomly generated.
#' @param Omega \emph{Optional} positive definite matrix of size \code{M x M}.
#' If not provided, it is generated as S x D x S, where D is a correlation matrix generated by
#' the LKJ distribution with shape parameter 1, and S is a diagonal matrix with entries sampled from a
#' gamma(1, 1) distribution.
#' @param device \emph{Optional} \code{torch_device} object specifying whether to run the simulation
#' on CPU or GPU. Defaults to GPU if available.
#' @return A list containing:
#' \itemize{
#' \item \code{data}: A data frame with M + d columns \code{y.1, ..., y.M} (response variables) and
#' \code{x.1, ..., x.d} (covariates for the covariance structure).
#' \item \code{true_vals}: A list containing the true values used for the simulation:
#' \itemize{
#' \item \code{theta}: The true inverse length-scale parameters.
#' \item \code{sigma2}: The true noise variance.
#' \item \code{tau}: The true global shrinkage parameter.
#' }
#' }
#' @details
#' This function simulates data from a multivariate Gaussian process regression model.
#' The response variable \code{y} is sampled from a matrix normal distribution with
#' a covariance matrix determined by the specified kernel function, \code{theta}, \code{tau},
#' the correlation matrix \code{Omega} and \code{sigma2} in the following way:
#'
#' \deqn{\bm Y \sim \mathcal{MN}_{N,M}(\bm 0, \bm K(\bm x; \bm \theta, \tau) + \bm I \sigma^2, \bm \Omega)}
#'
#' which is equivalent to
#'
#' \deqn{vec(\bm Y) \sim \mathcal{N}_{NM}(\bm 0, \bm \Omega \otimes (\bm K(\bm x; \bm \theta, \tau) + \bm I \sigma^2))}.
#'
#' @examples
#' if (torch::torch_is_installed()) {
#' torch::torch_manual_seed(123)
#'
#' # Simulate data with default settings
#' sim_data <- simMVGPR()
#'
#' # Simulate data with custom settings
#' sim_data <- simMVGPR(N = 100, d = 5, perc_spars = 0.3, sigma2 = 0.5)
#'
#' # Access the simulated data
#' head(sim_data$data)
#'
#' # Access the true values used for simulation
#' sim_data$true_vals
#' }
#' @export
simMVGPR <- function(N = 200,
M = 2,
d = 3,
sigma2 = 0.1,
tau = 2,
kernel_func = kernel_se,
perc_spars = 0.5,
rho = 0,
theta,
Omega,
device){
# Input checking for simGPR ----------------------------------------------
# Check that N is a positive integer
if (!is.numeric(N) || N <= 0 || N %% 1 != 0) {
stop("The argument 'N' must be a positive integer.")
}
# Check that d is a positive integer
if (!is.numeric(d) || d <= 0 || d %% 1 != 0) {
stop("The argument 'd' must be a positive integer.")
}
# Check that sigma2 is a positive numeric value
if (!is.numeric(sigma2) || sigma2 <= 0) {
stop("The argument 'sigma2' must be a positive numeric value.")
}
# Check that tau is a positive numeric value
if (!is.numeric(tau) || tau <= 0) {
stop("The argument 'tau' must be a positive numeric value.")
}
# Check that kernel_func is a function
if (!is.function(kernel_func)) {
stop("The argument 'kernel_func' must be a valid function.")
}
# Check that perc_spars is a numeric value in [0, 1]
if (!is.numeric(perc_spars) || perc_spars < 0 || perc_spars > 1) {
stop("The argument 'perc_spars' must be a numeric value between 0 and 1.")
}
# Check that theta, if provided, is a numeric vector of length d
if (!missing(theta)) {
if (!is.numeric(theta) || !is.vector(theta) || length(theta) != d) {
stop("The argument 'theta', if provided, must be a numeric vector of length 'd'.")
}
}
# Check that Omega, if provided, is an SPD matrix of size M x M
if (!missing(Omega)) {
if (!is.numeric(Omega) || !is.matrix(Omega) || nrow(Omega) != M || ncol(Omega) != M) {
stop("The argument 'Omega', if provided, must be a numeric symmetric positive definite matrix of size M x M.")
}
if (!isSymmetric(Omega) || any(eigen(Omega, only.values = TRUE)$values <= 0)) {
stop("The argument 'Omega', if provided, must be a symmetric positive definite matrix.")
}
}
# Check that device is a torch_device
if (!missing(device) && !inherits(device, "torch_device")) {
stop("The argument 'device', if provided, must be a valid 'torch_device' object.")
}
# Check that rho is a numeric value between 0 and 1
if (!is.numeric(rho) || rho < 0 || rho > 1) {
stop("The argument 'rho' must be a numeric value between 0 and 1.")
}
# Add device attribute, set to GPU if available
if (missing(device)) {
if (cuda_is_available()) {
device <- torch_device("cuda")
} else {
device <- torch_device("cpu")
}
}
# Take user supplied beta/theta or generate them
if (missing(theta)){
theta <- rgamma(d, 6, 24)
# Sparsify
theta[sample(1:d, round(d * perc_spars))] <- 0
}
if (missing(Omega)){
# Generate Omega using LKJ prior with shape parameter 1
D <- rlkjcorr(1, M, 1)
S <- diag(rgamma(M, 1, 1))
Omega <- S %*% D %*% S
}
# Transform into torch tensors
theta_tens <- torch_tensor(theta, device = device)$unsqueeze(1)
Omega_tens <- torch_tensor(Omega, device = device)
tau_tensor <- torch_tensor(tau, device = device)$unsqueeze(2)
# Generate x
if (rho > 0) {
Sigma <- matrix(rho, nrow = d, ncol = d)
diag(Sigma) <- 1
Sigma <- torch_tensor(Sigma, dtype = torch_float(), device = device)
# Cholesky decomposition
L <- linalg_cholesky(Sigma)
# Generate standard normal noise
Z <- torch_randn(c(N, d), device = device)
# Multiply by Cholesky factor to introduce correlation
x_tens <- Z$matmul(L$t())
} else {
x_tens <- torch_randn(N, d, device = device)
}
K <- kernel_func(theta_tens, tau_tensor, x_tens)$squeeze()
S <- K + sigma2 * torch_eye(N, device=device) # (N,N)
L_S <- linalg_cholesky(S) # (N,N)
L_Om <- linalg_cholesky(Omega_tens) # (M,M)
Z <- torch_randn(c(N, M), device=device)
y <- L_S$matmul(Z)$matmul(L_Om$t()) # (N,M)
data_frame <- data.frame(y = as.matrix(y),
x = as.matrix(x_tens))
res <- list(data = data_frame,
true_vals = list(theta = theta,
Omega = Omega,
sigma2 = sigma2,
tau = tau))
return(res)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.