R/simGPR.R

Defines functions simGPR

Documented in simGPR

#' Simulate Data for Gaussian Process Regression
#'
#' \code{simGPR} generates simulated data for Gaussian Process Regression (GPR) models, including the true hyperparameters used for simulation.
#'
#' @param N Positive integer specifying the number of observations to simulate. Default is 200.
#' @param d Positive integer specifying the number of covariates for the covariance structure. Default is 3.
#' @param d_mean Positive integer specifying the number of covariates for the mean structure. Default is 0.
#' @param sigma2 Positive numeric value specifying the noise variance. Default is 0.1.
#' @param tau Positive numeric value specifying the global shrinkage parameter. Default is 2.
#' @param kernel_func Function specifying the covariance kernel. Default is \code{kernel_se}.
#' @param perc_spars Numeric value in \[0, 1\] indicating the proportion of elements in \code{theta}
#' and \code{beta} to sparsify. Default is 0.5.
#' @param theta \emph{Optional} numeric vector specifying the true inverse length-scale parameters.
#' If not provided, they are randomly generated.
#' @param beta \emph{Optional} numeric vector specifying the true regression coefficients for the mean
#' structure. If not provided, they are randomly generated.
#' @param device \emph{Optional} \code{torch_device} object specifying whether to run the simulation
#' on CPU or GPU. Defaults to GPU if available.
#' @return A list containing:
#' \itemize{
#'   \item \code{data}: A data frame with \code{y} (response variable), \code{x} (covariates for the covariance structure),
#'   and optionally \code{x_mean} (covariates for the mean structure).
#'   \item \code{true_vals}: A list containing the true values used for the simulation:
#'     \itemize{
#'       \item \code{theta}: The true inverse length-scale parameters.
#'       \item \code{sigma2}: The true noise variance.
#'       \item \code{tau}: The true global shrinkage parameter.
#'       \item \code{beta} (optional): The true regression coefficients for the mean structure.
#'     }
#' }
#' @details
#' This function simulates data from a Gaussian Process Regression model.
#' The response variable \code{y} is sampled from a multivariate normal distribution with
#' a covariance matrix determined by the specified kernel function, \code{theta}, \code{tau},
#' and \code{sigma2}. If \code{d_mean > 0}, a mean structure is included in the simulation, with
#' covariates \code{x_mean} and regression coefficients \code{beta}.
#'
#' @examples
#' if (torch::torch_is_installed()) {
#'   torch::torch_manual_seed(123)
#'
#'   # Simulate data with default settings
#'   sim_data <- simGPR()
#'
#'   # Simulate data with custom settings
#'   sim_data <- simGPR(N = 100, d = 5, d_mean = 2, perc_spars = 0.3, sigma2 = 0.5)
#'
#'   # Access the simulated data
#'   head(sim_data$data)
#'
#'   # Access the true values used for simulation
#'   sim_data$true_vals
#'   }
#' @export
simGPR <-  function(N = 200,
                    d = 3,
                    d_mean = 0,
                    sigma2 = 0.1,
                    tau = 2,
                    kernel_func = kernel_se,
                    perc_spars = 0.5,
                    theta,
                    beta,
                    device){

  # Input checking for simGPR ----------------------------------------------

  # Check that N is a positive integer
  if (!is.numeric(N) || N <= 0 || N %% 1 != 0) {
    stop("The argument 'N' must be a positive integer.")
  }

  # Check that d is a positive integer
  if (!is.numeric(d) || d <= 0 || d %% 1 != 0) {
    stop("The argument 'd' must be a positive integer.")
  }

  # Check that d_mean is a non-negative integer
  if (!is.numeric(d_mean) || d_mean < 0 || d_mean %% 1 != 0) {
    stop("The argument 'd_mean' must be a non-negative integer.")
  }

  # Check that sigma2 is a positive numeric value
  if (!is.numeric(sigma2) || sigma2 <= 0) {
    stop("The argument 'sigma2' must be a positive numeric value.")
  }

  # Check that tau is a positive numeric value
  if (!is.numeric(tau) || tau <= 0) {
    stop("The argument 'tau' must be a positive numeric value.")
  }

  # Check that kernel_func is a function
  if (!is.function(kernel_func)) {
    stop("The argument 'kernel_func' must be a valid function.")
  }

  # Check that perc_spars is a numeric value in [0, 1]
  if (!is.numeric(perc_spars) || perc_spars < 0 || perc_spars > 1) {
    stop("The argument 'perc_spars' must be a numeric value between 0 and 1.")
  }

  # Check that theta, if provided, is a numeric vector of length d
  if (!missing(theta)) {
    if (!is.numeric(theta) || !is.vector(theta) || length(theta) != d) {
      stop("The argument 'theta', if provided, must be a numeric vector of length 'd'.")
    }
  }

  # Check that beta, if provided, is a numeric vector of length d_mean
  if (!missing(beta)) {
    if (!is.numeric(beta) || !is.vector(beta) || length(beta) != d_mean) {
      stop("The argument 'beta', if provided, must be a numeric vector of length 'd_mean'.")
    }
  }

  # Check that device is a torch_device
  if (!missing(device) && !inherits(device, "torch_device")) {
    stop("The argument 'device', if provided, must be a valid 'torch_device' object.")
  }

  # Add device attribute, set to GPU if available
  if (missing(device)) {
    if (cuda_is_available()) {
      device <- torch_device("cuda")
    } else {
      device <- torch_device("cpu")
    }
  }

  # Take user supplied beta/theta or generate them
  if (missing(beta)){
    beta <- rnorm(d_mean, 0, 1)

    # Sparsify
    beta[sample(1:d_mean, round(d_mean * perc_spars))] <- 0
  } else {
    if (is.vector(beta) == FALSE){
      stop("beta has to be a vector")
    }

    if (length(beta) != d_mean){
      stop("beta has to be of length d_mean")
    }
  }

  if (missing(theta)){
    theta <- rnorm(d, mean = 1.5)^2

    # Sparsify
    theta[sample(1:d, round(d * perc_spars))] <- 0
  } else {

    if (is.vector(theta) == FALSE){
      stop("theta has to be a vector")
    }

    if (length(theta) != d){
      stop("theta has to be of length d")
    }
  }

  # Transform into torch tensors
  beta_tens <- torch_tensor(beta, device = device)$unsqueeze(2)
  theta_tens <- torch_tensor(theta, device = device)$unsqueeze(1)
  tau_tensor <- torch_tensor(tau, device = device)$unsqueeze(2)

  # Generate y
  x_tens <- torch_randn(N, d, device = device)
  K <- kernel_func(theta_tens, tau_tensor, x_tens)$squeeze() + sigma2 * torch_eye(N, device = device)

  if (d_mean > 0) {
    x_mean <- torch_randn(N, d_mean, device = device)
    mu <- torch_mm(x_mean, beta_tens)
  } else {
    mu <- torch_zeros(N, 1, device = device)
  }


  multivar_dist <- distr_multivariate_normal(mu$squeeze(), K)

  y <- multivar_dist$sample()

  if (d_mean > 0) {
    data_frame <- data.frame(y = as.numeric(y),
                             x = as.matrix(x_tens),
                             x_mean = as.matrix(x_mean))
  } else {
    data_frame <- data.frame(y = as.numeric(y),
                             x = as.matrix(x_tens))
  }

  res <- list(data = data_frame,
              true_vals = list(theta = theta,
                               sigma2 = sigma2,
                               tau = tau))

  if (d_mean > 0) {
    res$true_vals$beta <- beta
  }

  return(res)
}

Try the shrinkGPR package in your browser

Any scripts or data that you put into this service are public.

shrinkGPR documentation built on April 4, 2025, 3:07 a.m.