R/DRPT.R
In DRPT: Density Ratio Permutation Test

Documented in DRPT

#' A function implementing the Density Ratio Permutation Test based on an
#' estimate of the shifted-MMD.
#'
#' A function that implements the DRPT based on the U-statistic (12)
#' defined in \insertCite{BB2025DRPT;textual}{DRPT}. An estimator of the shifted-MMD
#' with kernel \eqn{k(\cdot, \cdot)} as defined in Section 3.2 of the paper is computed using
#' the function \code{shiftedMMD}, which is provided in the package.
#'
#' @param X A numeric vector containing the first sample.
#' @param Y A numeric vector containing the second sample.
#' @param r A function specifying the hypothesised density ratio.
#' @param kernel A function defining the kernel to be used for the U-statistic.
#' @param H An integer specifying the number of permutations to use. Defaults to 99.
#' @param S An integer specifying the number of steps for the Markov-Chain defined in Algorithm
#' 2 in \insertCite{BB2025DRPT;textual}{DRPT}. Defaults to 50.
#'
#' @return The p-value of the DRPT as defined in (2) in \insertCite{BB2025DRPT;textual}{DRPT}.
#' @export
#'
#' @references \insertRef{BB2025DRPT}{DRPT}
#'
#' @importFrom future plan multicore multisession sequential
#' @importFrom future.apply future_sapply
#'
#' @examples
#' n = 50; m = 50; d = 2
#' r = function(x,y) {
#'   return(4*x*y)
#'    }
#'
#' gaussian.kernel = function(x, y, lambda = 1){
#'      return(lambda^(-d) * exp(-sum(((x - y) ^ 2) / (lambda ^ 2))))
#'      }
#'
#' X = as.matrix(cbind(runif(n, 0, 1), runif(n, 0, 1)))
#' Y = as.matrix(cbind(rbeta(m, 0.5, 0.3), rbeta(m, 0.5, 0.4)))
#'
#' DRPT(X,Y, r, gaussian.kernel, H=19, S=10)
#' DRPT(X,Y, r, gaussian.kernel, H=9)

DRPT = function(X, Y, r, kernel, H=99, S=50) {

  X = as.matrix(X); Y = as.matrix(Y)
  n = nrow(X); m = nrow(Y)

  # Generate exchangeable copies of the original dataset
  data = star_sampler_C(X, Y, S, H, r)

  # Compute T_hat_0
  T_hat_0 = compute_mmd_C(X = as.matrix(data[[1]][1:n,]),
                          Y = as.matrix(data[[1]][(n + 1):(n + m),]),
                          r, kernel)

  # Compute T_hat
  sum_indicator = 0

  compute_shifted_MMD = function(b) {
    X_b = as.matrix(data[[1 + b]][1:n,])
    Y_b = as.matrix(data[[1 + b]][(n + 1):(n + m),])
    current_T_hat = compute_mmd_C(X = X_b, Y = Y_b, r, kernel)
    return(current_T_hat)
  }


  plan(multicore)
  T_hats = future_sapply(seq_len(H), compute_shifted_MMD, future.seed = TRUE)

  plan(sequential)
  sum_indicator = sum(T_hats >= T_hat_0)

  p_hat = (1 + sum_indicator) / (H + 1)
  return(p_hat)
}