R/discreteT.R
In DRPT: Density Ratio Permutation Test

Documented in discreteT

#' Compute test statistics for the DRPT in discrete settings.
#'
#' Computes the test statistics introduced in \insertCite{BB2025DRPT;textual}{DRPT}
#' for settings where the data support is discrete and finite.
#'
#' When \code{type = "U"}, the U-statistic (12) is calculated.
#' When \code{type = "V"}, the V-statistic (11) is computed.
#' When \code{type = "D"}, the test statistic (56) from Appendix B is returned.
#'
#' @param NX A vector of counts for the first sample.
#' This corresponds to the sequence \eqn{\mathrm{tot}_j - N_{Y,j}^p} with
#'  \eqn{p = \mathrm{id}}, i.e. the identity permutation,
#' as introduced in Section 2.1 of \insertCite{BB2025DRPT;textual}{DRPT}.
#' @param NY A vector of counts for the second sample.
#' This corresponds to the sequence \eqn{N_{Y,j}^p} with
#' \eqn{p = \mathrm{id}}, i.e. the identity permutation,
#' as introduced in Section 2.1 of \insertCite{BB2025DRPT;textual}{DRPT}.
#' @param r A numeric vector of positive values specifying the hypothesised density ratio
#' in the discrete setting.
#' @param n The size of the first sample.
#' @param m The size of the second sample.
#' @param type A character string indicating which test statistic to compute.
#' One of \code{"U"}, \code{"V"}, or \code{"D"}. See the Details section for more information.
#' Defaults to \code{"V"}.
#'
#' @return A numeric value representing the computed test statistic.
#' @export
#'
#' @references \insertRef{BB2025DRPT}{DRPT}
#'
#' @importFrom rootSolve uniroot.all
#'
#'@examples
#' n = 100; m = n
#' X = sample(0:3, n, prob = c(1/4, 1/4, 1/4, 1/4), replace = TRUE)
#' Y = sample(0:3, m, prob = c(1/17, 3/17, 3/17, 10/17), replace = TRUE)
#' r = c(1, 3, 3, 10)
#'
#' NX = table(X)
#' NY = table(Y)
#'
#' discreteT(NX, NY, r, sum(NX), sum(NY), type = "V")
#' discreteT(NX, NY, r, sum(NX), sum(NY), type = "D")

discreteT = function(NX, NY, r, n, m, type = "V") {
  K = length(NY)

  if (type == "V"){

    # compute lambda.star
    sum_lambda = function(l) {
      sum = 0
      for (k in 1:K) {
        sum = sum + (NX[k]+NY[k]) / (n + m * l * r[k])
      }
      return(sum - 1)
    }
    lambda = uniroot.all(sum_lambda, c(0, 100), tol = (.Machine$double.eps)^4)[1]

    # compute the test statistic
    denominator = (n / m) + lambda * r[1:K]
    f_j = NX[1:K] / n
    g_j = NY[1:K] / m

    term1 = (lambda * r[1:K] * f_j) / denominator
    term2 = g_j / denominator

    return(sum((term1 - term2)^2))
  }

  else if (type == "U"){
    # compute lambda.star
    sum_lambda = function(l) {
      sum = 0
      for (k in 1:K) {
        sum = sum + (NX[k]+NY[k]) / (n + m * l * r[k])
      }
      return(sum - 1)
    }
    lambda = uniroot.all(sum_lambda, c(0, 100), tol = (.Machine$double.eps)^4)[1]

    # compute test statistic
    denominator = (n / m) + lambda * r[1:K]
    f_j = NX[1:K] / n
    g_j = NY[1:K] / m

    term1 = (lambda * r[1:K] * f_j) / denominator
    term2 = g_j / denominator

    V = sum((term1 - term2)^2)

    termX = sum(((lambda * r[1:K]) / denominator)^2 * (NX[1:K] / n^2))
    termY = sum((1 / denominator)^2 * (NY[1:K] / m^2))

    return(V - termX - termY)
  }

  else if (type == "D"){
    term1 = as.numeric(NY[2:K]) * as.numeric(NX[1]) / as.numeric(sqrt(r[2:K]))
    term2 = as.numeric(sqrt(r[2:K])) * as.numeric(NX[2:K]) * as.numeric(NY[1])

    return(sum(abs(term1 - term2)) / (n * m))
  }

}