R/transformationestimator.R
In VecDep: Measuring Copula-Based Dependence Between Random Vectors

Documented in transformationestimator

#' @title transformationestimator
#'
#' @description This function computes the non-parametric Gaussian transformation kernel copula density estimator.
#'
#' @param input The copula argument at which the density estimate is to be computed.
#' @param h The bandwidth to be used in the Gaussian kernel.
#' @param  pseudos The (estimated) copula observations from a \eqn{q}-dimensional random vector \eqn{\mathbf{X}} (\eqn{n \times q} matrix with observations in rows, variables in columns).
#'
#' @details
#' Given a \eqn{q}-dimensional random vector \eqn{\mathbf{X} = (\mathbf{X}_{1}, \dots, \mathbf{X}_{k})} with \eqn{\mathbf{X}_{i} = (X_{i1}, \dots, X_{id_{i}})},
#' and samples \eqn{X_{ij}^{(1)}, \dots, X_{ij}^{(n)}} from \eqn{X_{ij}} for \eqn{i = 1, \dots, k} and \eqn{j = 1, \dots, d_{i}},
#' the Gaussian transformation kernel estimator for the copula density of \eqn{\mathbf{X}} equals, at \eqn{\mathbf{u} = (u_{11}, \dots, u_{kd_{k}}) \in \mathbb{R}^{q}},
#' \deqn{\widehat{c}_{\text{T}}(\mathbf{u}) = \frac{1}{n h_{n}^{q} \prod_{i = 1}^{k} \prod_{j = 1}^{d_{i}} \phi \left (\Phi^{-1} \left (u_{ij} \right ) \right )} \sum_{\ell = 1}^{n} \prod_{i = 1}^{k} \prod_{j = 1}^{d_{i}} \phi \left (\frac{\Phi^{-1}(u_{ij}) - \Phi^{-1} \left (\widehat{U}_{ij}^{(\ell)} \right )}{h_{n}} \right ),}
#' where \eqn{h_{n} > 0} is a bandwidth parameter, \eqn{\widehat{U}_{ij}^{(\ell)} = \widehat{F}_{ij} (X_{ij}^{(\ell)})} with \deqn{\widehat{F}_{ij}(x_{ij}) = \frac{1}{n+1} \sum_{\ell = 1}^{n} 1 \left (X_{ij}^{(\ell)} \leq x_{ij} \right )} the (rescaled) empirical cdf of \eqn{X_{ij}}, and
#' \eqn{\Phi} the standard normal distribution function with corresponding quantile function \eqn{\Phi^{-1}} and density function \eqn{\phi}.
#'
#'
#' @return The Gaussian transformation kernel copula density estimator evaluated at the input.
#'
#' @references
#' De Keyser, S. & Gijbels, I. (2024).
#' Hierarchical variable clustering via copula-based divergence measures between random vectors.
#' International Journal of Approximate Reasoning 165:109090.
#' doi: https://doi.org/10.1016/j.ijar.2023.109090.
#'
#' @seealso \code{\link{betakernelestimator}} for the computation of the beta kernel copula density estimator,
#'          \code{\link{hamse}} for local bandwidth selection for the beta kernel or Gaussian transformation kernel copula density estimator,
#'          \code{\link{phinp}} for fully non-parametric estimation of the \eqn{\Phi}-dependence between \eqn{k} random vectors.
#'
#' @examples
#' q = 3
#' n = 100
#'
#' # Sample from multivariate normal distribution with identity covariance matrix
#' sample = mvtnorm::rmvnorm(n,rep(0,q),diag(3),method = "chol")
#'
#' # Copula pseudo-observations
#' pseudos = matrix(0,n,q)
#' for(j in 1:q){pseudos[,j] = (n/(n+1)) * ecdf(sample[,j])(sample[,j])}
#'
#' # Argument at which to estimate the density
#' input = rep(0.5,q)
#'
#' # Local bandwidth selection
#' h = hamse(input,pseudos = pseudos,n = n,estimator = "trans",bw_method = 1)
#'
#' # Gaussian transformation kernel estimator
#' est_dens = transformationestimator(input,h,pseudos)
#'
#' # True density
#' true = copula::dCopula(rep(0.5,q), copula::normalCopula(0, dim = q))
#'
#' @export


transformationestimator = function(input, h, pseudos){

  n = nrow(pseudos) # Sample size
  q = length(input) # Total dimension

  sum = 0

  for(j in 1:n){

    prod = 1

    for(i in 1:q){

      prod = prod * stats::dnorm((stats::qnorm(input[i]) - stats::qnorm(pseudos[j,i]))/h)

    }

    sum = sum + prod

  }

  return(sum/(n * (h^q) * prod(stats::dnorm(stats::qnorm(input)))))

}