R/MarshallOlkin.R

Defines functions BiCopEst.MO.MMD.MC BiCopEst.MO.itau BiCopEst.MO.curve BiCopEst.MO BiCopSim.MO

Documented in BiCopEst.MO BiCopSim.MO

#' Simulation of Marshall-Olkin copula
#'
#' This functions simulates independent realizations
#' from the Marshall-Olkin copula.
#'
#' @param n number of samples
#' @param alpha parameter of the Marshall-Olkin copula
#'
#' @return an \eqn{n \times 2} matrix containing the samples
#'
#' @seealso \code{\link{BiCopEst.MO}} for the estimation of
#' Marshall-Olkin copulas.
#'
#' @examples
#' # Simulation from a Marshall-Olkin copula with parameter alpha = 0.5
#' BiCopSim.MO(n = 100, alpha = 0.5)
#'
#'
#' @export
#'
BiCopSim.MO <- function(n, alpha) {

  ST = matrix(data = NA, nrow = n, ncol = 2)
  ST[,1] = stats::rbeta(n = n, shape1 = 2-alpha, shape2 = 1)
  ST[,2] = stats::runif(n = n, 0, ST[,1])
  P = alpha/(2-alpha)

  doEqual = stats::runif(n,0,1) < P
  ST[,2] = ifelse(doEqual, ST[,1], ST[,2])

  doExchange = (stats::runif(n,0,1) < 0.5) & !doEqual
  AUX = ST[,2]
  ST[,2] = ifelse(doExchange, ST[,1], ST[,2])
  ST[,1] = ifelse(doExchange, AUX, ST[,1])

  return(ST)
}


#' Estimation of Marshall-Olkin copulas
#'
#' @param u1 vector of observations of the first coordinate, in \eqn{[0,1]}.
#' @param u2 vector of observations of the second coordinate, in \eqn{[0,1]}.
#'
#' @param method a character giving the name of the estimation method, among:
#'   \itemize{
#'     \item \code{curve}: \eqn{\alpha} is estimated by inversion of
#'       the probability measure of the diagonal
#'       \eqn{\{(u,v): u = v\}}{ {(u,v): u = v} }
#'     \item \code{itau}: \eqn{\alpha} is estimated by inversion of Kendall's tau
#'     \item \code{MMD}: \eqn{\alpha} is estimated by MMD optimization
#'   }
#'
#' @param par.start starting parameter of the gradient descent.
#' (only used for \code{method = "MMD"})
#'
#' @param kernel the kernel used in the MMD distance
#' (only used for \code{method = "MMD"}) :
#'   it can be a function taking in parameter \code{(u1, u2, v1, v2, gamma, alpha)}
#'   or a name giving the kernel to use in the list:
#'   \itemize{
#'     \item \code{"gaussian"}: Gaussian kernel \eqn{k(x,y) = \exp(-\|\frac{x-y}{\gamma}\|_2^2)
#'     }{k(x,y) = exp( - || (x-y) / gamma ||_2^2)}
#'     \item \code{"exp-l2"}: \eqn{k(x,y) = \exp(-\|\frac{x-y}{\gamma}\|_2)
#'     }{k(x,y) = exp( - || (x-y) / gamma ||_2)}
#'     \item \code{"exp-l1"}: \eqn{k(x,y) = \exp(-\|\frac{x-y}{\gamma}\|_1)
#'     }{k(x,y) = exp( - || (x-y) / gamma ||_1)}
#'     \item \code{"inv-l2"}: \eqn{k(x,y) = 1/(1+\|\frac{x-y}{\gamma}\|_2)^\alpha
#'     }{k(x,y) = 1 / ( 1 + || (x-y) / gamma ||_2 )^\alpha}
#'     \item \code{"inv-l1"}: \eqn{k(x,y) = 1/(1+\|\frac{x-y}{\gamma}\|_1)^\alpha
#'     }{k(x,y) = 1 / ( 1 + || (x-y) / gamma ||_1 )^\alpha}
#'   }
#'  Each of these names can receive the suffix \code{".Phi"}, such as \code{"gaussian.Phi"}
#'  to indicates that the kernel \eqn{k(x,y)} is replaced by
#'  \eqn{k(\Phi^{-1}(x) , \Phi^{-1}(y))} where \eqn{\Phi^{-1}} denotes the quantile
#'  function of the standard Normal distribution.
#'
#' @param gamma parameter \eqn{\gamma} to be used in the kernel.
#' (only used for \code{method = "MMD"})
#'
#' @param alpha parameter \eqn{\alpha} to be used in the kernel, if any.
#' (only used for \code{method = "MMD"})
#'
#' @param ndrawings number of replicas of the stochastic estimate of the gradient
#' drawn at each step. The gradient is computed using the average of these replicas.
#' (only used for \code{method = "MMD"})
#'
#' @param niter the stochastic gradient algorithm is composed of two phases:
#' a first "burn-in" phase and a second "averaging" phase.
#' If \code{niter} is of size \code{1}, the same number of iterations is used for
#' both phases of the stochastic gradient algorithm. If \code{niter} is of size \code{2},
#' then \code{niter[1]} iterations are done for the burn-in phase and \code{niter[2]}
#' for the averaging phase.
#' (only used for \code{method = "MMD"})
#'
#' @param C_eta a multiplicative constant controlling for the size of the gradient descent step.
#' The step size is then computed as \code{C_eta / sqrt(i_iter)}
#' where \code{i_iter} is the index of the current iteration of the stochastic gradient algorithm.
#' (only used for \code{method = "MMD"})
#'
#' @param naveraging number of full run of the stochastic gradient algorithm
#' that are averaged at the end to give the final estimated parameter.
#' (only used for \code{method = "MMD"})
#'
#' @return the estimated parameter (\code{alpha}) of the Marshall-Olkin copula.
#'
#' @seealso \code{\link{BiCopSim.MO}} for the estimation of
#' Marshall-Olkin copulas.
#' \code{\link{BiCopEstMMD}} for the estimation of other parametric copula families by MMD.
#'
#'
#' @references Alquier, P., Chérief-Abdellatif, B.-E., Derumigny, A., and Fermanian, J.D. (2022).
#' Estimation of copulas via Maximum Mean Discrepancy.
#' Journal of the American Statistical Association, \doi{10.1080/01621459.2021.2024836}.
#'
#' @examples
#' U <- BiCopSim.MO(n = 1000, alpha = 0.2)
#' estimatedPar <- BiCopEst.MO(u1 = U[,1], u2 = U[,2], method = "MMD", niter = 1, ndrawings = 1)
#' \donttest{
#' estimatedPar <- BiCopEst.MO(u1 = U[,1], u2 = U[,2], method = "MMD")
#' }
#'
#' @export
#'
BiCopEst.MO <- function(
  u1, u2, method,
  par.start = 0.5, kernel = "gaussian.Phi",
  gamma=0.95, alpha=1,
  niter=100, C_eta = 1, ndrawings=10, naveraging = 1)
{
  verifData(u1, u2)

  switch (
    method,

    "curve" = {
      estimator = BiCopEst.MO.curve(u1, u2)
    },

    "itau" = {
      result = BiCopEst.MO.itau(u1, u2)
      return(result)
    },


    "MMD" = {

      # Choice of the kernel
      if (is.character(kernel))
      {
        kernelFun <- findKernelFunction(kernel)
      } else {
        kernelFun <- kernel
      }

      # If only one number of iterations is given,
      # it is reused for the burn-in phase and the averaging phase
      niter = rep(niter, length.out = 2)

      estimator = BiCopEst.MO.MMD.MC(
        u1 = u1, u2 = u2, par.start = par.start,
        kernelFun = kernelFun,
        gamma = gamma, alpha = alpha,
        niter = niter, ndrawings = ndrawings, naveraging = naveraging)

      # else if (methodMC == "QMCV"){
      #
      #   if (is.character(quasiRNG)) {
      #     switch (
      #       quasiRNG,
      #
      #       "sobol" = {quasiRNGFun <- function (n) {
      #         return (randtoolbox::sobol(n = n, dim = 2))}},
      #
      #       "halton" = {quasiRNGFun <- function (n) {
      #         return (randtoolbox::halton(n = n, dim = 2))}},
      #
      #       "torus" = {quasiRNGFun <- function (n) {
      #         return (randtoolbox::torus(n = n, dim = 2))}}
      #     )
      #   } else {
      #     quasiRNGFun <- quasiRNG
      #   }
      #
      #   estimator = BiCopEst.MO.MMD.QMCV(
      #     u1 = u1, u2 = u2, par.start = par.start,
      #     kernelFun = kernelFun,
      #     gamma = gamma, alpha = alpha,
      #     niter = niter, ndrawings = ndrawings, naveraging = naveraging,
      #     quasiRNGFun = quasiRNGFun)
      # }

    }

  )

  tau = estimator / (2 - estimator)
  return (list(tau = tau, par = estimator))

}

# Estimation of Marshall Olkin copulas using the method of moments
# based on the probability measure of the diagonal
BiCopEst.MO.curve = function(u1,u2)
{
  proba = mean(u1==u2)
  return(2*proba/(1+proba))
}


# Estimation of Marshall Olkin copulas using the method of moments
# based on the inversion of Kendall's tau
BiCopEst.MO.itau = function(u1,u2)
{
  tau = wdm::wdm(u1, u2, method = "kendall")
  return(list(tau = tau, par = 2*tau/(1+tau)))
}


# Estimation of Marshall-Olkin copulas by MMD
BiCopEst.MO.MMD.MC = function(
  u1, u2, par.start = 0.5, kernelFun,
  gamma = 0.3, alpha = 1,
  niter = 100, C_eta = 1, ndrawings = 10, naveraging = 1)
{

  n = length(u1)
  estimatorsA = rep(NA, naveraging)

  for (i_av in 1:naveraging){
    # 1- Burn-in phase
    aIter = par.start
    for (i_iter in 1:niter[1]){
      Grad = 0
      for (j in 1:ndrawings){
        U = BiCopSim.MO(n, aIter)
        V = BiCopSim.MO(n, aIter)
        usup = (U[,1]>U[,2])
        uinf = (U[,1]<U[,2])
        ueq = (U[,1]==U[,2])
        dlog = (log(U[,1]) - aIter / (1 - aIter)) * usup +
          (log(U[,2]) - aIter / (1 - aIter)) * uinf +
          (1/aIter - log(U[,1])) * ueq

        grad = mean(2 * ( kernelFun(U[,1], U[,2], V[,1], V[,2],gamma,alpha)
                          - kernelFun( u1,    u2, U[,1], U[,2],gamma,alpha) ) * dlog)
        Grad = grad/j + Grad*(j-1)/j
      }
      aIter = aIter - C_eta * Grad / sqrt(i_iter)
    }
    # 2- Averaging phase
    aIter_vec = rep(NA, niter[2]+1)
    aIter_vec[1] = aIter
    for (i_iter in 1:niter[2]){
      Grad = 0
      for (j in 1:ndrawings){
        U = BiCopSim.MO(n, aIter_vec[i_iter])
        V = BiCopSim.MO(n, aIter_vec[i_iter])
        usup = (U[,1]>U[,2])
        uinf = (U[,1]<U[,2])
        ueq = (U[,1]==U[,2])
        dlog = (log(U[,1]) - aIter_vec[i_iter] / (1-aIter_vec[i_iter])) * usup +
          (log(U[,2]) - aIter_vec[i_iter] / (1-aIter_vec[i_iter])) * uinf +
          (1/aIter_vec[i_iter] - log(U[,1])) * ueq

        grad = mean(2 * ( kernelFun(U[,1], U[,2], V[,1], V[,2],gamma,alpha)
                          - kernelFun( u1,    u2, U[,1], U[,2],gamma,alpha) ) * dlog)
        Grad = grad/j + Grad*(j-1)/j
      }
      aIter_vec[i_iter+1] = aIter_vec[i_iter] - C_eta * Grad / sqrt(niter[1] + i_iter)
    }
    estimatorsA[i_av] = mean(aIter_vec)
  }

  estim = mean( estimatorsA )

  return(estim)
}

#
# # Estimation of Marshall-Olkin copulas by MMD
# BiCopEst.MO.MMD.QMCV = function(
#   u1, u2, par.start=0.5, kernelFun,
#   gamma=0.3, alpha=1,
#   quasiRNGFun, niter=100, ndrawings=10, naveraging = 1)
# {
#
#   n = length(u1)
#   estimatorsA = rep(NA, naveraging)
#
#   for (i in 1:naveraging){
#     aIter = par.start
#     for (i_iter in 1:niter){
#       Grad = 0
#       for (j in 1:ndrawings){
#         U = BiCopSimMO(n,aIter)
#         V = BiCopSimMO(n,aIter)
#         usup = (U[,1]>U[,2])
#         uinf = (U[,1]<U[,2])
#         ueq = (U[,1]==U[,2])
#         dlog = (log(U[,1])-aIter/(1-aIter))*usup +
#           (log(U[,2])-aIter/(1-aIter))*uinf +
#           (1/aIter-log(U[,1])) * ueq
#
#         grad = mean(2 * ( kernelFun(U[,1], U[,2], V[,1], V[,2], kernel,gamma,alpha)
#                           - kernelFun( u1,    u2, U[,1], U[,2], kernel,gamma,alpha) ) * dlog)
#         Grad = grad/j + Grad*(j-1)/j
#       }
#       aIter = aIter - Grad / sqrt(i_iter)
#     }
#     for (i_iter in (niter+1):(2*niter)){
#       Grad = 0
#       for (j in 1:ndrawings){
#         U = BiCopSimMO(n,aIter)
#         V = BiCopSimMO(n,aIter)
#         usup = (U[,1]>U[,2])
#         uinf = (U[,1]<U[,2])
#         ueq = (U[,1]==U[,2])
#         dlog = (log(U[,1])-aIter/(1-aIter))*usup +
#           (log(U[,2])-aIter/(1-aIter))*uinf +
#           (1/aIter-log(U[,1])) * ueq
#
#         grad = mean(2 * ( kernelFun(U[,1], U[,2], V[,1], V[,2], kernel,gamma,alpha)
#                           - kernelFun( u1,    u2, U[,1], U[,2], kernel,gamma,alpha) ) * dlog)
#         Grad = grad/j + Grad*(j-1)/j
#       }
#       aIter = aIter - Grad / (sqrt(i_iter) * i_iter)
#     }
#     estimatorsA[i] = aIter
#   }
#
#   estim = mean( estimatorsA )
#
#   return(estim)
# }

Try the MMDCopula package in your browser

Any scripts or data that you put into this service are public.

MMDCopula documentation built on April 25, 2022, 5:06 p.m.