Nothing
#' Choose the bandwidth for kernel estimation of
#' conditional Kendall's tau using cross-validation
#'
#'
#' @description
#' Let \eqn{X_1} and \eqn{X_2} be two random variables.
#' The goal here is to estimate the conditional Kendall's tau
#' (a dependence measure) between \eqn{X_1} and \eqn{X_2} given \eqn{Z=z}
#' for a conditioning variable \eqn{Z}.
#' Conditional Kendall's tau between \eqn{X_1} and \eqn{X_2} given \eqn{Z=z}
#' is defined as:
#' \deqn{P( (X_{1,1} - X_{2,1})(X_{1,2} - X_{2,2}) > 0 | Z_1 = Z_2 = z)}
#' \deqn{- P( (X_{1,1} - X_{2,1})(X_{1,2} - X_{2,2}) < 0 | Z_1 = Z_2 = z),}
#' where \eqn{(X_{1,1}, X_{1,2}, Z_1)} and \eqn{(X_{2,1}, X_{2,2}, Z_2)}
#' are two independent and identically distributed copies of \eqn{(X_1, X_2, Z)}.
#' For this, a kernel-based estimator is used, as described in
#' (Derumigny & Fermanian (2019)).
#' These functions aims at finding the best bandwidth \code{h} among a given
#' \code{range_h} by cross-validation. They use either:
#' \itemize{
#' \item \strong{leave-one-out} cross-validation:
#' function \code{CKT.hCV.l1out}
#'
#' \item or \strong{K-folds} cross-validation:
#' function \code{CKT.hCV.Kfolds}
#' }
#'
#'
#' @param X1 a vector of \code{n} observations of the first variable
#'
#' @param X2 a vector of \code{n} observations of the second variable
#'
#' @param Z vector of observed values of Z.
#' If Z is multivariate, then this is a matrix whose rows correspond
#' to the observations of Z
#'
#' @param range_h vector containing possible values for the bandwidth.
#'
#' @param matrixSignsPairs square matrix of signs of all pairs,
#' produced by \code{\link{computeMatrixSignPairs}(observedX1, observedX2)}.
#' Only needed if \code{typeEstCKT} is not the default 'wdm'.
#'
#' @param nPairs number of pairs used in the cross-validation criteria.
#'
#' @param typeEstCKT type of estimation of the conditional Kendall's tau.
#'
#' @param kernel.name name of the kernel used for smoothing.
#' Possible choices are \code{"Gaussian"} (Gaussian kernel)
#' and \code{"Epa"} (Epanechnikov kernel).
#'
#' @param progressBar if \code{TRUE}, a progressbar for each h is displayed
#' to show the progress of the computation.
#'
#' @param verbose if \code{TRUE}, print the score of each h during the procedure.
#'
#' @param observedX1,observedX2,observedZ old parameter names for \code{X1},
#' \code{X2}, \code{Z}. Support for this will be removed at a later version.
#'
#'
#' @return Both functions return a list with two components:
#' \itemize{
#' \item \code{hCV}: the chosen bandwidth
#' \item \code{scores}: vector of the same length as range_h giving the
#' value of the CV criteria for each of the h tested.
#' Lower score indicates a better fit.
#' }
#'
#' @seealso \code{\link{CKT.kernel}} for the corresponding
#' estimator of conditional Kendall's tau by kernel smoothing.
#'
#' @references
#' Derumigny, A., & Fermanian, J. D. (2019).
#' On kernel-based estimation of conditional Kendall’s tau:
#' finite-distance bounds and asymptotic behavior.
#' Dependence Modeling, 7(1), 292-321.
#' Page 296, Equation (4).
#' \doi{10.1515/demo-2019-0016}
#'
#' @examples
#' # We simulate from a conditional copula
#' set.seed(1)
#' N = 200
#' Z = rnorm(n = N, mean = 5, sd = 2)
#' conditionalTau = -0.9 + 1.8 * pnorm(Z, mean = 5, sd = 2)
#' simCopula = VineCopula::BiCopSim(N=N , family = 1,
#' par = VineCopula::BiCopTau2Par(1 , conditionalTau ))
#' X1 = qnorm(simCopula[,1])
#' X2 = qnorm(simCopula[,2])
#'
#' newZ = seq(2,10,by = 0.1)
#' range_h = 3:10
#'
#' resultCV <- CKT.hCV.l1out(X1 = X1, X2 = X2, Z = Z,
#' range_h = range_h, nPairs = 100)
#'
#' resultCV <- CKT.hCV.Kfolds(X1 = X1, X2 = X2, Z = Z,
#' range_h = range_h, ZToEstimate = newZ)
#'
#' plot(range_h, resultCV$scores, type = "b")
#'
#' @export
#'
CKT.hCV.l1out <- function (X1 = NULL, X2 = NULL, Z = NULL,
range_h, matrixSignsPairs = NULL,
nPairs = 10*length(X1),
typeEstCKT = "wdm", kernel.name = "Epa",
progressBar = TRUE, verbose = FALSE,
observedX1 = NULL, observedX2 = NULL, observedZ = NULL)
{
# Back-compatibility code to allow users to use the old "observedX1 = ..."
env = environment()
.observedX1X2_to_X1X2(env)
.observedZ_to_Z(env)
n_h = length(range_h)
scores = rep(NA, n_h)
.checkSame_nobs_X1X2Z(X1, X2, Z)
.checkUnivX1X2(X1, X2)
if (NCOL(Z) == 1 && is.matrix(Z)){
Z = as.numeric(Z)
}
# Construction of the matrix of selected pairs
dataMatrix = datasetPairs_hCV(
X1 = X1, X2 = X2, Z = Z,
nPairs = nPairs, typeEstCKT = typeEstCKT)
nPairsReal = nrow(dataMatrix)
if(nPairsReal != nPairs) {
warning(paste0("nPairs =", nPairs, " but real number of pairs is ", nPairsReal))
}
if (is.vector(Z)){
computeScore <- function(i, i_h, h){
toBeRemoved = - c(dataMatrix[i , 4], dataMatrix[i , 5])
matrixScore[i, i_h] <<- CKT.kernelPointwise.univariate(
X1 = X1[toBeRemoved], X2 = X2[toBeRemoved],
matrixSignsPairs = matrixSignsPairs[toBeRemoved , toBeRemoved],
vectorZ = Z[toBeRemoved],
h = h, pointZ = dataMatrix[i , 2],
kernel.name = kernel.name, typeEstCKT = typeEstCKT)
}
} else {
dimZ = ncol(Z)
computeScore <- function(i, i_h, h){
toBeRemoved = - c(dataMatrix[i , 4], dataMatrix[i , 5])
matrixScore[i, i_h] <<- CKT.kernelPointwise.multivariate(
X1 = X1[toBeRemoved], X2 = X2[toBeRemoved],
matrixSignsPairs = matrixSignsPairs[toBeRemoved , toBeRemoved],
matrixZ = Z[toBeRemoved, ],
h = h, pointZ = dataMatrix[i , 1 + 1:dimZ],
kernel.name = kernel.name, typeEstCKT = typeEstCKT)
}
}
matrixScore = matrix(nrow = nPairsReal, ncol = n_h)
for (i_h in 1:n_h)
{
h = range_h[i_h]
if (progressBar){
pbapply::pbapply(X = array(1:nPairsReal),
FUN = computeScore, MARGIN = 1, i_h = i_h, h = h)
} else {
apply(X = array(1:nPairsReal),
FUN = computeScore, MARGIN = 1, i_h = i_h, h = h)
}
scores[i_h] = mean( (dataMatrix[,1] - matrixScore[, i_h])^2, na.rm = TRUE )
if (verbose){print(paste0("h = ", h, " ; score = ", scores[i_h]))}
}
hCV = range_h[which.min(scores)]
return(list(hCV = hCV, scores = scores))
}
#' Choose the bandwidth for estimation of conditional Kendall's tau
#' using K-fold cross-validation
#'
#'
#' @param ZToEstimate vector of fixed conditioning values at which
#' the difference between the two conditional Kendall's tau should be computed.
#' Can also be a matrix whose lines are the conditioning vectors at which
#' the difference between the two conditional Kendall's tau should be computed.
#'
#' @param Kfolds number of subsamples used.
#'
#' @rdname CKT.hCV.l1out
#' @export
#'
CKT.hCV.Kfolds <- function(X1, X2, Z, ZToEstimate,
range_h, matrixSignsPairs = NULL,
typeEstCKT = "wdm", kernel.name = "Epa",
Kfolds = 5, progressBar = TRUE, verbose = FALSE,
observedX1 = NULL, observedX2 = NULL, observedZ = NULL)
{
# Back-compatibility code to allow users to use the old "observedX1 = ..."
env = environment()
.observedX1X2_to_X1X2(env)
.observedZ_to_Z(env)
n_h = length(range_h)
scores = rep(NA, n_h)
n = NROW(Z)
.checkSame_nobs_X1X2Z(X1, X2, Z)
.checkUnivX1X2(X1, X2)
if (is.vector(Z)){
estimCKTNP <- CKT.kernel.univariate
Z = matrix(Z, ncol = 1)
} else {
estimCKTNP <- CKT.kernel.multivariate
}
computeScore <- function(i_h) {
foldid = sample(rep(seq(Kfolds), length = n))
list_vectorEstimate = as.list(seq(Kfolds))
list_vectorEstimate_comp = as.list(seq(Kfolds))
list_resultEstimation = as.list(seq(Kfolds))
distanceTot = 0
h = range_h[i_h]
for (i in seq(Kfolds)) {
which = foldid == i
list_vectorEstimate[[i]] = estimCKTNP(
X1 = X1[!which], X2 = X2[!which],
matrixSignsPairs = matrixSignsPairs[!which, !which],
Z = Z[!which,], ZToEstimate = ZToEstimate,
typeEstCKT = typeEstCKT, h = h, kernel.name = kernel.name, progressBar = FALSE)
list_vectorEstimate_comp[[i]] = estimCKTNP(
X1 = X1[which], X2 = X2[which],
matrixSignsPairs = matrixSignsPairs[which, which],
Z = Z[which,], ZToEstimate = ZToEstimate,
typeEstCKT = typeEstCKT, h = h, kernel.name = kernel.name, progressBar = FALSE)
if (all(!is.finite(list_vectorEstimate[[i]])))
{
distanceTot = NA ; break
}
distance = sum((list_vectorEstimate[[i]] - list_vectorEstimate_comp[[i]])^2)
if (verbose) {
print(distance)
}
distanceTot = distanceTot + distance
}
scores[i_h] <<- distanceTot
return()
}
if (progressBar){
pbapply::pbapply(X = array(1:n_h), FUN = computeScore, MARGIN = 1 )
} else {
apply(X = array(1:n_h), FUN = computeScore, MARGIN = 1 )
}
hCV = range_h[which.min(scores)]
return(list(hCV = hCV, scores = scores))
}
# Old code --------------------------------------------------------
# # This function takes in parameter
# # range_h the set of all h tested.
# # It returns a list where hCV is the best h
# # scores is the vector of the same length as range_h giving the
# # CV criteria for each of the h tested
# choose_hCV <- function (range_h, matrixSignsPairs, vectorZ, typeEstCKT, kernel.name)
# {
# n_h = length(range_h)
# scores = rep(NA, n_h)
#
# if (typeEstCKT == 1){
# matrix_g = 4 * matrixSignsPairs - 1
# } else if (typeEstCKT == 2 || typeEstCKT == 4){
# matrix_g = matrixSignsPairs
# } else if (typeEstCKT == 3){
# matrix_g = 1 - 4 * matrixSignsPairs
# } else {stop(paste0("typeEstCKT: ", typeEstCKT, " is not in {1,2,3}" ) ) }
#
# for (i_h in 1:n_h)
# {
# h = range_h[i_h]
#
# matrix_weights = outer(
# X = vectorZ, Y = vectorZ, FUN = function (u1, u2) {
# delta_u = (u1 - u2)/h
# return(0.75 * (1 - delta_u^2) * as.numeric(abs(delta_u) <= 1) / h ) } )
#
# matrix_CKT_esti = matrix(nrow = n, ncol = n)
#
# pbapply(X = array(n:1), FUN = function(i)
# {
# if (i == 1) {
# matrix_CKT_esti[i,i] <<-
# computeEstimateNP_tau_pointwise(
# matrixSignsPairs = matrixSignsPairs[-c(i), -c(i)], vectorZ = vectorZ[-c(i)],
# h = h, pointZ = vectorZ[i], kernel.name = kernel.name,
# typeEstCKT = typeEstCKT)
# } else {
# for (j in 1:(i-1))
# {
# matrix_CKT_esti[i,j] <<-
# computeEstimateNP_tau_pointwise(
# matrixSignsPairs = matrixSignsPairs[-c(i,j), -c(i,j)], vectorZ = vectorZ[-c(i,j)],
# h = h, pointZ = (vectorZ[i] + vectorZ[j])/2, kernel.name = kernel.name,
# typeEstCKT = typeEstCKT)
# matrix_CKT_esti[j,i] <<- matrix_CKT_esti[i,j]
# }
# matrix_CKT_esti[i,i] <<-
# computeEstimateNP_tau_pointwise(
# matrixSignsPairs = matrixSignsPairs[-c(i), -c(i)], vectorZ = vectorZ[-c(i)],
# h = h, pointZ = vectorZ[i], kernel.name = kernel.name,
# typeEstCKT = typeEstCKT)
# }
# return() }, MARGIN = 1 )
#
# scores[i_h] = sum( (matrix_g - matrix_CKT_esti)^2 * matrix_weights ) / length(matrix_g)
# }
# hCV = range_h[which.min(scores)]
# return(list(hCV = hCV, scores = scores))
# }
# # This function takes in parameter
# # range_h the set of all h tested.
# # It returns a list where hCV is the best h
# # scores is the vector of the same length as range_h giving the
# # CV criteria for each of the h tested
# choose_hCV_indK <- function (liste_h, matrixSignsPairs, vectorZ, typeEstCKT, kernel.name, nPairs)
# {
# n_h = length(liste_h)
# scores = rep(NA, n_h)
#
# if (typeEstCKT == 1){
# matrix_g = 4 * matrixSignsPairs - 1
# } else if (typeEstCKT == 2 || typeEstCKT == 4){
# matrix_g = matrixSignsPairs
# } else if (typeEstCKT == 3){
# matrix_g = 1 - 4 * matrixSignsPairs
# } else {stop(paste0("typeEstCKT: ", typeEstCKT, " is not in {1,2,3}" ) ) }
#
# matrix_diff = outer(
# X = vectorZ, Y = vectorZ, FUN = function (u1, u2) {
# return(abs(u1 - u2) ) } )
#
# seuil_a = quantile(x = as.numeric(matrix_diff), probs = (2*nPairs / length(matrix_diff)) )
# matrix_weights = as.numeric(matrix_diff <= seuil_a)
#
# # matrix_weights = outer(
# # X = vectorZ, Y = vectorZ, FUN = function (u1, u2) {
# # delta_u = (u1 - u2)/h
# # return(0.75 * (1 - delta_u^2) * as.numeric(abs(delta_u) <= 1) / h ) } )
#
# for (i_h in 1:n_h)
# {
# h = liste_h[i_h]
#
# matrix_CKT_esti = matrix(nrow = n, ncol = n)
#
# pbapply(X = array(n:1), FUN = function(i)
# {
# if (i == 1) {
# matrix_CKT_esti[i,i] <<-
# computeEstimateNP_tau_pointwise(
# matrixSignsPairs = matrixSignsPairs[-c(i), -c(i)], vectorZ = vectorZ[-c(i)],
# h = h, pointZ = vectorZ[i], kernel.name = kernel.name,
# typeEstCKT = typeEstCKT)
# } else {
# for (j in 1:(i-1))
# {
# matrix_CKT_esti[i,j] <<-
# computeEstimateNP_tau_pointwise(
# matrixSignsPairs = matrixSignsPairs[-c(i,j), -c(i,j)], vectorZ = vectorZ[-c(i,j)],
# h = h, pointZ = (vectorZ[i] + vectorZ[j])/2, kernel.name = kernel.name,
# typeEstCKT = typeEstCKT)
# matrix_CKT_esti[j,i] <<- matrix_CKT_esti[i,j]
# }
# matrix_CKT_esti[i,i] <<-
# computeEstimateNP_tau_pointwise(
# matrixSignsPairs = matrixSignsPairs[-c(i), -c(i)], vectorZ = vectorZ[-c(i)],
# h = h, pointZ = vectorZ[i], kernel.name = kernel.name,
# typeEstCKT = typeEstCKT)
# }
# return() }, MARGIN = 1 )
#
# scores[i_h] = mean( (matrix_g - matrix_CKT_esti)^2 * matrix_weights, na.rm = TRUE )
# }
# hCV = liste_h[which.min(scores)]
# return(list(hCV = hCV, scores = scores))
# }
# # This function takes in parameter
# # range_h the set of all h tested.
# # It returns a list where hCV is the best h
# # scores is the vector of the same length as range_h giving the
# # CV criteria for each of the h tested
# choose_hCV_indK_fast <- function (
# observedX1, observedX2, observedZ,
# liste_h, matrixSignsPairs, vectorZ, typeEstCKT,
# kernel.name, nPairs, verbose = TRUE)
# {
# n_h = length(liste_h)
# scores = rep(NA, n_h)
#
# # if (typeEstCKT == 1){
# # matrix_g = 4 * matrixSignsPairs - 1
# # } else if (typeEstCKT == 2 || typeEstCKT == 4){
# # matrix_g = matrixSignsPairs
# # } else if (typeEstCKT == 3){
# # matrix_g = 1 - 4 * matrixSignsPairs
# # } else {stop(paste0("typeEstCKT: ", typeEstCKT, " is not in {1,2,3}" ) ) }
#
# # Construction of the matrix of selected pairs
# switch(
# typeEstCKT,
#
# 1 = {dataMatrix <- datasetPairs_hCV1(
# X1 = observedX1, X2 = observedX2, Z = observedZ,
# h, cut = 0.5, onlyConsecutivePairs = FALSE, nPairs = NULL)},
# datasetPairs_hCV1(X1, X2, Z, h, cut = 0.5, onlyConsecutivePairs = FALSE, nPairs = NULL)
# )
# dataMatrix = computeDataMatrix_hCV(X1, X2, Z = vectorZ, cut = cut_a, typeEstCKT = typeEstCKT)
# nPairsReal = nrow(dataMatrix)
# if(nPairsReal != nPairs) {
# warning(paste0("nPairs =", nPairs, " but real number of pairs is ", nPairsReal))
# }
#
# matrixScore = matrix(nrow = nPairsReal, ncol = n_h)
# for (i_h in 1:n_h)
# {
# h = liste_h[i_h]
# pbapply::pbapply(X = array(1:nPairsReal), FUN = function(i)
# {
# matrixScore[i, i_h] <<- computeEstimateNP_tau_pointwise(
# matrixSignsPairs = matrixSignsPairs[-c(dataMatrix[i,4], dataMatrix[i,5]) ,
# -c(dataMatrix[i,4], dataMatrix[i,5])],
# vectorZ = vectorZ[-c(dataMatrix[i,4], dataMatrix[i,5])],
# h = h, pointZ = dataMatrix[i,2], kernel.name = kernel.name,
# typeEstCKT = typeEstCKT)
# }, MARGIN = 1 )
# scores[i_h] = mean( (dataMatrix[,1] - matrixScore[, i_h])^2, na.rm = TRUE )
# if (verbose){print(paste0("h = ", h, " ; score = ", scores[i_h]))}
# }
# hCV = liste_h[which.min(scores)]
# return(list(hCV = hCV, scores = scores))
# }
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.