Nothing
#' @title Draw Kendall plot and compute AUK.
#' @description This function draws Kendall plot of 2 variables. Also provides an index AUK (area under Kendall plot).
#'
#' @param x a numeric vector stores first variable.
#' @param y a numeric vector stores second variable.
#' @param plot a TRUE/ FALSE flag for generating Kendall plot or not.
#' @param main a character indicating the title of the plot.
#' @param Auxiliary.line a TRUE/ FALSE flag for drawing auxiliary lines or not.
#' @param BS.CI a numeric specifying alpha for Bootstrap confidence interval. When euqal 0, confidence interval won't be computed.
#' @param set.seed a TRUE/ FALSE flag specifying setting seed or not.
#' @author Jeffrey C. Miecznikowski, En-shuo Hsu, Yanhua Chen, Albert Vexler
#' @details
#' AUK is bounded between 0 and 0.75. For positively correlated x and y's, say x = y, AUK = 0.75. And the plot follows the concave auxiliary line. While negatively correlated x and y's, AUK = 0. The plot is horizontal on y = 0. For independent x and y, AUK = 0.5. Kendall plot is on the diagonal.
#' Due to possible variable overflow, this function is only suitable for input size less than 1000. Input size greater than 1000 causes error.
#'
#' @return
#' a list containing a numeric AUK, a numeric vector W.in (x axis of plot), a numeric vector Hi.sort (y axis of plot), and three confidence intervals: normal CI, pivotal CI and percentage CI.
#'
#' @references
#' Vexler, Albert, Xiwei Chen, and Alan D. Hutson. "Dependence and independence: Structure and inference." Statistical methods in medical research (2015): 0962280215594198.
#'
#' R package "VineCopula":
#' Schepsmeier, Ulf, et al. "Package 'VineCopula'." (2015).
#'
#' @export
#' @examples
#' set.seed(123)
#' x = runif(100)
#' y = runif(100)
#'
#' result = AUK(x, y, plot = TRUE)
#' result$AUK
#'
#' #[1] 0.4987523
AUK = function(x, y, plot = F, main = "Kendall plot", Auxiliary.line = T, BS.CI = 0, set.seed = FALSE){
#check input
if(!is.numeric(x) || !is.numeric(y) || length(x)<2 || length(y)<2)
stop("x and y must be numeric vectors.")
if(length(x) > 1000)
stop("Input size must be less than 1000.")
if(length(x) != length(y))
stop("x and y must be paired (with equal length).")
if(!is.logical(plot))
stop("plot must be of type logical")
if(!is.character(main))
stop("main must be of type character.")
if(BS.CI<0 || BS.CI >1)
stop("BS.CI must be between 0 and 1.")
#compute x, y's copula
n = length(x)
cx = rank(x, ties.method = "min")/n
cy = rank(y, ties.method = "min")/n
#CI
if(BS.CI != 0){
times = 1000
BS = function(i){
if(set.seed){set.seed(i)}
id = sample(1:n, n, replace = T)
sx = x[id]
sy = y[id]
return (kPlot(sx, sy))
}
result = lapply(1:times, BS)
Ts = c()
for(i in 1:times){
Ts[i] = result[[i]]$AUK
}
out = kPlot(cx, cy, PLOT = plot, main = main, Auxiliary.line = Auxiliary.line)
result = c(out, getCI(out$AUK, Ts, BS.CI))
return (result)
}
else{
return(kPlot(cx, cy, PLOT = plot, main = main, Auxiliary.line = Auxiliary.line))
}
}
my_rank = function(x){
uni = unique(sort(x))
out = c()
for(i in 1:length(x)){
out[i] = which(uni == x[i])
}
return (out)
}
Hn = function(x, y){
n = length(x)
table = emcdf_output(x, y, TRUE)
rankX = my_rank(x)
rankY = my_rank(y)
out = c()
for(i in 1:n){
out[i] = table[rankY[i],rankX[i]]-1
}
return(out/(n-1))
}
kPlot = function(x, y, PLOT = F, main, Auxiliary.line){
Hi <- Hn(x, y)
Hi.sort <- sort(Hi)
n <- length(x)
W.in <- rep(NA, n)
for (i in 1:n) {
# function to be integrated
f <- function(w) {
w * (-log(w)) * (w - w * log(w))^(i - 1) * (1 - w + w * log(w))^(n - i)
}
W.in[i] <- n * choose(n - 1, i - 1) * (integrate(f, lower = 0, upper = 1)$value)
}
g <- function(w) {
w - w * log(w)
} # K_{0}(w)=P(UV<=w)
if (PLOT) {
# should the results be plotted?
if(Auxiliary.line){
plot(g, main = main, xlim = c(0, 1), ylim = c(0, 1), pch = "x", xlab = expression(W[1:n]),
ylab = "H") #Kurve K_{0}(w)
points(W.in, Hi.sort, pch = "x", cex = 0.4)
abline(a = 0, b = 1) # angle bisector
}
else{
plot(W.in, Hi.sort, cex = 0.4, main = main, xlim = c(0, 1), ylim = c(0, 1), pch = "x", xlab = expression(W[1:n]),
ylab = "H") #Kurve K_{0}(w)
}
}
#compute AUK
W <- c(0,W.in,1)
H <- c(0,Hi.sort,1)
idx <- 2:length(W)
area <- ((W[idx]-W[idx-1])%*%(H[idx]+H[idx-1]))/2
AUK = min(area,0.75)
# create output data
kendall.plot.output <- list(AUK, W.in, Hi.sort)
names(kendall.plot.output) <- c("AUK","W.in", "Hi.sort")
return(kendall.plot.output)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.