R/HellCor.R In HellCor: The Hellinger Correlation

Documented in HellCor

```HellCor <- function(x, y = NULL, Kmax = 20L, Lmax = 20L, K = 0L, L = 0L, alpha = 6.0,
pval.comp = FALSE, conf.level = NULL, B1 = 200L, B2 = 200L,
C.version = TRUE) {

if (!is.null(y)) {
if (length(x) != length(y)) stop("'x' and 'y' should have the same length.")
} else {
if (!is.matrix(x)) stop("'x' should be a matrix when 'y' is not supplied.")
if (ncol(x) != 2) stop("'x' should be a matrix with two columns.")
y <- x[, 2]
x <- x[, 1]
}
Kmax <- as.integer(Kmax)
Lmax <- as.integer(Lmax)
K <- as.integer(K)
L <- as.integer(L)
B1 <- as.integer(B1)
B2 <- as.integer(B2)
if (K < 0) stop("'K' should be positive.")
if (L < 0) stop("'L' should be positive.")
if (!is.null(conf.level)) {
if (length(conf.level) > 1) stop("'conf.level' should be of length 1.")
if ((B1 <= 0) | (B2 <= 0)) stop("'B1' and 'B2' should be positive if 'conf.level' is used.")
eps <- 100 * .Machine\$double.eps
if (any(conf.level < -eps | conf.level > 1 + eps)) stop("'conf.level' outside [0,1]")
}
CIeta <- NA
pval <- NA

if (C.version) {
res <- .C("hellcorC", as.double(cbind(x, y)), as.integer(2 * length(x)), statistic = as.double(0.0), as.integer(pval.comp),
pvalue = as.double(0.0), Khat = as.integer(Kmax), Lhat = as.integer(Lmax), as.integer(K), as.integer(L), as.double(alpha),
conf.level = as.double(0.0), as.integer(B1), as.integer(B2), CIleft = as.double(0.0), CIright = as.double(0.0), PACKAGE = "HellCor")
if (!is.null(conf.level)) {
res <- .C("hellcorC", as.double(cbind(x, y)), as.integer(2 * length(x)), statistic = as.double(0.0), as.integer(pval.comp),
pvalue = as.double(0.0), Khat = as.integer(Kmax), Lhat = as.integer(Lmax), as.integer(K), as.integer(L), as.double(alpha),
conf.level = as.double(conf.level), as.integer(B1), as.integer(B2), CIleft = as.double(0.0), CIright = as.double(0.0), PACKAGE = "HellCor")
CIeta <- c(res\$CIleft, res\$CIright)
}
} else {
n <- length(x)
XX <- cbind(x, y)
UU <- apply(XX, MARGIN = 2, FUN = rank) / (nrow(XX) + 1)
TT <- apply(UU, MARGIN = 2, FUN = function(x) qbeta(x, alpha, alpha))
weight  <- sqrt(dbeta(TT[, 1], alpha, alpha) * dbeta(TT[, 2], alpha, alpha))
Dist <- dist(TT, diag = TRUE, upper = TRUE)
#    library("FNN")
neighbors <- FNN::get.knn(TT, k = 2)
R1 <- neighbors\$nn.dist[, 1] # in row i: distance from point i to its closest neighbor
R2 <- neighbors\$nn.dist[, 2] # in row i: distance from point i to its second closest neighbor
II <- neighbors\$nn.index # in row i: index of closest and second closest neighbor to point i
R1 <- R1 * weight
R2 <- R2 * weight
#    library("orthopolynom")
tmp <- orthopolynom::legendre.polynomials(max(Kmax, Lmax), normalized = TRUE) # Normalized on [0, 1]
bfuncs <- lapply(tmp, function(y) {eval(parse(text = paste("function(x)", y)))}) # bfuncs[[k]] is b_{k - 1}
bfuncs[[1]] <- function(x) rep(sqrt(2) / 2, length(x))
cte1 <- 2 * sqrt(n - 1) / n
betahat <- matrix(NA, nrow = Kmax + 1, ncol = Lmax + 1)
for (k in 0:Kmax) { # Equ. (5.2)
for (l in 0:Lmax) {
betahat[k + 1, l + 1] <- cte1 * sum(R1 * sqrt(2) * bfuncs[[k + 1]](2 * UU[, 1] - 1) * sqrt(2) * bfuncs[[l + 1]](2 * UU[, 2] - 1))
}
}
Aterm <- matrix(NA, nrow = Kmax + 1, ncol = Lmax + 1) # Denominator (5.3) without sqrt
for (K in 0:Kmax) {
for (L in 0:Lmax) {
Aterm[K + 1 , L + 1] <- sum(betahat[1:(K + 1), 1:(L + 1)] ^ 2)
}
}
cte2 <- 2 * sqrt(n - 2) / (n - 1)
betahatminusi <- as.list(1:n) # Second equation after (C.1)
for (i in 1:n) {
rminusivec <- R1
rminusivec[II[, 1] == i] <- R2[II[, 1] == i]
rminusivec[i] <- 0
betahatminusi[[i]] <- matrix(NA, nrow = Kmax + 1, ncol = Lmax + 1)
for (k in 0:Kmax) {
for (l in 0:Lmax) {
betahatminusi[[i]][k + 1, l + 1]  <- cte2 * sum(rminusivec * sqrt(2) * bfuncs[[k + 1]](2 * UU[, 1] - 1) * sqrt(2) * bfuncs[[l + 1]](2 * UU[, 2] - 1))
}
}
}

Khat <- Lhat <- 0
max <- -Inf
for (K in 0:Kmax) {
for (L in 0:Lmax) {
term <- 0
for (i in 1:n) {
num  <- denom <- 0
for (k in 0:K) {
for (l in 0:L) {
num <- num + betahatminusi[[i]][k + 1, l + 1] * sqrt(2) * bfuncs[[k + 1]](2 * UU[i, 1] - 1) * sqrt(2) * bfuncs[[l + 1]](2 * UU[i, 2] - 1)
denom <- denom + (betahatminusi[[i]][k + 1, l + 1]) ^ 2
}
}
term <- term + R1[i] * num / sqrt(denom)
}
term <- cte1 * term
if (max < term) {
max <- term
Khat <- K
Lhat <- L
}
}
}
if (Khat < 1) Khat  <-  1
if (Lhat < 1) Lhat  <-  1

etafunc <- function(H2) {
B  <- 1 - H2
return(2.0 * sqrt(-2.0 + B ^ 4 + sqrt(4.0 - 3.0 * B ^ 4)) / B ^ 2)
}

etahat  <-  etafunc(1.0 - betahat[1][1] / sqrt(Aterm[Khat + 1, Lhat + 1]))

res <- vector("list")
res\$statistic <- etahat
res\$Khat <- Khat
res\$Lhat <- Lhat

if (!is.null(conf.level)) {
XX <- cbind(x, y)
n <- nrow(XX)
RR <- apply(XX, 2, rank)
etahatstar <- Zstar <- rep(NA, B1)
etahatstarstar <- rep(NA, B2)
for (b1 in 1:B1) {
II <- sample(1:n, n, replace = TRUE)
UUstar <- cbind(rbeta(n, RR[, 1][II], n + 1 - RR[, 1][II]), rbeta(n, RR[, 2][II], n + 1 - RR[, 2][II]))
etacur <- HellCor(UUstar[, 1], UUstar[, 2], Kmax = Kmax, Lmax = Lmax, K = 0L, L = 0L, alpha = alpha, pval.comp = FALSE, conf.level = NULL)
etahatstar[b1] <- etacur\$Hcor
RRstar <- apply(UUstar, 2, rank)
for (b2 in 1:B2) {
II <- sample(1:n, n, replace = TRUE)
UUstarstar <- cbind(rbeta(n, RRstar[, 1][II], n + 1 - RRstar[, 1][II]), rbeta(n, RRstar[, 2][II], n + 1 - RRstar[, 2][II]))
etahatstarstar[b2] <- HellCor(UUstarstar[, 1], UUstarstar[, 2], Kmax = Kmax, Lmax = Lmax,
K = 0L, L = 0L, alpha = alpha, pval.comp = FALSE, conf.level = NULL)\$Hcor
}
Vstarstar <- sd(etahatstarstar)
Zstar[b1] <- (etacur\$Hcor - res\$statistic) / Vstarstar
}
Vstar <- sd(etahatstar)
CIeta <- pmin(pmax(0, res\$statistic - Vstar * quantile(Zstar, c(1 - (1 - conf.level) / 2, (1 - conf.level) / 2))), 1)
}
}

if (pval.comp) {
n <- length(x)
twon <- 2 * n
y <- runif(n)
pval <- mean(replicate(10000, .C("hellcorC", as.double(cbind(runif(n), y)), as.integer(twon), statistic = as.double(0.0),
as.integer(0), as.double(0.0), as.integer(4), as.integer(4), as.integer(0), as.integer(L),
as.double(alpha), as.double(0.0), as.integer(B1), as.integer(B2),
CIleft = as.double(0.0), CIright = as.double(0.0),
PACKAGE = "HellCor")\$statistic) > res\$statistic)
}

if ((Kmax == 0L) & (Lmax == 0L)) {res\$Khat <- NA; res\$Lhat <- NA}
return(list(Hcor = res\$statistic, p.value = pval, conf.int = CIeta, Khat = res\$Khat, Lhat = res\$Lhat))

}

.datagenW.corrected <- function(n) {
x <- runif(n, -1, 1)
u <- x + runif(n) / 3
v <- 4 * ((x ^ 2 - 0.5) ^ 2 + runif(n) / 500)
return (cbind(u, v))
}
.datagenDiamond <- function(n) {
x <- runif(n, min = -1, max = 1)
y <- runif(n, min = -1, max = 1)
return (sqrt(2) * (cbind(x - y, x + y)) / 2)
}
.datagenParabola.corrected <- function(n) {
x <- runif(n, -1, 1)
y <- (x ^ 2 + runif(n)) / 2
return (cbind(x, y))
}
.datagen2Parabolas.corrected <- function(n) {
x <- runif(n, -1, 1)
y <- (x ^ 2 + runif(n) / 2) * (sample(c(-1, 1), size = n, replace = TRUE))
return (cbind(x, y))
}
.datagenCircle.corrected <- function(n) {
x <- runif(n, -pi, pi)
u <- sin(x) + rnorm(n) / 8
v <- cos(x) + rnorm(n) / 8
return (cbind(u, v))
}
.datagen4indclouds <- function(n) {
dx <- rnorm(n) / 3
dy <- rnorm(n) / 3
cx <- sample(c(-1, 1), size = n, replace = TRUE)
cy <- sample(c(-1, 1), size = n, replace = TRUE)
u <- cx + dx
v <- cy + dy
return (cbind(u, v))
}
.datagenCubic <- function(n) {
x <- runif(n, -1.3, 1.1)
x2 <- x + runif(n, -1.3, 1.1) / 2
y <- 4 * x2 ^ 3 + x2 ^ 2 - 4 * x2
return (cbind(x, y))
}
.datagenSine <- function(n) {
x <- runif(n, 0, 1)
y <- sin(8 * pi * x) + runif(n, -1, 1) / 3
return (cbind(x, y))
}
.datagenWedge <- function(n) {
x <- runif(n, 0, 1)
y <- x * (sample(c(-1, 1), size = n, replace = TRUE)) + runif(n, -1, 1) / 2
return (cbind(x, y))
}
.datagenCross <- function(n) {
x <- runif(n, 0, 1)
s <- sample(c(-1, 1), size = n, replace = TRUE)
y <- 2 * s * x - (s - 1) + runif(n, -1, 1) / 2
return (cbind(x, y))
}
.datagenSpiral <- function(n,sigma=1/8) {
theta <- runif(n, 0, 6 * pi)
r <- (1/3) * theta
x <- r * cos(theta) + rnorm(n)*sigma
y <- r * sin(theta) + rnorm(n)*sigma
return (cbind(x, y))
}
.datagen4Circles <- function(n) {
theta <- runif(n, -pi, pi)
s1 <- sample(c(-1, 1), size = n, replace = TRUE)
s2 <- sample(c(-1, 1), size = n, replace = TRUE)
y.center <- s1 * 1
x.center <- s2 * 1
y <- radius * sin(theta) + y.center
x <- radius * cos(theta) + x.center
return (cbind(x, y))
}
.datagenHeavisine <- function(n) {
x <- runif(n, 0, 1)
y <- (4 * sin(4 * pi * x) - sign(x - 0.3) - sign (0.72 - x))/3 + runif(n, -1, 1) / 2
return (cbind(x, y))
}
.datagenDoppler <- function(n) {
x <- runif(n, 0, 1)
y <- sqrt(x * (1 - x)) * sin(2.1 * pi / (x + 0.05)) + runif(n, -1, 1) / 2
return (cbind(x, y))
}
.datagen5Clouds <- function(n) {
dx <- rnorm(n) / 5
dy <- rnorm(n) / 5
cx <- sample(c(-1, 1), size = n, replace = TRUE)
cy <- sample(c(-1, 1), size = n, replace = TRUE)
s <- sample(c(0, 1, 1, 1, 1), size = n, replace = TRUE)
u <- s * cx + dx
v <- s * cy + dy
return (cbind(u, v))
}

.datagenBex <- function(n, depth = 2) { # depth >=1
width <- 1 / 2 ^ depth
which.be <- sample(1:2 ^ (2 * (depth - 1)), n, replace = TRUE)
bex.center <- cbind(rep((1:2 ^ (depth - 1)) / 2 ^ (depth - 1),
2 ^ (depth - 1)),
rep((1:2 ^ (depth - 1)) / 2 ^ (depth - 1),
rep(2 ^ (depth - 1), 2 ^ (depth - 1)))) - 1 / 2 ^ depth
x_inc <- runif(n) * 2 * width - width
y_inc <- x_inc * sample(c(-1, 1), n, replace = TRUE)
xy <- bex.center[which.be, ] + cbind(x_inc, y_inc)
colnames(xy) <- c("x", "y")
return(xy)
}

.datagenHilbertPeano <- function(n, depth = 1) { # depth >=1
res <- .C("hilbertpeano", x = as.double(rep(0.0, 2 * n)), xlen = as.integer(2 * n), depth = as.integer(depth), setseed = 1L, PACKAGE = "HellCor")
res <- matrix(res\$x, nrow = n, ncol = 2)
return(res)
}

.drawHilbertPeano <- function(depth = 1, drawlines = FALSE) { # depth >=1

Double <- function(A) {
#The matrix for H(n) is equal to Double(H(n-1)).
m <- dim(A)[1]
depth <- dim(A)[2]
N <- m * depth
B <- A + N
C <- B + N
D <- C + N
E <- cbind(rbind(B, skew.transpose(A)), rbind(C, t(D)))
return(E)
}

Rotate <- function(A) {
#Rotates the matrix A clockwise.
m <- dim(A)[1]
depth <- dim(A)[2]
N <- m * depth
B <- matrix(0, m, depth)
for (i in 1:m) for (j in 1:depth) B[j, depth + 1 - i] <- A[i, j]
return(B)
}

skew.transpose <- function(A) {
return(Rotate(Rotate(t(A))))
}

rowofx <- function(A, x) {
#Returns the row index of the matrix A for entry equal to x.
m <- dim(A)[1]
depth <- dim(A)[2]
for (i in 1:m) for (j in 1:depth) if (A[i, j] == x) return(i)
}

colofx <- function(A, x) {
#Returns the column index of the matrix A for entry equal to x.
m <- dim(A)[1]
depth <- dim(A)[2]
for (i in 1:m) for (j in 1:depth) if (A[i, j] == x) return(j)
}

Draw <- function(A, n) {
#Draws a graphical representation of the matrix A.
A <- Rotate(A)
m <- dim(A)[1]
depth <- dim(A)[2] - 1
N <- m * (depth + 1)
plot((colofx(A, 1) - 1) / depth, (rowofx(A, 1) - 1) / depth, pch = 19, cex = 0.5, ylim = c(0,1), xlim =c(0,1), ylab = character(1), xlab = character(1), axes = FALSE)
for (i in 1:(N - 1)) lines(c((colofx(A, i) - 1) / depth, ((colofx(A, i + 1) - 1) / depth)), c((rowofx(A, i) - 1) / depth, ((rowofx(A, i + 1) - 1) / depth)), lwd = 1)
points((colofx(A, N) - 1) / depth, (rowofx(A, N) - 1) / depth, pch = 19, cex = 0.5)
}

H <- function(depth) {
if (depth == 0) return(matrix(c(2, 1, 3, 4), 2, 2))
return(Double(H(depth - 1)))
}

H <- H(depth)
A <- Rotate(H)
m <- dim(A)[1]
n <- dim(A)[2] - 1

N <- m * (depth+1)
pts <- runif(n, min = 1, max = N)
left <- floor(pts)
right <- ceiling(pts)
alpha <- pts - left

mat <- matrix(NA, ncol = 2, nrow = n)
for (ind in 1:n) {
i <- left[ind]
x1 <- (rowofx(A, i) - 1) / depth
x2 <- (rowofx(A, i + 1) - 1) / depth
y1 <- (colofx(A, i) - 1) / depth
y2 <- (colofx(A, i + 1) - 1) / depth
mat[ind, ] <- c(x1 + alpha[ind] * (x2 - x1), y1 + alpha[ind] * (y2 - y1))
}

if (drawlines) Draw(H, n)

return(mat)
}

# Generate samples from the 15 scenarios
.gendep <- function(n, alpha, beta, psi1, psi2, dimU2) {
u1 <- runif(n)
uu2 <- matrix(runif(n * dimU2), ncol = dimU2)
w1 <- runif(n)
ww2 <- matrix(runif(n * dimU2), ncol = dimU2)
v1 <- runif(n)
v2 <- runif(n)
X1X2 <- cbind(psi1(u1, uu2), alpha * psi2(u1, uu2) + (1 - alpha) * psi2(w1, ww2)) + beta * cbind(qnorm(v1), qnorm(v2))
return(X1X2)
}

.datagen15 <- function(n, alpha, beta, type) {
switch(type,
"W" = { # corrected
dimU2 <- 0
psi1 <- function(u1, uu2) (2 * u1 - 1)
psi2 <- function(u1, uu2) 4 * ((2 * u1 - 1) ^ 2 - 0.5) ^ 2
},
"Diamond" = {
dimU2 <- 1
psi1 <- function(u1, uu2) 2 * u1
psi2 <- function(u1, uu2) -1 + (2 * (uu2 > 0.5) - 1) * (1 - abs(2 * u1 - 1))
},
"Parabola" = { # corrected
dimU2 <- 0
psi1 <- function(u1, uu2) (2 * u1 - 1)
psi2 <- function(u1, uu2) (2 * u1 - 1) ^ 2 / 2
},
"2Parabolas" = { # corrected
dimU2 <- 1
psi1 <- function(u1, uu2) (2 * u1 - 1)
psi2 <- function(u1, uu2) (2 * u1 - 1) ^ 2 * (2 * (uu2 > 0.5) - 1)
},
"Circle" = { # corrected
dimU2 <- 0
psi1 <- function(u1, uu2) sin(2 * pi * u1 - pi)
psi2 <- function(u1, uu2) cos(2 * pi * u1 - pi)
},
"4Clouds" = {
dimU2 <- 0
psi1 <- function(u1, uu2) (2 * (u1 > 0.5) - 1) + u1 / 100
psi2 <- function(u1, uu2) (2 * (u1 > 0.5) - 1) + u1 / 100
},
"Cubic" = {
dimU2 <- 0
psi1 <- function(u1, uu2) (2 * u1 - 1) ^ 3
psi2 <- function(u1, uu2) 4 * (2 * u1 - 1) ^ 3 + (2 * u1 - 1) ^ 2 - 4 * (2 * u1 - 1)
},
"Sine" = {
dimU2 <- 0
psi1 <- function(u1, uu2) 4 * u1
psi2 <- function(u1, uu2) 4 * sin(4 * pi * u1)
},
"Wedge" = {
dimU2 <- 1
psi1 <- function(u1, uu2) u1
psi2 <- function(u1, uu2) u1 * (2 * (uu2 > 0.5) - 1)
},
"Cross" = {
dimU2 <- 1
psi1 <- function(u1, uu2) u1
psi2 <- function(u1, uu2) (2 * u1 - 1) * (2 * (uu2 > 0.5) - 1)
},
"Spiral" = {
dimU2 <- 0
psi1 <- function(u1, uu2) u1 *  6 * pi * cos(u1 * 6 * pi) / 3
psi2 <- function(u1, uu2) u1 *  6 * pi * sin(u1 * 6 * pi) / 3
},
"4Circles" = {
dimU2 <- 2
psi1 <- function(u1, uu2) sqrt(2) * cos(u1 * 2 * pi) + (2 * (uu2[, 1] > 0.5) - 1)
psi2 <- function(u1, uu2) sqrt(2) * sin(u1 * 2 * pi) + (2 * (uu2[, 2] > 0.5) - 1)
},
"Heavisine" = {
dimU2 <- 0
psi1 <- function(u1, uu2) 4 * u1
psi2 <- function(u1, uu2) {y  <- (4 * sin(4 * pi * u1) - sign(u1 - 0.3) - sign(0.72 - u1)); return(y / sqrt(var(y)))}
},
"Doppler" = {
dimU2 <- 0
psi1 <- function(u1, uu2) 10 * u1
psi2 <- function(u1, uu2) {eps <- 0.05; y <- sqrt(u1 * (1 - u1)) * sin((2 * pi * (1 + eps))/ (u1 + eps)); return(y / sqrt(var(y)))}
},
"5to15Clouds" = {
dimU2 <- 0
psi1 <- function(u1, uu2) {mu <- 1; ind1 <- (u1 < (1 / 5)); ind2  <- (u1 >= (1 / 5)) & (u1 < (2 / 5)); ind3  <- (u1 >= (2 / 5)) & (u1 < (3 / 5)); ind4  <- (u1 >= (3 / 5)) & (u1 < (4 / 5)); ind5 <- (u1 >= (4 / 5)); u1[ind1] <- u1[ind1] / 100 - mu; u1[ind2] <- u1[ind2] / 100 - mu; ; u1[ind3] <- u1[ind3] / 100; u1[ind4] <- u1[ind4] / 100 + mu; u1[ind5] <- u1[ind5] / 100 + mu; return(u1)}
psi2 <- function(u1, uu2) {mu <- 1; ind1 <- (u1 < (1 / 5)); ind2  <- (u1 >= (1 / 5)) & (u1 < (2 / 5)); ind3  <- (u1 >= (2 / 5)) & (u1 < (3 / 5)); ind4  <- (u1 >= (3 / 5)) & (u1 < (4 / 5)); ind5 <- (u1 >= (4 / 5)); u1[ind1] <- u1[ind1] / 100 - mu; u1[ind2] <- u1[ind2] / 100 + mu; ; u1[ind3] <- u1[ind3] / 100; u1[ind4] <- u1[ind4] / 100 + mu; u1[ind5] <- u1[ind5] / 100 - mu; return(u1)}
}
)
res <- .gendep(n, alpha, beta, psi1, psi2, dimU2)
return(res)
}
```

Try the HellCor package in your browser

Any scripts or data that you put into this service are public.

HellCor documentation built on July 2, 2020, 3:57 a.m.