#' Estimate local correlation \eqn{$\rho$} between two variables.
#'
#' @description This function uses the local likelihood function
#' proposed by Hjort & Jones (1996) to find an estimate of the
#' local correlation \eqn{$\rho$} given \eqn{$n$} bivariate
#' observations and a vector \eqn{$b$} with the specified
#' bandwidths. Note that the code use the simplifying assumption
#' that the the marginals are standard normal, which reduces the
#' number of parameters to estimate from five to one. This
#' function is based on code from H{\\aa}kon Otneim.
#'
#' @details In the computation, a product kernel that either is based
#' on the standard normal distribution or the uniform distribution
#' is used. The default kernel is the normal one, since it seems
#' to behave better than the uniform kernel with regard to being
#' able to get a proper result (i.e. not \code{NA}) and since it
#' also seems to compute a bit faster when a computation is
#' possible. Note that the code computes the required densities
#' (related to the normal distribution) directly instead of using
#' \code{dnorm} and \code{dmvnorm}, since that gives a speed gain.
#'
#' @references Hjort, N. L., and Jones, M. C.:
#' "Locally parametric nonparametric density estimation.", The
#' Annals of Statistics (1996): 1619-1647.
#'
#' @param data A matrix containing the observations, one bivariate
#' observation in each row.
#'
#' @param grid A matrix containing the points where we want to find
#' the local correlations, one bivariate point in each row.
#'
#' @param b A vector containing the two bandwidths to use.
#'
#' @param .kernel Specification of the kernel to use, either
#' \code{normal} or \code{uniform}, with default being the
#' \code{normal} one if none is selected. Note: The
#' \code{uniform}-kernel takes much longer to compute (a factor of
#' ten when used on a small sample of size \eqn{$n=200$}).
#'
#' @param return_arguments Logical argument, default \code{FALSE},
#' that can be used to include the three arguments \code{data},
#' \code{grid} and \code{b} in the result from this function.
#'
#' @return The result of this function is a list. For each grid-point
#' there will be an estimate of the local correlation and the
#' log-density of the corresponding local Gaussian approximation
#' at that point. When \code{return_arguments} is \code{TRUE},
#' the three arguments \code{data}, \code{grid} and \code{b} will
#' also be included.
#'
#' @keywords internal
find_rho <- function(data,
grid,
b,
.kernel = c("normal", "uniform"),
return_arguments = FALSE) {
## Restrict to the first alternative of '.kernel'.
.kernel <- .kernel[1]
## Extract the data and the bandwidths from 'data' and 'b'.
x <- data[,1]
y <- data[,2]
##---
b1 <- b[1]
b2 <- b[2]
## If 'x' and 'y' are identical, then no computation should be
## performed (all the points lies on the diagonal). In this case
## return the value '1' for 'par.est' and a 'NA' for 'log_f.est'
if (identical(x, y))
return(list(
par.est = 1,
log_f.est = NA_real_))
## Compute some values that we don't need to compute every time
## we want to consider a new grid-point.
xx_yy <- x^2 + y^2
xy <- x * y
## Create the key-function that maximises the desired local
## likelihood function for a given grid-point.
maximise.likelihood <- function(grid.point) {
## Extract the coordinates.
x0 <- grid.point[1]
y0 <- grid.point[2]
## Compute a bunch of constants (not depending on rho) that
## will be used repeatedly in the optimisation. Reminder:
## There's a cost of looking up values too, so the gain from
## this step might be questionable since these are "trivial"
## operations...
x0x0 <- x0^2
y0y0 <- y0^2
x0y0.2 <- 2*x0*y0
.log2pi <- log(2*pi)
## Stuff to use when '.kernel' is given as "normal".
if (identical(.kernel, "normal")) {
.b1_2 <- 1 + b1^2
.b2_2 <- 1 + b2^2
pi.2 <- 2 * pi
## Some weights.
W <- exp(-(((x-x0)/b1)^2 + ((y-y0)/b2)^2)/2) / (2 * pi * b1 * b2)
m1 <- mean(W)
m23 <- mean(W * xx_yy)
m4.2 <- 2 * mean(W * xy)
} else {
## The case when '.kernel' is given as "uniform".
inside_bandwidth_rectangle <- all(
abs(x - x0) <= b1,
abs(y - y0) <= b2)
## Some weights.
m1 <- 1/ dim(data)[1]
m23 <- m1 * sum(xx_yy[inside_bandwidth_rectangle])
m4.2 <- 2 * m1 * sum(xy[inside_bandwidth_rectangle])
}
## The likelihood function, depending on '.kernel'.
lik <-
if (identical(.kernel, "normal")) {
function(rho) {
.rho2 <- 1-rho^2
.tmp <- .b1_2 * .b2_2 - rho^2
##---
(- .log2pi - 0.5*log(.rho2)) * m1 - (m23 - rho*m4.2)/(2*.rho2) -
exp(-(.b2_2*x0x0 + .b1_2*y0y0 - x0y0.2*rho)/(2*.tmp))/
(pi.2 * sqrt(.tmp))
}
} else
## The case "uniform", ignore common scaling factor
## of '1/(4*b1*b2)' from the computation.
function(rho) {
.rho2 <- 1-rho^2
##---
(- .log2pi - 0.5*log(.rho2)) * m1 - (m23 - rho*m4.2)/(2*.rho2) -
pmvnorm(lower = c(x0 - b1, y0 - b2),
upper = c(x0 + b1, y0 + b2),
mean = c(0,0),
corr = matrix(
data = c(1, rho, rho, 1),
nrow = 2))
}
## Optimise the likelihood.
opt <- try(optimise(lik,
lower = -1,
upper= 1,
maximum = TRUE,
tol = .Machine$double.eps^0.25/1e4),
silent=TRUE)
## Return the maximum of the likelihood and the log-density
## estimate when successfully computed, otherwise return
## 'NA'-values to the work-flow.
if(class(opt) != "try-error") {
c(opt$maximum,
dmvnorm(x = c(x0, y0),
mean = c(0,0),
sigma = matrix(
data = c(1, opt$maximum, opt$maximum, 1),
nrow = 2),
log = TRUE))
} else {
c(NA_real_, NA_real_)
}
}
## Use 'maximise.likelihood' on all the grid-points.
est <- apply(
X = grid,
MARGIN = 1,
FUN = maximise.likelihood)
## Return the result to the work-flow.
c(list(par.est = c(rho = est[1, ]),
log_f.est = est[2, ]),
if (return_arguments)
list(kernel = kernel,
b = b,
grid = grid,
data = data))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.