#' Borrajo et al. 2017 bandwidth selection
#' @param Y A numeric vector of observations.
#' @param w A function that gives the probability of observation
#' # at any single value in the range of \code{Y}.
#' @param method One of \code{c('rt', 'brt')}.
#' \code{rt} is the rule-of-thumb estimate; \code{brt} is a bootstrap estimate
#' with the rule-of-thumb as the pilot.
#' @keywords internal
selectbw <- function(Y, w, method='brt'){
if (! method %in% c('brt','rt')){
stop('method must be brt or rt')
}
# estimate of mu sub omega
n <- length(Y)
mu <- (sum(1 / w(Y)) / n) ^ -1
# estimate of sigma-sq sub omega
term1 <- sum(w(Y)) / n
sgma <- sqrt(mu * term1 - mu^2)
# estimate of c sub omega
cHat <- mu * 1 / n * sum(1 / w(Y)^2)
# estimate rule-of-thumb bandwidth
K2u <- function(u){
stats::dnorm(u)^2
}
Rk <- pracma::quadinf(K2u, -Inf, Inf)$Q
u2k <- function(u){
u^2 * stats::dnorm(u)
}
mu2K <- pracma::quadinf(u2k, -Inf, Inf)$Q
# for a Gaussian kernel, mu2k = 1
rtNum <- Rk * mu * cHat * 8 * sqrt(pi)
rtDenom <- n * mu2K * 3
rt <- (rtNum / rtDenom)^(1 / 5) * sgma
if (method=='rt'){
return(rt)
}
if (method=='brt'){
# bootstrap estimate of bandwidth,
# with rule-of-thumb for pilot
wtsUnscld <- 1 / w(Y)
wts <- wtsUnscld / sum(wtsUnscld)
fgDens <- stats::density(Y, bw = rt, kernel = 'gaussian', weights = wts)
fg <- stats::splinefun(fgDens$x, fgDens$y)
# take 2nd deriv of smooth f estimate, square it, and integrate
f2g_y <- fg(fgDens$x, deriv=2)
f2g <- stats::approxfun(fgDens$x, f2g_y)
f2g_sq <- function(u){
f2g(u)^2
}
a <- min(fgDens$x)
b <- max(fgDens$x)
Rfg <- pracma::integral(f2g_sq, a, b)
num <- Rk * mu * cHat
denom <- n * mu2K * Rfg
brt <- (num / denom)^(1/5)
return(brt)
}
}
#' Weight a biased sample to estimate probability density
#'
#' Calculate a kernel density estimate while correcting for selection bias
#' by weighting the kernels.
#'
#' The kernel on each datum is weighted by the inverse of the observation
#' probability at that point, \code{1/w(X)}. Weighted kernel estimation should
#' not be confused with adaptive kenel estimation. Both approaches
#' modify the individual kernels that contribute to an estimate.
#' However, in adaptive KDE the badwidth of each kernel is adjusted,
#' whereas in weighted KDE the bandwidth is constant while the height
#' (total probability density) of each kernel is adjusted.
#'
#' Weighted KDE is the method data that has received the most attention
#' in the statistics literature for selection-biased data, beginning with
#' the foundational paper of Jones (1991). The method is a slight modification
#' of classical KDE and does not add onerous calculation, making it attractive.
#' However, the choice of bandwidth is complicated. \code{wdens} calls the
#' internal \code{selectbw} function to select a bandwidth following the
#' user-specified bootstrap methods of Borrajo and others (2017).
#' The bandwidth will be calculated for a Gaussian kernel, so \code{wdens}
#' will not accept the additional arguments \code{kernel}, \code{window}, or
#' \code{width}. Other arguments will be passed to \code{\link[stats]{density}}.
#'
#' @seealso \code{\link{transdens}}
#'
#' @inheritParams transdens
#' @param bw A method to estimate the kernel bandwidth from Borrajo et al. 2017.
#' \code{rt} is the rule-of-thumb estimate; \code{brt} is a bootstrap estimate
#' with the rule-of-thumb as the pilot. Alternatively, a numeric value to pass
#' to \code{\link[stats]{density}}.
#' @return An S3 density.
#' @export
#' @references
#' \insertRef{Borrajo17}{kerneval}
#'
#' \insertRef{Jones91}{kerneval}
wdens <- function(x, w, bw='brt', reflect=FALSE, a=NULL, b=NULL, ...){
wts <- 1/w(x)
wts <- wts/sum(wts)
if (is.null(a)){
a <- min(x)
}
if (is.null(b)){
b <- max(x)
}
# select bandwidth
if (is.numeric(bw)){
h <- bw
} else {
if (! bw %in% c('brt','rt')){
stop('bw method must be brt or rt')
}
h <- selectbw(x, w, method = bw)
}
# check that user didn't specify kernel/window or width arguments
forbid <- c('kernel','window','width')
args <- list(...)
if (length(args) > 0){
checkForbid <- match(forbid, names(args))
if ( any( !is.na(checkForbid)) ){
stop('argument not allowed; see function details')
}
}
if (reflect){
kde <- density.reflected(x, bw = h, lower = a, upper = b, weights = wts, ...)
} else {
kde <- stats::density(x, bw = h, from = a, to = b, weights = wts, ...)
}
return(kde)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.