#' Bayesian kernel density estimation
#'
#' @param x numeric vector.
#' @param bw the smoothing bandwidth to be used.
#' @param m size of the grid used for calculating density.
#' @param nsim number of simulated draws.
#' @param n the number of equally spaced points at which the density is to be estimated.
#' @param alpha confidence level of the interval.
#' @param na.rm logical; if \code{TRUE}, missing values are removed from \code{x}.
#' If \code{FALSE} any missing values cause an error.
#'
#' @examples
#'
#' x <- c(rnorm(150, 20), rnorm(200, 15))
#'
#' (fit <- bayesDensity(x))
#' plot(fit)
#' lines(density(x), col = "red", lty = 2)
#' rug(x, lwd = 2)
#'
#' x <- mtcars$mpg
#'
#' plot(bayesDensity(x))
#' lines(density(x), col = "red", lty = 2)
#' rug(x, lwd = 2)
#'
#' @references
#' Sibisi, S., & Skilling, J. (1996).
#' Bayesian Density Estimation.
#' In Maximum Entropy and Bayesian Methods (pp. 189-198). Springer.
#'
#' @importFrom extraDistr rdirichlet
#' @importFrom stats dnorm
#'
#' @export
bayesDensity <- function(x, bw = bw.nrd(x), m = 512, nsim = 5000,
n = 512, alpha = 0.95, na.rm = TRUE) {
call <- match.call()
xnam <- as.character(deparse(substitute(x)))
N <- length(x)
x.na <- is.na(x)
if (any(x.na)) {
if (na.rm)
x <- x[!x.na]
else stop("'x' contains missing values")
}
x.finite <- is.finite(x)
if (any(!x.finite))
x <- x[x.finite]
bs <- bins(x, m)
grid <- seq(min(x)-4*bw, max(x)+4*bw, length.out = n)
K <- outer(bs$mids, grid, function(mu, y) dnorm(y, mu, bw))
phi <- rdirichlet(nsim, as.numeric(1/m + bs$counts))
sim <- phi %*% K
quant <- apply(sim, 2, quantile, c((1-alpha)/2, 0.5, 1-(1-alpha)/2))
mean <- colMeans(sim)
sd <- apply(sim, 2, sd)
structure(
list(
x = grid,
y = mean,
low = quant[1, ],
high = quant[3, ],
median = quant[2, ],
sd = sd,
m = m,
n = N,
bw = bw,
nsim = nsim,
alpha = alpha,
call = call,
data.name = xnam,
bins = bs,
has.na = FALSE
), class = c("bayesDensity", "density")
)
}
#' @export
plot.bayesDensity <- function(x, main = NULL, xlab = NULL,
ylab = "Density", type = "l",
zero.line = TRUE, ...) {
if (is.null(xlab))
xlab <- paste("N =", x$n, " Bandwidth =", formatC(x$bw))
if (is.null(main))
main <- deparse(x$call)
plot.default(x, main = main, xlab = xlab, ylab = ylab, type = type,
ylim = c(0, max(x$high)), ...)
if (zero.line)
abline(h = 0, lwd = 0.1, col = "gray")
lines(x$x, x$low, col = "gray", lty = 2)
lines(x$x, x$high, col = "gray", lty = 2)
invisible(NULL)
}
# library(extraDistr)
#
#
# bins <- function(x, n = 512L) {
# x <- x[!is.na(x)]
# h <- diff(range(x))/(n-4)
# mids <- seq(min(x)-2*h, max(x)+2*h, length.out = n)
# breaks <- c(mids[1L] - h, mids + h)
# counts <- unname(tapply(x, cut(x, breaks), length,
# default = 0L))
# list(
# mids = mids,
# breaks = breaks,
# counts = counts
# )
# }
#
# rudir <- function(n, d) {
# y <- matrix(rexp(d * n, 1), nrow = n, ncol = d)
# y / rowSums(y)
# }
#
# library(gss)
#
# data(buffalo)
# x <- buffalo
#
# (fit <- bayesDensity(x))
# plot(fit)
# lines(density(x), col = "red")
# plot(fit$x, fit$mean + fit$sd/sqrt(fit$n), lty = 2, col = "blue", type = "l")
# lines(fit$x, fit$mean, lty = 1, col = "blue")
# lines(fit$x, fit$mean - fit$sd/sqrt(fit$n), lty = 2, col = "blue")
# #lines(density(x), col = "red", lty = 2)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.