twextras: twextras

Documented in bayesDensity

#' Bayesian kernel density estimation
#'
#' @param x      numeric vector.
#' @param bw     the smoothing bandwidth to be used.
#' @param m      size of the grid used for calculating density.
#' @param nsim   number of simulated draws.
#' @param n      the number of equally spaced points at which the density is to be estimated.
#' @param alpha  confidence level of the interval.
#' @param na.rm  logical; if \code{TRUE}, missing values are removed from \code{x}.
#'               If \code{FALSE} any missing values cause an error.
#' 
#' @examples
#' 
#' x <- c(rnorm(150, 20), rnorm(200, 15))
#' 
#' (fit <- bayesDensity(x))
#' plot(fit)
#' lines(density(x), col = "red", lty = 2)
#' rug(x, lwd = 2)
#' 
#' x <- mtcars$mpg
#' 
#' plot(bayesDensity(x))
#' lines(density(x), col = "red", lty = 2)
#' rug(x, lwd = 2)
#' 
#' @references
#' Sibisi, S., & Skilling, J. (1996).
#' Bayesian Density Estimation.
#' In Maximum Entropy and Bayesian Methods (pp. 189-198). Springer.
#' 
#' @importFrom extraDistr rdirichlet
#' @importFrom stats dnorm
#' 
#' @export

bayesDensity <- function(x, bw = bw.nrd(x), m = 512, nsim = 5000,
                         n = 512, alpha = 0.95, na.rm = TRUE) {
  
  call <- match.call()
  xnam <- as.character(deparse(substitute(x)))
  N <- length(x)
  
  x.na <- is.na(x)
  if (any(x.na)) {
    if (na.rm) 
      x <- x[!x.na]
    else stop("'x' contains missing values")
  }
  
  x.finite <- is.finite(x)
  if (any(!x.finite))
    x <- x[x.finite]
  
  bs <- bins(x, m)
  grid <- seq(min(x)-4*bw, max(x)+4*bw, length.out = n)
  
  K <- outer(bs$mids, grid, function(mu, y) dnorm(y, mu, bw))
  phi <- rdirichlet(nsim, as.numeric(1/m + bs$counts))
  
  sim <- phi %*% K
  quant <- apply(sim, 2, quantile, c((1-alpha)/2, 0.5, 1-(1-alpha)/2))
  mean <- colMeans(sim)
  sd <- apply(sim, 2, sd)
  
  structure(
    list(
      x = grid,
      y = mean,
      low = quant[1, ],
      high = quant[3, ],
      median = quant[2, ],
      sd = sd,
      m = m,
      n = N,
      bw = bw,
      nsim = nsim,
      alpha = alpha,
      call = call,
      data.name = xnam,
      bins = bs,
      has.na = FALSE
    ), class = c("bayesDensity", "density")
  )
  
}

#' @export

plot.bayesDensity <- function(x, main = NULL, xlab = NULL,
                              ylab = "Density", type = "l",
                              zero.line = TRUE, ...) {
  if (is.null(xlab))
    xlab <- paste("N =", x$n, "  Bandwidth =", formatC(x$bw))
  if (is.null(main))
    main <- deparse(x$call)
  plot.default(x, main = main, xlab = xlab, ylab = ylab, type = type,
               ylim = c(0, max(x$high)), ...)
  if (zero.line)
    abline(h = 0, lwd = 0.1, col = "gray")

  lines(x$x, x$low,  col = "gray", lty = 2)
  lines(x$x, x$high, col = "gray", lty = 2)

  invisible(NULL)
}
  





# library(extraDistr)
# 
# 
# bins <- function(x, n = 512L) {
#   x <- x[!is.na(x)]
#   h <- diff(range(x))/(n-4)
#   mids <- seq(min(x)-2*h, max(x)+2*h, length.out = n)
#   breaks <- c(mids[1L] - h, mids + h)
#   counts <- unname(tapply(x, cut(x, breaks), length,
#                           default = 0L))
#   list(
#     mids = mids,
#     breaks = breaks,
#     counts = counts
#   )
# }
# 
# rudir <- function(n, d) {
#   y <- matrix(rexp(d * n, 1), nrow = n, ncol = d)
#   y / rowSums(y)
# }
# 
# library(gss)
# 
# data(buffalo)
# x <- buffalo
# 
# (fit <- bayesDensity(x))
# plot(fit)
# lines(density(x), col = "red")

# plot(fit$x, fit$mean + fit$sd/sqrt(fit$n), lty = 2, col = "blue", type = "l")
# lines(fit$x, fit$mean, lty = 1, col = "blue")
# lines(fit$x, fit$mean - fit$sd/sqrt(fit$n), lty = 2, col = "blue")
# #lines(density(x), col = "red", lty = 2)