#' Kullback-Leibler divergence for the latent variables of the estimation
#' process.
#'
#' Returns the Kullback-Leibler divergence for the latent variables, which are
#' used in estimation process, and it could indicate possible outlying
#' observations.
#'
#' @param object This is an object of the class "bqr", produced by a call
#' to the bayesQR function.
#' @param burnin Initial part of the chain, which is to be discarded. Default
#' value is 50.
#' @param plotKL If TRUE, the function prints the plot with all probabilities.
#' Default is set to TRUE.
#' @param scales.free If FALSE, the default, then all plots will use the same
#' y scale. If TRUE, for each tau the plot will use the best possible scale
#' in order to visualize the probability information for all observations.
#' @param all.obs if TRUE, calculates KL divergence for all observations
#' @param obs if \code{all.obs} is FALSE, specifies the observation to
#' calculate the KL divergence.
#' @return Prints a plot of the posterior probability of being an outlier for
#' all observations and returns a data.frame with all values of the
#' probabilities.
#' @export
#' @useDynLib baquantreg
#' @import ggplot2
latentKL <- function(object, burnin = 50, plotKL = TRUE,
scales.free = FALSE, all.obs = TRUE, obs = 1) {
if (class(object) != "bqr")
stop("This function is not suited for your model.")
taus <- object$tau
nobs <- dim(object$chains[[1]]$vSample)[2]
if (all.obs) seqObs <- 1:nobs
else seqObs <- obs
klValues <- sapply(object$chains, function(a){
sapply(seqObs, function(b){
otherV <- a$vSample[-c(1:burnin),-b]
vSample <- a$vSample[-c(1:burnin), b]
mean(sapply(1:(nobs-1), function(ccc){
minV <- min(vSample, otherV[,ccc])
maxV <- max(vSample, otherV[,ccc])
g1 <- stats::density(otherV[,ccc], from = minV, to = maxV)
g2 <- stats::density(vSample, from = minV, to = maxV)
if (!all.equal(g1$x, g2$x))
warning("Values considered for interpolation are not the same.")
g1$y[g1$y == 0] <- .Machine$double.eps
g2$y[g2$y == 0] <- .Machine$double.eps
f_y <- g2$y * (log(g2$y) - log(g1$y))
sum((f_y[-1] + f_y[-length(f_y)]) * diff(g1$x) * 0.5)
# f <- stats::approxfun(x = g1$x, y = f_y)
#
# stats::integrate(f, lower = minV, upper = Inf,
# rel.tol = .Machine$double.eps^0.1,
# subdivisions = 50)$value
}))
})
})
plotData <- data.frame(nobs = rep(seqObs, times=length(taus)),
values = as.numeric(klValues),
taus = rep(taus, each=length(seqObs)))
if (all.obs){
maxKL <- which.max(stats::aggregate(values ~ nobs, data=plotData, mean)$values)
print(paste("The observation with greatest Kullback-Leibler divergence from the others is:",
maxKL))
g <- ggplot(subset(plotData), aes(y=values, x=nobs)) + theme_bw()
if (length(taus) > 1){
if (scales.free) g <- g + facet_wrap(~taus, scales='free')
else g <- g + facet_wrap(~taus)
}
g <- g + geom_point() +
ylab("Mean Kullback-Leibler divergence for each point") +
xlab("# Observation")
if (plotKL) print(g)
}
return(invisible(plotData))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.