#' MCMC diagnostic plots
#'
#' @param mcmc.list. an MCMC list
#' @param family. group of parameters
#' with the same name but different numerical value
#' between square brackets (as beta[1], beta[2])
#' @param description. Character vector giving a short descriptive text that identifies the model.
#' @param par_labels. data frame with two colums. One named "Parameter"
#' with the same names of the parameters of the model.
#' @param sort. Logical. When TRUE (the default), parameters are sorted first by
#' family name and then by numerical value.
#' @param inc_warmup. Logical. When dealing with stanfit objects from rstan,
#' logical value whether the warmup samples are included. Defaults to FALSE.
#' @param stan_include_auxiliar. Logical value to include "lp__" parameter in rstan, and "lp__", "treedepth__" and
#' "stepsize__" in stan running without rstan. Defaults to FALSE.
#' @param droppars. variables to exclude. Defaults to droppars = "lp__".
#' @export
#' @examples
#' diagnostic.plots()
#'
#'
diagnostic.plots =
function (model.fit, family. = NA, description. = NA, burnin. = FALSE,
par_labels. = NA, sort. = TRUE, inc_warmup. = FALSE, stan_include_auxiliar. = FALSE)
{
MCMC_Chain = function(s) {
n.samples <- dim(s)[1]
iter <- 1:n.samples
d <- data.frame(Iteration = iter, as.matrix(unclass(s)),
check.names = FALSE)
D <- d %>% tidyr::gather(Parameter, value, -Iteration)
D <- dplyr::tbl_df(D)
return(D)
}
MCMC.Sort = function(x) {
x <- sort(as.character(unique(x)))
family <- gsub("\\[.+\\]", "", x)
Families <- sort(unique(family))
X <- NULL
for (f in Families) {
x.family <- x[family == f]
if (length(grep("\\[", x.family)) > 0) {
index <- gsub("]", "", gsub("(.+)\\[", "", x.family))
if (length(grep(",", index) > 0)) {
idl <- data.frame(x.family = x.family, index = index,
matrix(unlist(strsplit(index, ",")), nrow = length(index),
byrow = TRUE))
for (c in 3:(dim(idl)[2])) {
idl[, c] <- as.numeric(as.character(idl[,
c]))
}
command <- paste("dplyr::arrange(idl,", paste(names(idl)[-(c(1,
2))], collapse = ","), ")", sep = "")
idl <- eval(parse(text = command))
x.family <- as.character(idl$x.family)
}
else {
x.family <- x.family[order(as.numeric((gsub("]",
"", gsub("(.+)\\[", "", x.family)))))]
}
X <- c(X, x.family)
}
else {
X <- c(X, x.family)
}
}
return(X)
}
MCMCtoggplot = function(S, family = family., description = description.,
burnin = burnin., par_labels = par_labels., sort = sort.,
inc_warmup = inc_warmup., stan_include_auxiliar = stan_include_auxiliar.) {
processed <- FALSE
original.object.class <- class(S)[1]
if (length(class(S)) > 1 & class(S)[1] == "stanreg") {
S <- S$stanfit
}
if (class(S) == "brmsfit") {
S <- S$fit
}
if (class(S) == "stanfit") {
nChains <- S@sim$chains
nThin <- S@sim$thin
mDescription <- S@model_name
D <- NULL
for (l in 1:nChains) {
sdf <- as.data.frame(S@sim$samples[[l]])
names(sdf) <- names(S@sim$samples[[l]])
sdf$Iteration <- 1:dim(sdf)[1]
s <- tidyr::gather(sdf, Parameter, value, -Iteration) %>%
dplyr::mutate(Chain = l) %>% dplyr::select(Iteration,
Chain, Parameter, value)
D <- dplyr::bind_rows(D, s)
}
if (!inc_warmup) {
if (original.object.class == "stanfit") {
D <- dplyr::filter(D, Iteration > (S@sim$warmup/nThin))
D$Iteration <- D$Iteration - (S@sim$warmup/nThin)
}
nBurnin <- S@sim$warmup
}
else {
nBurnin <- 0
}
if (!stan_include_auxiliar) {
D <- dplyr::filter(D, Parameter != "lp__")
}
if (sort) {
D$Parameter <- factor(D$Parameter, levels = MCMC.Sort(D$Parameter))
}
else {
D$Parameter <- factor(D$Parameter)
}
processed <- TRUE
D <- dplyr::tbl_df(D)
}
if (class(S) == "list") {
D <- NULL
for (i in 1:length(S)) {
samples.c <- dplyr::tbl_df(read.table(S[[i]],
sep = ",", header = TRUE, colClasses = "numeric",
check.names = FALSE))
D <- dplyr::bind_rows(D, tidyr::gather(samples.c,
Parameter) %>% dplyr::mutate(Iteration = rep(1:(dim(samples.c)[1]),
dim(samples.c)[2]), Chain = i) %>% dplyr::select(Iteration,
Chain, Parameter, value))
}
if (!stan_include_auxiliar) {
D <- D[grep("__$", D$Parameter, invert = TRUE),
]
if (sort) {
D$Parameter <- factor(D$Parameter, levels = MCMC.Sort(D$Parameter))
}
else {
D$Parameter <- factor(D$Parameter)
}
}
nBurnin <- as.integer(gsub("warmup=", "", scan(S[[i]],
"", skip = 12, nlines = 1, quiet = TRUE)[2]))
nThin <- as.integer(gsub("thin=", "", scan(S[[i]],
"", skip = 13, nlines = 1, quiet = TRUE)[2]))
processed <- TRUE
}
if (class(S) == "mcmc.list" | class(S) == "mcmc" | processed) {
if (!processed) {
lS <- length(S)
D <- NULL
if (lS == 1 | class(S) == "mcmc") {
if (lS == 1 & class(S) == "mcmc.list") {
s <- S[[1]]
}
else {
s <- S
}
D <- dplyr::mutate(MCMC_Chain(s), Chain = 1) %>%
dplyr::select(Iteration, Chain, Parameter,
value)
nBurnin <- (attributes(s)$mcpar[1]) - (1 *
attributes(s)$mcpar[3])
nThin <- attributes(s)$mcpar[3]
}
else {
for (l in 1:lS) {
s <- S[l][[1]]
D <- dplyr::bind_rows(D, dplyr::mutate(MCMC_Chain(s),
Chain = l))
}
D <- dplyr::select(D, Iteration, Chain, Parameter,
value)
nBurnin <- (attributes(s)$mcpar[1]) - (1 *
attributes(s)$mcpar[3])
nThin <- attributes(s)$mcpar[3]
}
if (sort) {
D$Parameter <- factor(D$Parameter, levels = MCMC.Sort(D$Parameter))
}
else {
D$Parameter <- factor(D$Parameter)
}
D <- dplyr::arrange(D, Parameter, Chain, Iteration)
}
attr(D, "nChains") <- length(unique(D$Chain))
attr(D, "nParameters") <- length(unique(D$Parameter))
attr(D, "nIterations") <- max(D$Iteration)
if (is.numeric(burnin) & length(burnin) == 1) {
attr(D, "nBurnin") <- burnin
}
else if (is.logical(burnin)) {
if (burnin) {
attr(D, "nBurnin") <- nBurnin
}
else {
attr(D, "nBurnin") <- 0
}
}
else {
stop("burnin must be either logical (TRUE/FALSE) or a numerical vector of length one.")
}
attr(D, "nThin") <- nThin
if (is.character(description)) {
attr(D, "description") <- description
}
else {
if (!is.na(description)) {
print("description is not a text string. The name of the imported object is used instead.")
}
if (exists("mDescription")) {
attr(D, "description") <- mDescription
}
else {
attr(D, "description") <- as.character(sys.call()[2])
}
}
if (!is.na(family)) {
D <- get_family(D, family = family)
}
if (class(par_labels) %in% c("data.frame", "tbl_df")) {
if (length(which(c("Parameter", "Label") %in%
names(par_labels))) == 2) {
aD <- attributes(D)
levels(D$Parameter)[which(levels(D$Parameter) %in%
par_labels$Parameter)] <- as.character(par_labels$Label[match(levels(D$Parameter)[which(levels(D$Parameter) %in%
par_labels$Parameter)], par_labels$Parameter)])
L <- dplyr::tbl_df(data.frame(Parameter = par_labels$Label,
ParameterOriginal = par_labels$Parameter)) %>%
mutate(Parameter = as.character(Parameter))
D <- suppressWarnings(dplyr::left_join(D, L,
by = "Parameter"))
D <- D %>% dplyr::select(Iteration, Chain,
Parameter, value, ParameterOriginal)
if (class(D$Parameter) == "character") {
if (sort) {
D$Parameter <- factor(D$Parameter, levels = MCMC.Sort(D$Parameter))
}
else {
D$Parameter <- factor(D$Parameter)
}
}
attr(D, "nChains") <- aD$nChains
attr(D, "nParameters") <- aD$nParameters
attr(D, "nIterations") <- aD$nIterations
attr(D, "nBurnin") <- aD$nBurnin
attr(D, "nThin") <- aD$nThin
attr(D, "description") <- aD$description
if (dim(par_labels)[2] > 2) {
aD <- attributes(D)
L.noParameter <- dplyr::tbl_df(par_labels) %>%
dplyr::select(-Parameter) %>% dplyr::mutate(Label = as.character(Label))
D <- suppressWarnings(dplyr::left_join(D,
L.noParameter, by = c(Parameter = "Label")))
if (class(D$Parameter) == "character") {
if (sort) {
D$Parameter <- factor(D$Parameter, levels = MCMC.Sort(D$Parameter))
}
else {
D$Parameter <- factor(D$Parameter)
}
}
}
attr(D, "nChains") <- aD$nChains
attr(D, "nParameters") <- aD$nParameters
attr(D, "nIterations") <- aD$nIterations
attr(D, "nBurnin") <- aD$nBurnin
attr(D, "nThin") <- aD$nThin
attr(D, "description") <- aD$description
}
else {
stop("par_labels must include at least columns called 'Parameter' and 'Label'.")
}
}
else {
if (!is.na(par_labels)) {
stop("par_labels must be a data frame or a tbl_df.")
}
}
return(D)
}
else {
stop("MCMC is not able to transform the input object into a MCMC object suitable for ggmcmc.")
}
}
MCMC_traceplot = function(D, family = NA, original_burnin = TRUE,
original_thin = TRUE, simplify = NULL, greek = FALSE,
...) {
if (!is.na(family)) {
D <- get_family(D, family = family)
}
if (!is.null(simplify)) {
if (simplify > 0 & simplify < 1) {
aD <- attributes(D)
D <- dplyr::sample_frac(D, simplify)
attr(D, "nChains") <- aD$nChains
attr(D, "nParameters") <- aD$nParameters
attr(D, "nIterations") <- aD$nIterations
attr(D, "nBurnin") <- aD$nBurnin
attr(D, "nThin") <- aD$nThin
attr(D, "description") <- aD$description
}
else {
print("It is not possible to guess the simplification percentage to apply.")
}
}
if (attributes(D)$nChains <= 1) {
f <- ggplot(D, aes(x = Iteration, y = value))
}
else {
f <- ggplot(D, aes(x = Iteration, y = value, colour = as.factor(Chain)))
}
f <- f + geom_line(alpha = 0.7) + scale_colour_discrete(name = "Chain")
if (!greek) {
f <- f + facet_wrap(~Parameter, ncol = 1, scales = "free")
}
else {
f <- f + facet_wrap(~Parameter, ncol = 1, scales = "free",
labeller = label_parsed)
}
t_format <- function(x) {
return(((x - 1) * attributes(D)$nThin) + attributes(D)$nThin)
}
b_format <- function(x) {
return(x + attributes(D)$nBurnin)
}
bt_format <- function(x) {
return(attributes(D)$nBurnin + (((x - 1) * attributes(D)$nThin) +
attributes(D)$nThin))
}
if (original_burnin & !original_thin) {
f <- f + scale_x_continuous(labels = b_format)
}
else if (!original_burnin & original_thin) {
f <- f + scale_x_continuous(labels = t_format)
}
else if (original_burnin & original_thin) {
f <- f + scale_x_continuous(labels = bt_format)
}
else {
f <- f
}
return(f)
}
MCMC_autocorrelation = function(D, family = NA, nLags = 50,
greek = FALSE, ...) {
ac = function(x, nLags) {
X <- matrix(NA, ncol = nLags, nrow = length(x))
X[, 1] <- x
for (i in 2:nLags) {
X[, i] <- c(rep(NA, i - 1), x[1:(length(x) -
i + 1)])
}
X <- data.frame(Lag = 1:nLags, Autocorrelation = cor(X,
use = "pairwise.complete.obs")[, 1])
return(X)
}
if (!is.na(family)) {
D <- get_family(D, family = family)
}
nIter <- attr(D, "nIteration")
if (nIter < nLags) {
warning(sprintf("nLags=%d is larger than number of iterations, computing until max possible lag %d",
nLags, nIter))
nLags <- nIter
}
wc.ac <- D %>% dplyr::group_by(Parameter, Chain) %>%
dplyr::do(ac(.$value, nLags))
if (attributes(D)$nChains <= 1) {
f <- ggplot(wc.ac, aes(x = Lag, y = Autocorrelation)) +
geom_bar(stat = "identity", position = "identity") +
ylim(-1, 1)
if (!greek) {
f <- f + facet_wrap(~Parameter)
}
else {
f <- f + facet_wrap(~Parameter, labeller = label_parsed)
}
}
else {
f <- ggplot(wc.ac, aes(x = Lag, y = Autocorrelation,
colour = as.factor(Chain), fill = as.factor(Chain))) +
geom_bar(stat = "identity", position = "identity") +
ylim(-1, 1) + scale_fill_discrete(name = "Chain") +
scale_colour_discrete(name = "Chain")
if (!greek) {
f <- f + facet_grid(Parameter ~ Chain)
}
else {
f <- f + facet_grid(Parameter ~ Chain, labeller = label_parsed)
}
}
return(f)
}
plot(MCMC_autocorrelation(MCMCtoggplot(model.fit)))
plot(MCMC_traceplot(MCMCtoggplot(model.fit)))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.