R/MCMC_diagnostic_plots.R

#' MCMC diagnostic plots
#'
#' @param mcmc.list. an MCMC list
#' @param family. group of parameters
#' with the same name but different numerical value
#' between square brackets (as beta[1], beta[2])
#' @param description. Character vector giving a short descriptive text that identifies the model.
#' @param par_labels. data frame with two colums. One named "Parameter"
#' with the same names of the parameters of the model.
#' @param sort. Logical. When TRUE (the default), parameters are sorted first by
#' family name and then by numerical value.
#' @param inc_warmup. Logical. When dealing with stanfit objects from rstan,
#' logical value whether the warmup samples are included. Defaults to FALSE.
#' @param stan_include_auxiliar.	Logical value to include "lp__" parameter in rstan, and "lp__", "treedepth__" and
#' "stepsize__" in stan running without rstan. Defaults to FALSE.
#' @param droppars. variables to exclude. Defaults to droppars = "lp__".
#' @export
#' @examples
#' diagnostic.plots()
#'
#'
diagnostic.plots =
  function (model.fit, family. = NA, description. = NA, burnin. = FALSE,
            par_labels. = NA, sort. = TRUE, inc_warmup. = FALSE, stan_include_auxiliar. = FALSE)
  {
    MCMC_Chain = function(s) {
      n.samples <- dim(s)[1]
      iter <- 1:n.samples
      d <- data.frame(Iteration = iter, as.matrix(unclass(s)),
                      check.names = FALSE)
      D <- d %>% tidyr::gather(Parameter, value, -Iteration)
      D <- dplyr::tbl_df(D)
      return(D)
    }
    MCMC.Sort = function(x) {
      x <- sort(as.character(unique(x)))
      family <- gsub("\\[.+\\]", "", x)
      Families <- sort(unique(family))
      X <- NULL
      for (f in Families) {
        x.family <- x[family == f]
        if (length(grep("\\[", x.family)) > 0) {
          index <- gsub("]", "", gsub("(.+)\\[", "", x.family))
          if (length(grep(",", index) > 0)) {
            idl <- data.frame(x.family = x.family, index = index,
                              matrix(unlist(strsplit(index, ",")), nrow = length(index),
                                     byrow = TRUE))
            for (c in 3:(dim(idl)[2])) {
              idl[, c] <- as.numeric(as.character(idl[,
                                                      c]))
            }
            command <- paste("dplyr::arrange(idl,", paste(names(idl)[-(c(1,
                                                                         2))], collapse = ","), ")", sep = "")
            idl <- eval(parse(text = command))
            x.family <- as.character(idl$x.family)
          }
          else {
            x.family <- x.family[order(as.numeric((gsub("]",
                                                        "", gsub("(.+)\\[", "", x.family)))))]
          }
          X <- c(X, x.family)
        }
        else {
          X <- c(X, x.family)
        }
      }
      return(X)
    }
    MCMCtoggplot = function(S, family = family., description = description.,
                            burnin = burnin., par_labels = par_labels., sort = sort.,
                            inc_warmup = inc_warmup., stan_include_auxiliar = stan_include_auxiliar.) {
      processed <- FALSE
      original.object.class <- class(S)[1]
      if (length(class(S)) > 1 & class(S)[1] == "stanreg") {
        S <- S$stanfit
      }
      if (class(S) == "brmsfit") {
        S <- S$fit
      }
      if (class(S) == "stanfit") {
        nChains <- S@sim$chains
        nThin <- S@sim$thin
        mDescription <- S@model_name
        D <- NULL
        for (l in 1:nChains) {
          sdf <- as.data.frame(S@sim$samples[[l]])
          names(sdf) <- names(S@sim$samples[[l]])
          sdf$Iteration <- 1:dim(sdf)[1]
          s <- tidyr::gather(sdf, Parameter, value, -Iteration) %>%
            dplyr::mutate(Chain = l) %>% dplyr::select(Iteration,
                                                       Chain, Parameter, value)
          D <- dplyr::bind_rows(D, s)
        }
        if (!inc_warmup) {
          if (original.object.class == "stanfit") {
            D <- dplyr::filter(D, Iteration > (S@sim$warmup/nThin))
            D$Iteration <- D$Iteration - (S@sim$warmup/nThin)
          }
          nBurnin <- S@sim$warmup
        }
        else {
          nBurnin <- 0
        }
        if (!stan_include_auxiliar) {
          D <- dplyr::filter(D, Parameter != "lp__")
        }
        if (sort) {
          D$Parameter <- factor(D$Parameter, levels = MCMC.Sort(D$Parameter))
        }
        else {
          D$Parameter <- factor(D$Parameter)
        }
        processed <- TRUE
        D <- dplyr::tbl_df(D)
      }
      if (class(S) == "list") {
        D <- NULL
        for (i in 1:length(S)) {
          samples.c <- dplyr::tbl_df(read.table(S[[i]],
                                                sep = ",", header = TRUE, colClasses = "numeric",
                                                check.names = FALSE))
          D <- dplyr::bind_rows(D, tidyr::gather(samples.c,
                                                 Parameter) %>% dplyr::mutate(Iteration = rep(1:(dim(samples.c)[1]),
                                                                                              dim(samples.c)[2]), Chain = i) %>% dplyr::select(Iteration,
                                                                                                                                               Chain, Parameter, value))
        }
        if (!stan_include_auxiliar) {
          D <- D[grep("__$", D$Parameter, invert = TRUE),
                 ]
          if (sort) {
            D$Parameter <- factor(D$Parameter, levels = MCMC.Sort(D$Parameter))
          }
          else {
            D$Parameter <- factor(D$Parameter)
          }
        }
        nBurnin <- as.integer(gsub("warmup=", "", scan(S[[i]],
                                                       "", skip = 12, nlines = 1, quiet = TRUE)[2]))
        nThin <- as.integer(gsub("thin=", "", scan(S[[i]],
                                                   "", skip = 13, nlines = 1, quiet = TRUE)[2]))
        processed <- TRUE
      }
      if (class(S) == "mcmc.list" | class(S) == "mcmc" | processed) {
        if (!processed) {
          lS <- length(S)
          D <- NULL
          if (lS == 1 | class(S) == "mcmc") {
            if (lS == 1 & class(S) == "mcmc.list") {
              s <- S[[1]]
            }
            else {
              s <- S
            }
            D <- dplyr::mutate(MCMC_Chain(s), Chain = 1) %>%
              dplyr::select(Iteration, Chain, Parameter,
                            value)
            nBurnin <- (attributes(s)$mcpar[1]) - (1 *
                                                     attributes(s)$mcpar[3])
            nThin <- attributes(s)$mcpar[3]
          }
          else {
            for (l in 1:lS) {
              s <- S[l][[1]]
              D <- dplyr::bind_rows(D, dplyr::mutate(MCMC_Chain(s),
                                                     Chain = l))
            }
            D <- dplyr::select(D, Iteration, Chain, Parameter,
                               value)
            nBurnin <- (attributes(s)$mcpar[1]) - (1 *
                                                     attributes(s)$mcpar[3])
            nThin <- attributes(s)$mcpar[3]
          }
          if (sort) {
            D$Parameter <- factor(D$Parameter, levels = MCMC.Sort(D$Parameter))
          }
          else {
            D$Parameter <- factor(D$Parameter)
          }
          D <- dplyr::arrange(D, Parameter, Chain, Iteration)
        }
        attr(D, "nChains") <- length(unique(D$Chain))
        attr(D, "nParameters") <- length(unique(D$Parameter))
        attr(D, "nIterations") <- max(D$Iteration)
        if (is.numeric(burnin) & length(burnin) == 1) {
          attr(D, "nBurnin") <- burnin
        }
        else if (is.logical(burnin)) {
          if (burnin) {
            attr(D, "nBurnin") <- nBurnin
          }
          else {
            attr(D, "nBurnin") <- 0
          }
        }
        else {
          stop("burnin must be either logical (TRUE/FALSE) or a numerical vector of length one.")
        }
        attr(D, "nThin") <- nThin
        if (is.character(description)) {
          attr(D, "description") <- description
        }
        else {
          if (!is.na(description)) {
            print("description is not a text string. The name of the imported object is used instead.")
          }
          if (exists("mDescription")) {
            attr(D, "description") <- mDescription
          }
          else {
            attr(D, "description") <- as.character(sys.call()[2])
          }
        }
        if (!is.na(family)) {
          D <- get_family(D, family = family)
        }
        if (class(par_labels) %in% c("data.frame", "tbl_df")) {
          if (length(which(c("Parameter", "Label") %in%
                           names(par_labels))) == 2) {
            aD <- attributes(D)
            levels(D$Parameter)[which(levels(D$Parameter) %in%
                                        par_labels$Parameter)] <- as.character(par_labels$Label[match(levels(D$Parameter)[which(levels(D$Parameter) %in%
                                                                                                                                  par_labels$Parameter)], par_labels$Parameter)])
            L <- dplyr::tbl_df(data.frame(Parameter = par_labels$Label,
                                          ParameterOriginal = par_labels$Parameter)) %>%
              mutate(Parameter = as.character(Parameter))
            D <- suppressWarnings(dplyr::left_join(D, L,
                                                   by = "Parameter"))
            D <- D %>% dplyr::select(Iteration, Chain,
                                     Parameter, value, ParameterOriginal)
            if (class(D$Parameter) == "character") {
              if (sort) {
                D$Parameter <- factor(D$Parameter, levels = MCMC.Sort(D$Parameter))
              }
              else {
                D$Parameter <- factor(D$Parameter)
              }
            }
            attr(D, "nChains") <- aD$nChains
            attr(D, "nParameters") <- aD$nParameters
            attr(D, "nIterations") <- aD$nIterations
            attr(D, "nBurnin") <- aD$nBurnin
            attr(D, "nThin") <- aD$nThin
            attr(D, "description") <- aD$description
            if (dim(par_labels)[2] > 2) {
              aD <- attributes(D)
              L.noParameter <- dplyr::tbl_df(par_labels) %>%
                dplyr::select(-Parameter) %>% dplyr::mutate(Label = as.character(Label))
              D <- suppressWarnings(dplyr::left_join(D,
                                                     L.noParameter, by = c(Parameter = "Label")))
              if (class(D$Parameter) == "character") {
                if (sort) {
                  D$Parameter <- factor(D$Parameter, levels = MCMC.Sort(D$Parameter))
                }
                else {
                  D$Parameter <- factor(D$Parameter)
                }
              }
            }
            attr(D, "nChains") <- aD$nChains
            attr(D, "nParameters") <- aD$nParameters
            attr(D, "nIterations") <- aD$nIterations
            attr(D, "nBurnin") <- aD$nBurnin
            attr(D, "nThin") <- aD$nThin
            attr(D, "description") <- aD$description
          }
          else {
            stop("par_labels must include at least columns called 'Parameter' and 'Label'.")
          }
        }
        else {
          if (!is.na(par_labels)) {
            stop("par_labels must be a data frame or a tbl_df.")
          }
        }
        return(D)
      }
      else {
        stop("MCMC is not able to transform the input object into a MCMC object suitable for ggmcmc.")
      }
    }
    MCMC_traceplot = function(D, family = NA, original_burnin = TRUE,
                              original_thin = TRUE, simplify = NULL, greek = FALSE,
                              ...) {
      if (!is.na(family)) {
        D <- get_family(D, family = family)
      }
      if (!is.null(simplify)) {
        if (simplify > 0 & simplify < 1) {
          aD <- attributes(D)
          D <- dplyr::sample_frac(D, simplify)
          attr(D, "nChains") <- aD$nChains
          attr(D, "nParameters") <- aD$nParameters
          attr(D, "nIterations") <- aD$nIterations
          attr(D, "nBurnin") <- aD$nBurnin
          attr(D, "nThin") <- aD$nThin
          attr(D, "description") <- aD$description
        }
        else {
          print("It is not possible to guess the simplification percentage to apply.")
        }
      }
      if (attributes(D)$nChains <= 1) {
        f <- ggplot(D, aes(x = Iteration, y = value))
      }
      else {
        f <- ggplot(D, aes(x = Iteration, y = value, colour = as.factor(Chain)))
      }
      f <- f + geom_line(alpha = 0.7) + scale_colour_discrete(name = "Chain")
      if (!greek) {
        f <- f + facet_wrap(~Parameter, ncol = 1, scales = "free")
      }
      else {
        f <- f + facet_wrap(~Parameter, ncol = 1, scales = "free",
                            labeller = label_parsed)
      }
      t_format <- function(x) {
        return(((x - 1) * attributes(D)$nThin) + attributes(D)$nThin)
      }
      b_format <- function(x) {
        return(x + attributes(D)$nBurnin)
      }
      bt_format <- function(x) {
        return(attributes(D)$nBurnin + (((x - 1) * attributes(D)$nThin) +
                                          attributes(D)$nThin))
      }
      if (original_burnin & !original_thin) {
        f <- f + scale_x_continuous(labels = b_format)
      }
      else if (!original_burnin & original_thin) {
        f <- f + scale_x_continuous(labels = t_format)
      }
      else if (original_burnin & original_thin) {
        f <- f + scale_x_continuous(labels = bt_format)
      }
      else {
        f <- f
      }
      return(f)
    }
    MCMC_autocorrelation = function(D, family = NA, nLags = 50,
                                    greek = FALSE, ...) {
      ac = function(x, nLags) {
        X <- matrix(NA, ncol = nLags, nrow = length(x))
        X[, 1] <- x
        for (i in 2:nLags) {
          X[, i] <- c(rep(NA, i - 1), x[1:(length(x) -
                                             i + 1)])
        }
        X <- data.frame(Lag = 1:nLags, Autocorrelation = cor(X,
                                                             use = "pairwise.complete.obs")[, 1])
        return(X)
      }
      if (!is.na(family)) {
        D <- get_family(D, family = family)
      }
      nIter <- attr(D, "nIteration")
      if (nIter < nLags) {
        warning(sprintf("nLags=%d is larger than number of iterations, computing until max possible lag %d",
                        nLags, nIter))
        nLags <- nIter
      }
      wc.ac <- D %>% dplyr::group_by(Parameter, Chain) %>%
        dplyr::do(ac(.$value, nLags))
      if (attributes(D)$nChains <= 1) {
        f <- ggplot(wc.ac, aes(x = Lag, y = Autocorrelation)) +
          geom_bar(stat = "identity", position = "identity") +
          ylim(-1, 1)
        if (!greek) {
          f <- f + facet_wrap(~Parameter)
        }
        else {
          f <- f + facet_wrap(~Parameter, labeller = label_parsed)
        }
      }
      else {
        f <- ggplot(wc.ac, aes(x = Lag, y = Autocorrelation,
                               colour = as.factor(Chain), fill = as.factor(Chain))) +
          geom_bar(stat = "identity", position = "identity") +
          ylim(-1, 1) + scale_fill_discrete(name = "Chain") +
          scale_colour_discrete(name = "Chain")
        if (!greek) {
          f <- f + facet_grid(Parameter ~ Chain)
        }
        else {
          f <- f + facet_grid(Parameter ~ Chain, labeller = label_parsed)
        }
      }
      return(f)
    }
    plot(MCMC_autocorrelation(MCMCtoggplot(model.fit)))
    plot(MCMC_traceplot(MCMCtoggplot(model.fit)))
  }
abnormally-distributed/abdisttools documentation built on May 5, 2019, 7:07 a.m.