R/diagnostic.bcmeta.R
In jarbes: Just a Rather Bayesian Evidence Synthesis

Documented in diagnostic.bcmeta

#' @title Diagnostic function for bcmeta object in jarbes
#'
#' @description This function performers an approximated Bayesian cross-validation for a bcmeta object and specially designed diagnostics to detect the existence of a biased component.
#'
#' @param object The object generated by the function b3lmeta.
#' @param post.p.value.cut Posterior p-value cut point to assess outliers.
#' @param study.names Character vector containing names of the studies used.
#' @param size.forest Size of the center symbol mark in the forest-plot lines
#' @param lwd.forest  Thickness of the lines in the forest-plot
#' @param shape.forest Type of symbol for the center mark in the forest-plot lines
#'
#' @param bias.plot Display the bias plot. The default is TRUE.
#' @param cross.val.plot Display the cross validation plot. The default is TRUE.
#'
#' @param level Vector with the probability levels of the contour plot. The default values are: 0.5, 0.75, and 0.95.

#' @param x.lim Numeric vector of length 2 specifying the x-axis limits.
#' @param y.lim Numeric vector of length 2 specifying the y-axis limits.
#' @param x.lab Text with the label of the x-axis.
#' @param y.lab Text with the label of the y-axis.
#' @param title.plot Text for setting a title in the bias plot.

#' @param kde2d.n The number of grid points in each direction for the non-parametric density estimation. The default is 25.
#' @param marginals If TRUE the marginal histograms of the posteriors are added to the plot.
#'
#' @param bin.hist The number of bins in for the histograms. The default value is 30.
#' @param color.line The color of the contour lines. The default is "black.
#' @param color.hist The color of the histogram bars. The default is "white".
#' @param color.data.points The color of the data points. The default is "black".
#' @param alpha.data.points Transparency of the data points.
#'
#' @param S The number of sample values from the joint posterior distribution used to approximate the contours. The default is S=5000.
#'
#' @param ... \dots
#'
#'
#' @import ggplot2
#' @import ggExtra
#' @import MASS
#'
#' @export

diagnostic.bcmeta = function(object,
                      # Parameters for the forest plot ....
                      post.p.value.cut = 0.05,
                      study.names = NULL,
                      size.forest = 0.4,
                      lwd.forest = 0.2,
                      shape.forest = 23,
                      # Parameters for the bias check plot...
                      bias.plot = TRUE,
                      cross.val.plot = TRUE,
                      level = c(0.5, 0.75, 0.95),
                      x.lim = c(0, 1),
                      y.lim = c(0, 10),
                      x.lab = "P(Bias)",
                      y.lab = "Mean Bias",
                      title.plot = paste("Bias Diagnostics Contours (50%, 75% and 95%)"),
                      kde2d.n = 25,
                      marginals = TRUE,
                      bin.hist = 30,
                      color.line = "black",
                      color.hist = "white",
                      color.data.points = "black",
                      alpha.data.points = 0.1,
                      S = 5000,
                             ...) {

  x=y=ylo=yhi=kde2d=pi.bias=bias=dens.z=NULL

  # Data preparation for the forest-plot .......................................
  y.ghost = object$BUGSoutput$sims.list$y.ghost
  g.m = apply(y.ghost, 2, median)
  g.u = apply(y.ghost, 2, quantile, prob = 0.975)
  g.l = apply(y.ghost, 2, quantile, prob = 0.025)

  n.studies = length(g.m)

  TE = object$data$TE

  if (is.null(study.names)) {
    study.names = 1:n.studies
  }

  # Posterior p-values to detect outliers.......................................
  p.vec = NULL
  for(i in 1:n.studies)
  {
    p1 = sum(y.ghost[,i]<TE[i])/length(y.ghost[,i])
    p2 = sum(y.ghost[,i]>TE[i])/length(y.ghost[,i])
    p.val = min(p1, p2)
    p.vec = c(p.vec, p.val)
  }

  p.col = ifelse(p.vec < post.p.value.cut, "red", "blue")

  data.plot = data.frame(
    x = study.names,
    TE = TE,
    g.m = g.m,
    ylo  = g.l,
    yhi  = g.u,
    p.vec = p.vec,
    p.col = p.col)

  p.forest = ggplot(data.plot, aes(x = x, y = TE,
                            ymin = ylo, ymax = yhi)) +
             geom_pointrange(colour = p.col,
                             lwd = lwd.forest,      # Thickness of the lines
                             shape = shape.forest,
                             size = size.forest)+   # Point size
             coord_flip() +
             xlab("Study") +
             ylab("Posterior predictive based on the Bias Corrected component") +
             ggtitle("Bayesian Cross-Valdiation") +
             theme_bw()

# Bias plot ....................................................................

  # Data preparation
  p.bias.1 = object$BUGSoutput$sims.list$p.bias[,2]
  #mu = object$BUGSoutput$sims.list$mu[,1:2]
  delta.1 =   object$BUGSoutput$sims.list$B #- mu[,1]

  dat.post = data.frame(x=p.bias.1, y=delta.1)
  dat.post = dat.post[sample(1:S), ]

  tau = object$BUGSoutput$sims.list$tau

  cut.point = 2*mean(tau)

  # Base plot ..................................................................
  baseplot = ggplot(dat.post, aes(x = x, y = y)) +
    geom_point(size=0.01, alpha = alpha.data.points,
               aes(color = "MCMC Samples"), color = color.data.points)+
    scale_x_continuous(name = x.lab, limits = x.lim) +
    scale_y_continuous(name = y.lab, limits = y.lim) +
    ggtitle(title.plot) +
    geom_hline(yintercept=cut.point, linetype="dashed", color = "black")+
    theme_bw()

  # Non-parametric .............................................................
  # Estimation of the nonparametric density ...
  x.nopar = dat.post[ ,1]
  y.nopar = dat.post[ ,2]

  dens = kde2d(x.nopar, y.nopar, n = 20)
  dx = diff(dens$x[1:2])
  dy = diff(dens$y[1:2])
  sz = sort(dens$z)
  c1 = cumsum(sz) * dx * dy
  Levels.nonpar = approx(c1, sz, xout = 1 - level)$y

  densdf = data.frame(expand.grid(pi.bias = dens$x,
                                  bias = dens$y),
                      dens.z = as.vector(dens$z))

  # Non-parametric .............................................................
  finalplot = baseplot + geom_contour(data = densdf,
                                      aes(pi.bias, bias, z = dens.z),
                                      colour = color.line,
                                      breaks = Levels.nonpar,
                                      lwd =1)

  if(marginals==TRUE){
            p.bias = ggMarginal(finalplot, type= "histogram",
                                fill = color.hist,
                                bins = bin.hist)}
  else{p.bias = finalplot}

#...............................................................................

  if(cross.val.plot==TRUE & bias.plot == TRUE){return(grid.arrange(p.bias, p.forest, ncol=2))}
    else if(cross.val.plot==TRUE & bias.plot == FALSE){return(p.forest)}
        else if(cross.val.plot==FALSE & bias.plot == TRUE){return(p.bias)}

}