R/converge_diag_fn.R
In BayesMoFo: Bayesian Mortality Forecasting

Documented in converge_diag_fn

#' A function to check for overall convergence of fitted stochastic mortality models for monitoring purposes
#'
#' Compute several common MCMC convergence diagnostics of parameters fitted under stochastic mortality models using posterior samples stored in "fit_result" object.
#' @param result object of type either "fit_result" or "BayesMoFo".
#' @param tol A numeric value between 0 and 1 specifying the tolerance percentage of the convergence diagnostics, i.e. if the proportion of convergence diagnostic checks/tests (either Gelman's or Heidel's) exceeds \code{tol}, warning messages will be triggered. Default is \code{tol=0.20}.
#' @param plot_gelman A logical value indicating whether to produce a plot of Gelman's R statistic, PSRF ("potential scale reduction factor") for visualisation.
#' @param plot_geweke A logical value indicating whether to produce a plot of Geweke's statistic for visualisation.
#' @return A message of recommendations and (possibly) convergence diagnostics plots. Additionally, a list with components:
#' \describe{
#'   \item{\code{gelman_diag}}{A \code{gelman.diag} object which is a list containing the estimated R statistic (PSRF) along with the upper confidence intervals, and also the multivariate PSRF (Brooks and Gelman, 1998). See Gelman and Rubin (1992) for more details.}
#'   \item{\code{geweke_diag}}{A \code{geweke.diag} object which is a list containing the estimated Geweke's Z scores and the corresponding fractions used for equality of means test. See Geweke (1992) for more details.}
#'   \item{\code{heidel_diag}}{A \code{heidel.diag} object which is a matrix containing results from both Stationarity and Half-width tests. See Heidelberger and Welch (1981), Heidelberger and Welch (1983) for more details.}
#'   \item{\code{n}}{The total number of posterior samples (if more than one chain were generated, they are merged into one long chain).}
#' }
#' @references Andrew Gelman, Donald B. Rubin. (1992). "Inference from Iterative Simulation Using Multiple Sequences," Statistical Science, Statist. Sci. 7(4), 457-472. \doi{DOI: 10.1214/ss/1177011136}
#' @references Brooks, SP. and Gelman, A. (1998). General methods for monitoring convergence of iterative simulations. Journal of Computational and Graphical Statistics, 7, 434-455. \doi{https://doi.org/10.1080/10618600.1998.10474787}
#' @references Geweke, J. (1992) Evaluating the accuracy of sampling-based approaches to calculating posterior moments. In Bayesian Statistics 4 (ed JM Bernado, JO Berger, AP Dawid and AFM Smith). Clarendon Press, Oxford, UK. \doi{https://doi.org/10.21034/sr.148}
#' @references Heidelberger P and Welch PD. (1981). A spectral method for confidence interval generation and run length control in simulations. Comm. ACM. 24, 233-245. \doi{https://doi.org/10.1145/358598.358630}
#' @references Heidelberger P and Welch PD. (1983) Simulation run length control in the presence of an initial transient. Opns Res., 31, 1109-44. \doi{https://doi.org/10.1287/opre.31.6.1109}
#' @keywords bayesian diagnostics visualization
#' @concept convergence
#' @concept PSRF
#' @importFrom stats dbinom dpois quantile sd density dnorm
#' @importFrom graphics abline
#' @export
#' @examples
#' \donttest{
#' #load and prepare data
#' data("dxt_array_product");data("Ext_array_product")
#' death<-preparedata_fn(dxt_array_product,strat_name = c("ACI","DB","SCI"),ages=35:65)
#' expo<-preparedata_fn(Ext_array_product,strat_name = c("ACI","DB","SCI"),ages=35:65)
#' 
#' #fit any mortality model
#' runBayesMoFo_result<-runBayesMoFo(death=death,expo=expo,models="M1A",n_iter=1000,
#'   family="poisson",n.chain=5,thin=1)
#' 
#' #compute convergence diagnostics (with plots)
#' converge_runBayesMoFo_result<-converge_diag_fn(runBayesMoFo_result,
#'   plot_gelman=TRUE,plot_geweke=TRUE)
#' 
#' #Gelman's R (PSRF)
#' converge_runBayesMoFo_result$gelman_diag
#' 
#' #Geweke's Z statistics
#' converge_runBayesMoFo_result$geweke_diag$z
#' 
#' #Heidel's Stationary and Halfwidth Tests
#' converge_runBayesMoFo_result$heidel_diag
#' }

converge_diag_fn<-function(result,tol=0.20,plot_gelman=FALSE,plot_geweke=FALSE){
  
  if ("BayesMoFo_obj" %in% names(result)){
    fit_result<-result$result$best
  } else {
    fit_result<-result
  }
  
  death<-fit_result$death
  
  p<-death$n_strat
  A<-death$n_ages
  T<-death$n_years
  size<-p*A*T
  
  h<-fit_result$h
  forecast<-fit_result$forecast
  if (forecast){size<-p*A*(T+h)}
  
  n_chain<-length(fit_result$post_sample)
  
  rates_mcmc_list<-coda::mcmc.list()
  for (i in 1:n_chain){
    rates_mcmc_list[[i]]<-fit_result$post_sample[[i]][,startsWith(colnames(fit_result$post_sample[[i]]),"q")]
  }
  
  gelman_diag<-coda::gelman.diag(rates_mcmc_list);gc()
  
  rates_mcmc_object<-coda::as.mcmc(as.matrix(rates_mcmc_list));gc()
  
  heidel_diag<-coda::heidel.diag(rates_mcmc_object);gc()
  geweke_diag<-coda::geweke.diag(rates_mcmc_object);gc()
  
  gelman_threshold<-sum(gelman_diag$psrf[,1]>1.2)/size
  stationarity_threshold<-sum(heidel_diag[,1][!is.na(heidel_diag[,1])]==0)/size
  halfwidth_threshold<-sum(heidel_diag[,4][!is.na(heidel_diag[,4])]==0)/size
  
  if (gelman_threshold>tol | stationarity_threshold>tol | halfwidth_threshold>tol){
    if (gelman_threshold>tol){insight::print_colour(paste0("Warning: More than ",tol*100,"% of the Gelman's R statistics are larger than 1.20. Longer posterior chains recommended!","\n"),"red")}
    if (stationarity_threshold>tol){insight::print_colour(paste0("Warning: More than ",tol*100,"% of the Stationarity tests failed. Longer posterior chains recommended!","\n"),"red")}
    if (halfwidth_threshold>tol){insight::print_colour(paste0("Warning: More than ",tol*100,"% of the Half-width tests failed. Longer posterior chains recommended!","\n"),"red")}
  } else {
    insight::print_colour(paste0("Note: no convergence issues identified.","\n"),"green")
  }
    
  n<-dim(rates_mcmc_object)[1] #number of posterior samples
  
  oldpar <- par(no.readonly = TRUE)
  on.exit(par(oldpar)) 
  
  if (plot_gelman){
    if (plot_geweke){par(mfrow=c(1,3))}else{par(mfrow=c(1,1))}
    plot(sort(gelman_diag$psrf[,1]),pch=19,ylab="Gelman's R statistic (PSRF)",xlab="q[,,]",main="Plot of sorted Gelman's R statistics")
    abline(h=1,lty=2)
    legend("topleft",c("Benchmark"),lty=2)
  }
  
  if (plot_geweke){
    plot(geweke_diag$z,ylim=range(geweke_diag$z),pch=19,xlab="q[,,]",ylab="Geweke's Z statistics")
    abline(h=c(-1.96,1.96),lty=2)
    
    plot(density(geweke_diag$z),main="Density plot of Geweke's statistics")
    lines(seq(-10,10,0.1),dnorm(seq(-10,10,0.1)),lty=2)
    legend("topright",c("Benchmark"),lty=2)
  }
  
  list(gelman_diag=gelman_diag,geweke_diag=geweke_diag,heidel_diag=heidel_diag,n=n)
}