R/utility_functions.R

run_jags <- function(model_string, data, inits, params, n.chains, n.adapt, n.update, n.iter, thin, progress.bar) {
  if(!interactive()) {
    progress.bar <- "none"
  }
  
  # Set the random number generator and seed based on R's random state (through runif)
  if(is.null(inits$.RNG.seed) & is.null(inits$.RNG.name)) {
    RNGs <- c("base::Wichmann-Hill", "base::Marsaglia-Multicarry", 
            "base::Super-Duper", "base::Mersenne-Twister")
    init_list <- inits
    inits <- function(chain) {
      chain_init_list <- init_list
      chain_init_list$.RNG.seed <- as.integer(runif(1, 0, .Machine$integer.max))
      chain_init_list$.RNG.name <- RNGs[ ((chain - 1) %% 4) +  1 ]
      chain_init_list
    }
  }

  jags_model <- jags.model(textConnection(model_string) , data=data , inits=inits , 
                          n.chains=n.chains , n.adapt=0, quiet=TRUE)
  adapt(jags_model, max(1, n.adapt),  progress.bar="none", end.adaptation=TRUE)
  if(n.update > 0) { 
    update( jags_model, n.update, progress.bar="none")
  }
  mcmc_samples <- coda.samples( jags_model , variable.names= params,
                               n.iter=n.iter, thin=thin, progress.bar=progress.bar)
  mcmc_samples <- reorder_coda(mcmc_samples, params)
  mcmc_samples
}

# The following two functions are hacks that are only used to construct
# and print the model code that is printed by the model.code functions.

# This first function takes a function and replaces the placeholder
# with model_string. Is used so that model string doesn't have to be written twice.
inject_model_string <- function(fn, model_string, placeholder = "BayesianFirstAid::replace_this_with_model_string") {
  code_string <- deparse(fn, control="useSource")
  code_string <- gsub(placeholder, 
                      paste0('model_string <- "', gsub("\n", "\n  ", model_string), '"'), code_string)
  eval(parse(text=code_string))
}

# This second function pretty prints the body of a function.
pretty_print_function_body <- function(fn) {
  fn_string <- deparse(fn, control="useSource")
  fn_string <- gsub("^  ", "", fn_string)
  cat(paste(fn_string[-c(1, length(fn_string))], collapse="\n"))
}

# mcmc_samples should be a coda mcmc object
print_mcmc_info <- function(mcmc_samples) {
  cat("\n", "Iterations = ", start(mcmc_samples), ":", end(mcmc_samples), "\n", sep = "")
  cat("Thinning interval =", thin(mcmc_samples), "\n")
  cat("Number of chains =", nchain(mcmc_samples), "\n")
  cat("Sample size per chain =", (end(mcmc_samples) - start(mcmc_samples))/thin(mcmc_samples) + 1, "\n")
}

# s is the matrix of statistics generated by the mcmc_stats function
print_diagnostics_measures <- function(s) {
  cat("  Diagnostic measures\n")
  print(s[, c("mean", "sd", "mcmc_se", "n_eff", "Rhat")])
  cat("\n")
  cat("mcmc_se: the estimated standard error of the MCMC approximation of the mean.\n")
  cat("n_eff: a crude measure of effective MCMC sample size.\n")
  cat("Rhat: the potential scale reduction factor (at convergence, Rhat=1).\n")
}

# Below shold be same as:
#rmt(100, samples_mat[i, c("mu[1]", "mu[2]")], cov_mat, samples_mat[i, "nu"])

rmt <- function(n, mu, cov_mat, nu) {
  t(t(mvrnorm(n, rep(0, length(mu)), cov_mat) / sqrt(nu / rchisq(n, nu))) + mu)
}

# A version of mad (median absolute deviation), that guards against mad returning zero by
# replacing 0 it by the SD instead. Also removes NAs by default.

mad0 <- function(..., na.rm=TRUE) {
  mad_est <- mad(..., na.rm=na.rm)
  if(mad_est != 0) {
    mad_est
  } else {
    sd(..., na.rm=na.rm)
  }
}

# Estimate the mode of a continous distribution given by x
# from and to limits the location of the mode
estimate_mode <- function(x,from=min(x), to=max(x)) {
  d <- density(x, from=from, to=to)
  d$x[which.max(d$y)]
}

# Not very general function that is made to plot a histogram given discrete (integer)
# data.
discrete_hist <- function(x, xlim, col="skyblue", lwd=3, x_marked = c(), marked_col = "red", yaxt="n",...) {
  hist_data <- hist(x, (xlim[1] - 1):(xlim[2]) + 0.5 , plot=FALSE)
  cols <- ifelse(hist_data$mids %in% x_marked, rgb(0, 0, 0, 0), col )
  plot(hist_data$mids, hist_data$density, type="h", col=col, lwd=lwd, bty = "o", lend=1,...)
  points(hist_data$mids[hist_data$mids %in% x_marked], hist_data$density[hist_data$mids %in% x_marked],
       type="h", col=marked_col, lwd=lwd, bty = "o",lend=2,...)
  invisible(hist_data)
}

# Plots a histogram of x with curves of t distributions
# specified by mu, sigma and nu
# data_mu and data_sigma should be point estimates
# Adapted from Kruschkes BEST1Gplot
hist_with_t_curves <- function(data, data_mu, data_sigma, mu, sigma, nu, data_name = "", main = "",
                               x_range = range(data), horiz=FALSE, plot_n = TRUE, x_lim=NULL, axs= "r",...) {
  n_curves <- length(mu)
  
  if(is.null(x_lim)) {
    # Calculating limits for the curves
    x_lim = c( x_range[1]-0.1*(x_range[2]-x_range[1]) , 
               x_range[2]+0.1*(x_range[2]-x_range[1]) )
  }
  x = seq( x_lim[1] , x_lim[2] , length=200 )
  
  # Limits and bins for the histogram
  bin_with = data_sigma/2
  hist_center = data_mu
  breaks = sort( c( seq( hist_center - bin_with/2 , min(x) - bin_with/2 ,
                         -bin_with ),
                    seq( hist_center + bin_with/2 , max(x) + bin_with/2 ,
                         bin_with ) , x_lim ) )
  hist_info = hist( data , plot=FALSE , breaks=breaks )
  hist_y = hist_info$density 
  hist_y[ hist_y==0.0 ] = NA
  hist_x = hist_info$mids
  hist_x[ hist_y==0.0 ] = NA
  
  # The height of the plot
  curve_maxs <- sapply(seq_along(mu), function(i) {
    max(dt( (x - mu[i]) / sigma[i] , df=nu[i] ) / sigma[i])
  })
  
  max_y = max( curve_maxs, max(hist_y, na.rm=T))
  
  # Plotting
  if(!horiz) {
    plot( x , dt( (x - mu[1]) / sigma[1] , df=nu[1] ) / sigma[1] , 
          ylim=c(0,max_y) , cex.lab=1.5, xaxs=axs, yaxs=axs,
          type="l" , col="skyblue" , lwd=1 , xlab=data_name , ylab="Probability", 
          main=main, ...)
    for ( i in 2:length(mu) ) {
      lines(x, dt( (x - mu[i]) / sigma[i] , df=nu[i] ) / sigma[i] , 
            type="l" , col="skyblue" , lwd=1 )
    }
    points( hist_x , hist_y , type="h" , lwd=3 , col="red" ,lend=1 )
    if(plot_n) {
      text( max(x) , max_y , bquote(N==.(length(data))) , adj=c(1.1,1.1) )
    }
  } else {
    plot( y=x , x=dt( (x - mu[1]) / sigma[1] , df=nu[1] ) / sigma[1] , 
          xlim=c(0,max_y) , cex.lab=1.5, xaxs=axs, yaxs=axs,
          type="l" , col="skyblue" , lwd=1 , ylab=data_name , xlab="Probability", 
          main=main, ...)
    for ( i in 2:length(mu) ) {
      lines(y = x, x=dt( (x - mu[i]) / sigma[i] , df=nu[i] ) / sigma[i] , 
            type="l" , col="skyblue" , lwd=1 )
    }
    segments(x0= rep(0, length(hist_y)), y0= hist_x, x1 = hist_y, y1=hist_x, lwd=3 , col="red" ,lend=1 )
    if(plot_n) {
      text( y=max(x) , x=max_y , bquote(N==.(length(data))) , adj=c(1.1,1.1) )
    }
    
  }
}
  
  

# Reoder the columns of a mcmc.list coda object.
reorder_coda <- function(s, param_order) {
  s <- lapply(s, function(chain) {
    chain[, order(match(gsub("\\[.+\\]$", "", colnames(chain)), param_order))]
  })
  mcmc.list(s)
}

# Formats a number and returns d significant digits, keeps all
# integer digits. This is probably possible to write in a
# much more elegant manner.
sign_digits <- function(x,d){
  s <- format(x,digits=d)
  if(grepl("\\.", s) && ! grepl("e", s)) {
    n_sign_digits <- nchar(s) - 
      max( grepl("\\.", s), attr(regexpr("(^[-0.]*)", s), "match.length") )
    n_zeros <- max(0, d - n_sign_digits)
    s <- paste(s, paste(rep("0", n_zeros), collapse=""), sep="")
  } else if(nchar(s) < d && ! grepl("e", s)) {
    s <- paste(s, ".", paste(rep("0", d - nchar(s)), collapse=""), sep="")
  }
  s
}

# rounds to d decimal places or to d significan digets depending on which leads to
# the least absolute error.
round_or_signif <- function(x, d) {
  x_round <- round(x, d)
  x_signif <- signif(x, d)
  least_error <- sapply(seq_len(length(x)), function(i) {
    least_error_i <- which.min(c(abs(x[i] - x_signif[i]), abs(x[i] - x_round[i])))
    # Guard against NAs and other strangeness in the the input.
    if(! is.numeric(least_error_i) || length(least_error_i) != 1) {
      least_error_i <- 1
    }
    least_error_i
  })
  x[least_error == 1] <- x_signif[least_error == 1]
  x[least_error == 2] <- x_round[least_error == 2]
  x  
}

# Takes coda samples generates a matrix with different statistics for the
# parameters. Samples can both be a mcmc.list and a matrix with one column
# per parameter
mcmc_stats <- function(samples, cred_mass = 0.95, comp_val = 0) {
  samples_mat <- as.matrix(samples)
  stats <- data.frame(mean = colMeans(samples_mat))
  stats$sd <- apply(samples_mat, 2, sd)
  cred_mass <- rep(cred_mass, length.out = ncol(samples_mat))
  comp_val <- rep(comp_val, length.out = ncol(samples_mat))
  stats$"HDI%" <- cred_mass * 100
  stats$comp <- comp_val
  stats$HDIlo <- NA
  stats$HDIup <- NA
  for(i in 1:ncol(samples_mat)){
    hdi_lim <- HDIofMCMC(samples_mat[,i], credMass=cred_mass[i])
    stats$HDIlo[i] <- hdi_lim[1]
    stats$HDIup[i] <- hdi_lim[2]  
    stats$"%>comp"[i] <- mean(c(samples_mat[,i] > comp_val[i], 0, 1))
    stats$"%<comp"[i] <- mean(c(samples_mat[,i] < comp_val[i], 0, 1))
  }
  stats$"q2.5%" <- apply(samples_mat, 2, quantile,  probs= 0.025)
  stats$"q25%" <- apply(samples_mat, 2, quantile,  probs= 0.25)
  stats$median <- apply(samples_mat, 2, median)
  stats$"q75%" <- apply(samples_mat, 2, quantile,  probs= 0.75)
  stats$"q97.5%" <- apply(samples_mat, 2, quantile,  probs= 0.975)
  stats$mcmc_se <- NA
  stats$Rhat <- NA
  stats$n_eff <- NA
  if(is.mcmc.list(samples)) {
    stats$mcmc_se <- summary(samples)$statistics[,"Time-series SE"]
    stats$Rhat <- gelman.diag(samples, multivariate = FALSE)$psrf[, 1]
    stats$n_eff <- as.integer(effectiveSize(samples))
  }
  as.matrix(stats) # 'cause it's easier to index
}

# converts x to a char but returns, say, "<0.001" if x would be below low = 0.001
num_to_char_with_lim <- function(x, low, high, digits) {
  ifelse(x > high, paste(">", round(high, digits) , sep=""),
  ifelse(x < low, paste("<", round(low, digits), sep=""),
  as.character(round(x, digits))))
}

# Takes a matrix like the one generated by the function mcmc_stats and generates a
# version where the number have been formated into strings for pretty printing.
format_stats <- function(s) {
    s_char <- apply(s, c(1,2), function(x) { sign_digits(x, 2) })
    s_char[, "comp"] <- round(s[, "comp"], 3)
    
    s_char[, "%>comp"] <- num_to_char_with_lim(s[, "%>comp"], 0.001, 0.999,  3)
    s_char[, "%<comp"] <- num_to_char_with_lim(s[, "%<comp"], 0.001, 0.999,  3)
    
    s_char
}

# Kruschke
HDIofICDF = function( ICDFname , credMass=0.95 , tol=1e-8 , ... ) {
  # Arguments:
  #   ICDFname is R's name for the inverse cumulative density function
  #     of the distribution.
  #   credMass is the desired mass of the HDI region.
  #   tol is passed to R's optimize function.
  # Return value:
  #   Highest density iterval (HDI) limits in a vector.
  # Example of use: For determining HDI of a beta(30,12) distribution, type
  #   HDIofICDF( qbeta , shape1 = 30 , shape2 = 12 )
  #   Notice that the parameters of the ICDFname must be explicitly named;
  #   e.g., HDIofICDF( qbeta , 30 , 12 ) does not work.
  # Adapted and corrected from Greg Snow's TeachingDemos package.
  incredMass =  1.0 - credMass
  intervalWidth = function( lowTailPr , ICDFname , credMass , ... ) {
    ICDFname( credMass + lowTailPr , ... ) - ICDFname( lowTailPr , ... )
  }
  optInfo = optimize( intervalWidth , c( 0 , incredMass ) , ICDFname=ICDFname ,
                      credMass=credMass , tol=tol , ... )
  HDIlowTailPr = optInfo$minimum
  return( c( ICDFname( HDIlowTailPr , ... ) ,
             ICDFname( credMass + HDIlowTailPr , ... ) ) )
}                  # Kruschke, J. K. (2011). Doing Bayesian data analysis: A
# Tutorial with R and BUGS. Elsevier Science/Academic Press.


# Kruschke
HDIofMCMC = function( sampleVec , credMass=0.95 ) {
  # Computes highest density interval from a sample of representative values,
  #   estimated as shortest credible interval.
  # Arguments:
  #   sampleVec
  #     is a vector of representative values from a probability distribution.
  #   credMass
  #     is a scalar between 0 and 1, indicating the mass within the credible
  #     interval that is to be estimated.
  # Value:
  #   HDIlim is a vector containing the limits of the HDI
  sortedPts = sort( sampleVec )
  ciIdxInc = floor( credMass * length( sortedPts ) )
  nCIs = length( sortedPts ) - ciIdxInc
  ciWidth = rep( 0 , nCIs )
  for ( i in 1:nCIs ) {
    ciWidth[ i ] = sortedPts[ i + ciIdxInc ] - sortedPts[ i ]
  }
  HDImin = sortedPts[ which.min( ciWidth ) ]
  HDImax = sortedPts[ which.min( ciWidth ) + ciIdxInc ]
  HDIlim = c( HDImin , HDImax )
  return( HDIlim )
}

# Author John Kruschke, slightly modified
plotPost = function( param_sample_vec , cred_mass=0.95 , comp_val=NULL ,
                     HDI_text_place=0.7 , ROPE=NULL , yaxt=NULL , ylab=NULL ,
                     xlab=NULL , cex.lab=NULL , cex=NULL , xlim=NULL , main=NULL ,
                     col=NULL , border=NULL , show_mode=FALSE , show_median = FALSE,
                     show_curve=FALSE , breaks=NULL , show_labels = TRUE, log_base = NULL,... ) {
  # Override defaults of hist function, if not specified by user:
  # (additional arguments "..." are passed to the hist function)
  if ( is.null(xlab) ) xlab="Parameter"
  if ( is.null(cex.lab) ) cex.lab=1.5
  if ( is.null(cex) ) cex=1.4
  if ( is.null(xlim) ) xlim=range( c( comp_val , param_sample_vec ) )
  if ( is.null(main) ) main=""
  if ( is.null(yaxt) ) yaxt="n"
  if ( is.null(ylab) ) ylab=""
  if ( is.null(col) ) col="skyblue"
  if ( is.null(border) ) border="#CCF0FF"
  
  postSummary = matrix( NA , nrow=1 , ncol=11 , 
                        dimnames=list( c( xlab ) , 
                                       c("mean","median","mode",
                                         "hdiMass","hdiLow","hdiHigh",
                                         "comp_val","pcGTcomp_val",
                                         "ROPElow","ROPEhigh","pcInROPE")))              
  postSummary[,"mean"] = mean(param_sample_vec)
  postSummary[,"median"] = median(param_sample_vec)
  mcmcDensity = density(param_sample_vec)
  postSummary[,"mode"] = mcmcDensity$x[which.max(mcmcDensity$y)]
  HDI = HDIofMCMC( param_sample_vec , cred_mass )
  if(! is.null(log_base)) {
    HDI = log_base^HDIofMCMC( log(param_sample_vec, log_base) , cred_mass )
  } else {
    HDI = HDIofMCMC( param_sample_vec , cred_mass )
  }
    
  postSummary[,"hdiMass"]=cred_mass
  postSummary[,"hdiLow"]=HDI[1]
  postSummary[,"hdiHigh"]=HDI[2]
  
  # Plot histogram.
  if ( is.null(breaks) ) {
    if(! is.null(log_base) ) {
      log_param_sample_vec <- log(param_sample_vec, log_base)
      HDI95 = HDIofMCMC( log_param_sample_vec , 0.95 )
      breaks = c( seq( from=min(log_param_sample_vec) , to=max(log_param_sample_vec) ,
                       by=(HDI95[2]-HDI95[1])/18 ) , max(log_param_sample_vec) )  
    } else {
      HDI95 = HDIofMCMC( param_sample_vec , 0.95 )
      breaks = c( seq( from=min(param_sample_vec) , to=max(param_sample_vec) ,
                       by=(HDI95[2]-HDI95[1])/18 ) , max(param_sample_vec) )
    }
    
  }
  if ( !show_curve ) {
    par(xpd=NA)
    if(! is.null(log_base)) {
      old_par <- par(lab=c(6, 5, 7))
      histinfo <- hist( log(param_sample_vec, log_base) , xaxt = "n",xlab=xlab , yaxt=yaxt , ylab=ylab ,
               freq=F , border=border , col=col , xlim=log(xlim, log_base) , main=main , cex=cex , cex.lab=cex.lab ,
               breaks=breaks , ... )
      log_labels = as.character(fractions(log_base^axTicks(1, ), max.denominator = 2^64))
      extreme_axis_ticks <- log_base^axTicks(1) > 9999 | log_base^axTicks(1) < 1/9999
      log_labels[extreme_axis_ticks] <- paste(log_base, "^", axTicks(1)[extreme_axis_ticks], sep="")
      axis(1, at = axTicks(1), labels = log_labels)
      par(old_par)
    } else {
      histinfo = hist( param_sample_vec , xlab=xlab , yaxt=yaxt , ylab=ylab ,
                       freq=F , border=border , col=col ,
                       xlim=xlim , main=main , cex=cex , cex.lab=cex.lab ,
                       breaks=breaks , ... )
    }
  }
  if ( show_curve ) {
    par(xpd=NA)
    histinfo = hist( param_sample_vec , plot=F )
    densCurve = density( param_sample_vec , adjust=2 )
    plot( densCurve$x , densCurve$y , type="l" , lwd=5 , col=col , bty="n" ,
          xlim=xlim , xlab=xlab , yaxt=yaxt , ylab=ylab ,
          main=main , cex=cex , cex.lab=cex.lab , ... )
  }
  cenTendHt = 0.9*max(histinfo$density)
  cvHt = 0.7*max(histinfo$density)
  ROPEtextHt = 0.55*max(histinfo$density)
  # Display mean or mode:
  if ( show_mode==T )   {
    dres = density( param_sample_vec )
    modeParam = dres$x[which.max(dres$y)]
    if(show_labels) {
      text_label <-  bquote(mode==.(sign_digits(modeParam,2)))
    } else {
      text_label <-  sign_digits(modeParam,2)
    }
    if(! is.null(log_base)) {
      text( log(modeParam, log_base) , cenTendHt , text_label, adj=c(.5,0) , cex=cex )
    } else {
      text( modeParam , cenTendHt , text_label, adj=c(.5,0) , cex=cex )
    }
  } else if(show_median) {
    medianParam = median( param_sample_vec )
    if(show_labels) {
      text_label <-  bquote(median==.(sign_digits(medianParam,2)))
    } else {
      text_label <-  sign_digits(medianParam,2)
    }
    if(! is.null(log_base)) {
      text( log(medianParam, log_base) , cenTendHt , text_label, adj=c(.5,0) , cex=cex )
    } else {
      text( medianParam , cenTendHt , text_label, adj=c(.5,0) , cex=cex )
    }
  } else { # Show the mean
    meanParam = mean( param_sample_vec )
    if(show_labels) {
      text_label <-  bquote(mean==.(sign_digits(meanParam,2)))
    } else {
      text_label <-  sign_digits(meanParam,2)
    }
    if(! is.null(log_base)) {
      text( log(meanParam, log_base) , cenTendHt ,  text_label, adj=c(.5,0) , cex=cex )
    } else {
      text( meanParam , cenTendHt ,  text_label, adj=c(.5,0) , cex=cex )
    }
  }
  
  
  # Display the comparison value.
  if ( !is.null( comp_val ) ) {
    cvCol = "darkgreen"
    pcgtcomp_val = round( 100 * sum( param_sample_vec > comp_val )
                          / length( param_sample_vec )  , 1 )
    pcltcomp_val = 100 - pcgtcomp_val
    if(! is.null(log_base)) {
      comp_val_pos <- log(comp_val, log_base)
    } else {
      comp_val_pos <- comp_val
    }
    
    lines( c(comp_val_pos,comp_val_pos) , c(0.96*cvHt,0) ,
           lty="dotted" , lwd=1 , col=cvCol )
    text( comp_val_pos , cvHt ,
          bquote( .(pcltcomp_val)*"% < " *
                   .(signif(comp_val,2)) * " < "*.(pcgtcomp_val)*"%" ) ,
          adj=c(pcltcomp_val/100,0) , cex=0.8*cex , col=cvCol )
    postSummary[,"comp_val"] = comp_val
    postSummary[,"pcGTcomp_val"] = ( sum( param_sample_vec > comp_val ) 
                                     / length( param_sample_vec ) )
  }
  # Display the ROPE.
  if ( !is.null( ROPE ) ) {
    ropeCol = "darkred"
    pcInROPE = ( sum( param_sample_vec > ROPE[1] & param_sample_vec < ROPE[2] )
                 / length( param_sample_vec ) )
    if(! is.null(log_base)) {
      ROPE_pos <- log(ROPE, log_base)
    } else {
      ROPE_pos <- ROPE
    }
  
    lines( c(ROPE_pos[1],ROPE_pos[1]) , c(0.96*ROPEtextHt,0) , lty="dotted" , lwd=2 ,
           col=ropeCol )
    lines( c(ROPE_pos[2],ROPE_pos[2]) , c(0.96*ROPEtextHt,0) , lty="dotted" , lwd=2 ,
           col=ropeCol)
    text( mean(ROPE_pos) , ROPEtextHt ,
          bquote( .(round(100*pcInROPE))*"% in ROPE" ) ,
          adj=c(.5,0) , cex=1 , col=ropeCol )
    
    postSummary[,"ROPElow"]=ROPE[1] 
    postSummary[,"ROPEhigh"]=ROPE[2] 
    postSummary[,"pcInROPE"]=pcInROPE
  }
  # Display the HDI.
  if(! is.null(log_base)) {
    HDI_pos <- log(HDI, log_base)
  } else {
    HDI_pos <- HDI
  }
  
  lines( HDI_pos , c(0,0) , lwd=4 )
  if(show_labels) {
    text( mean(HDI_pos) , 0 , bquote(.(100*cred_mass) * "% HDI" ) ,
          adj=c(.5,-1.7) , cex=cex )
  }
  text( HDI_pos[1] , 0 , bquote(.(sign_digits(HDI[1],2))) ,
        adj=c(HDI_text_place,-0.5) , cex=cex )
  text( HDI_pos[2] , 0 , bquote(.(sign_digits(HDI[2],2))) ,
        adj=c(1.0-HDI_text_place,-0.5) , cex=cex )
  par(xpd=F)
  #
  #return( postSummary )
  return(invisible())
}
rasmusab/bayesian_first_aid documentation built on May 27, 2019, 2:03 a.m.