#' @title Analyze the result of a call to \code{bd_do_inference}
#'
#' @description \code{bd_do_inference} calls Stan to do Bayesian inference by generating
#' a sample of parameters from the posterior of theta (or th).
#' This function analyzes the result of that inference.
#' In particular, it calculates the quantiles of the density function and growth rate.
#'
#' @details `soln` is the result of a call to \code{bd_do_inference}.
#' It contains both the resulting samples and the parameters used in the inference,
#' such as the hyperparameters (see \code{bd_do_inference} for further details).
#' The primary thing \code{bd_analyze_soln} does is calculate quantiles of both
#' the parameterized density and growth rate. For example,
#' for a calendar date y each sample yields a density and growth rate.
#' The quantile is the value of the density or growth rate such that
#' a given proportion of samples are smaller than that value.
#' The probabilities used to calculate these quantiles are `probs = c(lev, 0.5, 1-lev)`,
#' where `lev` is the level (0.025 by default, so that 95% of the observations
#' lie between the first and last quantile bands).
#'
#' In addition, \code{bd_analyze_soln} identifies calendar dates for which
#' the growth rate quantiles defined by `lev` and `1 - lev` do not contain zero.
#' This indicates significant positive or negative growth for the density curve.
#' The output vector `growthState` codes calendar dates by growth state as 'negative',
#' 'zero', and 'positive'. For the Gaussian mixture parameterization of the density,
#' the rate is not typically meaningful near the calendar date boundaries where it
#' increases linearly as the calendar date goes to positive or negative infinity.
#' The parameter `rateProp` provides control on how calendar dates are classified by
#' growth rate near these boundaries. In particular, the calendar dates with a cumulative
#' density (50% quantile) below `rateProp` (for the lower boundary) or above `1 - rateProp`
#' (for the upper boundary) are classified as 'missing' in `growthState`.
#' By default, `rateProp` is NA and no calendar dates are classified as missing.
#'
#' By default, a summary is done for each sample by calling
#' bd_summarize_sample. This is not done of doSummary is FALSE
#'
#' @param soln The solution, a list-like object of class bd_soln (see \code{bd_do_inference})
#' @param y (optional) The calendar dates at which to evaluate densities.
#' If y is not input, y is built from the hyperparameters.
#' @param th_sim (optional) The known parameters used to create simulation data
#' @param lev (default: 0.025) The level to use for the quantile bands
#' @param rateProp (optional) The cumulative density needed to define rate growth bands
#' @param doSummary [Default TRUE] Whether to summarize each sample by calling bd_summarize_sample
#'
#' @return A list with information on the quantiles of the density function and growth rate (and sample summaries)
#'
#' @export
bd_analyze_soln <- function(soln, y = NA, th_sim = NA, lev = 0.025, rateProp = NA, doSummary = T) {
if (all(is.na(y))) {
y <- seq(soln$prob$hp$ymin, soln$prob$hp$ymax, by = soln$prob$hp$dy)
}
probs <- c(lev, 0.5, 1 - lev) # The probabilities to use for quantiles
# Determine y spacing, dy, and ensure that y is evenly spaced
dy <- unique(diff(y))
if (length(dy) > 1) {
stop("y should by uniformily spaced")
}
# Extract the samples of theta in the variable TH. TH is matrix like object,
# possibly of a specific class (e.g., gaussmix) with dimensions
# numSamp x numParam, where numSamp is the number of samples and numParam is
# the number of parameters (length of th).
TH <- bd_extract_param(soln$fit)
numMix <- ncol(TH) / 3
numSamp <- nrow(TH)
numGrid <- length(y)
# Calculate the pdf matrix, which is the density of the parametric model for
# theta for each sample and each grid point. fMat has dimensions
# N x G, where N is the number of samples in TH and G is the length of the vector y.
# Because bd_calc_gauss_mix_pdf_mat is called with ymin and ymax, the density
# is normalized to integrate to 1 on the interval ymin to ymax.
fMat <- bd_calc_gauss_mix_pdf_mat(TH,y,ymin=soln$prob$hp$ymin,ymax=soln$prob$hp$ymax)
# Calculate the rate for each sample and grid point (f' / f, where f is density)
rateMat <- bd_calc_gauss_mix_pdf_mat(TH,y,ymin=soln$prob$hp$ymin,ymax=soln$prob$hp$ymax,type='rate')
# Calculate the quantiles of the normalized density matrix
Qdens <- bd_calc_quantiles(fMat, probs)
# Normalized 50% densities (not normalized to integrate to 1)
f50 <- Qdens[2, ] # The second row gives the 50% quantiles
#f50 <- f50 / sum(f50) / dy
# Restrict to indices with enough probability mass (if necessary)
if (!is.na(rateProp)) {
rateInd <- which(cumsum(f50 * dy) > rateProp & rev(cumsum(rev(f50) * dy)) > rateProp)
} else {
rateInd <- 1:length(f50)
}
# Identify regions with growth rates that differ from zero per the input quantile level (lev)
# growthState0 is -1 for significant negative growth, 1 for significant positive growth, and 0 otherwise
Qrate <- bd_calc_quantiles(rateMat[, rateInd], probs)
growthState0 <- rep("zero", length(rateInd)) # growthState0 indices in rateInd
growthState0[Qrate[2, ] > 0 & Qrate[1, ] > 0] <- "positive"
growthState0[Qrate[2, ] < 0 & Qrate[3, ] < 0] <- "negative"
growthState <- rep("missing", length(y))
growthState[rateInd] <- growthState0 # growthState for all indices
# Calculate the measurement matrix
M <- bd_calc_meas_matrix(y, soln$prob$phi_m, soln$prob$sig_m, soln$prob$calibDf)
# Calculate and normalize the summed probability density vector
f_spdf <- colSums(M)
f_spdf <- f_spdf / sum(f_spdf) / dy
out <- list(y = y,
f_spdf = f_spdf,
Qdens = Qdens,
Qrate = Qrate,
probs = probs,
rateProp = rateProp,
rateInd = rateInd,
growthState = growthState,
dy = dy)
class(out) <- "bd_analysis"
if(doSummary) {
summList <- list()
for(n in 1:numSamp) {
th <- TH[n,]
summList[[n]] <- bd_summarize_trunc_gauss_mix_sample(th,soln$prob$hp$ymin,soln$prob$hp$ymax)
}
out$summList <- summList
}
haveSim <- !all(is.na(th_sim))
if (haveSim) {
f_sim <- bd_calc_gauss_mix_pdf(th_sim,y,ymin=soln$prob$hp$ymin,ymax=soln$prob$hp$ymax)
rate_sim <- bd_calc_gauss_mix_pdf(th_sim,y,ymin=soln$prob$hp$ymin,ymax=soln$prob$hp$ymax,type='rate')
out$f_sim <- f_sim
out$rate_sim <- rate_sim
}
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.