#' @title blatentPPMC
#'
#' @description Simulates data using parameters from posterior distribution of blatent Markov chain.
#'
#' @param model A blatent MCMC model object.
#'
#' @param nSamples The number of PPMC samples to be simulated.
#'
#' @param seed The random number seed. Defaults to the seed set in the blatent model object.
#'
#' @param parallel If parallelization should be used in PPMC. Defaults to \code{"TRUE"}.
#'
#' @param nCores If \code{"parallel == TRUE"}, then specifies the number of cores to use. Defaults to four.
#'
#' @param type The type of statistic to generate, submitted as a character vector. Options include:
#' \itemize{
#' \item \code{"mean"} computes and tabulates the mean for the posterior simulated data for each
#' observed variable.
#' \item \code{"covariance"} computes and tabulates the covariance for the posterior simulated data
#' for all pairs of observed variables.
#' \item \code{"univariate"} computes and tabulates a Pearson Chi-Square comparing the counts for
#' an observed variable with the counts for a variable from the posterior simulated data, for each observed variable.
#' \item \code{"bivariate"}computes and tabulates a Pearson Chi-Square comparing the counts for a
#' pair of observed variables with the counts for a pair of variables from the posterior simulated
#' data, for each pair of observed variables.
#' \item \code{"tetrachoric"} computes and tabulates the tetrachoric correlation for the
#' posterior simulated data for all pairs of observed variables.
#' \item \code{"pearson"} computes and tabulates the Pearson correlation for the
#' posterior simulated data for all pairs of observed variables.
#' }
#'
#' @param lowPPMCpercentile A vector of the lower bound percentiles used for flagging statistics against PPMC
#' predictive distributions. Results are flagged if the observed statistics percentile is lower than
#' the number in the vector. Provided in order of each term in \code{"type"}. Defaults to
#' \code{"c(.025, .025, 0, 0, .025, .025)"}.
#'
#' @param highPPMCpercentile A vector of the upper bound percentiles used for flagging statistics against PPMC
#' predictive distributions. Results are flagged if the observed statistics percentile is higher than
#' the number in the vector. Provided in order of each term in \code{"type"}. Defaults to
#' \code{"c(.975, .975, 1, 1, .975, .975)"}.
#'
#' @export
blatentPPMC = function(model, nSamples, seed = model$options$seed, parallel = TRUE, nCores = 4,
type = c("mean", "covariance", "univariate", "bivariate", "tetrachoric", "pearson"),
lowPPMCpercentile = c(.025, .025, 0, 0, .025, .025),
highPPMCpercentile = c(.975, .975, 1, 1, .975, .975)){
modelParams = model$specs$parameters$paramNames[which(model$specs$parameters$paramTypes=="model")]
# put full posterior into one easily readable matrix
fullPosterior = list(variables = Reduce(f = "rbind", x = model$chain)[, modelParams])
dataFunctions = list()
if ("mean" %in% type){
propFunction = function(simData, ...){
proportion =
apply(
X = simData,
MARGIN = 2,
FUN = mean,
na.rm = TRUE
)
proportion = matrix(data = proportion, nrow = 1)
colnames(proportion) = paste0(colnames(simData),"_proportion")
cat("\r")
return(proportion)
}
dataFunctions$mean = propFunction
}
if (any(c("covariance", "bivariate", "pearson", "tetrachoric") %in% type)){
# check list of pairwise variables with data for correlation
obsCov = stats::cov(model$data[model$specs$observedVariables], use = "pairwise.complete.obs")
covEst = sapply(
X = 1:(nrow(obsCov) - 1),
FUN = function(x, obsCov)
if (any(!is.na(obsCov[x, (x + 1):ncol(obsCov)])))
return((which(!is.na(
obsCov[x, (x + 1):ncol(obsCov)]
)) + x)),
obsCov = obsCov
)
# convert list into paired numbers:
estimableCovariances = Reduce("rbind", lapply(
X = 1:(nrow(obsCov) - 1),
FUN = function(x, covEst) {
temp = cbind(rep(x, length(covEst[[x]])), covEst[[x]])
},
covEst = covEst
))
# make list of all estimable covariances
estimableCovariances = cbind(colnames(obsCov)[estimableCovariances[, 1]], colnames(obsCov)[estimableCovariances[, 2]])
}
if ("covariance" %in% type){
covFunction = function(simData, estimableCovariances, ...){
covEst = stats::cov(simData, use = "pairwise.complete.obs")
covariance = mapply(
FUN = function(x, y, covEst)
return(covEst[x, y]),
estimableCovariances[, 1],
estimableCovariances[, 2],
MoreArgs = list(covEst = covEst)
)
covariance = matrix(data = covariance, nrow = 1)
colnames(covariance) = paste0(estimableCovariances[,1], "_", estimableCovariances[,2], "_covariance")
return(covariance)
}
dataFunctions$covariance = covFunction
}
if ("pearson" %in% type){
corFunction = function(simData, estimableCovariances, ...){
corEst = stats::cor(simData, use = "pairwise.complete.obs")
correlation = mapply(
FUN = function(x, y, corEst)
return(corEst[x, y]),
estimableCovariances[, 1],
estimableCovariances[, 2],
MoreArgs = list(corEst = corEst)
)
correlation = matrix(data = correlation, nrow = 1)
colnames(correlation) = paste0(estimableCovariances[,1], "_", estimableCovariances[,2], "_pearson")
return(correlation)
}
dataFunctions$pearson = corFunction
}
if ("tetrachoric" %in% type){
tetraFunction = function(simData, realData, estimableCovariances){
result = correlationTetrachoric(data = simData)
tetrachor = mapply(
FUN = function(x, y, result)
return(result[x, y]),
estimableCovariances[, 1],
estimableCovariances[, 2],
MoreArgs = list(result = result)
)
tetrachor = matrix(data = tetrachor, nrow = 1)
colnames(tetrachor) = paste0(estimableCovariances[,1], "_", estimableCovariances[,2], "_tetrachoric")
return(tetrachor)
}
dataFunctions$tetrachoric = tetraFunction
}
if ("univariate" %in% type){
univarFunction = function(simData, realData, ...){
result = unlist(lapply(
X = names(simData),
FUN = function(x)
return(
itemChiSquare(observed = realData[,x], observedLevels = list(c(0,1)),
expected = simData[,x], expectedLevels = list(c(0,1)), minObs = 0, minExp = 0)
)
))
univariate = c(result, sum(result))
univariate = matrix(data = univariate, nrow = 1)
colnames(univariate) = c(paste0(colnames(simData),"_univariate"), "totalUnivariate")
univariate = round(univariate, digits = 4)
return(univariate)
}
dataFunctions$univariate = univarFunction
}
if ("bivariate" %in% type){
bivarFunction = function(simData, realData, estimableCovariances){
bivariate = mapply(
FUN = function(x, y, data, simData){
# return(c(x,y))
return(itemChiSquare(observed = data[c(x,y)], observedLevels = list(c(0,1), c(0,1)),
expected = simData[,c(x,y)], expectedLevels = list(c(0,1), c(0,1)), minObs = 0, minExp = 0))
}
,
estimableCovariances[, 1],
estimableCovariances[, 2],
MoreArgs = list(data = model$data, simData = simData)
)
bivariate = c(bivariate, sum(bivariate))
bivariate = matrix(data = bivariate, nrow = 1)
colnames(bivariate) = c(paste0(estimableCovariances[,1], "_", estimableCovariances[,2], "_bivariate"), "totalBivariate")
return(bivariate)
}
dataFunctions$bivariate = bivarFunction
}
PPMCmessage = quote(if (coreNum == 1) cat("\r", paste0("Progress: ", sprintf("%03.1f",round((sample/coreSamples)*100), digits = 1)), "% Complete"))
if (parallel){
#make cluster
cl = parallel::makeCluster(nCores, outfile="", setup_strategy = "sequential")
#set random seed
parallel::clusterSetRNGStream(cl = cl, iseed = seed)
#export packages
# tempResult = parallel::clusterEvalQ(cl = cl, library(blatent))
parallel::clusterExport(
cl = cl,
varlist = c("model", "fullPosterior", "estimableCovariances"),
envir = environment()
)
chain = parallel::parLapply(
cl = cl,
X = 1:nCores,
fun = runPPMC,
nCores = nCores,
totalSamples = nSamples,
PPMCmessage = PPMCmessage,
model = model,
fullPosterior = fullPosterior,
estimableCovariances = estimableCovariances,
dataFunctions = dataFunctions
)
parallel::stopCluster(cl = cl)
chain = Reduce(f = "rbind", x = chain)
} else {
set.seed(seed)
coreNum = 1
nCores = 1
totalSamples = nSamples
chain = runPPMC(coreNum = 1, nCores = 1, totalSamples = nSamples, PPMCmessage = PPMCmessage,
model = model, fullPosterior = fullPosterior, estimableCovariances = estimableCovariances, dataFunctions = dataFunctions)
}
# post-processing PPMC data:
# run dataFunctions on real data for point estimates
dataResults = lapply(
X = dataFunctions,
FUN = function(x,
simData,
realData,
estimableCovariances)
return(
x(
simData = simData,
realData = realData,
estimableCovariances = estimableCovariances
)
),
simData = model$data[model$specs$observedVariables],
realData = model$data,
estimableCovariances = estimableCovariances
)
dataResultsMatrix = do.call("cbind", dataResults)
# separate PPMC by type
PPMCresults = list()
if ("mean" %in% type){
PPMCresults$mean = list(samples = chain[,grep(pattern = "_proportion", x = colnames(chain))])
PPMCresults$mean$dataResults = dataFunctions$mean(simData = model$data[model$specs$observedVariables])
}
if ("univariate" %in% type){
PPMCresults$univariate = list(samples = chain[,c(grep(pattern = "_univariate", x = colnames(chain)),
grep(pattern = "totalUnivariate", x = colnames(chain)))])
PPMCresults$univariate$dataResults = dataFunctions$univariate(simData = model$data[model$specs$observedVariables],
realData = model$data[model$specs$observedVariables])
}
if ("covariance" %in% type){
PPMCresults$covariance = list(samples = chain[,grep(pattern = "_covariance", x = colnames(chain))])
if (!is.matrix(PPMCresults$covariance$samples)){
PPMCresults$covariance$samples = matrix(data = PPMCresults$covariance$samples, ncol = 1)
colnames(PPMCresults$covariance$samples) = paste0(estimableCovariances[,1], "_", estimableCovariances[,2], "_covariance")
}
PPMCresults$covariance$dataResults = dataFunctions$covariance(
simData = model$data[model$specs$observedVariables],
realData = model$data[model$specs$observedVariables],
estimableCovariances = estimableCovariances
)
}
if ("pearson" %in% type){
PPMCresults$pearson = list(samples = chain[,grep(pattern = "_pearson", x = colnames(chain))])
if (!is.matrix(PPMCresults$pearson$samples)){
PPMCresults$pearson$samples = matrix(data = PPMCresults$pearson$samples, ncol = 1)
colnames(PPMCresults$pearson$samples) = paste0(estimableCovariances[,1], "_", estimableCovariances[,2], "_pearson")
}
PPMCresults$pearson$dataResults = dataFunctions$pearson(
simData = model$data[model$specs$observedVariables],
realData = model$data[model$specs$observedVariables],
estimableCovariances = estimableCovariances
)
}
if ("bivariate" %in% type){
PPMCresults$bivariate = list(samples = chain[,c(grep(pattern = "_bivariate", x = colnames(chain)),
grep(pattern = "totalBivariate", x = colnames(chain)))])
PPMCresults$bivariate$dataResults = dataFunctions$bivariate(
simData = model$data[model$specs$observedVariables],
realData = model$data[model$specs$observedVariables],
estimableCovariances = estimableCovariances
)
}
if ("tetrachoric" %in% type){
PPMCresults$tetrachoric = list(samples = chain[,grep(pattern = "_tetrachoric", x = colnames(chain))])
if (!is.matrix(PPMCresults$tetrachoric$samples)){
PPMCresults$tetrachoric$samples = matrix(data = PPMCresults$tetrachoric$samples, ncol = 1)
colnames(PPMCresults$tetrachoric$samples) = paste0(estimableCovariances[,1], "_", estimableCovariances[,2], "_tetrachoric")
}
PPMCresults$tetrachoric$dataResults = dataFunctions$tetrachoric(
simData = model$data[model$specs$observedVariables],
realData = model$data[model$specs$observedVariables],
estimableCovariances = estimableCovariances
)
}
# calculate posterior predictive p-values for each
PPMCresults = lapply(X = PPMCresults, FUN = function(x){
x$quantiles = data.frame(matrix(data = NA, nrow = ncol(x$samples), ncol = 9))
names(x$quantiles)[1] = "Statistic"
names(x$quantiles)[2:7] = names(summary(stats::ecdf(x = chain[,1])))
names(x$quantiles)[8:9] = c("ObservedStatistic", "ObsStatPctile")
x$quantiles[,1] = colnames(x$dataResults)
col = 1
for (col in 1:ncol(x$samples)){
if (!all(is.na(x$samples[,col]))){
colECDF = stats::ecdf(x = x$samples[,col])
x$quantiles[col,2:9] = c(summary(colECDF), x$dataResults[1,col], colECDF(x$dataResults[1,col]))
} else {
x$quantiles[col,2:9] = NA
}
}
return(x)
})
ppmc = 1
for (ppmc in 1:length(type)){
# flag results in extremes:
PPMCresults[[ppmc]]$extremes =
PPMCresults[[ppmc]]$quantiles[which(PPMCresults[[ppmc]]$quantiles[,9] > highPPMCpercentile[ppmc] |
PPMCresults[[ppmc]]$quantiles[,9] < lowPPMCpercentile[ppmc]),]
ppmcType = names(PPMCresults)[ppmc]
# format for numeric values
numDigits = 3
num.format <-
paste("%", max(8L, numDigits + 5L), ".", numDigits, "f", sep = "")
num.format0 <-
paste("%", max(8L, 0 + 5L), ".", 0, "f", sep = "")
char.format <-
paste("%", max(8L, numDigits + 5L), "s", sep = "")
# determine results headlines for summary
if (ppmcType == "univariate"){
# here, message is median chi-square -- using output format from classBlatentModel
preamble = paste0(" ", "Median Total Univariate PPMC Chi-Square", collapse = "")
paramVals = paste0(sprintf(num.format, PPMCresults[[ppmc]]$quantiles[which(PPMCresults[[ppmc]]$quantiles[,1] == "totalUnivariate"), 4], collapse = ""))
buffer = paste0(rep(" ", 80 - nchar(preamble) - nchar(paramVals)), collapse = "")
PPMCresults[[ppmc]]$summaryMessage = paste0(preamble, buffer, paramVals, collapse = "")
} else if (ppmcType == "bivariate"){
# here, message is number of statistics out of total that are extreme
preamble = paste0(" ", "Median Total Bivariate PPMC Chi-Square", collapse = "")
paramVals = paste0(sprintf(num.format, PPMCresults[[ppmc]]$quantiles[which(PPMCresults[[ppmc]]$quantiles[,1] == "totalBivariate"), 4], collapse = ""))
buffer = paste0(rep(" ", 80 - nchar(preamble) - nchar(paramVals)), collapse = "")
PPMCresults[[ppmc]]$summaryMessage = paste0(preamble, buffer, paramVals, collapse = "")
} else {
# here, message is number of statistics out of total that are extreme
preamble = paste0(" ", "Extreme PPMC Percentiles: ", ppmcType, " (out of ", nrow(PPMCresults[[ppmc]]$quantiles), ")", collapse = "")
paramVals = paste0(sprintf(num.format0, nrow(PPMCresults[[ppmc]]$extremes), collapse = ""))
buffer = paste0(rep(" ", 80 - nchar(preamble) - nchar(paramVals)), collapse = "")
PPMCresults[[ppmc]]$summaryMessage = paste0(preamble, buffer, paramVals, collapse = "")
}
}
return(PPMCresults)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.