Nothing
#' Normalize data according to a canonical order
#'
#' @template args-df
#' @template args-dots-barrier
#' @param .palist a character vector giving an order to use instead of the default
#' @param .sortRows logical. Using the same order, sort rows in addition to vertex pairs.
#'
#' @description
#'
#' Pairwise comparison data are not commutative.
#' Alice beating Bob in chess is equivalent to Bob losing to
#' Alice. \code{normalizeData} assigns an arbitrary order to all
#' vertices and reorders vertices column-wise to match,
#' flipping signs as needed.
#'
#' @examples
#' df <- data.frame(pa1=NA, pa2=NA, i1=c(1, -1))
#' df[1,paste0('pa',1:2)] <- c('a','b')
#' df[2,paste0('pa',1:2)] <- c('b','a')
#' normalizeData(df)
#' @export
normalizeData <- function(df, ..., .palist=NULL, .sortRows=TRUE) {
if (length(list(...)) > 0) stop("Rejected are any values passed in the '...' argument")
palist <- verifyIsData(df)
if (!is.null(.palist)) {
if (length(palist) != length(.palist)) {
stop(paste(".palist must be length", length(palist)))
}
if (any(is.na(match(palist, .palist)))) {
stop(".palist must contain the names of all vertices")
}
palist <- .palist
}
flip <- match(df$pa1, palist) > match(df$pa2, palist)
dataCols <- -match(paste0('pa',1:2), colnames(df))
df[flip, dataCols] <- -df[flip, dataCols]
tmp <- df[flip, 'pa1']
df[flip, 'pa1'] <- df[flip, 'pa2']
df[flip, 'pa2'] <- tmp
if (.sortRows) {
df <- df[order(df$pa1, df$pa2),]
}
df
}
#' Transforms data into a form tailored for efficient evaluation by Stan
#'
#' Vertex names, if not already factors, are converted to
#' factors. The number of thresholds per item is determined by the
#' largest absolute response value. Missing responses are filtered
#' out. Responses on the same pair of vertices on the same item are
#' grouped together. Within a vertex pair and item, responses
#' are ordered from negative to positive.
#'
#' @details
#' Note: Reordering of responses is likely unless something like
#' \code{\link{normalizeData}} has been used with \code{.sortRows=TRUE}.
#'
#' @template args-df
#'
#' @template return-datalist
#' @family data preppers
#' @examples
#' df <- prepCleanData(phyActFlowPropensity)
#' str(df)
#' @importFrom reshape2 melt
#' @export
prepCleanData <- function(df) {
palist <- verifyIsData(df)
if (!is.factor(df$pa1)) {
df$pa1 <- factor(df$pa1, levels=palist)
df$pa2 <- factor(df$pa2, levels=palist)
}
dataCols <- -match(paste0('pa',1:2), colnames(df))
nthr <- apply(df[,dataCols,drop=FALSE], 2, function(x) max(abs(x), na.rm=TRUE))
itemNames <- names(nthr)
names(nthr) <- NULL
tall <- melt(df, id.vars = paste0('pa',1:2),
variable.name = "item", value.name="pick")
base <- 1L+max(length(nthr), length(palist))
likeId <- (base * base * unclass(tall$pa1) +
base * unclass(tall$pa2) + unclass(tall$item))
l <- split(tall, likeId)
pa1 <- c()
pa2 <- c()
item <- c()
weight <- c()
pick <- c()
refresh <- c()
numOutcome <- c()
numRefresh <- 0L
for (lx in 1:length(l)) {
x <- l[[lx]]
pt <- table(x$pick, useNA="no")
if (length(pt) == 0) next
numRefresh <- numRefresh + 1L
pa1 <- c(pa1, unclass(x$pa1)[1])
pa2 <- c(pa2, unclass(x$pa2)[1])
item <- c(item, unclass(x$item)[1])
weight <- c(weight, pt, use.names=FALSE)
pick <- c(pick, as.integer(names(pt)))
refresh <- c(refresh, length(pt))
numOutcome <- c(numOutcome, sum(pt))
}
dl <- list(
nameInfo=list(pa=palist, item=itemNames),
# all models
NPA=length(palist),
NCMP=length(pick),
N=sum(weight),
# multivariate models
NITEMS=length(nthr),
NTHRESH=nthr,
TOFFSET=c(1L, 1L + cumsum(nthr)[-length(nthr)]),
# data (all models)
pa1=pa1,
pa2=pa2,
weight=weight,
pick=pick,
numRefresh=numRefresh,
refresh=refresh,
numOutcome=numOutcome,
# multivariate data
item=item,
# priors
alphaScalePrior = 0.2,
# correlation model
corLKJPrior = 2.5, # 2.0 not quite enough
# factor model
propShape = 3.0 # 2.0 give divegence, 4.0 okay, but try to reduce?
)
if (any(is.na(match(preppedDataFields, names(dl))))) {
stop("Bug in prepCleanData(); contact developers") # nocov
}
dl
}
#' Transforms data into a form tailored for efficient evaluation by Stan
#'
#' @template args-df
#'
#' @description
#' Invokes \code{\link{filterGraph}} and \code{\link{normalizeData}}.
#' Vertex names, if not already factors, are converted to
#' factors. The number of thresholds per item is determined by the
#' largest absolute response value. Missing responses are filtered
#' out. Responses on the same pair of vertices on the same item are
#' grouped together. Within a vertex pair and item, responses
#' are ordered from negative to positive.
#'
#' @template return-datalist
#' @family data preppers
#' @examples
#' df <- prepData(phyActFlowPropensity)
#' str(df)
#'
#' @export
prepData <- function(df) {
df <- filterGraph(df)
df <- normalizeData(df)
prepCleanData(df)
}
preppedDataFields <- c('nameInfo','NPA','NCMP','N','pa1','pa1','weight',
'pick','refresh')
verifyIsPreppedData <- function(data) {
if (is.data.frame(data)) stop("Data must be processed by prepData. Try prepData(data)")
if (is.list(data) && all(!is.na(match(preppedDataFields, names(data))))) return()
stop("Is data an object returned by prepData()? Seems not")
}
#' Given a model name, return stanmodel object
#'
#' @template args-locate
#' @description
#'
#' This is a convenience function to help you look up the path to an
#' appropriate model for your data.
#'
#' @details
#'
#' There are essentially three models: \sQuote{unidim}, \sQuote{covariance}, and \sQuote{factor}.
#' \sQuote{unidim} analyzes a single item. \sQuote{covariance} is suitable for two or more items.
#' Once you have vetted your items with the \sQuote{unidim} and \sQuote{covariance} models,
#' then you can try the \sQuote{factor} model.
#' For each model, there is a \sQuote{_ll} variation. This model
#' includes row-wise log likelihoods suitable for feeding to \pkg{loo}
#' for efficient approximate leave-one-out cross-validation (Vehtari, Gelman, & Gabry, 2017).
#'
#' There is also a special model \sQuote{unidim_adapt}. Except for
#' this model, the other models require a scaling constant. To find
#' an appropriate scaling constant, we recommend fitting
#' \sQuote{unidim_adapt} to each item separately and then take the
#' median of median point estimates to set the scale. \sQuote{unidim_adapt} requires a
#' varCorrection constant. In general, a varCorrection of 2.0 or 3.0
#' should provide optimal results.
#'
#' Since version 1.1.0, the factor model permits an arbitrary number
#' of factors and arbitrary factor-to-item paths. If you were using
#' the old factor model, you'll need to update your code to call
#' \link{prepSingleFactorModel}. Arbitrary factor model structure
#' should be specified using \link{prepFactorModel}. The single factor model
#' is called \sQuote{factor1} and the general factor model is called \sQuote{factor}.
#'
#' @return An instance of S4 class \code{\link[rstan:stanmodel-class]{stanmodel}} that can be passed to \code{\link{pcStan}}.
#' @seealso \code{\link{toLoo}}
#' @template ref-vehtari2017
#' @examples
#' findModel() # shows available models
#' findModel('unidim')
#' @export
findModel <- function(model=NULL) {
avail <- names(stanmodels)
if (is.null(model)) {
message(paste("Models available:", paste0(deparse(avail), collapse="")))
return(invisible())
}
obj <- stanmodels[[model]]
if(is.null(obj)) {
stop(paste("Stan model not found:", model))
}
obj
}
#' Specify a single factor model
#'
#' Specify a single latent factor with a path to each item.
#'
#' @template args-factorScalePrior
#' @template args-data
#' @template return-datalist
#' @examples
#' dl <- prepData(phyActFlowPropensity)
#' dl <- prepSingleFactorModel(dl)
#' str(dl)
#' @family factor model
#' @family data preppers
#' @export
#' @importFrom lifecycle deprecate_warn
#' @importFrom lifecycle deprecated
prepSingleFactorModel <- function(data, factorScalePrior=deprecated()) {
if (lifecycle::is_present(factorScalePrior)) {
deprecate_warn("1.5.0", "prepSingleFactorModel(factorScalePrior = )")
}
verifyIsPreppedData(data)
ni <- data$NITEMS
data$factorItemPath <- matrix(c(rep(1,ni), 1:ni), nrow=2, byrow=TRUE)
data$NFACTORS <- 1L
data$NPATHS <- ni
data <- setFactorScalePrior(data)
data
}
setFactorScalePrior <- function(data) {
data$factorScalePrior <- as.array(rep(1.2, data$NFACTORS))
data
}
#' Specify a factor model
#'
#' Specify a factor model with an arbitrary number of factors and
#' arbitrary factor-to-item structure.
#'
#' @template detail-factorspec
#' @template args-path
#' @template args-factorScalePrior
#' @template args-data
#' @param psiScalePrior matrix of priors for factor correlations (deprecated)
#' @template return-datalist
#' @examples
#' pa <- phyActFlowPropensity[,setdiff(colnames(phyActFlowPropensity),
#' c('goal1','feedback1'))]
#' dl <- prepData(pa)
#' dl <- prepFactorModel(dl,
#' list(flow=c('complex','skill','predict',
#' 'creative', 'novelty', 'stakes',
#' 'present', 'reward', 'chatter',
#' 'body'),
#' f2=c('waiting','control','evaluated','spont'),
#' rc=c('novelty', 'waiting')))
#' str(dl)
#' @family factor model
#' @family data preppers
#' @seealso To simulate data from a factor model: \link{generateFactorItems}
#' @export
prepFactorModel <- function(data, path, factorScalePrior=deprecated(),
psiScalePrior=deprecated()) {
if (lifecycle::is_present(factorScalePrior)) {
deprecate_warn("1.5.0", "prepFactorModel(factorScalePrior = )")
}
if (lifecycle::is_present(psiScalePrior)) {
deprecate_warn("1.5.0", "prepFactorModel(psiScalePrior = )")
}
verifyIsPreppedData(data)
items <- data$nameInfo$item
validateFactorModel(items, path)
itemsPerFactor <- sapply(path, length)
data$factorItemPath <- matrix(c(rep(1:length(itemsPerFactor), itemsPerFactor),
unlist(lapply(path, function(x) match(x, items)))),
nrow=2, byrow=TRUE)
data$NFACTORS <- length(names(path))
data$NPATHS <- sum(itemsPerFactor)
data$nameInfo[['factor']] <- names(path)
data <- setFactorScalePrior(data)
data
}
#' Fit a paired comparison Stan model
#' @template args-locate
#' @template args-data
#' @template args-stan
#' @description Uses \code{\link{findModel}} to find the appropriate
#' model and then invokes \link[rstan:sampling]{sampling}.
#' @return A \code{\link[rstan:stanfit-class]{stanfit}} object.
#' @seealso See \code{\link[rstan:sampling]{sampling}}, for which this function is
#' a wrapper, for additional options. See \code{\link{prepData}} to
#' create a suitable data list. See
#' \code{\link[rstan:print.stanfit]{print.stanfit}} for ways of getting tables
#' summarizing parameter posteriors.
#'
#' @return An object of S4 class \code{\link[rstan:stanfit-class]{stanfit}}.
#' @seealso \code{\link{calibrateItems}}, \code{\link{outlierTable}}
#' @examples
#' dl <- prepData(phyActFlowPropensity[,c(1,2,3)])
#' dl$varCorrection <- 5.0
#' \donttest{pcStan('unidim_adapt', data=dl)} # takes more than 5 seconds
#' @importFrom rstan sampling
#' @export
pcStan <- function(model, data, ...) {
verifyIsPreppedData(data)
obj <- findModel(model)
rstan::sampling(obj, data = data, ...)
}
#' Determine the optimal scale constant for a set of items
#' @template args-df
#' @template args-stan
#' @param iter A positive integer specifying the number of iterations for each chain (including warmup).
#' @param chains A positive integer specifying the number of Markov chains.
#' @param varCorrection A correction factor greater than or equal to 1.0
#' @param maxAttempts How many times to try re-running a model with more iterations.
#'
#' @description
#'
#' Data are passed through \code{\link{filterGraph}} and \code{\link{normalizeData}}.
#' Then the \sQuote{unidim_adapt} model is fit to each item individually.
#' A larger \code{varCorrection} will obtain a more accurate
#' \code{scale}, but is also more likely to produce an intractable
#' model. A good compromise is between 5.0 and 9.0.
#'
#' @return
#' A data.frame (one row per item) with the following columns:
#' \describe{
#' \item{item}{Name of the item}
#' \item{iter}{Number of iterations per chain}
#' \item{divergent}{Number of divergent transitions observed after warmup}
#' \item{treedepth}{Number of times the treedepth was exceeded}
#' \item{low_bfmi}{Number of chains with low E-BFMI}
#' \item{n_eff}{Minimum effective number of samples across all parameters}
#' \item{Rhat}{Maximum Rhat across all parameters}
#' \item{scale}{Median marginal posterior of \code{scale}}
#' \item{thetaVar}{Median variance of theta (latent scores)}
#' }
#' @seealso \code{\link[rstan:check_hmc_diagnostics]{check_hmc_diagnostics}}
#' @template ref-vehtari2019
#' @examples
#' \donttest{
#' result <- calibrateItems(phyActFlowPropensity) # takes more than 5 seconds
#' print(result)
#' }
#' @importMethodsFrom rstan summary
#' @importFrom rstan get_num_divergent get_max_treedepth_iterations get_low_bfmi_chains
#' @export
calibrateItems <- function(df, iter=2000L, chains=4L, varCorrection=5.0, maxAttempts=5L, ...) {
df <- filterGraph(df)
df <- normalizeData(df)
vCol <- match(paste0('pa',1:2), colnames(df))
result <- expand.grid(item=colnames(df[,-vCol]),
iter=NA,
divergent=NA, treedepth=NA, low_bfmi=NA, n_eff=NA, Rhat=NA,
scale=NA, thetaVar=NA)
for (attempt in 1:maxAttempts) {
for (rx in 1:nrow(result)) {
if (!is.na(result[rx,'divergent']) &&
(result[rx,'divergent'] || result[rx,'treedepth'] || result[rx,'low_bfmi'])) next
if (!is.na(result[rx, 'Rhat']) &&
result[rx, 'Rhat'] < 1.015 && result[rx, 'n_eff'] > 100 * chains) next
itemCol <- match(result[rx,'item'], colnames(df))
dl <- prepCleanData(df[,c(vCol, itemCol)])
dl$varCorrection <- varCorrection
result[rx,'iter'] <- ifelse(is.na(result[rx,'iter']), iter, result[rx,'iter'] * 1.5)
fit1 <- suppressWarnings(pcStan("unidim_adapt", data=dl, chains=chains, iter=result[rx,'iter'], ...))
result[rx,'divergent'] <- get_num_divergent(fit1)
result[rx,'treedepth'] <- sum(get_max_treedepth_iterations(fit1))
result[rx,'low_bfmi'] <- length(get_low_bfmi_chains(fit1))
allPars <- summary(fit1, probs=0.5)$summary
result[rx,'n_eff'] <- min(allPars[,'n_eff'])
result[rx,'Rhat'] <- max(allPars[,'Rhat'])
result[rx,'scale'] <- allPars['scale','50%']
result[rx,'thetaVar'] <- allPars['thetaVar','50%']
}
}
result
}
#' Compute approximate leave-one-out (LOO) cross-validation for Bayesian
#' models using Pareto smoothed importance sampling (PSIS)
#'
#' @template args-fit
#' @param ... Additional options passed to \code{\link[loo:loo]{loo}}.
#'
#' @description
#'
#' You must use an \sQuote{_ll} model variation (see \code{\link{findModel}}).
#'
#' @return a loo object
#' @seealso \code{\link{outlierTable}}, \code{\link[loo:loo]{loo}}
#' @importFrom loo loo extract_log_lik relative_eff
#' @export
#' @examples
#' palist <- letters[1:10]
#' df <- twoLevelGraph(palist, 300)
#' theta <- rnorm(length(palist))
#' names(theta) <- palist
#' df <- generateItem(df, theta, th=rep(0.5, 4))
#'
#' df <- filterGraph(df)
#' df <- normalizeData(df)
#' dl <- prepCleanData(df)
#' dl$scale <- 1.5
#'
#' \donttest{
#' m1 <- pcStan("unidim_ll", dl)
#'
#' loo1 <- toLoo(m1, cores=1)
#' print(loo1)
#' }
toLoo <- function(fit, ...) {
ll <- extract_log_lik(fit, merge_chains = FALSE)
loo(ll, r_eff=relative_eff(exp(ll)), ...)
}
#' List observations with Pareto values larger than a given threshold
#'
#' @template args-data
#' @param x An object created by \code{\link[loo:loo]{loo}}
#' @param threshold threshold is the minimum k value to include
#'
#' @description
#'
#' The function \code{\link{prepCleanData}} compresses observations
#' into the most efficient format for evaluation by Stan. This function
#' maps indices of observations back to the actual observations,
#' filtering by the largest Pareto k values. It is assumed that
#' \code{data} was processed by \code{\link{normalizeData}} or is in
#' the same order as seen by \code{\link{prepCleanData}}.
#'
#' @return
#' A data.frame (one row per observation) with the following columns:
#' \describe{
#' \item{pa1}{Name of object 1}
#' \item{pa2}{Name of object 2}
#' \item{item}{Name of item}
#' \item{pick}{Observed response}
#' \item{k}{Associated Pareto k value}
#' }
#' @seealso \code{\link{toLoo}}, \code{\link[loo:pareto-k-diagnostic]{pareto_k_ids}}
#' @importFrom loo pareto_k_ids pareto_k_values
#' @export
#' @examples
#' palist <- letters[1:10]
#' df <- twoLevelGraph(palist, 300)
#' theta <- rnorm(length(palist))
#' names(theta) <- palist
#' df <- generateItem(df, theta, th=rep(0.5, 4))
#'
#' df <- filterGraph(df)
#' df <- normalizeData(df)
#' dl <- prepCleanData(df)
#' dl$scale <- 1.5
#'
#' \donttest{
#' m1 <- pcStan("unidim_ll", dl)
#'
#' loo1 <- toLoo(m1, cores=1)
#' ot <- outlierTable(dl, loo1, threshold=.2)
#' df[df$pa1==ot[1,'pa1'] & df$pa2==ot[1,'pa2'], 'i1']
#' }
outlierTable <- function(data, x, threshold=0.5) {
verifyIsPreppedData(data)
ids <- pareto_k_ids(x, threshold)
xx <- data.frame(rx=rep(NA, data$N),
px=rep(NA, data$N))
cmpStart <- 1L
cur <- 1L
for (rx in 1:data$numRefresh) {
len <- data$refresh[rx]
for (ox in cmpStart:(cmpStart + len - 1)) {
for (wx in 1:data$weight[ox]) {
xx[cur,'rx']=rx
xx[cur,'px']=ox
cur <- cur + 1L
}
}
cmpStart <- cmpStart + data$refresh[rx]
}
RX <- xx[ids,'rx']
PX <- xx[ids,'px']
palist <- data$nameInfo$pa
itemName <- data$nameInfo$item
df <- data.frame(pa1=data$pa1[RX],
pa2=data$pa2[RX],
item=data$item[RX],
pick=data$pick[PX],
k=pareto_k_values(x)[ids])
df <- df[!duplicated(PX),]
for (k in paste0('pa',1:2)) {
levels(df[[k]]) <- palist
class(df[[k]]) <- 'factor'
}
levels(df[['item']]) <- itemName
class(df[['item']]) <- 'factor'
df <- df[order(-df$k),]
rownames(df) <- c() # original order is meaningless
df
}
assertDataFitCompat <- function(dl, fit) {
if (!is(dl, 'list')) stop("dl must be a list of data")
if (!is(fit, 'stanfit')) stop("fit must be a stanfit object")
pd <- fit@par_dims
if (pd$theta[1] != dl$NPA) {
stop(paste0("dl has ",dl$NPA," objects but fit has ",pd$theta[1]," objects"))
}
fitItems <- ifelse(length(pd$theta)==1, 1, pd$theta[2])
if (!fitItems == dl$NITEMS) {
stop(paste0("dl has ",dl$NITEMS," items but fit has ",fitItems," items"))
}
if (pd$threshold != sum(dl$NTHRESH)) {
stop(paste0("dl has a total of ",dl$NTHRESH," thresholds across all items but fit has ",
pd$threshold," thresholds"))
}
}
#' Produce data suitable for plotting item response curves
#'
#' @template args-dl
#' @template args-fit
#' @param item a vector of item names
#' @param responseNames a vector of labels for the possible responses
#' @template args-samples
#' @param from the starting latent difference value
#' @param to the ending latent difference value
#' @param by the grid increment
#'
#' @description
#' Selects \code{samples} random draws from the posterior and evaluates the item
#' response curve on the grid given by \code{seq(from,to,by)}.
#' All items use the same \code{responseNames}. If you have some items
#' with a different number of thresholds or different response names
#' then you can call \code{responseCurve} for each item separately
#' and \code{rbind} the results together.
#'
#' @template detail-response
#' @return
#' A data.frame with the following columns:
#' \describe{
#' \item{response}{Which response}
#' \item{worthDiff}{Difference in worth}
#' \item{item}{Which item}
#' \item{sample}{Which sample}
#' \item{prob}{Associated probability}
#' \item{responseSample}{A grouping index for independent item response samples}
#' }
#' @export
#' @importFrom rstan extract
#' @importFrom stats qnorm
#' @family data extractor
#' @examples
#' \donttest{ vignette('manual', 'pcFactorStan') }
responseCurve <- function(dl, fit, responseNames, item=dl$nameInfo$item,
samples=100, from=qnorm(.1), to=-from, by=.02) {
assertDataFitCompat(dl, fit)
pd <- fit@par_dims
itemIndex <- match(item, dl$nameInfo$item)
if (any(is.na(itemIndex))) {
stop(paste0("Item not found: ",
paste(item[is.na(itemIndex)], collapse=', ')))
}
mismatch <- (1 + 2 * dl$NTHRESH[itemIndex]) != length(responseNames)
if (any(mismatch)) {
stop(paste0("Item with different number of responseNames: ",
paste(item[mismatch], collapse=', ')))
}
grid <- seq(from,to,by)
df <- expand.grid(response=responseNames, worthDiff=1:length(grid),
item=item, sample=1:samples, prob=NA)
pick <- c()
for (i1 in item) {
ii <- match(i1, dl$nameInfo$item)
grid <- seq(from,to,by) / dl$scale[ii]
thrInd <- dl$TOFFSET[ii] + 1:dl$NTHRESH[ii] - 1L
thrData <- extract(fit, pars=paste0("threshold[",thrInd,"]"))
if ('alpha' %in% names(pd)) {
if (length(pd$alpha) == 0) {
alphaData <- extract(fit, pars="alpha")[[1]]
} else {
alphaData <- extract(fit, pars=paste0("alpha[",ii,"]"))[[1]]
}
} else {
alphaData <- dl$alpha[ii]
}
if (length(pick)==0) {
samples <- min(length(thrData[[1]]), samples)
pick <- sample.int(length(thrData[[1]]), samples)
}
for (sx in 1:samples) {
mask <- df$item == i1 & df$sample == sx
p1 <- sapply(grid, function(gx) {
if (length(alphaData) > 1) {
alpha <- alphaData[pick[sx]]
} else {
alpha <- alphaData
}
cmp_probs(dl$scale[ii], alpha, 0, gx,
sapply(thrData, function(x) x[pick[sx]]))
})
df[mask, 'prob'] <- c(p1)
df[mask, 'worthDiff'] <- kronecker(grid, rep(1, nrow(p1)))
}
}
df$responseSample <-
(match(df$item, dl$nameInfo$item) * length(responseNames) * samples +
df$sample * length(responseNames) +
unfactor(df$response)-1L)
df
}
#' Remove the array indexing from a parameter name
#' @param name a parameter name
#' @return the name without the square bracket parameter indexing
#' @examples
#' withoutIndex("foo[1,2]")
#' @export
withoutIndex <- function(name) {
sub("\\[[\\d,]+\\]$", "", name, perl=TRUE)
}
#' Produce data suitable for plotting parameter estimates
#'
#' @template args-fit
#' @param pars a vector of parameter names
#' @param label column name for \code{nameVec}
#' @param nameVec a vector of explanatory parameters names
#' @param width a width in probability units for the uncertainty interval
#'
#' @return
#' A data.frame with the following columns:
#' \describe{
#' \item{L}{Lower quantile}
#' \item{M}{Median}
#' \item{U}{Upper quantile}
#' \item{\emph{label}}{\emph{nameVec}}
#' }
#' @export
#' @importFrom rstan summary
#' @family data extractor
#' @examples
#' \donttest{ vignette('manual', 'pcFactorStan') }
parInterval <- function(fit, pars, nameVec,
label=withoutIndex(pars[1]), width=0.8) {
if (!is(fit, "stanfit")) stop("fit must be a stanfit object")
if (width <= 0 || width >= 1) stop("width must be in the interval (0,1)")
probs <- 0.5 + c(-width/2, 0, width/2)
interval <- summary(fit, pars=pars, probs=probs)$summary[,4:6,drop=FALSE]
colnames(interval) <- c("L","M","U")
interval <- as.data.frame(interval)
if (nrow(interval) != length(nameVec)) {
stop(paste("pars and nameVec must be the same length. Currently",
"pars is length", nrow(interval),
"but nameVec is length", length(nameVec)))
}
interval[[label]] <- factor(nameVec, levels=nameVec)
interval
}
#' Produce data suitable for plotting parameter distributions
#'
#' @template args-fit
#' @param pars a vector of parameter names
#' @param label column name for \code{nameVec}
#' @param nameVec a vector of explanatory parameters names
#' @template args-samples
#' @return
#' A data.frame with the following columns:
#' \describe{
#' \item{sample}{Sample index}
#' \item{\emph{label}}{A name from \emph{nameVec}}
#' \item{value}{A single sample of the associated parameter}
#' }
#' @export
#' @importFrom rstan extract
#' @importFrom reshape2 melt
#' @family data extractor
#' @examples
#' \donttest{ vignette('manual', 'pcFactorStan') }
parDistributionCustom <- function(fit, pars, nameVec,
label=withoutIndex(pars[1]), samples=500) {
if (!is(fit, "stanfit")) stop("fit must be a stanfit object")
colSet <- extract(fit, pars)
if (length(colSet) != length(nameVec)) {
stop(paste("pars and nameVec must be the same length. Currently",
"pars is length", length(colSet),
"but nameVec is length", length(nameVec)))
}
nextCol <- 1L
pick <- c()
tall <- NULL
for (c1 in colSet) {
if (length(dim(c1)) == 1) c1 <- as.matrix(c1)
colnames(c1) <- nameVec[nextCol:(nextCol + ncol(c1) - 1L)]
nextCol <- nextCol + ncol(c1)
if (length(pick) == 0) {
samples <- min(nrow(c1), samples)
pick <- sample.int(nrow(c1), samples)
}
c1 <- c1[pick,,drop=FALSE]
tall1 <- melt(c1)
colnames(tall1)[1:2] <- c('sample',label)
tall <- rbind(tall, tall1)
}
tall
}
#' @param pi a data.frame returned by \code{\link{parInterval}}
#' @rdname parDistributionCustom
#' @export
parDistributionFor <- function(fit, pi, samples=500) {
parDistributionCustom(fit, rownames(pi), nameVec=pi[[4]],
label=colnames(pi)[4], samples=samples)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.