#' Text decription of priors
#'
#' Generate data.frames giving, for each main effect or
#' interaction in a model, a short description of the
#' associated prior. The data.frames are useful for
#' understanding a model, or for generating tables in
#' documents describing the analyses.
#'
#' \code{describePriors} is applied to the databases
#' created by functions \code{\link{estimateModel}},
#' \code{\link{estimateCounts}} or \code{\link{estimateAccount}}.
#'
#' @param filename The filename used by the estimate function.
#'
#' @return If \code{filename} refers to the results from a
#' call to \code{\link{estimateModel}}, then \code{describePriors}
#' returns a single data.frame. Otherwise, it returns a named
#' list containing one or more data.frames.
#'
#' @seealso \code{\link{showModel}} gives a semi-algebraic
#' description of an entire model, including the priors.
#'
#' @examples
#' deaths <- demdata::VADeaths2
#' popn <- demdata::VAPopn
#' deaths <- round(deaths)
#' deaths <- Counts(deaths)
#' popn <- Counts(popn)
#' filename <- tempfile()
#' estimateModel(Model(y ~ Poisson(mean ~ age * sex)),
#' y = deaths,
#' exposure = popn,
#' filename = filename,
#' nBurnin = 0,
#' nSim = 5,
#' nChain = 2)
#' describePriors(filename)
#' @export
describePriors <- function(filename) {
object <- fetchResultsObject(filename)
describePriorsResults(object)
}
## HAS_TESTS
#' Extract estimates from model output.
#'
#' Functions \code{\link{estimateModel}}, \code{\link{estimateCounts}},
#' and \code{\link{estimateAccount}}, and their \code{predict} counterparts,
#' send their output to a simple database specified by \code{filename}.
#' Function \code{fetch} extracts estimates from this database.
#'
#' Estimates are stored in a hierarchical structure. The structure be viewed
#' with function \code{\link{listContents}}. Function \code{fetch} uses
#' a description of the path through the hierarchical structure to locate
#' estimates. The \code{where} argument to \code{fetch} is a character vector,
#' giving names of nodes in the hierarchy. Partial matching is used with
#' the names of the nodes, so names can be shortened, provided they are still
#' long enough to uniquely identify a node. (To make code self-documenting,
#' it is still best to use the full names in production code.)
#'
#' Using the \code{iterations} to select a subset of iterations can be useful
#' if the object being extracted is large relative to memory or if calculations
#' are running slowly.
#'
#' Where possible, \code{\link{estimateModel}}, \code{\link{estimateCounts}},
#' and \code{\link{estimateAccount}} avoid imputing missing values during
#' model fitting, since the imputed values are typically highly correlated
#' with other unknown quantities, which can slow convergence. If a batch
#' of estimates has missing values and if \code{impute} is \code{TRUE} the
#' missing values will be imputed at the time they are fetched.
#'
#' @param filename The filename used by the estimate function.
#' @param where A character vector describing the path to the item to be
#' extracted.
#' @param iterations A vector of positive integers giving the iterations to be
#' extracted if an item has multiple iterations.
#' @param impute Logical. Whether to impute missing values.
#' Defaults to \code{TRUE}.
#'
#' @return Parameters that were estimated from the data typically have class
#' \code{DemographicArray} and have a dimension with
#' \code{\link[dembase]{dimtype}} \code{"iteration"}. Other elements
#' stored in \code{object} have a variety of classes.
#'
#' @seealso \code{fetch} is used to extract output from functions
#' \code{\link{estimateModel}}, \code{\link{estimateCounts}},
#' and \code{\link{estimateAccount}}. Function \code{\link{listContents}}
#' shows the internal structure of the output, which is useful for
#' constructing the \code{where} argument to \code{fetch}.
#'
#' @references
#' The weak identification of main effects and interactions is discussed in
#'
#' Gelman, A. (2005). Analysis of variance: Why it is more important than ever.
#' \emph{Annals of Statistics}, 33(1):1-53.
#'
#' and
#'
#' Nelder, J. A. (1994). The statistics of linear models: Back to basics.
#' \emph{Statistics and Computing}. 4: 221-234.
#'
#' @examples
#' deaths <- demdata::VADeaths2
#' popn <- demdata::VAPopn
#' deaths <- round(deaths)
#' deaths <- Counts(deaths)
#' popn <- Counts(popn)
#' filename <- tempfile()
#' estimateModel(Model(y ~ Poisson(mean ~ age * sex)),
#' y = deaths,
#' exposure = popn,
#' filename = filename,
#' nBurnin = 20,
#' nSim = 20,
#' nChain = 2,
#' parallel = FALSE)
#' ## the model hasn't been run for long enough,
#' ## but we'll extract some results anyway
#'
#' ## examine structure
#' listContents(filename)
#'
#' ## extract means from likelihood model
#' rate <- fetch(filename,
#' where = c("model", "likelihood", "rate"))
#' plot(rate)
#'
#' ## only supply enough of component of 'where' to identify
#' rate <- fetch(filename,
#' where = c("mo", "l", "r"))
#'
#' ## extract all iterations of intercept
#' fetch(filename,
#' where = c("model", "prior", "(Intercept)"))
#'
#' ## extract every fifth iteration
#' fetch(filename,
#' where = c("model", "prior", "(Intercept)"),
#' iterations = seq(from = 5, by = 5, to = 40))
#' @export
fetch <- function(filename, where = character(), iterations = NULL,
impute = TRUE) {
object <- fetchResultsObject(filename)
nIteration <- object@mcmc["nIteration"]
lengthIter <- object@control$lengthIter
listsAsSingleItems <- listsAsSingleItems()
if (identical(length(where), 0L))
stop(gettextf("'%s' has length %d",
"where", 0L))
if (any(is.na(where)))
stop(gettextf("'%s' has missing values",
"where"))
where <- as.character(where)
if (!is.null(iterations)) {
if (identical(length(iterations), 0L))
stop(gettextf("'%s' has length %d",
"iterations", 0L))
if (!is.numeric(iterations))
stop(gettextf("'%s' does not have type \"%s\"",
"iterations", "numeric"))
if (any(is.na(iterations)))
stop(gettextf("'%s' has missing values",
"iterations"))
if (!all(round(iterations) == iterations))
stop(gettextf("'%s' has non-integer values",
"iterations"))
if (min(iterations) < 1L)
stop(gettextf("'%s' has values less than %d",
"iterations", 1L))
if (max(iterations) > nIteration)
stop(gettextf("maximum value for '%s' argument [%s] exceeds number of iterations [%d]",
"iterations", max(iterations), nIteration))
if (any(duplicated(iterations)))
stop(gettextf("'%s' has duplicates",
"iterations"))
iterations <- as.integer(iterations)
iterations <- sort(iterations)
}
if (identical(dembase::nIteration(object), 0L))
return(NULL)
choices <- methods::slotNames(object)
name <- where[1L]
i <- charmatch(name, choices, nomatch = -1L)
if (i > 0L) {
name <- choices[i] ## in case of partial match
ans <- fetchInner(object = methods::slot(object, name),
nameObject = name,
where = where[-1L],
iterations = iterations,
filename = filename,
lengthIter = lengthIter,
nIteration = nIteration,
listsAsSingleItems = listsAsSingleItems,
impute = impute)
}
else if (i == 0L)
raiseMultipleMatchesError(target = name, choices = choices)
else
raiseNotFoundError(target = name, choices = choices)
ans
}
## HAS_TESTS
#' Extract combined results from estimation and prediction.
#'
#' Combined results from calling \code{\link{estimateModel}} followed
#' by \code{\link{predictModel}}, \code{\link{estimateCounts}} followed
#' by \code{\link{predictCounts}}, or \code{\link{estimateAccount}}
#' followed by \code{\link{predictAccount}}. (Note - functions
#' predictCounts, estimateAccount, and predictAccount have not been
#' written yet.)
#'
#' Identical results can be obtained by calling \code{\link{fetch}}
#' separately on the files created by the estimation and prediction functions,
#' and then combining. However, \code{fetchBoth} is more convenient.
#'
#' \code{fetchBoth} is particularly convenient in the case of main effects,
#' since it normalises the combines the estimates and predictions before
#' normalising, rather than normalising each separately. (See the
#' documentation for \code{\link{fetch}} for a discussion of normalisation.)
#'
#' @inheritParams fetch
#' @param filenameEst The filename used for estimation.
#' @param filenamePred The filename used for prediction.
#'
#' @seealso Function \code{\link{fetch}} extracts results from a single file.
#'
#' @examples
#' \dontrun{
#' deaths <- demdata::us.deaths
#' exposure <- demdata::us.exposure
#' deaths <- Counts(deaths,
#' dimscales = c(year = "Intervals"))
#' exposure <- Counts(exposure,
#' dimscales = c(year = "Intervals"))
#' filename.est <- tempfile()
#' filename.pred <- tempfile()
#' model <- Model(y ~ Poisson(mean ~ age + sex + year))
#' estimateModel(model,
#' filename = filename.est,
#' y = deaths,
#' exposure = exposure,
#' nBurnin = 5,
#' nSim = 5,
#' nChain = 2,
#' parallel = FALSE)
#' predictModel(filenameEst = filename.est,
#' filenamePred = filename.pred,
#' n = 5)
#' ## the model is too simple, and hasn't been run for long
#' ## enough, but we'll extract some results anyway...
#' rate <- fetchBoth(filenameEst = filename.est,
#' filenamePred = filename.pred,
#' where = c("model", "likelihood", "rate"))
#' year <- fetchBoth(filenameEst = filename.est,
#' filenamePred = filename.pred,
#' where = c("model", "prior", "year"))
#' }
#' @export
fetchBoth <- function(filenameEst, filenamePred, where, iterations = NULL,
impute = TRUE) {
## preparation and checking
results.est <- fetchResultsObject(filenameEst)
nIteration <- results.est@mcmc["nIteration"]
lengthIterEst <- results.est@control$lengthIter
listsAsSingleItems <- listsAsSingleItems()
if (identical(length(where), 0L))
stop(gettextf("'%s' has length %d",
"where", 0L))
if (any(is.na(where)))
stop(gettextf("'%s' has missing values",
"where"))
where <- as.character(where)
if (!is.null(iterations)) {
if (identical(length(iterations), 0L))
stop(gettextf("'%s' has length %d",
"iterations", 0L))
if (!is.numeric(iterations))
stop(gettextf("'%s' does not have type \"%s\"",
"iterations", "numeric"))
if (any(is.na(iterations)))
stop(gettextf("'%s' has missing values",
"iterations"))
if (!all(round(iterations) == iterations))
stop(gettextf("'%s' has non-integer values",
"iterations"))
if (min(iterations) < 1L)
stop(gettextf("'%s' has values less than %d",
"iterations", 1L))
if (max(iterations) > nIteration)
stop(gettextf("maximum value for '%s' argument [%s] exceeds number of iterations [%d]",
"iterations", max(iterations), nIteration))
if (any(duplicated(iterations)))
stop(gettextf("'%s' has duplicates",
"iterations"))
iterations <- as.integer(iterations)
iterations <- sort(iterations)
}
if (identical(dembase::nIteration(results.est), 0L))
return(NULL)
choices <- methods::slotNames(results.est)
name <- where[1L]
i <- charmatch(name, choices, nomatch = -1L)
if (i == -1L)
raiseNotFoundError(target = name, choices = choices)
if (i == 0L)
raiseMultipleMatchesError(target = name, choices = choices)
name <- choices[i] ## in case of partial match
## extract reults
is.time.varying <- isTimeVarying(filenameEst = filenameEst,
filenamePred = filenamePred,
where = where)
if (is.time.varying) {
results.pred <- fetchResultsObject(filenamePred)
lengthIterPred <- results.pred@control$lengthIter
est <- fetchInner(object = methods::slot(results.est, name),
nameObject = name,
where = where[-1L],
iterations = iterations,
filename = filenameEst,
lengthIter = lengthIterEst,
nIteration = nIteration,
listsAsSingleItems = listsAsSingleItems,
impute = impute)
pred <- fetchInner(object = methods::slot(results.pred, name),
nameObject = name,
where = where[-1L],
iterations = iterations,
filename = filenamePred,
lengthIter = lengthIterPred,
nIteration = nIteration,
listsAsSingleItems = listsAsSingleItems,
impute = impute)
ans <- combineEstPred(est = est, pred = pred)
}
else {
ans <- fetchInner(object = methods::slot(results.est, name),
nameObject = name,
where = where[-1L],
iterations = iterations,
filename = filenameEst,
lengthIter = lengthIterEst,
nIteration = nIteration,
listsAsSingleItems = listsAsSingleItems,
impute = impute)
}
ans
}
#' Obtain the coverage ratio for a dataset.
#'
#' After running \code{\link{estimateCounts}} or \code{\link{estimateAccount}},
#' calculate the coverage ratio for a particular dataset. The coverage
#' ratio is the ratio between counts in the dataset and counts in the
#' series being modelled. For instance, if the dataset is the census
#' and the series being modelled is population, then the coverage ratio
#' is the number of people (of a given age, sex, geography, etc) measured
#' in the census, divided by the number of people in the true population.
#'
#' Only the name of the dataset is needed. With \code{estimateCounts},
#' the denominator is always \code{y}. With \code{estimateAccount},
#' the denominator depends on the data model used for the particular dataset.
#'
#' Note that a coverage ratio is a finite-population estimate, in that it is
#' defined in terms of actual (potentially observable) numbers of people or
#' events, rather than underlying (unobservable) parameters. Some data models,
#' such as Poisson models, yield rates or probabilities that can be
#' interpreted as super-population counterparts of coverage ratios.
#' These can be extracted using function \code{\link{fetch}}.
#'
#' @inheritParams fetch
#' @param dataset The name of the dataset to be used as the numerator
#' of the ratios.
#'
#' @return An object of class \code{\link[dembase:DemographicArray-class]{Values}}.
#'
#' @seealso Coverage ratios can also be calculated by a couple of calls
#' to \code{\link{fetch}}.
#'
#' @examples
#' nat <- demdata::sim.admin.nat
#' health <- demdata::sim.admin.health
#' survey <- demdata::sim.admin.survey
#' nat <- Counts(nat, dimscales = c(year = "Points"))
#' health <- Counts(health, dimscales = c(year = "Points"))
#' survey <- Counts(survey)
#' y <- health + 10
#' model <- Model(y ~ Poisson(mean ~ age + sex + region,
#' useExpose = FALSE))
#' dataModels <- list(Model(nat ~ PoissonBinomial(prob = 0.98)),
#' Model(health ~ Poisson(mean ~ age)),
#' Model(survey ~ Binomial(mean ~ 1)))
#' datasets <- list(nat = nat, health = health, survey = survey)
#' filename <- tempfile()
#' ## in a real example, nBurnin and nSim would be much larger
#' \dontrun{
#' estimateCounts(model = model,
#' y = y,
#' dataModels = dataModels,
#' datasets = datasets,
#' filename = filename,
#' nBurnin = 5,
#' nSim = 5,
#' nThin = 2,
#' nChain = 2,
#' parallel = FALSE)
#' cover.nat <- fetchCoverage(filename, "nat")
#' summary(cover.nat)
#' }
#' @export
fetchCoverage <- function(filename, dataset) {
## dataset has length 1
if (!identical(length(dataset), 1L))
stop(gettextf("'%s' does not have length %d",
"dataset", 1L))
## dataset is not missing
if (is.na(dataset))
stop(gettextf("'%s' is missing",
"dataset"))
## dataset is not blank
if (!nzchar(dataset))
stop(gettextf("'%s' is blank",
"dataset"))
combined <- fetch(filename = filename,
where = "final")[[1L]]
names.datasets <- combined@namesDatasets
i.dataset <- pmatch(dataset, names.datasets, nomatch = 0L)
has.dataset <- i.dataset > 0L
if (!has.dataset)
stop(gettextf("could not find dataset called \"%s\" : choices are %s",
dataset, paste(dQuote(names.datasets), collapse = ", ")))
series <- getSeriesForDataset(combined = combined,
dataset = dataset,
filename = filename)
dataset <- fetch(filename,
where = c("datasets", dataset))
suppressMessages(dataset / series)
}
## HAS_TESTS
#' Create a list of objects for analysis with package "coda".
#'
#' Create an object of class \code{"\link{mcmc.list}"}, or a list of objects of
#' class \code{"mcmc.list"}, which can be analysed using the diagnostic
#' functions in package \pkg{coda}.
#'
#' If no \code{where} argument is supplied, \code{\link[coda]{mcmc.list}}
#' objects are returned for every parameter or batch of parameters that were
#' estimated. If a \code{where} argument is supplied, it must describe the
#' path to parameter or parameters. See \code{\link{fetch}} for more on
#' specifying paths.
#'
#' If a batch of parameters has many elements, then calculating MCMC
#' diagnositics for all those elements can be very slow.
#' To speed things up, when applied to a batch of parameters,
#' \code{fetchMCMC} randomly selects only \code{nSample} of these
#' parameters. \code{nSample} defaults to 25.
#'
#' Alternatively, the indices of the parameters to be selected
#' can be specified using the \code{sample} argument.
#' See below for an example.
#'
#' If \code{thinned} is \code{TRUE}, then the \code{thin} argument in
#' \pkg{coda} function \code{\link[coda]{mcmc}} is set to 1; otherwise
#' \code{thin} is set to \code{nThin}, extracted from \code{object}.
#'
#' If the model in question contains structural zeros (see \code{\link{Poisson}}),
#' some parameters of the model may be undefined. These parameters are omitted
#' from the results, unless they are specifically requested via the
#' \code{sample} argument.
#'
#' @param filename The name of the file where the output from the
#' \code{estimate} function is kept.
#' @param where A character vector used to select a single parameter or batch
#' of parameters. See below for details.
#' @param nSample Number of parameters to be sampled from a batch
#' of parameters. Defaults to 25. Ignored when batch has fewer than
#' 25 parameters (including having only one parameter.)
#' @param sample Indices of parameters to be sampled. An alternative
#' to \code{nSample} when more control over sampling is required.
#' @param thinned Logical. If \code{TRUE}, the default, the
#' \code{"\link{mcmc.list}"} object or objects describe the thinned process.
#'
#' @return A single object of class \code{"\link{mcmc.list}"}, or a named list
#' of such objects.
#'
#' @seealso Other functions for examining output from
#' calls to \code{\link{estimateModel}}, \code{\link{estimateCounts}}, and
#' \code{\link{estimateAccount}} include \code{\link{fetch}},
#' \code{\link{fetchFiniteSD}}, and \code{\link{listContents}}.
#'
#' @examples
#' library(demdata)
#' ## fit model
#' deaths <- Counts(round(VADeaths2))
#' popn <- Counts(VAPopn)
#' filename <- tempfile()
#' estimateModel(Model(y ~ Poisson(mean ~ age)),
#' y = deaths,
#' exposure = popn,
#' filename = filename,
#' nBurnin = 20,
#' nSim = 20,
#' nChain = 2,
#' parallel = FALSE)
#'
#' ## create a list containing an "mcmc.list" object for
#' ## every element of the results that was estimated
#' l <- fetchMCMC(filename)
#' names(l)
#' sapply(l, class)
#'
#' ## analyse using functions from 'coda'
#' \dontrun{
#' library(coda)
#' plot(l$model.likelihood.rate)
#' plot(l$model.likelihood.rate, ask = FALSE)
#' plot(l$model.likelihood.rate, ask = FALSE, smooth = FALSE)
#' gelman.diag(l$model.likelihood.rate)
#' }
#'
#' ## create a single "mcmc.list" object
#' mean.mcmc <- fetchMCMC(filename,
#' where = c("model", "likelihood", "rate"))
#'
#' ## only write part of each name
#' mean.mcmc <- fetchMCMC(filename, where = c("mod", "like", "r"))
#'
#' ## sample 6 randomly-chosen values
#' mean.mcmc.5 <- fetchMCMC(filename,
#' where = c("model", "likelihood", "rate"),
#' nSample = 6)
#'
#' ## sample the first 5 values
#' mean.mcmc.5 <- fetchMCMC(filename,
#' where = c("model", "likelihood", "rate"),
#' sample = 1:5)
#'
#' @export
fetchMCMC <- function(filename, where = NULL, nSample = 25,
sample = NULL, thinned = TRUE) {
object <- fetchResultsObject(filename)
if (!methods::is(object, "Results"))
stop(gettextf("results object has class \"%s\"",
class(object)))
if (!identical(length(thinned), 1L))
stop(gettextf("'%s' does not have length %d",
"thinned", 1L))
if (!is.logical(thinned))
stop(gettextf("'%s' does not have type \"%s\"",
"thinned", "logical"))
if (is.na(thinned))
stop(gettextf("'%s' is missing",
"thinned"))
mcmc.args <- fetch(filename, where = "mcmc")
n.chain <- mcmc.args[["nChain"]]
if (thinned)
n.thin <- 1L
else
n.thin <- mcmc.args[["nThin"]]
if (is.null(where)) {
where.mcmc <- whereMetropStat(object, whereEstimated)
n.where <- length(where.mcmc)
if (n.where == 0L)
NULL
else {
ans <- vector(mode = "list", length = n.where)
for (i in seq_len(n.where)) {
where <- where.mcmc[[i]]
obj <- fetch(filename, where = where)
skeleton <- fetchSkeleton(object, where = where)
ans[[i]] <- MCMCDemographic(object = obj,
sample = sample,
nSample = nSample,
nChain = n.chain,
nThin = n.thin,
skeleton = skeleton)
}
names(ans) <- sapply(where.mcmc, paste, collapse = ".")
ans
}
}
else {
obj <- fetch(filename, where = where)
if (!methods::is(obj, "DemographicArray"))
stop(gettextf("'%s' has class \"%s\"",
where[length(where)], class(obj)))
if (!("iteration" %in% dembase::dimtypes(obj)))
stop(gettextf("'%s' does not have dimension with dimtype \"%s\"",
where[length(where)], "iteration"))
skeleton <- fetchSkeleton(object, where = where)
MCMCDemographic(object = obj,
sample = sample,
nSample = nSample,
nChain = n.chain,
nThin = n.thin,
skeleton = skeleton)
}
}
## NO_TESTS
#' Summarise estimation output.
#'
#' Summarise the results from \code{\link{estimateModel}},
#' \code{\link{estimateCounts}} or \code{\link{estimateAccount}}.
#' \code{fetchSummary} can also be called on results from prediction
#' functions, though doing so is generally less enlightening.
#'
#' @param filename The filename used by the estimation or prediction
#' function.
#' @param nSample The number of elements to sample from a batch of
#' parameters. Defaults to 25.
#'
#' @section Metropolis-Hastings updates:
#'
#' Depending on the model, one or more batches of parameters is updated
#' using Metropolis-Hasting updates. \code{jump} is the standard
#' deviation of the propoposal density. \code{acceptance} is the
#' proportion of proposals that are accepted. \code{autocorrelation}
#' is the correlation between successive iterations, after thinning.
#' Autocorrelations are calculated separately, parameter by parameter,
#' and then averaged. If a batch contains more than \code{nSample} parameters,
#' then \code{nSample} parameters are chosen at random, and autocorrelations
#' calculated only for that sample.
#'
#' The Metropolis-Hastings statistics can be extracted from the summary
#' object using function \code{\link{metropolis}}.
#'
#' @section Parameters:
#'
#' 'Rhat's, or 'potential scale reduction factors' are calculated
#' using function \code{\link[coda]{gelman.diag}}
#' from package \pkg{coda}. However, like Gelman et al (2014: 284-286),
#' but unlike the original \code{gelman.diag} function,
#' each chain is split into two,
#' and the first and second halves compared,
#' to capture variation within each chain,
#' in addition to variation across chains.
#'
#' To save time, \code{fetchSummary} calculates Rhats for a sample of
#' at most \code{nSample} elements from each batch of parameters.
#' The number of elements sampled is shown in the column labeled \code{n}.
#'
#' Rhats typically vary across different parameters within a batch.
#' To give an idea of the distribution, \code{fetchSummary} shows
#' the median and maximum Rhats. The best way to get detailed information
#' about convergence is to use \code{\link{fetchMCMC}} followed by
#' \code{plot}.
#'
#' The sampling of the Rhats introduces an element
#' of randomness: if \code{fetchSample} is called twice on the same
#' \code{filename} the results for \code{Rhat} will differ.
#'
#' When the number of chains (\code{nChain}), the number of iterations
#' (\code{nSim}), or the proportion of proposals accepted is small,
#' estimated Rhats can be unstable, and must be interpreted with
#' caution.
#'
#' \code{Est.} in the output from \code{fetchSummary} is short for
#' "Point estimates". The \code{Est.} columns provide summaries
#' of the distribution of the estimates. \code{fetchSummary}
#' calculates point estimates (posterior medians) for every element
#' in a batch of parameters, and then shows the distribution of
#' these point estimates. If a batch has only one parameter, then
#' only the point estimate for that parameter is shown.
#' If a batch has more than one parameter, then the minimum
#' point estimate, median point estimate, and maximum point estimate
#' for that batch are shown.
#'
#' To save time, \code{fetchSummary} calculates the point estimates
#' from a maximum of 100 draws from the posterior sample. The values
#' may therefore differ slightly from the more accurate values
#' obtained by calling \code{fetch} and then \code{collapseIterations}
#' (assuming the full sample has more than 100 draws.)
#'
#'
#' If greater control over the calculation of Rhat is desired, parameter
#' estimates can be extracted using \code{\link{fetchMCMC}}, and
#' results calculated using \code{\link[coda]{gelman.diag}}.
#'
#' @return An object of class \code{\linkS4class{SummaryResults}}.
#'
#' @seealso Individual components of the summary can be extracted
#' using functions \code{\link{metropolis}}, \code{\link{parameters}},
#' and \code{\link{gelmanDiag}}. Parameter estimates from estimation
#' or prediction are obtained using function \code{fetch}.
#'
#' @references
#' Geyer, C. J. (2011) Introduction to Markov Chain Monte Carlo.
#' Brooks, S., Gelman, A., Jones, G., Meng, X-L (eds.) \emph{Handbook of Markov
#' Chain Monte Carlo}. CRC Press.
#'
#' Gelman, A., Shirley, K. (2011) Inference from simulations and monitoring
#' convergence. Brooks, S., Gelman, A., Jones, G., Meng, X-L (eds.)
#' \emph{Handbook of Markov Chain Monte Carlo}. CRC Press.
#'
#' Gelman, A., Carlin, J.B., Stern, H.S. and Rubin, D.B., 2014.
#' \emph{Bayesian Data Analysis. Third Edition.} Boca Raton, FL, USA:
#' Chapman & Hall/CRC.
#'
#' @examples
#' deaths <- demdata::VADeaths2
#' popn <- demdata::VAPopn
#' deaths <- round(deaths)
#' deaths <- Counts(deaths)
#' popn <- Counts(popn)
#' filename <- tempfile()
#' model <- Model(y ~ Poisson(mean ~ age + sex),
#' jump = 0.5)
#' estimateModel(model = model,
#' y = deaths,
#' exposure = popn,
#' filename = filename,
#' nBurnin = 50,
#' nSim = 50,
#' nChain = 2,
#' parallel = FALSE)
#' fetchSummary(filename)
#'
#' ## keep summary object and extract parts from it
#' summary.est <- fetchSummary(filename)
#' gelmanDiag(summary.est)
#' metropolis(summary.est)
#' parameters(summary.est)
#' @export
fetchSummary <- function(filename, nSample = 25) {
checkPositiveInteger(x = nSample,
name = "nSample")
nSample <- as.integer(nSample)
object <- fetchResultsObject(filename)
summary(object = object,
filename = filename,
nSample = nSample)
}
## HAS_TESTS
#' Finite-population standard deviations.
#'
#' Extract the finite-population standard deviation of main effects
#' and interactions in hierarchical models.
#'
#' In the hierarchical models discussed in \code{\link{Model}}, cell means
#' are moderned using terms formed from cross-classifying dimensions such as
#' age or sex, or from interactions between these dimensions. Adapting ideas
#' from ANOVA, Gelman (2004: 408-409) propose that the finite-population
#' standard deviations of these terms be used as a measure of the terms'
#' relative importance. For instance, is an 'age' term has a greater
#' finite-population standard deviation than a 'region' term in a model, this
#' suggests that age is a more important predictor of outcomes than region.
#'
#' @inheritParams fetchMCMC
#' @param probs A vectors of quantiles, for summarizing the standard deviations.
#' @param iterations A vector of positive integers giving the iterations to be
#' extracted.
#'
#' @return A list of objects of class \code{"FiniteSD"} if
#' \code{object} contains several hierarchical models; a single
#' \code{"FiniteSD"} object if' \code{object} contains
#' a single hierchical model; and \code{NULL} if \code{object}
#' contains no hierarchical models.
#'
#' @seealso \code{\link{estimateModel}}, \code{\link{estimateCounts}},
#' \code{\link{estimateAccount}}
#'
#' @references Gelman, A. (2004) Analysis of variance:
#' why it is more important than ever
#' (with discussion). \emph{Annals of Statistics}. 33(1): 1-53.
#' @export
fetchFiniteSD <- function(filename, probs = c(0.025, 0.5, 0.975), iterations = NULL) {
object <- fetchResultsObject(filename)
finiteSDObject(object = object,
filename = filename,
probs = probs,
iterations = iterations)
}
## HAS_TESTS
#' Estimate or predict finite-population quantity 'y'.
#'
#' Estimate or predict finite-population quantities for
#' the toal population, from estimated rates, probabilities,
#' or means for the sampled population, plus information
#' on the sizes of the sampled and total population.
#'
#' Consider a model in which
#'
#' \deqn{y_i \sim G(\gamma_i, n_i)}
#'
#' for some distribution \eqn{G}, such as a Poisson, binomial, or
#' normal distribution. \eqn{y_i} is a count or other value
#' for cell \eqn{i} within a classification defined by
#' dimensions such as age and sex. \eqn{\gamma_i} is an
#' unobserved cell-level parameter such as a rate, probability,
#' or mean. \eqn{n_i} is a cell-level exposure or sample size, which
#' is included in some models, such as Poisson or binomial models,
#' but not all.
#'
#' We assume that \eqn{y_i} is observed for only part of
#' the population (the sampled population), and would like to
#' know \eqn{Y_i}, the corresponding value for the total population.
#' This requires estimating values for the unobserved' part
#' of the population (the nonsampled population). We assume that
#' the unobserved part of the population is subject to the same
#' \eqn{\gamma_i} as the observed part.
#'
#' Quantities \eqn{y_i} and \eqn{Y_i} are finite-population
#' quantities, that is, they are values that are, or
#' could theoretically be, observed in real populations.
#' Value \eqn{\gamma_i}, in contrast, is a super-population
#' quantity. It describes the hypothetical super-population
#' from which values such as \eqn{y_i} and \eqn{Y_i} are
#' drawn.
#'
#' @inheritParams fetch
#'
#' @param total An object of class \code{\linkS4class{Counts}}
#' giving the size of the population for which the finite-population
#' estimates are to be generated.
#' @param sampled An object of class \code{\linkS4class{Counts}}
#' giving the same of the sample from which the rates, means, or
#' probabilities were estimated. If no \code{sampled} argument
#' is supplied, and if the model includes an \code{exposure}
#' term, then \code{sampled} is assumed to equal \code{exposure}.
#'
#' @seealso \code{finiteY} can be used with the results from
#' a call to \code{estimate} or \code{predict} functions such
#' as \code{\link{estimateModel}} and \code{\link{predictModel}}.
#'
#' @references
#' Gelman, A., Carlin, J.B., Stern, H.S. and Rubin, D.B., 2014.
#' \emph{Bayesian Ddata Analysis. Third Edition.} Boca Raton, FL, USA:
#' Chapman & Hall/CRC. Section 8.3.
#'
#' @examples
#' ## generate sampled, non-sampled, and total population
#' popn.sampled <- Counts(array(rpois(n = 20, lambda = 100),
#' dim = c(10, 2),
#' dimnames = list(region = LETTERS[1:10],
#' sex = c("Female", "Male"))))
#' popn.nonsampled <- Counts(array(rpois(n = 20, lambda = 900),
#' dim = c(10, 2),
#' dimnames = list(region = LETTERS[1:10],
#' sex = c("Female", "Male"))))
#' popn.total <- popn.sampled + popn.nonsampled
#'
#' ## binomial model
#' y.sampled.binom <- Counts(array(rbinom(n = 20, size = popn.sampled, prob = 0.5),
#' dim = c(10, 2),
#' dimnames = list(region = LETTERS[1:10],
#' sex = c("Female", "Male"))))
#' filename.binom <- tempfile()
#' estimateModel(Model(y ~ Binomial(mean ~ region + sex)),
#' y = y.sampled.binom,
#' exposure = popn.sampled,
#' filename = filename.binom,
#' nBurnin = 10,
#' nSim = 10,
#' nChain = 2)
#' fy.binom <- finiteY(filename = filename.binom, # sample population assumed
#' total = popn.total) # to equal exposure
#'
#' ## normal model
#' y.sampled.norm <- Counts(array(rnorm(n = 20),
#' dim = c(10, 2),
#' dimnames = list(region = LETTERS[1:10],
#' sex = c("Female", "Male"))))
#' filename.norm <- tempfile()
#' estimateModel(Model(y ~ Normal(mean ~ region + sex)),
#' y = y.sampled.norm,
#' filename = filename.norm,
#' nBurnin = 10,
#' nSim = 10,
#' nChain = 2)
#' fy.norm <- finiteY(filename = filename.norm,
#' sampled = popn.sampled, # specify sample population
#' total = popn.total)
#'@export
finiteY <- function(filename, total, sampled = NULL, iterations = NULL) {
object <- fetchResultsObject(filename)
if (!methods::is(object, "ResultsModel"))
stop(gettextf("results object has class \"%\"", class(object)))
if (identical(dembase::nIteration(object), 0L))
return(NULL)
model <- object@final[[1L]]@model
has.exposure <- methods::is(model, "UseExposure")
y.sampled <- fetch(filename, where = "y", iterations = iterations, impute = TRUE)
total <- checkAndTidyTotalOrSampled(x = total,
model = model,
ySampled = y.sampled,
name = "total")
if (is.null(sampled)) {
if (has.exposure)
sampled <- fetch(filename, where = "exposure")
else
stop(gettextf("argument '%s' is missing with no default",
"sampled"))
}
else
sampled <- checkAndTidyTotalOrSampled(x = sampled,
model = model,
ySampled = y.sampled,
name = "sampled")
nonsampled <- total - sampled
if (any(nonsampled < 0L))
stop(gettextf("'%s' has cells with fewer units than corresponding cells in '%s'",
"total", "sampled"))
y.nonsampled <- drawYNonSampled(filename,
model = model,
nonsampled = nonsampled,
iterations = iterations)
y.sampled + y.nonsampled
}
## NO_TESTS
#' List of output from estimate function.
#'
#' Calling \code{\link{listContents}} on a the filenmame used by
#' \code{\link{estimateModel}}, \code{\link{estimateCounts}},
#' or \code{\link{estimateAccount}} shows the items available to be
#' fetched, in a hierarchical structure.
#' @inheritParams fetchMCMC
#' @param max Maximum depth of hierarchical structure to show.
#' @export
listContents <- function(filename, where = character(), max = NULL) {
checkMax(max)
object <- fetchResultsObject(filename)
l <- makeContentsList(object = object, where = where, max = max)
showContentsList(l)
invisible(l)
}
#' Show final model specification.
#'
#' Show the complete specification model used by functions
#' \code{\link{estimateModel}}, \code{\link{estimateCounts}},
#' and \code{\link{estimateAccount}}, or by their prediction
#' counterparts such as \code{\link{predictModel}}.
#'
#' When setting up models to be fitted by the estimation functions,
#' users typically only specify some of the details of the priors for
#' the main effects or interactions. The remaining details are filled in
#' by the estimation functions, based on the input data. Function
#' \code{showModel} can be used find out what choices the estimation
#' functions made.
#'
#' To see the partial specification constructed by function
#' \code{\link{Model}}, print the model object (typically by typing
#' the name of the object at the console.) See below for an example.
#' show \code{\link{fetch}}.
#'
#' The \code{trunc-half-t(df, s^2, max)} in the printed results refers to a
#' truncated \code{\link[=halft-distn]{half-t}} distribution with \code{df}
#' degrees of freedom, scale \code{s^2}, and maximum value \code{max}.
#'
#' @inheritParams fetchMCMC
#'
#' @return \code{showModel} is called for its side effect, which is to print a
#' description of the model.
#'
#' @seealso Parameter estimate can be extracted using function
#' \code{\link{fetch}}.
#'
#' @examples
#' births <- demdata::nz.births.reg
#' popn <- demdata::nz.popn.reg
#' births <- Counts(births, dimscales = c(year = "Intervals"))
#' popn <- Counts(popn, dimscales = c(year = "Intervals"))
#' births <- subarray(births, year > 2005 & year < 2014)
#'
#' filename.est <- tempfile()
#' filename.pred <- tempfile()
#'
#' model <- Model(y ~ Poisson(mean ~ age * region + year),
#' year ~ DLM(trend = NULL))
#'
#' ## model specification before estimateModel called
#' model
#'
#' estimateModel(model = model,
#' y = births,
#' exposure = popn,
#' filename = filename.est,
#' nBurnin = 5,
#' nSim = 5,
#' nChain = 2,
#' parallel = FALSE)
#'
#' ## model specification after estimateModel called
#' showModel(filename.est)
#'
#' predictModel(filenameEst = filename.est,
#' filenamePred = filename.pred,
#' n = 5)
#'
#' ## specification used by predictModel
#' showModel(filename.pred)
#' @export
showModel <- function(filename) {
object <- fetchResultsObject(filename)
showModelHelper(object = object)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.