R/query-functions.R

Defines functions showModel listContents finiteY fetchFiniteSD fetchSummary fetchMCMC fetchCoverage fetchBoth fetch describePriors

Documented in describePriors fetch fetchBoth fetchCoverage fetchFiniteSD fetchMCMC fetchSummary finiteY listContents showModel

#' Text decription of priors
#'
#' Generate data.frames giving, for each main effect or
#' interaction in a model, a short description of the
#' associated prior. The data.frames are useful for
#' understanding a model, or for generating tables in
#' documents describing the analyses.
#'
#' \code{describePriors} is applied to the databases
#' created by functions \code{\link{estimateModel}},
#' \code{\link{estimateCounts}} or \code{\link{estimateAccount}}.
#'
#' @param filename The filename used by the estimate function.
#'
#' @return If \code{filename} refers to the results from a
#' call to \code{\link{estimateModel}}, then \code{describePriors}
#' returns a single data.frame. Otherwise, it returns a named
#' list containing one or more data.frames.
#'
#' @seealso \code{\link{showModel}} gives a semi-algebraic
#' description of an entire model, including the priors.
#'
#' @examples
#' deaths <- demdata::VADeaths2
#' popn <- demdata::VAPopn
#' deaths <- round(deaths)
#' deaths <- Counts(deaths)
#' popn <- Counts(popn)
#' filename <- tempfile()
#' estimateModel(Model(y ~ Poisson(mean ~ age * sex)),
#'               y = deaths,
#'               exposure = popn,
#'               filename = filename,
#'               nBurnin = 0,
#'               nSim = 5,
#'               nChain = 2)
#' describePriors(filename)
#' @export
describePriors <- function(filename) {
    object <- fetchResultsObject(filename)
    describePriorsResults(object)
}


## HAS_TESTS
#' Extract estimates from model output.
#'
#' Functions \code{\link{estimateModel}}, \code{\link{estimateCounts}},
#' and \code{\link{estimateAccount}}, and their \code{predict} counterparts,
#' send their output to a simple database specified by \code{filename}.
#' Function \code{fetch} extracts estimates from this database.
#'
#' Estimates are stored in a hierarchical structure.  The structure be viewed
#' with function \code{\link{listContents}}.  Function \code{fetch} uses
#' a description of the path through the hierarchical structure to locate
#' estimates.  The \code{where} argument to \code{fetch} is a character vector,
#' giving names of nodes in the hierarchy.  Partial matching is used with
#' the names of the nodes, so names can be shortened, provided they are still
#' long enough to uniquely identify a node.  (To make code self-documenting,
#' it is still best to use the full names in production code.)
#' 
#' Using the \code{iterations} to select a subset of iterations can be useful
#' if the object being extracted is large relative to memory or if calculations
#' are running slowly.
#'
#' Where possible, \code{\link{estimateModel}}, \code{\link{estimateCounts}},
#' and \code{\link{estimateAccount}} avoid imputing missing values during
#' model fitting, since the imputed values are typically highly correlated
#' with other unknown quantities, which can slow convergence.  If a batch
#' of estimates has missing values and if \code{impute} is \code{TRUE} the
#' missing values will be imputed at the time they are fetched.
#' 
#' @param filename The filename used by the estimate function.
#' @param where A character vector describing the path to the item to be
#' extracted.
#' @param iterations A vector of positive integers giving the iterations to be
#' extracted if an item has multiple iterations.
#' @param impute Logical. Whether to impute missing values.
#' Defaults to \code{TRUE}.
#'
#' @return Parameters that were estimated from the data typically have class
#' \code{DemographicArray} and have a dimension with
#' \code{\link[dembase]{dimtype}} \code{"iteration"}.  Other elements
#' stored in \code{object} have a variety of classes.
#'
#' @seealso \code{fetch} is used to extract output from functions 
#' \code{\link{estimateModel}}, \code{\link{estimateCounts}},
#' and \code{\link{estimateAccount}}.  Function \code{\link{listContents}}
#' shows the internal structure of the output, which is useful for
#' constructing the \code{where} argument to \code{fetch}.
#'
#' @references
#' The weak identification of main effects and interactions is discussed in
#'
#' Gelman, A. (2005). Analysis of variance: Why it is more important than ever.
#' \emph{Annals of Statistics}, 33(1):1-53.
#'
#' and
#'
#' Nelder, J. A. (1994). The statistics of linear models: Back to basics.
#' \emph{Statistics and Computing}. 4: 221-234.
#' 
#' @examples
#' deaths <- demdata::VADeaths2
#' popn <- demdata::VAPopn
#' deaths <- round(deaths)
#' deaths <- Counts(deaths)
#' popn <- Counts(popn)
#' filename <- tempfile()
#' estimateModel(Model(y ~ Poisson(mean ~ age * sex)),
#'               y = deaths,
#'               exposure = popn,
#'               filename = filename,
#'               nBurnin = 20,
#'               nSim = 20,
#'               nChain = 2,
#'               parallel = FALSE)
#' ## the model hasn't been run for long enough,
#' ## but we'll extract some results anyway
#' 
#' ## examine structure
#' listContents(filename)
#' 
#' ## extract means from likelihood model
#' rate <- fetch(filename,
#'               where = c("model", "likelihood", "rate"))
#' plot(rate)
#' 
#' ## only supply enough of component of 'where' to identify
#' rate <- fetch(filename,
#'               where = c("mo", "l", "r"))
#' 
#' ## extract all iterations of intercept
#' fetch(filename,
#'       where = c("model", "prior", "(Intercept)"))
#' 
#' ## extract every fifth iteration
#' fetch(filename,
#'       where = c("model", "prior", "(Intercept)"),
#'       iterations = seq(from = 5, by = 5, to = 40))
#' @export
fetch <- function(filename, where = character(), iterations = NULL,
                  impute = TRUE) {
    object <- fetchResultsObject(filename)
    nIteration <- object@mcmc["nIteration"]
    lengthIter <- object@control$lengthIter
    listsAsSingleItems <- listsAsSingleItems()
    if (identical(length(where), 0L))
        stop(gettextf("'%s' has length %d",
                      "where", 0L))
    if (any(is.na(where)))
        stop(gettextf("'%s' has missing values",
                      "where"))
    where <- as.character(where)
    if (!is.null(iterations)) {
        if (identical(length(iterations), 0L))
            stop(gettextf("'%s' has length %d",
                          "iterations", 0L))
        if (!is.numeric(iterations))
            stop(gettextf("'%s' does not have type \"%s\"",
                          "iterations", "numeric"))
        if (any(is.na(iterations)))
            stop(gettextf("'%s' has missing values",
                          "iterations"))
        if (!all(round(iterations) == iterations))
            stop(gettextf("'%s' has non-integer values",
                          "iterations"))
        if (min(iterations) < 1L)
            stop(gettextf("'%s' has values less than %d",
                          "iterations", 1L))
        if (max(iterations) > nIteration)
            stop(gettextf("maximum value for '%s' argument [%s] exceeds number of iterations [%d]",
                          "iterations", max(iterations), nIteration))
        if (any(duplicated(iterations)))
            stop(gettextf("'%s' has duplicates",
                          "iterations"))
        iterations <- as.integer(iterations)
        iterations <- sort(iterations)
    }
    if (identical(dembase::nIteration(object), 0L))
        return(NULL)
    choices <- methods::slotNames(object)
    name <- where[1L]
    i <- charmatch(name, choices, nomatch = -1L)
    if (i > 0L) {
        name <- choices[i] ## in case of partial match
        ans <- fetchInner(object = methods::slot(object, name),
                          nameObject = name,
                          where = where[-1L],
                          iterations = iterations,
                          filename = filename,
                          lengthIter = lengthIter,
                          nIteration = nIteration,
                          listsAsSingleItems = listsAsSingleItems,
                          impute = impute)
    }
    else if (i == 0L)
        raiseMultipleMatchesError(target = name, choices = choices)
    else
        raiseNotFoundError(target = name, choices = choices)
    ans
}


## HAS_TESTS
#' Extract combined results from estimation and prediction.
#'
#' Combined results from calling \code{\link{estimateModel}} followed
#' by \code{\link{predictModel}}, \code{\link{estimateCounts}} followed
#' by \code{\link{predictCounts}}, or \code{\link{estimateAccount}}
#' followed by \code{\link{predictAccount}}.  (Note - functions
#' predictCounts, estimateAccount, and predictAccount have not been
#' written yet.)
#'
#' Identical results can be obtained by calling \code{\link{fetch}}
#' separately on the files created by the estimation and prediction functions,
#' and then combining.  However, \code{fetchBoth} is more convenient.
#'
#' \code{fetchBoth} is particularly convenient in the case of main effects,
#' since it normalises the combines the estimates and predictions before
#' normalising, rather than normalising each separately.  (See the
#' documentation for \code{\link{fetch}} for a discussion of normalisation.)
#'
#' @inheritParams fetch
#' @param filenameEst The filename used for estimation.
#' @param filenamePred The filename used for prediction.
#'
#' @seealso Function \code{\link{fetch}} extracts results from a single file.
#'
#' @examples
#' \dontrun{
#' deaths <- demdata::us.deaths
#' exposure <- demdata::us.exposure
#' deaths <- Counts(deaths,
#'                  dimscales = c(year = "Intervals"))
#' exposure <- Counts(exposure,
#'                    dimscales = c(year = "Intervals"))
#' filename.est <- tempfile()
#' filename.pred <- tempfile()
#' model <- Model(y ~ Poisson(mean ~ age + sex + year))
#' estimateModel(model,
#'               filename = filename.est,
#'               y = deaths,
#'               exposure = exposure,
#'               nBurnin = 5,
#'               nSim = 5,
#'               nChain = 2,
#'               parallel = FALSE)
#' predictModel(filenameEst = filename.est,
#'              filenamePred = filename.pred,
#'              n = 5)
#' ## the model is too simple, and hasn't been run for long
#' ## enough, but we'll extract some results anyway...
#' rate <- fetchBoth(filenameEst = filename.est,
#'                   filenamePred = filename.pred,
#'                   where = c("model", "likelihood", "rate"))
#' year <- fetchBoth(filenameEst = filename.est,
#'                   filenamePred = filename.pred,
#'                   where = c("model", "prior", "year"))
#' }
#' @export
fetchBoth <- function(filenameEst, filenamePred, where, iterations = NULL,
                      impute = TRUE) {
    ## preparation and checking
    results.est <- fetchResultsObject(filenameEst)
    nIteration <- results.est@mcmc["nIteration"]
    lengthIterEst <- results.est@control$lengthIter
    listsAsSingleItems <- listsAsSingleItems()
    if (identical(length(where), 0L))
        stop(gettextf("'%s' has length %d",
                      "where", 0L))
    if (any(is.na(where)))
        stop(gettextf("'%s' has missing values",
                      "where"))
    where <- as.character(where)
    if (!is.null(iterations)) {
        if (identical(length(iterations), 0L))
            stop(gettextf("'%s' has length %d",
                          "iterations", 0L))
        if (!is.numeric(iterations))
            stop(gettextf("'%s' does not have type \"%s\"",
                          "iterations", "numeric"))
        if (any(is.na(iterations)))
            stop(gettextf("'%s' has missing values",
                          "iterations"))
        if (!all(round(iterations) == iterations))
            stop(gettextf("'%s' has non-integer values",
                          "iterations"))
        if (min(iterations) < 1L)
            stop(gettextf("'%s' has values less than %d",
                          "iterations", 1L))
        if (max(iterations) > nIteration)
            stop(gettextf("maximum value for '%s' argument [%s] exceeds number of iterations [%d]",
                          "iterations", max(iterations), nIteration))
        if (any(duplicated(iterations)))
            stop(gettextf("'%s' has duplicates",
                          "iterations"))
        iterations <- as.integer(iterations)
        iterations <- sort(iterations)
    }
    if (identical(dembase::nIteration(results.est), 0L))
        return(NULL)
    choices <- methods::slotNames(results.est)
    name <- where[1L]
    i <- charmatch(name, choices, nomatch = -1L)
    if (i == -1L)
        raiseNotFoundError(target = name, choices = choices)
    if (i == 0L)
        raiseMultipleMatchesError(target = name, choices = choices)
    name <- choices[i] ## in case of partial match
    ## extract reults
    is.time.varying <- isTimeVarying(filenameEst = filenameEst,
                                     filenamePred = filenamePred,
                                     where = where)
    if (is.time.varying) {
        results.pred <- fetchResultsObject(filenamePred)
        lengthIterPred <- results.pred@control$lengthIter
        est <- fetchInner(object = methods::slot(results.est, name),
                          nameObject = name,
                          where = where[-1L],
                          iterations = iterations,
                          filename = filenameEst,
                          lengthIter = lengthIterEst,
                          nIteration = nIteration,
                          listsAsSingleItems = listsAsSingleItems,
                          impute = impute)
        pred <- fetchInner(object = methods::slot(results.pred, name),
                           nameObject = name,
                           where = where[-1L],
                           iterations = iterations,
                           filename = filenamePred,
                           lengthIter = lengthIterPred,
                           nIteration = nIteration,
                           listsAsSingleItems = listsAsSingleItems,
                           impute = impute)
        ans <- combineEstPred(est = est, pred = pred)
    }
    else {
        ans <- fetchInner(object = methods::slot(results.est, name),
                          nameObject = name,
                          where = where[-1L],
                          iterations = iterations,
                          filename = filenameEst,
                          lengthIter = lengthIterEst,
                          nIteration = nIteration,
                          listsAsSingleItems = listsAsSingleItems,
                          impute = impute)
    }
    ans
}

#' Obtain the coverage ratio for a dataset.
#'
#' After running \code{\link{estimateCounts}} or \code{\link{estimateAccount}},
#' calculate the coverage ratio for a particular dataset.  The coverage
#' ratio is the ratio between counts in the dataset and counts in the
#' series being modelled.  For instance, if the dataset is the census
#' and the series being modelled is population, then the coverage ratio
#' is the number of people (of a given age, sex, geography, etc) measured
#' in the census, divided by the number of people in the true population.
#'
#' Only the name of the dataset is needed. With \code{estimateCounts},
#' the denominator is always \code{y}.  With \code{estimateAccount},
#' the denominator depends on the data model used for the particular dataset.
#'
#' Note that a coverage ratio is a finite-population estimate, in that it is
#' defined in terms of actual (potentially observable) numbers of people or
#' events, rather than underlying (unobservable) parameters.  Some data models,
#' such as Poisson models, yield rates or probabilities that can be
#' interpreted as super-population counterparts of coverage ratios.
#' These can be extracted using function \code{\link{fetch}}.
#'
#' @inheritParams fetch
#' @param dataset The name of the dataset to be used as the numerator
#' of the ratios.
#'
#' @return An object of class \code{\link[dembase:DemographicArray-class]{Values}}.
#'
#' @seealso Coverage ratios can also be calculated by a couple of calls
#' to \code{\link{fetch}}.
#'
#' @examples
#' nat <- demdata::sim.admin.nat
#' health <- demdata::sim.admin.health
#' survey <- demdata::sim.admin.survey
#' nat <- Counts(nat, dimscales = c(year = "Points"))
#' health <- Counts(health, dimscales = c(year = "Points"))
#' survey <- Counts(survey)
#' y <- health + 10
#' model <- Model(y ~ Poisson(mean ~ age + sex + region,
#'                            useExpose = FALSE))
#' dataModels <- list(Model(nat ~ PoissonBinomial(prob = 0.98)),
#'                    Model(health ~ Poisson(mean ~ age)),
#'                    Model(survey ~ Binomial(mean ~ 1)))
#' datasets <- list(nat = nat, health = health, survey = survey)
#' filename <- tempfile()
#' ## in a real example, nBurnin and nSim would be much larger
#' \dontrun{
#' estimateCounts(model = model,
#'                y = y,
#'                dataModels = dataModels,
#'                datasets = datasets,
#'                filename = filename,
#'                nBurnin = 5,
#'                nSim = 5,
#'                nThin = 2,
#'                nChain = 2,
#'                parallel = FALSE)
#' cover.nat <- fetchCoverage(filename, "nat")
#' summary(cover.nat)
#' }
#' @export
fetchCoverage <- function(filename, dataset) {
    ## dataset has length 1
    if (!identical(length(dataset), 1L))
        stop(gettextf("'%s' does not have length %d",
                      "dataset", 1L))
    ## dataset is not missing
    if (is.na(dataset))
        stop(gettextf("'%s' is missing",
                      "dataset"))
    ## dataset is not blank
    if (!nzchar(dataset))
        stop(gettextf("'%s' is blank",
                      "dataset"))
    combined <- fetch(filename = filename,
                      where = "final")[[1L]]
    names.datasets <- combined@namesDatasets
    i.dataset <- pmatch(dataset, names.datasets, nomatch = 0L)
    has.dataset <- i.dataset > 0L
    if (!has.dataset)
        stop(gettextf("could not find dataset called \"%s\" : choices are %s",
                      dataset, paste(dQuote(names.datasets), collapse = ", ")))
    series <- getSeriesForDataset(combined = combined,
                                  dataset = dataset,
                                  filename = filename)
    dataset <- fetch(filename,
                     where = c("datasets", dataset))
    suppressMessages(dataset / series)
}

## HAS_TESTS
#' Create a list of objects for analysis with package "coda".
#' 
#' Create an object of class \code{"\link{mcmc.list}"}, or a list of objects of
#' class \code{"mcmc.list"}, which can be analysed using the diagnostic
#' functions in package \pkg{coda}.
#' 
#' If no \code{where} argument is supplied, \code{\link[coda]{mcmc.list}}
#' objects are returned for every parameter or batch of parameters that were
#' estimated. If a \code{where} argument is supplied, it must describe the
#' path to parameter or parameters.  See \code{\link{fetch}} for more on
#' specifying paths.
#' 
#' If a batch of parameters has many elements, then calculating MCMC
#' diagnositics for all those elements can be very slow.
#' To speed things up, when applied to a batch of parameters,
#' \code{fetchMCMC} randomly selects only \code{nSample} of these
#' parameters.  \code{nSample} defaults to 25.
#'
#' Alternatively, the indices of the parameters to be selected
#' can be specified using the \code{sample} argument.
#' See below for an example.
#'
#' If \code{thinned} is \code{TRUE}, then the \code{thin} argument in
#' \pkg{coda} function \code{\link[coda]{mcmc}} is set to 1; otherwise
#' \code{thin} is set to \code{nThin}, extracted from \code{object}.
#'
#' If the model in question contains structural zeros (see \code{\link{Poisson}}),
#' some parameters of the model may be undefined. These parameters are omitted
#' from the results, unless they are specifically requested via the
#' \code{sample} argument.
#' 
#' @param filename The name of the file where the output from the
#' \code{estimate} function is kept.
#' @param where A character vector used to select a single parameter or batch
#' of parameters.  See below for details.
#' @param nSample Number of parameters to be sampled from a batch
#' of parameters. Defaults to 25. Ignored when batch has fewer than
#' 25 parameters (including having only one parameter.)
#' @param sample Indices of parameters to be sampled.  An alternative
#' to \code{nSample} when more control over sampling is required.
#' @param thinned Logical.  If \code{TRUE}, the default, the
#' \code{"\link{mcmc.list}"} object or objects describe the thinned process.
#'
#' @return A single object of class \code{"\link{mcmc.list}"}, or a named list
#' of such objects.
#' 
#' @seealso Other functions for examining output from
#' calls to \code{\link{estimateModel}}, \code{\link{estimateCounts}}, and
#' \code{\link{estimateAccount}} include \code{\link{fetch}},
#' \code{\link{fetchFiniteSD}}, and \code{\link{listContents}}.
#'
#' @examples
#' library(demdata)
#' ## fit model
#' deaths <- Counts(round(VADeaths2))
#' popn <- Counts(VAPopn)
#' filename <- tempfile()
#' estimateModel(Model(y ~ Poisson(mean ~ age)),
#'               y = deaths,
#'               exposure = popn,
#'               filename = filename,
#'               nBurnin = 20,
#'               nSim = 20,
#'               nChain = 2,
#'               parallel = FALSE)
#' 
#' ## create a list containing an "mcmc.list" object for
#' ## every element of the results that was estimated
#' l <- fetchMCMC(filename)
#' names(l)
#' sapply(l, class)
#' 
#' ## analyse using functions from 'coda'
#' \dontrun{
#' library(coda)
#' plot(l$model.likelihood.rate)
#' plot(l$model.likelihood.rate, ask = FALSE)
#' plot(l$model.likelihood.rate, ask = FALSE, smooth = FALSE)
#' gelman.diag(l$model.likelihood.rate)
#' }
#' 
#' ## create a single "mcmc.list" object
#' mean.mcmc <- fetchMCMC(filename,
#'                   where = c("model", "likelihood", "rate"))
#' 
#' ## only write part of each name
#' mean.mcmc <- fetchMCMC(filename, where = c("mod", "like", "r"))
#' 
#' ## sample 6 randomly-chosen values
#' mean.mcmc.5 <- fetchMCMC(filename,
#'                     where = c("model", "likelihood", "rate"),
#'                     nSample = 6)
#'
#' ## sample the first 5 values
#' mean.mcmc.5 <- fetchMCMC(filename,
#'                     where = c("model", "likelihood", "rate"),
#'                     sample = 1:5)
#'
#' @export
fetchMCMC <- function(filename, where = NULL, nSample = 25,
                      sample = NULL, thinned = TRUE) {
    object <- fetchResultsObject(filename)
    if (!methods::is(object, "Results"))
        stop(gettextf("results object has class \"%s\"",
                      class(object)))
    if (!identical(length(thinned), 1L))
        stop(gettextf("'%s' does not have length %d",
                      "thinned", 1L))
    if (!is.logical(thinned))
        stop(gettextf("'%s' does not have type \"%s\"",
                      "thinned", "logical"))
    if (is.na(thinned))
        stop(gettextf("'%s' is missing",
                      "thinned"))
    mcmc.args <- fetch(filename, where = "mcmc")
    n.chain <- mcmc.args[["nChain"]]
    if (thinned)
        n.thin <- 1L
    else
        n.thin <- mcmc.args[["nThin"]]
    if (is.null(where)) {
        where.mcmc <- whereMetropStat(object, whereEstimated)
        n.where <- length(where.mcmc)
        if (n.where == 0L)
            NULL
        else {
            ans <- vector(mode = "list", length = n.where)
            for (i in seq_len(n.where)) {
                where <- where.mcmc[[i]]
                obj <- fetch(filename, where = where)
                skeleton <- fetchSkeleton(object, where = where)
                ans[[i]] <- MCMCDemographic(object = obj,
                                            sample = sample,
                                            nSample = nSample,
                                            nChain = n.chain,
                                            nThin = n.thin,
                                            skeleton = skeleton)
            }
            names(ans) <- sapply(where.mcmc, paste, collapse = ".")
            ans
        }
    }
    else {
        obj <- fetch(filename, where = where)
        if (!methods::is(obj, "DemographicArray"))
            stop(gettextf("'%s' has class \"%s\"",
                          where[length(where)], class(obj)))
        if (!("iteration" %in% dembase::dimtypes(obj)))
            stop(gettextf("'%s' does not have dimension with dimtype \"%s\"",
                          where[length(where)], "iteration"))
        skeleton <- fetchSkeleton(object, where = where)
        MCMCDemographic(object = obj,
                        sample = sample,
                        nSample = nSample,
                        nChain = n.chain,
                        nThin = n.thin,
                        skeleton = skeleton)
    }
}



## NO_TESTS
#' Summarise estimation output.
#'
#' Summarise the results from \code{\link{estimateModel}},
#' \code{\link{estimateCounts}} or \code{\link{estimateAccount}}.
#' \code{fetchSummary} can also be called on results from prediction
#' functions, though doing so is generally less enlightening.
#'
#' @param filename The filename used by the estimation or prediction
#' function.
#' @param nSample The number of elements to sample from a batch of
#' parameters. Defaults to 25.
#'
#' @section Metropolis-Hastings updates:
#'
#' Depending on the model, one or more batches of parameters is updated
#' using Metropolis-Hasting updates.  \code{jump} is the standard
#' deviation of the propoposal density.  \code{acceptance} is the
#' proportion of proposals that are accepted.  \code{autocorrelation}
#' is the correlation between successive iterations, after thinning.
#' Autocorrelations are calculated separately, parameter by parameter,
#' and then averaged.  If a batch contains more than \code{nSample} parameters,
#' then \code{nSample} parameters are chosen at random, and autocorrelations
#' calculated only for that sample.
#'
#' The Metropolis-Hastings statistics can be extracted from the summary
#' object using function \code{\link{metropolis}}.
#'
#' @section Parameters:
#' 
#' 'Rhat's, or 'potential scale reduction factors' are calculated
#' using function \code{\link[coda]{gelman.diag}}
#' from package \pkg{coda}.  However, like Gelman et al (2014: 284-286),
#' but unlike the original \code{gelman.diag} function,
#' each chain is split into two,
#' and the first and second halves compared,
#' to capture variation within each chain,
#' in addition to variation across chains.
#'
#' To save time, \code{fetchSummary} calculates Rhats for a sample of
#' at most \code{nSample} elements from each batch of parameters.
#' The number of elements sampled is shown in the column labeled \code{n}.
#'
#' Rhats typically vary across different parameters within a batch.
#' To give an idea of the distribution, \code{fetchSummary} shows
#' the median and maximum Rhats. The best way to get detailed information
#' about convergence is to use \code{\link{fetchMCMC}} followed by
#' \code{plot}.
#'
#' The sampling of the Rhats introduces an element
#' of randomness: if \code{fetchSample} is called twice on the same
#' \code{filename} the results for \code{Rhat} will differ.
#'
#' When the number of chains (\code{nChain}), the number of iterations
#' (\code{nSim}), or the proportion of proposals accepted is small,
#' estimated Rhats can be unstable, and must be interpreted with
#' caution.
#'
#' \code{Est.} in the output from \code{fetchSummary} is short for
#' "Point estimates". The \code{Est.} columns provide summaries
#' of the distribution of the estimates. \code{fetchSummary}
#' calculates point estimates (posterior medians) for every element
#' in a batch of parameters, and then shows the distribution of
#' these point estimates.  If a batch has only one parameter, then
#' only the point estimate for that parameter is shown.
#' If a batch has more than one parameter, then the minimum
#' point estimate, median point estimate, and maximum point estimate
#' for that batch are shown.
#'
#' To save time, \code{fetchSummary} calculates the point estimates
#' from a maximum of 100 draws from the posterior sample. The values
#' may therefore differ slightly from the more accurate values
#' obtained by calling \code{fetch} and then \code{collapseIterations}
#' (assuming the full sample has more than 100 draws.)
#'
#'
#' If greater control over the calculation of Rhat is desired, parameter
#' estimates can be extracted using \code{\link{fetchMCMC}}, and
#' results calculated using \code{\link[coda]{gelman.diag}}.  
#'
#' @return An object of class \code{\linkS4class{SummaryResults}}.
#'
#' @seealso Individual components of the summary can be extracted
#' using functions \code{\link{metropolis}}, \code{\link{parameters}},
#' and \code{\link{gelmanDiag}}.  Parameter estimates from estimation
#' or prediction are obtained using function \code{fetch}.
#'
#' @references
#' Geyer, C. J. (2011) Introduction to Markov Chain Monte Carlo.
#' Brooks, S., Gelman, A., Jones, G., Meng, X-L (eds.) \emph{Handbook of Markov
#' Chain Monte Carlo}. CRC Press.
#'
#' Gelman, A., Shirley, K. (2011) Inference from simulations and monitoring
#' convergence.  Brooks, S., Gelman, A., Jones, G., Meng, X-L (eds.)
#' \emph{Handbook of Markov Chain Monte Carlo}. CRC Press.
#'
#' Gelman, A., Carlin, J.B., Stern, H.S. and Rubin, D.B., 2014.
#' \emph{Bayesian Data Analysis. Third Edition.} Boca Raton, FL, USA:
#' Chapman & Hall/CRC.
#'
#' @examples
#' deaths <- demdata::VADeaths2
#' popn <- demdata::VAPopn
#' deaths <- round(deaths)
#' deaths <- Counts(deaths)
#' popn <- Counts(popn)
#' filename <- tempfile()
#' model <- Model(y ~ Poisson(mean ~ age + sex),
#'                jump = 0.5)
#' estimateModel(model = model,
#'               y = deaths,
#'               exposure = popn,
#'               filename = filename,
#'               nBurnin = 50,
#'               nSim = 50,
#'               nChain = 2,
#'               parallel = FALSE)
#' fetchSummary(filename)
#' 
#' ## keep summary object and extract parts from it
#' summary.est <- fetchSummary(filename)
#' gelmanDiag(summary.est)
#' metropolis(summary.est)
#' parameters(summary.est)
#' @export
fetchSummary <- function(filename, nSample = 25) {
    checkPositiveInteger(x = nSample,
                         name = "nSample")
    nSample <- as.integer(nSample)
    object <- fetchResultsObject(filename)
    summary(object = object,
            filename = filename,
            nSample = nSample)
}

## HAS_TESTS
#' Finite-population standard deviations.
#' 
#' Extract the finite-population standard deviation of main effects
#' and interactions in hierarchical models.
#' 
#' In the hierarchical models discussed in \code{\link{Model}}, cell means
#' are moderned using terms formed from cross-classifying dimensions such as
#' age or sex, or from interactions between these dimensions.  Adapting ideas
#' from ANOVA, Gelman (2004: 408-409) propose that the finite-population
#' standard deviations of these terms be used as a measure of the terms'
#' relative importance.  For instance, is an 'age' term has a greater
#' finite-population standard deviation than a 'region' term in a model, this
#' suggests that age is a more important predictor of outcomes than region.
#' 
#' @inheritParams fetchMCMC
#' @param probs A vectors of quantiles, for summarizing the standard deviations.
#' @param iterations A vector of positive integers giving the iterations to be
#'     extracted.
#' 
#' @return A list of objects of class \code{"FiniteSD"} if
#' \code{object} contains several hierarchical models; a single
#' \code{"FiniteSD"} object if' \code{object} contains
#' a single hierchical model; and \code{NULL} if \code{object}
#' contains no hierarchical models.
#'
#' @seealso \code{\link{estimateModel}}, \code{\link{estimateCounts}},
#' \code{\link{estimateAccount}}
#'
#' @references Gelman, A. (2004) Analysis of variance:
#' why it is more important than ever
#' (with discussion). \emph{Annals of Statistics}. 33(1): 1-53.
#' @export
fetchFiniteSD <- function(filename, probs = c(0.025, 0.5, 0.975), iterations = NULL) {
    object <- fetchResultsObject(filename)
    finiteSDObject(object = object,
                   filename = filename,
                   probs = probs,
                   iterations = iterations)
}


## HAS_TESTS
#' Estimate or predict finite-population quantity 'y'.
#'
#' Estimate or predict finite-population quantities for
#' the toal population, from estimated rates, probabilities,
#' or means for the sampled population, plus information
#' on the sizes of the sampled and total population.
#'
#' Consider a model in which
#'
#' \deqn{y_i \sim G(\gamma_i, n_i)}
#'
#' for some distribution \eqn{G}, such as a Poisson, binomial, or
#' normal distribution.  \eqn{y_i} is a count or other value
#' for cell \eqn{i} within a classification defined by
#' dimensions such as age and sex.  \eqn{\gamma_i} is an
#' unobserved cell-level parameter such as a rate, probability,
#' or mean.  \eqn{n_i} is a cell-level exposure or sample size, which
#' is included in some models, such as Poisson or binomial models,
#' but not all.
#'
#' We assume that \eqn{y_i} is observed for only part of
#' the population (the sampled population), and would like to
#' know \eqn{Y_i}, the corresponding value for the total population.
#' This requires estimating values for the unobserved' part
#' of the population (the nonsampled population).  We assume that
#' the unobserved part of the population is subject to the same
#' \eqn{\gamma_i} as the observed part.
#'
#' Quantities \eqn{y_i} and \eqn{Y_i} are finite-population
#' quantities, that is, they are values that are, or
#' could theoretically be, observed in real populations.
#' Value \eqn{\gamma_i}, in contrast, is a super-population
#' quantity. It describes the hypothetical super-population
#' from which values such as \eqn{y_i} and \eqn{Y_i} are
#' drawn.
#'
#' @inheritParams fetch
#' 
#' @param total An object of class \code{\linkS4class{Counts}}
#' giving the size of the population for which the finite-population
#' estimates are to be generated.
#' @param sampled An object of class \code{\linkS4class{Counts}}
#' giving the same of the sample from which the rates, means, or
#' probabilities were estimated.  If no \code{sampled} argument
#' is supplied, and if the model includes an \code{exposure}
#' term, then \code{sampled} is assumed to equal \code{exposure}.
#'
#' @seealso \code{finiteY} can be used with the results from
#' a call to \code{estimate} or \code{predict} functions such
#' as \code{\link{estimateModel}} and \code{\link{predictModel}}.
#' 
#' @references
#' Gelman, A., Carlin, J.B., Stern, H.S. and Rubin, D.B., 2014.
#' \emph{Bayesian Ddata Analysis. Third Edition.} Boca Raton, FL, USA:
#' Chapman & Hall/CRC. Section 8.3.
#' 
#' @examples 
#' ## generate sampled, non-sampled, and total population
#' popn.sampled <- Counts(array(rpois(n = 20, lambda = 100),
#'                              dim = c(10, 2),
#'                              dimnames = list(region = LETTERS[1:10],
#'                                  sex = c("Female", "Male"))))
#' popn.nonsampled <- Counts(array(rpois(n = 20, lambda = 900),
#'                              dim = c(10, 2),
#'                              dimnames = list(region = LETTERS[1:10],
#'                                  sex = c("Female", "Male"))))
#' popn.total <- popn.sampled + popn.nonsampled
#'
#' ## binomial model
#' y.sampled.binom <- Counts(array(rbinom(n = 20, size = popn.sampled, prob = 0.5),
#'                           dim = c(10, 2),
#'                           dimnames = list(region = LETTERS[1:10],
#'                               sex = c("Female", "Male"))))
#' filename.binom <- tempfile()
#' estimateModel(Model(y ~ Binomial(mean ~ region + sex)),
#'               y = y.sampled.binom,
#'               exposure = popn.sampled,
#'               filename = filename.binom,
#'               nBurnin = 10,
#'               nSim = 10,
#'               nChain = 2)
#' fy.binom <- finiteY(filename = filename.binom, # sample population assumed
#'                     total = popn.total)        # to equal exposure
#'
#' ## normal model
#' y.sampled.norm <- Counts(array(rnorm(n = 20),
#'                                dim = c(10, 2),
#'                                dimnames = list(region = LETTERS[1:10],
#'                                    sex = c("Female", "Male"))))
#' filename.norm <- tempfile()
#' estimateModel(Model(y ~ Normal(mean ~ region + sex)),
#'               y = y.sampled.norm,
#'               filename = filename.norm,
#'               nBurnin = 10,
#'               nSim = 10,
#'               nChain = 2)
#' fy.norm <- finiteY(filename = filename.norm,
#'                    sampled = popn.sampled, # specify sample population
#'                    total = popn.total)
#'@export
finiteY <- function(filename, total, sampled = NULL, iterations = NULL) {
    object <- fetchResultsObject(filename)
    if (!methods::is(object, "ResultsModel"))
        stop(gettextf("results object has class \"%\"", class(object)))
    if (identical(dembase::nIteration(object), 0L))
        return(NULL)
    model <- object@final[[1L]]@model
    has.exposure <- methods::is(model, "UseExposure")
    y.sampled <- fetch(filename, where = "y", iterations = iterations, impute = TRUE)
    total <- checkAndTidyTotalOrSampled(x = total,
                                        model = model,
                                        ySampled = y.sampled,
                                        name = "total")
    if (is.null(sampled)) {
        if (has.exposure)
            sampled <- fetch(filename, where = "exposure")
        else
            stop(gettextf("argument '%s' is missing with no default",
                          "sampled"))
    }            
    else
        sampled <- checkAndTidyTotalOrSampled(x = sampled,
                                              model = model,
                                              ySampled = y.sampled,
                                              name = "sampled")
    nonsampled <- total - sampled
    if (any(nonsampled < 0L))
        stop(gettextf("'%s' has cells with fewer units than corresponding cells in '%s'",
                      "total", "sampled"))
    y.nonsampled <- drawYNonSampled(filename,
                                    model = model,
                                    nonsampled = nonsampled,
                                    iterations = iterations)
    y.sampled + y.nonsampled
}

## NO_TESTS
#' List of output from estimate function.
#' 
#' Calling \code{\link{listContents}} on a the filenmame used by
#' \code{\link{estimateModel}}, \code{\link{estimateCounts}},
#' or \code{\link{estimateAccount}} shows the items available to be
#' fetched, in a hierarchical structure.
#' @inheritParams fetchMCMC
#' @param max Maximum depth of hierarchical structure to show.
#' @export
listContents <- function(filename, where = character(), max = NULL) {
    checkMax(max)
    object <- fetchResultsObject(filename)
    l <- makeContentsList(object = object, where = where, max = max)
    showContentsList(l)
    invisible(l)
}


#' Show final model specification.
#'
#' Show the complete specification model used by functions
#' \code{\link{estimateModel}}, \code{\link{estimateCounts}},
#' and \code{\link{estimateAccount}}, or by their prediction
#' counterparts such as \code{\link{predictModel}}.
#'
#' When setting up models to be fitted by the estimation functions,
#' users typically only specify some of the details of the priors for
#' the main effects or interactions.  The remaining details are filled in
#' by the estimation functions, based on the input data.  Function
#' \code{showModel} can be used find out what choices the estimation
#' functions made.
#'
#' To see the partial specification constructed by function
#' \code{\link{Model}}, print the model object (typically by typing
#' the name of the object at the console.)  See below for an example.
#' show \code{\link{fetch}}.
#'
#' The \code{trunc-half-t(df, s^2, max)} in the printed results refers to a
#' truncated \code{\link[=halft-distn]{half-t}} distribution with \code{df}
#' degrees of freedom, scale \code{s^2}, and maximum value \code{max}.
#'
#' @inheritParams fetchMCMC
#'
#' @return \code{showModel} is called for its side effect, which is to print a
#' description of the model.
#'
#' @seealso Parameter estimate can be extracted using function
#' \code{\link{fetch}}.
#' 
#' @examples
#' births <- demdata::nz.births.reg
#' popn <- demdata::nz.popn.reg
#' births <- Counts(births, dimscales = c(year = "Intervals"))
#' popn <- Counts(popn, dimscales = c(year = "Intervals"))
#' births <- subarray(births, year > 2005 & year < 2014)
#'
#' filename.est <- tempfile()
#' filename.pred <- tempfile()
#' 
#' model <- Model(y ~ Poisson(mean ~ age * region + year),
#'                year ~ DLM(trend = NULL))
#'
#' ## model specification before estimateModel called
#' model
#'
#' estimateModel(model = model,
#'               y = births,
#'               exposure = popn,
#'               filename = filename.est,
#'               nBurnin = 5,
#'               nSim = 5,
#'               nChain = 2,
#'               parallel = FALSE)
#'
#' ## model specification after estimateModel called
#' showModel(filename.est)
#'
#' predictModel(filenameEst = filename.est,
#'              filenamePred = filename.pred,
#'              n = 5)
#'
#' ## specification used by predictModel
#' showModel(filename.pred)
#' @export
showModel <- function(filename) {
    object <- fetchResultsObject(filename)
    showModelHelper(object = object)
}
StatisticsNZ/demest documentation built on Nov. 2, 2023, 7:56 p.m.