#' MCMC sampler for day-specific probabilities of conception methodology
#'
#' \code{dsp} is an MCMC sampler for the methodology proposed by Dunson and
#' Stanford in \emph{Bayesian Inferences on Predictors of Conception
#' Probabilities} (2005).
#'
#' @param dspDat An object of \code{class} \code{\link{dspDat}}.
#'
#' @param nSamp The number of post-burn-in scans for which to perform the
#' sampler.
#'
#' @param nBurn Number of sampler scans included in the burn-in phase.
#'
#' @param nThin Value such that during the post-burn-in phase, only every
#' \code{nThin}-th scan is recorded for use in posterior inference. Default
#' of \code{1} corresponds to every scan being retained.
#'
#' @param hypGam Either \code{NULL} or a \code{list} containing hyperparameters
#' to be specified for the exponentiated model regression coefficients. None,
#' some, or all of the hyperparameters can be/need be specified.
#'
#' Each exponentiated regression coefficient has a prior defined in terms of 5
#' hyperparameters. These hyperparameters are the (i) prior probability of the
#' point mass state, the (ii) shape and (iii) rate of the gamma distribution
#' state, and the (iv) lower (v) and upper bounds of the gamma distribution
#' state.
#'
#' If not specified by the function input, then a default value is provided
#' for each of the hyperparameters. These default parameters, correponding to
#' their description in the preceeding paragraph, are (i) \code{0.5}, (ii)
#' \code{1}, (iii) \code{1}, (iv) \code{0}, and (v) \code{Inf}.
#'
#' Exponentiated regression coefficient hyperparameter specifications must be
#' provided as follows. If the input to \code{hypGam} is \code{NULL}, then
#' every hyperparameter is taken to be the default value. If some of the
#' hyperparameters are to be specified, then \code{hypGam} must be a
#' \code{list} containing a sub-hierarchy of \code{list}s; each of these
#' second-level \code{list}s must have the name of one of model design matrix
#' variables. Thus if non-\code{NULL}, then \code{hypGam} is a \code{list}
#' containing between \code{1} and \code{q} \code{list}s, where \code{q} is
#' the number of covariates in the model (after recoding categorical variables
#' to dummy-variable form).
#'
#' Each second-level \code{list} in \code{hypGam} must contain between
#' \code{1} and \code{5} \code{numeric} values with possible names (i)
#' \code{p}, (ii) \code{a}, (iii) \code{b}, (iv) \code{bndL}, or (v)
#' \code{bndU} corresponding to the hyperparameter description from before
#' with matching index. The order of the objects in either level of
#' \code{hypGam} does not matter.
#'
#' @param tuningGam Either \code{NULL} or a list containing one or more
#' \code{numeric} objects each with the name of one of the model design matrix
#' variables; the values are used as tuning parameters for the Metropolis step
#' for regression coeffients corresponding to continuous predictor variables.
#' Categorical variables do not have a Metropolis step and values provided for
#' them are ignored. If \code{NULL}, then default values are provided for
#' every regression coefficient (that corresponds to a continuous predictor
#' variable). If some but not all of the tuning parameters for regression
#' coefficients corresponding to continuous predictor variables are provided,
#' then default values are provided for the remaining. The default tuning
#' value for any regression coefficient is \code{0.25}.
#'
#' @param hypPhi Either \code{NULL} or a \code{list} containing one or two
#' \code{numeric} objects with names \code{c1} and/or \code{c2}. If supplied,
#' then these values correspond (respectively) to the shape and rate
#' parameters of the prior (gamma) distribution for the variance parameter of
#' the woman-specific fecundability multipliers. If \code{NULL}, then default
#' values are provided for both \code{c1} and \code{c2}, and if exactly one of
#' either \code{c1} or \code{c2} are provided, then a default value is
#' provided for the other. The default values for \code{c1} and \code{c2} are
#' \code{1} and \code{1}, respectively.
#'
#' @param tuningPhi Metropolis tuning parameter for the variance parameter of
#' the woman-specific fecundability mulitpliers. The proposal value for this
#' variance parameter is sampled from a uniform distribution with support as
#' determined by the tuning parameter.
#'
#' @param trackProg One of either \code{"none"}, \code{"percent"}, or
#' \code{"verbose"}; partial matching is supported.
#'
#' @param progQuants Vector with values in (0,1]. Ignored if \code{trackProg}
#' is specified as \code{"none"}. If \code{trackProg} is one of
#' \code{"percent"} or \code{"verbose"} then the specified output is printed
#' whenever the post-burn-in progress of the sampler reaches one of the
#' quantiles specified by \code{progQuants}.
#'
#' @param saveToFile \code{logical} specifying whether the samples from the
#' post-burn-in phase are to be either written to file or returned as
#' \code{data.frame} objects. Note that in either case output characterizing
#' the model is returned by the sampler.
#'
#' @param outPath String specifying the local path into which output files
#' containing the MCMC samples are to be placed. Ignored if \code{saveToFile}
#' is \code{FALSE}.
#'
#'
#' @details Takes preprocessed fertility data in the form of a
#' \code{\link{dspDat}} object and performs an MCMC sampling algorithm for the
#' Dunson and Stanford day-specific probabilities of conception methodology.
#'
#' Selection of the covariates to include in the model is performed when
#' creating the \code{dspDat} object.
#'
#'
#' @return \code{dsp} returns a list containing the following objects
#'
#' \describe{ \item{\code{formula}}{Model \code{formula}, as passed to the
#' \code{dsp} sampler through the input to the \code{dspDat} parameter.}
#'
#' \item{\code{hypGam}}{\code{list} containing a sub-hierarchy of
#' \code{list}s, each containing the hyperparameter values used for the
#' sampler for the regression coefficients.}
#'
#' \item{\code{tuningGam}}{********}
#'
#' \item{\code{hypPhi}}{Hyperparameters for the variance parameter of the
#' woman-specific fecundability multipliers.}
#'
#' \item{\code{tuningPhi}}{Metropolis tuning parameter used for sampling the
#' variance parameter of the woman-specific fecundability multipliers.}
#'
#' \item{\code{nSamp}}{Input to \code{nSamp} parameter.}
#'
#' \item{\code{nBurn}}{Input to \code{nBurn} parameter.}
#'
#' \item{\code{nThin}}{Input to \code{nThin} parameter.}
#'
#' \item{\code{outPath}}{If \code{saveToFile} specified as \code{TRUE}, then
#' the input to \code{outPath} parameter.}
#'
#' \item{\code{phi}}{If \code{saveToFile} specified as \code{FALSE}, then a
#' vector containing the post-burn-in samples for the variance parameter of
#' the woman-specific fecundability multipliers.}
#'
#' \item{\code{xi}}{If \code{saveToFile} specified as \code{FALSE}, then a
#' \code{data.frame} containing the post-burn-in samples for the
#' woman-specific fecundability multipliers.}
#'
#' \item{\code{gam}}{If \code{saveToFile} specified as \code{FALSE}, then a
#' \code{data.frame} containing the post-burn-in samples for the regression
#' coefficients.}
#'
#' }
#'
#' @author David A. Pritchard and Sam Berchuck, 2015
#'
#' @references Dunson, David B., and Joseph B. Stanford. "Bayesian inferences on
#' predictors of conception probabilities." \emph{Biometrics} 61.1 (2005):
#' 126-133.
dsp <- function(dspDat, nSamp=1e4, nBurn=0, nThin=1, hypGam=NULL, tuningGam=NULL,
hypPhi=NULL, tuningPhi=0.3, trackProg="percent", progQuants=seq(0.1, 1.0, 0.1),
saveToFile=FALSE, outPath=NULL) {
# TODO: check if valid input
# TODO: does it work if formula entered directly into fcn? I think yes..
# TODO: work out details for 'format_outPath' function
# TODO: verbose print not yet written for 'saveToFile == TRUE' case
# TODO: 'getHypGam' not yet written (properly)
# TODO: add deviance to verbose print
# TODO: update 'getHypGam' using code suggested in comments
list2env(dspDat$samplerObj, envir=environment())
rm(dspDat)
# Objects related to burn-in and thinning
nBurn <- as.integer(nBurn)
burnPhaseBool <- !identical(nBurn, 0L)
nThin <- as.integer(nThin)
thinIsOneBool <- identical(nThin, 1L)
# Add a backslash to 'outPath' if necessary. How does this work for Windows OS?
if (saveToFile)
outPath <- format_outPath(outPath)
# Objects for progress statistics
printProgBool <- !identical(trackProg, "none")
# 'bbl': short for "burn bar length" (in chars); choice of 50 is arbitrary
bbl <- 50
burnQuants <- seq(1 / bbl, 1, 1 / bbl)
if (printProgBool) {
# 'trackVals': sampler iterations at which we print the percentage of progress
trackVals <- list()
trackVals$prog <- sapply(progQuants, function(x) tail(which(1:nSamp <= x * nSamp), 1))
trackVals$burn <- sapply(burnQuants, function(x) tail(which(1:nBurn <= x * nBurn), 1) - nBurn)
if (burnPhaseBool) {
# Write first line of burn-in progress bar
cat("Burn-in progress: |", pasteC(rep(" ", bbl)), "|", sep="")
}
else if (identical(trackProg, "percent")) {
# Write first line of 'trackProg="percent"' option
cat("Sampler progress: ")
}
}
# Combine default hyperparameters with custom user input hyperparameters
hypPhi <- getHypPhi(hypPhi)
hypGam <- getHypGam(varNames, hypGam)
tuningGam <- getTuningGam(q)
gamIsTrunBool <- sapply(1:q, function(j)
with(!isTRUE(all.equal(c(bndL, bndU), c(0, Inf))), data=hypGam[[j]]))
# Set initial values: uses mean of prior dists for phi and gamma
Wfull <- integer(nObs)
phi <- hypPhi$c1 / hypPhi$c2
gamCoef <- theta <- getGamInit(hypGam, gamIsTrunBool)
uProdBeta <- drop(U %*% log(gamCoef))
xi <- rep(1, n)
xiDay <- xi[idDayExpan]
# Metropolis acceptance rate counters
metCtr <- list( phiAccept = 0L,
gamAccept = integer(q),
gamTotal = integer(q) )
# Inititalize MCMC output files / objects
if (saveToFile) {
write(varNames, file=paste0(outPath, "GAMMA.csv"), sep=",", ncolumns=q)
write(subjId, file=paste0(outPath, "XI.csv"), sep=",", ncolumns=n)
write("phi", file=paste0(outPath, "PHI.csv"), sep=",", ncolumns=1)
}
else {
phiOut <- numeric(nSamp)
xiOut <- setNames(data.frame(matrix(nrow=nSamp, ncol=n)), subjId)
gamOut <- setNames(data.frame(matrix(nrow=nSamp, ncol=q)), varNames)
}
# Begin MCMC sampler ==========================================================
# Subtract 'nBurn' from 's' to prevent work later checking for thinning
for (s in (1 - nBurn):nSamp) {
# Sample missing intercourse values and update data
if (useNaSexBool) idx <- sampIdx(uProdBeta, xiDay, naSexDat, nObs)
# Sample latent variable W
W <- sampW(uProdBeta, xiDay, idx$preg, idx$pregCyc)
# 'Wfull': W's for every sex day, even those that are always 0 (i.e. cyc's w/o pregnancy)
Wfull <- replace(integer(nObs), idx$preg, W)
# Sample regression coefficients gamma
for (h in 1:q) {
# Binary case has closed-form full conditional
if (gamBinBool[h]) {
uProdBetaNoH <- getUProdBetaNoH(uProdBeta, drop(U[, h]), gamCoef[h])
gamCoef[h] <- sampGam(Wfull, uProdBetaNoH, xiDay,
hypGam[[h]], idx$U$all[[h]], idx$U$preg[[h]], gamIsTrunBool[h])
uProdBeta <- getUProdBeta(uProdBetaNoH, drop(U[, h]), gamCoef[h])
}
# Continuous case requires sampling via Metropolis algorithm
else {
# 'uProdTheta': list with uProdBeta for point mass and continuous value of theta
uProdTheta <- getUProdTheta(uProdBeta, drop(U[, h]), gamCoef[h], theta[h])
# M is the state part of the mixture distribution
Mbool <- sampM(Wfull, xiDay, uProdTheta, hypGam[[h]]$p)
# Corresponds to the point mass part of the mixture distribution
if (Mbool) {
gamCoef[h] <- 1
theta[h] <- with(rgamma(1, shape=a, rate=b), data=hypGam[[h]])
uProdBeta <- uProdTheta$point
}
# Corresponds to the continuous part of the mixture distribution
else {
propTheta <- sampPropTheta(theta[h], tuningGam[h])
uProdTheta$prop <- uProdTheta$point + drop(U[, h] * log(propTheta))
logR <- getGamLogR(Wfull, xiDay, uProdTheta, theta[h], propTheta, hypGam[[h]])
if (log(runif(1)) < logR) {
theta[h] <- propTheta
uProdBeta <- uProdTheta$prop
metCtr$gamAccept[h] <- metCtr$gamAccept[h] + 1L
}
else {
uProdBeta <- uProdTheta$cont
}
gamCoef[h] <- theta[h]
metCtr$gamTotal[h] <- metCtr$gamTotal[h] + 1L
}
}
} # End gamma update
# Sample woman-specific fecundability multiplier xi
xi <- sampXi(W, uProdBeta, phi, idx$subj$obs, idx$subj$preg, idx$pregCyc, n)
xiDay <- xi[idDayExpan]
# Metropolis step for phi, the variance parameter for xi
phiProp <- sampPhiProp(phi, tuningPhi)
phiLogR <- getPhiLogR(xi, phi, phiProp, hypPhi)
if (log(runif(1)) < phiLogR) {
phi <- phiProp
if (!burnPhaseBool)
metCtr$phiAccept <- metCtr$phiAccept + 1L
}
# Write samples to output
if (burnPhaseBool) {
if (identical(s, 0L)) {
burnPhaseBool <- FALSE
if (printProgBool) {
printBurn(which(s == trackVals$burn), bbl)
if (identical(trackProg, "percent"))
cat("\nPost-burn-in progress: ")
else if (identical(trackProg, "verbose"))
cat("\nEntering post-burn-in phase of sampler..\n")
}
}
}
else if (thinIsOneBool || identical(s %% nThin, 0L)) {
if (saveToFile) {
write(phi, file=paste0(outPath, "PHI.csv"), sep=",", ncolumns=1, append=TRUE)
write(xi, file=paste0(outPath, "XI.csv"), sep=",", ncolumns=n, append=TRUE)
write(gamCoef, file=paste0(outPath, "GAMMA.csv"), sep=",", ncolumns=q, append=TRUE)
}
else {
phiOut[s] <- phi
xiOut[s, ] <- xi
gamOut[s, ] <- gamCoef
}
}
# Print progress / verbose info
if (printProgBool) {
if (burnPhaseBool) {
if (s %in% trackVals$burn) {
printBurn(which(s == trackVals$burn), bbl)
}
}
else if (s %in% trackVals$prog)
printProg(trackProg, nSamp, s, gamOut, varNames, gamBinBool, metCtr)
}
} # End DSP sampler ----------------------------------------------------------
# Construct and return sampler output
outObj <- list( formula = formula,
hypGam = hypGam,
tuningGam = tuningGam,
hypPhi = hypPhi,
tuningPhi = tuningPhi,
nSamp = nSamp,
nBurn = nBurn,
nThin = nThin )
if (!saveToFile) {
outObj$phi <- phiOut
outObj$xi <- xiOut
outObj$gam <- gamOut
}
else
outObj$outPath <- outPath
return (outObj)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.