Nothing
##########################################################################
## sample from the posterior distribution of a logistic regression
## model in R using linked C++ code in Scythe
##
## KQ 1/23/2003
##
## This software is distributed under the terms of the GNU GENERAL
## PUBLIC LICENSE Version 2, June 1991. See the package LICENSE
## file for more information.
##
## Modified to meet new developer specification 7/15/2004 KQ
## Modified for new Scythe and rngs 7/25/2004 KQ
## note: B0 is now a precision
## Modified to allow user-specified prior density 8/17/2005 KQ
## Modified to handle marginal likelihood calculation 1/27/2006 KQ
##
## Copyright (C) 2003-2007 Andrew D. Martin and Kevin M. Quinn
## Copyright (C) 2007-present Andrew D. Martin, Kevin M. Quinn,
## and Jong Hee Park
##########################################################################
#' Markov Chain Monte Carlo for Logistic Regression
#'
#' This function generates a sample from the posterior distribution of a
#' logistic regression model using a random walk Metropolis algorithm. The user
#' supplies data and priors, and a sample from the posterior distribution is
#' returned as an mcmc object, which can be subsequently analyzed with
#' functions provided in the coda package.
#'
#' \code{MCMClogit} simulates from the posterior distribution of a logistic
#' regression model using a random walk Metropolis algorithm. The simulation
#' proper is done in compiled C++ code to maximize efficiency. Please consult
#' the coda documentation for a comprehensive list of functions that can be
#' used to analyze the posterior sample.
#'
#' The model takes the following form:
#'
#' \deqn{y_i \sim \mathcal{B}ernoulli(\pi_i)}
#'
#' Where the inverse link function:
#'
#' \deqn{\pi_i = \frac{\exp(x_i'\beta)}{1 + \exp(x_i'\beta)}}
#'
#' By default, we assume a multivariate Normal prior on \eqn{\beta}:
#'
#' \deqn{\beta \sim \mathcal{N}(b_0,B_0^{-1})}
#'
#' Additionally, arbitrary user-defined priors can be specified with
#' the \code{user.prior.density} argument.
#'
#' If the default multivariate normal prior is used, the Metropolis proposal
#' distribution is centered at the current value of \eqn{\beta} and has
#' variance-covariance \eqn{V = T (B_0 + C^{-1})^{-1} T }, where \eqn{T} is a the
#' diagonal positive definite matrix formed from the \code{tune}, \eqn{B_0}
#' is the prior precision, and \eqn{C} is the large sample
#' variance-covariance matrix of the MLEs. This last calculation is done via an
#' initial call to \code{glm}.
#'
#' If a user-defined prior is used, the Metropolis proposal distribution is
#' centered at the current value of \eqn{\beta} and has
#' variance-covariance \eqn{V = T C T}, where
#' \eqn{T} is a the diagonal positive definite matrix formed from the
#' \code{tune} and \eqn{C} is the large sample variance-covariance matrix of
#' the MLEs. This last calculation is done via an initial call to \code{glm}.
#'
#' @param formula Model formula.
#'
#' @param data Data frame.
#'
#' @param burnin The number of burn-in iterations for the sampler.
#'
#' @param mcmc The number of Metropolis iterations for the sampler.
#'
#' @param thin The thinning interval used in the simulation. The number of
#' mcmc iterations must be divisible by this value.
#'
#' @param tune Metropolis tuning parameter. Can be either a positive scalar or
#' a \eqn{k}-vector, where \eqn{k} is the length of
#' \eqn{\beta}.Make sure that the acceptance rate is satisfactory
#' (typically between 0.20 and 0.5) before using the posterior sample for
#' inference.
#'
#' @param verbose A switch which determines whether or not the progress of the
#' sampler is printed to the screen. If \code{verbose} is greater than 0 the
#' iteration number, the current beta vector, and the Metropolis acceptance
#' rate are printed to the screen every \code{verbose}th iteration.
#'
#' @param seed The seed for the random number generator. If NA, the Mersenne
#' Twister generator is used with default seed 12345; if an integer is passed
#' it is used to seed the Mersenne twister. The user can also pass a list of
#' length two to use the L'Ecuyer random number generator, which is suitable
#' for parallel computation. The first element of the list is the L'Ecuyer
#' seed, which is a vector of length six or NA (if NA a default seed of
#' \code{rep(12345,6)} is used). The second element of list is a positive
#' substream number. See the MCMCpack specification for more details.
#'
#' @param beta.start The starting value for the \eqn{\beta} vector. This
#' can either be a scalar or a column vector with dimension equal to the number
#' of betas. If this takes a scalar value, then that value will serve as the
#' starting value for all of the betas. The default value of NA will use the
#' maximum likelihood estimate of \eqn{\beta} as the starting value.
#'
#' @param b0 If \code{user.prior.density==NULL} \code{b0} is the prior mean of
#' \eqn{\beta} under a multivariate normal prior. This can either be a
#' scalar or a column vector with dimension equal to the number of betas. If
#' this takes a scalar value, then that value will serve as the prior mean for
#' all of the betas.
#'
#' @param B0 If \code{user.prior.density==NULL} \code{B0} is the prior
#' precision of \eqn{\beta} under a multivariate normal prior. This can
#' either be a scalar or a square matrix with dimensions equal to the number of
#' betas. If this takes a scalar value, then that value times an identity
#' matrix serves as the prior precision of \eqn{\beta}. Default value of
#' 0 is equivalent to an improper uniform prior for beta.
#'
#' @param user.prior.density If non-NULL, the prior (log)density up to a
#' constant of proportionality. This must be a function defined in R whose
#' first argument is a continuous (possibly vector) variable. This first
#' argument is the point in the state space at which the prior (log)density is
#' to be evaluated. Additional arguments can be passed to
#' \code{user.prior.density()} by inserting them in the call to
#' \code{MCMClogit()}. See the Details section and the examples below for more
#' information.
#'
#' @param logfun Logical indicating whether \code{use.prior.density()} returns
#' the natural log of a density function (TRUE) or a density (FALSE).
#'
#' @param marginal.likelihood How should the marginal likelihood be calculated?
#' Options are: \code{none} in which case the marginal likelihood will not be
#' calculated or \code{Laplace} in which case the Laplace approximation (see
#' Kass and Raftery, 1995) is used.
#'
#' @param ... further arguments to be passed
#'
#' @return An mcmc object that contains the posterior sample. This object can
#' be summarized by functions provided by the coda package.
#'
#' @export
#'
#' @seealso \code{\link[coda]{plot.mcmc}},\code{\link[coda]{summary.mcmc}},
#' \code{\link[stats]{glm}}
#'
#' @references Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park. 2011.
#' ``MCMCpack: Markov Chain Monte Carlo in R.'', \emph{Journal of Statistical
#' Software}. 42(9): 1-21. \doi{10.18637/jss.v042.i09}.
#'
#' Daniel Pemstein, Kevin M. Quinn, and Andrew D. Martin. 2007. \emph{Scythe
#' Statistical Library 1.0.} \url{http://scythe.wustl.edu.s3-website-us-east-1.amazonaws.com/}.
#'
#' Martyn Plummer, Nicky Best, Kate Cowles, and Karen Vines. 2006. ``Output
#' Analysis and Diagnostics for MCMC (CODA)'', \emph{R News}. 6(1): 7-11.
#' \url{https://CRAN.R-project.org/doc/Rnews/Rnews_2006-1.pdf}.
#'
#' @keywords models
#'
#' @examples
#'
#' \dontrun{
#' ## default improper uniform prior
#' data(birthwt)
#' posterior <- MCMClogit(low~age+as.factor(race)+smoke, data=birthwt)
#' plot(posterior)
#' summary(posterior)
#'
#'
#' ## multivariate normal prior
#' data(birthwt)
#' posterior <- MCMClogit(low~age+as.factor(race)+smoke, b0=0, B0=.001,
#' data=birthwt)
#' plot(posterior)
#' summary(posterior)
#'
#'
#' ## user-defined independent Cauchy prior
#' logpriorfun <- function(beta){
#' sum(dcauchy(beta, log=TRUE))
#' }
#'
#' posterior <- MCMClogit(low~age+as.factor(race)+smoke,
#' data=birthwt, user.prior.density=logpriorfun,
#' logfun=TRUE)
#' plot(posterior)
#' summary(posterior)
#'
#'
#' ## user-defined independent Cauchy prior with additional args
#' logpriorfun <- function(beta, location, scale){
#' sum(dcauchy(beta, location, scale, log=TRUE))
#' }
#'
#' posterior <- MCMClogit(low~age+as.factor(race)+smoke,
#' data=birthwt, user.prior.density=logpriorfun,
#' logfun=TRUE, location=0, scale=10)
#' plot(posterior)
#' summary(posterior)
#'
#'
#' }
#'
"MCMClogit" <-
function(formula, data=NULL, burnin = 1000, mcmc = 10000,
thin=1, tune=1.1, verbose = 0, seed = NA, beta.start = NA,
b0 = 0, B0 = 0, user.prior.density=NULL, logfun=TRUE,
marginal.likelihood = c("none", "Laplace"), ...) {
## checks
check.offset(list(...))
check.mcmc.parameters(burnin, mcmc, thin)
cl <- match.call()
## seeds
seeds <- form.seeds(seed)
lecuyer <- seeds[[1]]
seed.array <- seeds[[2]]
lecuyer.stream <- seeds[[3]]
## form response and model matrices
holder <- parse.formula(formula, data=data)
Y <- holder[[1]]
X <- holder[[2]]
xnames <- holder[[3]]
K <- ncol(X) # number of covariates
## starting values and priors
beta.start <- coef_start(beta.start, K, formula, family=binomial, data)
mvn.prior <- form.mvn.prior(b0, B0, K)
b0 <- mvn.prior[[1]]
B0 <- mvn.prior[[2]]
## get marginal likelihood argument
marginal.likelihood <- match.arg(marginal.likelihood)
B0.eigenvalues <- eigen(B0)$values
if (min(B0.eigenvalues) < 0){
stop("B0 is not positive semi-definite.\nPlease respecify and call again.\n")
}
if (isTRUE(all.equal(min(B0.eigenvalues), 0))){
if (marginal.likelihood != "none"){
warning("Cannot calculate marginal likelihood with improper prior\n")
marginal.likelihood <- "none"
}
}
logmarglike <- NULL
## setup the environment so that fun can see the things passed as ...
userfun <- function(ttt) user.prior.density(ttt, ...)
my.env <- environment(fun = userfun)
## check to make sure user.prior.density returns a numeric scalar and
## starting values have positive prior mass
if (is.function(user.prior.density)){
funval <- userfun(beta.start)
if (length(funval) != 1){
cat("user.prior.density does not return a scalar.\n")
stop("Respecify and call MCMClogit() again. \n")
}
if (!is.numeric(funval)){
cat("user.prior.density does not return a numeric value.\n")
stop("Respecify and call MCMClogit() again. \n")
}
if (identical(funval, Inf)){
cat("user.prior.density(beta.start) == Inf.\n")
stop("Respecify and call MCMClogit() again. \n")
}
if (logfun){
if (identical(funval, -Inf)){
cat("user.prior.density(beta.start) == -Inf.\n")
stop("Respecify and call MCMClogit() again. \n")
}
}
else{
if (funval <= 0){
cat("user.prior.density(beta.start) <= 0.\n")
stop("Respecify and call MCMClogit() again. \n")
}
}
}
else if (!is.null(user.prior.density)){
cat("user.prior.density is neither a NULL nor a function.\n")
stop("Respecify and call MCMClogit() again. \n")
}
## form the tuning parameter
tune <- vector.tune(tune, K)
V <- vcov(glm(formula=formula, data=data, family=binomial))
## y \in {0, 1} error checking
if (sum(Y!=0 & Y!=1) > 0) {
cat("Elements of Y equal to something other than 0 or 1.\n")
stop("Check data and call MCMClogit() again. \n")
}
propvar <- tune %*% V %*% tune
posterior <- NULL
## call C++ code to draw sample
if (is.null(user.prior.density)){
## define holder for posterior density sample
sample <- matrix(data=0, mcmc/thin, dim(X)[2] )
auto.Scythe.call(output.object="posterior", cc.fun.name="cMCMClogit",
sample.nonconst=sample, Y=Y, X=X,
burnin=as.integer(burnin),
mcmc=as.integer(mcmc), thin=as.integer(thin),
tune=tune, lecuyer=as.integer(lecuyer),
seedarray=as.integer(seed.array),
lecuyerstream=as.integer(lecuyer.stream),
verbose=as.integer(verbose), betastart=beta.start,
b0=b0, B0=B0, V=V)
## marginal likelihood calculation if Laplace
if (marginal.likelihood == "Laplace"){
theta.start <- beta.start
optim.out <- optim(theta.start, logpost.logit, method="BFGS",
control=list(fnscale=-1),
hessian=TRUE, y=Y, X=X, b0=b0, B0=B0)
theta.tilde <- optim.out$par
beta.tilde <- theta.tilde[1:K]
Sigma.tilde <- solve(-1*optim.out$hessian)
logmarglike <- (length(theta.tilde)/2)*log(2*pi) +
log(sqrt(det(Sigma.tilde))) +
logpost.logit(theta.tilde, Y, X, b0, B0)
}
## put together matrix and build MCMC object to return
output <- form.mcmc.object(posterior, names=xnames,
title="MCMClogit Posterior Sample",
y=Y, call=cl, logmarglike=logmarglike)
}
else {
sample <- .Call("MCMClogituserprior_cc",
userfun, as.integer(Y), as.matrix(X),
as.double(beta.start),
my.env, as.integer(burnin), as.integer(mcmc),
as.integer(thin),
as.integer(verbose),
lecuyer=as.integer(lecuyer),
seedarray=as.integer(seed.array),
lecuyerstream=as.integer(lecuyer.stream),
as.logical(logfun),
as.matrix(propvar),
PACKAGE="MCMCpack")
## marginal likelihood calculation if Laplace
if (marginal.likelihood == "Laplace"){
theta.start <- beta.start
optim.out <- optim(theta.start, logpost.logit.userprior, method="BFGS",
control=list(fnscale=-1),
hessian=TRUE, y=Y, X=X, userfun=userfun,
logfun=logfun, my.env=my.env)
theta.tilde <- optim.out$par
beta.tilde <- theta.tilde[1:K]
Sigma.tilde <- solve(-1*optim.out$hessian)
logmarglike <- (length(theta.tilde)/2)*log(2*pi) +
log(sqrt(det(Sigma.tilde))) +
logpost.logit.userprior(theta.tilde, Y, X, userfun=userfun,
logfun=logfun, my.env=my.env)
}
output <- mcmc(data=sample, start=burnin+1,
end=burnin+mcmc, thin=thin)
varnames(output) <- as.list(xnames)
attr(output, "title") <- "MCMClogit Posterior Sample"
attr(output, "y") <- Y
attr(output, "call") <- cl
attr(output, "logmarglike") <- logmarglike
}
return(output)
}
##########################################################################
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.