Nothing
#' @title
#' Parameter Estimation of (Zero-Truncated) Poisson Regression Model with
#' Multivariate Normal Prior Distributions.
#'
#' @description
#' The function \code{cpef2reg} estimates regression parameters of
#' (zero-truncated) Poisson regression model with multivariate normal prior
#' distributions.
#' See \sQuote{Details}.
#'
#' @param y
#' a numeric vector of length \code{n}.
#'
#' @param X
#' a model matrix of size \code{n}-by-\code{p}.
#'
#' @param b
#' a mean vector of length \code{p} for \code{apriori}.
#'
#' @param B
#' a variance-covariance matrix of size \code{p}-by-\code{p} for \code{apriori}.
#'
#' @param ztrunc
#' a logical value; Defults to \code{FALSE}; Is the zero truncated?
#'
#' @param method
#' a name of numerical method to be used for fitting the \code{formula} in
#' the \code{\link{model}}.
#' Three options of \code{MH}, \code{LA}, and \code{IS} are available.
#' See \sQuote{Details} for more information.
#'
#' @param xi
#' a numeric vector of length \code{p} for prediction.
#'
#' @param verbose
#' a logical value; Be more verbose about the process by displaying messages;
#' Defaults to \code{FALSE}.
#'
#' @param initrun
#' a logical value; Would you like to run the function \code{cpef} for other
#' numerical method in order to have the best guess about \code{start}?
#'
#' @param control
#' a list of parameters for controlling the fitting process.
#' See \sQuote{Details}.
#'
#' @param proposal
#' a list of parameters for controlling the fitting process.
#' See \sQuote{Details}.
#'
#' @param start
#' a list of parameters for controlling the fitting process.
#' See \sQuote{Details}.
#'
#' @details
#' Numericall methods are implemented for both cases of \code{ztrunc=TRUE} and
#' \code{ztrunc=FALSE}.
#'
#' \code{LA} (Laplace approximation) is the implementation of that in the
#' study \cite{Tierney, Kass, and Kadane 1989}.
#' The correction \code{const} defaults to \code{2e1} (log-scaled value).
#'
#' \code{MH} (Metropolis-Hasting algorithm) is the implementation of that
#' in the study \cite{Chib and Greenberg 1995}.
#' \code{nchains} and \code{nburns} defaults to \code{2e3} and \code{5e2}.
#' Normal distribution with \code{mu=log(mean(y))} and
#' \code{sigma=sd(y)} is chosen for a proposal distribution
#' to generate a candidate sample.
#'
#' \code{IS} (Importance sampling) is the implementation of that in the study
#' \cite{Denny 2001}.
#' \code{ess} (effective sample size) is computed with an importance sample of
#' size \code{n=1e3}.
#' \code{cpef} is recursively used with \code{method=LA} for having
#' the best guess for a mean value \code{mu} of a proposal distribution.
#'
#' @return
#' The list of components returned from \code{cpef2reg} depends on
#' what numerical method is employed.
#' All input paramters are inherited.
#' The commonly returned components are:
#'
#' \item{theta}{The estimated canonical parameter of length \code{p}.}
#' \item{ess}{The effective sample size if \code{method=IS}.}
#' \item{sratio}{The ratio of determinants of two hessian matrices
#' if \code{method=LA}}
#' \item{accept.rate}{The proportion of candidate samples that are accepted
#' in the chain of length \code{n=2e3} if \code{method=MH}.}
#'
#' @note
#' Estimation with \code{method=AQ} frequently fails on the cases of
#' \code{ztrunc=FALSE} and \code{ztrunc=TRUE} if \eqn{n<50}.
#'
#' Estimation with \code{method=LA} frequently fails on the case of
#' \code{ztrunc=TRUE} if \eqn{n<5e2}.
#'
#' \code{method=MH} is the most safe choice for various conditions.
#'
#' @keywords
#' Bayesian (zero-truncated) Poisson regression with a normal prior,
#' (Zero-truncated) Poisson regression with an imprecise prior
#'
#' @seealso
#' \code{\link{optim}}, \code{\link{integrate}}, \code{\link{update}},
#' \code{\link{model}}, \code{\link{iprior}}
#'
#' @references
#' Tierney L., Kass R. and Kadane J. (1989). ``Fully Exponential Laplace
#' Approximations to Expectations and Variances of Nonpositive Functions.''
#' _Journal of the American Statistical Association_, *84*(407), pp.
#' 710-716.
#'
#' Chib S. and Greenberg E. (1995). ``Understanding the Metropolis-Hastings
#' Algorithm.'' _The American Statistician_, *49*(4), pp. 327-335.
#'
#' Denny M. (2001). ``Introduction to importance sampling in rare-event
#' simulations.'' _European Journal of Physics_, *22*(4), pp. 403.
#'
#' Lee (2013) ``Imprecise inferential framework'', PhD thesis.
#'
#' @examples
#' \dontrun{
#' # cpef2reg
#' }
#'
#' @author Chel Hee Lee <\email{gnustats@@gmail.com}>
#' @export
cpef2reg <- function(y, X, ztrunc=FALSE, method=c("MH", "IS", "LA", "LAF"),
xi, b, B, apriori=c("normal"), start, verbose=FALSE,
initrun=FALSE, control=list(), proposal=list()){
# initial run for other numerical approximation
if (initrun){
xid <- mc <- NULL
} else {
xid <- eval(expr=expression(xtms.i), envir=parent.frame(n=1))
mc <- match.call(expand.dots=TRUE)
if (verbose) {
message(sQuote(xid), " is passed to ", sQuote(mc[[1]]),
" with the options of \nmethod=", sQuote(method),
", apriori=", sQuote(apriori),
", and ztrunc=", sQuote(ztrunc), ".")
}
}
# sanity check
method <- match.arg(method)
apriori <- match.arg(apriori)
stopifnot(!missing(y), is.numeric(y), is.vector(y))
stopifnot(!missing(X), is.matrix(X), is.numeric(X))
stopifnot(!missing(ztrunc), is.logical(ztrunc), length(ztrunc)==1)
stopifnot(!missing(method), is.character(method), length(method)==1)
stopifnot(!missing(b), is.numeric(b), is.vector(b))
stopifnot(!missing(B), is.numeric(B), is.matrix(B))
stopifnot(!missing(apriori), is.character(apriori), length(apriori)==1)
stopifnot(is.logical(verbose), length(verbose)==1)
stopifnot(is.logical(initrun), length(initrun)==1)
stopifnot(all.equal(length(b), ncol(X), nrow(B), ncol(B), ncol(X)))
stopifnot(!missing(start), is.list(start))
stopifnot(is.list(control), is.list(proposal))
if (missing(xi)) {
xi <- NULL
} else {
stopifnot(is.numeric(xi), is.vector(xi), length(xi)==ncol(X))
names(xi) <- colnames(X)
}
# naming convention
n <- length(y)
p <- ncol(X)
# control variables for numerical approximation
ctl <- list()
if (method == "MH") {
ctl$len.chain <- 2e3
ctl$len.burnin <- 5e2
}
if (method == "IS") {
ctl$ess <- 5
ctl$maxiter <- 5e1
}
if (method == "LA") {
ctl$bst.guess <- start$cfs
ctl$const <- 20 # or 15, 20, 25
ctl$tol <- 2e-3
ctl$maxiter <- 5e1
ctl$qc <- FALSE
}
if (all(names(control) %in% names(ctl))) {
ctl[names(control)] <- control
} else {
stop("Unknown names in ", sQuote("control"))
}
# proposal distribution specification
ppsl <- list()
if (method == "MH") {
ppsl$mu <- start$cfs
ppsl$sigma <- start$vcv
}
if (method == "IS") {
ppsl$n <- 1e3
bst.guess <- cpef2reg(y=y, X=X, ztrunc=ztrunc, method="LA", apriori="normal", b=b, B=B, start=start, initrun=TRUE, ...)$cfs
# print(bst.guess)
# print(start$cfs)
if (all(is.na(bst.guess))) {
ppsl$mu <- start$cfs
} else {
ppsl$mu <- bst.guess
}
ppsl$sigma <- start$vcv
}
if (all(names(proposal) %in% names(ppsl))) {
ppsl[names(proposal)] <- proposal
} else {
stop("Unknown names in ", sQuote("proposal"))
}
# returning basic objects
rvals <- list(xid=xid, y=y, X=X, ztrunc=ztrunc, method=method, apriori=apriori, b=b, B=B, xi=xi, control=ctl, proposal=ppsl)
if (verbose) {
message("Input sanity check .................... PASSED!\n")
}
lp.kernel <- function(beta){
# The kernel of log-posterior in terms of regression parameters
#
# Args:
# beta: Regression parameters
#
# Return:
# The evaluated scalar value
beta <- as.vector(beta)
mu <- as.vector(exp(X%*%beta))
llk <- -mu + y*log(mu)
if (ztrunc) {
llk <- sum(llk-ppois(q=0, lambda=mu, lower.tail=FALSE, log.p=TRUE))
} else {
llk <- sum(llk)
}
lp <- llk - 0.5*crossprod(x=(beta-b), y=ginv(B))%*%(beta-b)
return(lp)
}
fn.MHalg <- function(...){
# Implementation of Metropolis Hastings algorithm
#
# Args: ...
#
# Return:
# theta : The posterior means of 'beta'
# MHchain : The generated Markov chain of length 'ctl$len.chain'
# accept.rate: The acceptance ratio
len <- ctl$len.chain + ctl$len.burnin
BETA <- matrix(numeric(length=p*len), ncol=p)
accept <- logical(len)
if (verbose) {
cat("Progress ")
}
for (i in seq_len(len)) {
# i <- 1
# while (i<len) {
if (i==1) {
curr <- mvtnorm::rmvnorm(n=1, mean=as.vector(ppsl$mu), sigma=as.matrix(ppsl$sigma))
} else {
curr <- BETA[i-1, ]
}
cand <- mvtnorm::rmvnorm(n=1, mean=curr, sigma=ppsl$sigma)
lr <- lp.kernel(beta=cand) - lp.kernel(beta=curr)
if (is.nan(lr)) {
next
}
if (accept[i] <- (log(runif(1))<lr)){
BETA[i, ] <- cand
} else {
BETA[i, ] <- curr
}
if (verbose & (i%%100 == 0)) {
cat(".")
}
# i <- i + 1
}
BETA <- BETA[-seq_len(ctl$len.burnin), ]
colnames(BETA) <- colnames(X)
cfs <- colMeans(BETA)
accept.rate <- mean(as.numeric(accept[-seq_len(ctl$len.burnin)]))
if (verbose) {
message("\n", "Estimates = ", paste(round(cfs, 4), collapse=","), "\n",
"Acceptance Rate = ", round(accept.rate, 4), "\n")
}
rvals <- c(rvals, list(cfs=cfs, MHchain=BETA, accept.rate=accept.rate, ftag=match.call()[[1]]))
return(rvals)
}
fn.ISampler <- function(...){
# Implementation of Importance Sampling for variance reduction
#
# Args: ...
#
# Return:
# theta : The posterior means
# isample: Importance samples
# lweight: Log-valued importance weight
# ess : The effcective sample size
niter <- 0
withintol <- FALSE
while (!withintol & (niter < ctl$maxiter)) {
isample <- mvtnorm::rmvnorm(n=ppsl$n, mean=ppsl$mu, sigma=ppsl$sigma)
lp <- unlist(apply(isample, 1, lp.kernel))
lweight <- lp - unlist(apply(isample, 1, dmvnorm, mean=ppsl$mu, sigma=ppsl$sigma, log=TRUE))
lweight <- lweight - max(lweight)
cfs <- colSums(isample*exp(lweight))/sum(exp(lweight))
ess <- mean(((exp(lweight)/mean(exp(lweight)))-1)^2)
withintol <- (ess < ctl$ess)
niter <- niter + 1
}
rvals <- c(rvals, list(cfs=cfs, niter=niter, isample=isample,
lweight=lweight, ess=ess, ftag=match.call()[[1]]))
return(rvals)
}
fn.LApprox <- function(...){
# Implementation of Laplace approximation
#
# Args: ...
#
# Return:
# theta : The posterior means of 'beta'.
# sratio: The ratio of det(Sigma) to det(Sigma0).
# fdiff : The value of likelihood ratio between numerator and denominator.
hfn <- function(beta, i=NULL, flip=FALSE, pred.xi=FALSE, pred.N=FALSE, ...){
# The objective function in terms of
#
# Args:
# theta: A canonical parameter
# numer: Is this computation for the part of numerator?
#
# Return:
# The evaluated scalar value
beta <- as.vector(beta)
mu <- as.vector(exp(X%*%beta))
llk <- -mu + y*log(mu)
if (ztrunc) {
lp <- sum(llk - ppois(q=0, lambda=mu, lower.tail=FALSE, log.p=TRUE))
} else {
lp <- sum(llk)
}
rval <- lp - 0.5*crossprod(x=(beta-b), y=ginv(B))%*%(beta-b)
if (!is.null(i)) {
rval <- suppressWarnings(log(beta[i]+ctl$const)) + rval
}
if (pred.xi) {
rval <- xi%*%beta + rval
}
if (pred.N) {
rval <- suppressWarnings(log(sum(1/exp(ppois(q=0, lambda=mu, lower.tail=FALSE, log.p=TRUE))))) + rval
}
return(rval)
}
hgr <- function(beta, i=NULL, flip=FALSE, pred.xi=FALSE, pred.N=FALSE, ...){
# The gradient function for both cases of normal and log-gamma priors
#
# Args:
# theta: A canonical parameter
# numer: Is this computation for the part of numerator?
#
# Return:
#
beta <- as.vector(beta)
mu <- as.vector(exp(X%*%beta))
if (ztrunc) {
delta <- ppois(q=0, lambda=mu, lower.tail=FALSE, log.p=FALSE)
} else {
delta <- 1
}
rval <- colSums((-mu/delta + y)*X) + ginv(B)%*%(beta-b)
if (!is.null(i)) {
rval <- 1/(beta[i]+ctl$const) + rval
}
if (pred.xi) {
rval <- xi + rval
}
if (pred.N) {
rval <- colSums((1/ppois(q=0, lambda=mu, lower.tail=FALSE, log.p=FALSE))^2*ppois(q=0, lambda=mu, lower.tail=TRUE, log.p=FALSE)*X)/sum(1/ppois(q=0, lambda=mu, lower.tail=FALSE, log.p=FALSE)) + rval
}
return(rval)
}
nh0 <- optim(par=ctl$bst.guess, fn=hfn, gr=hgr, control=list(fnscale=-1),
hessian=TRUE)
if (nh0$convergence==0) {
Sigma0 <- -ginv(nh0$hessian)
ev0 <- eigen(Sigma0)$values
if ( any(ev0<=0) ) {
detSigma0 <- NA
nh0value <- NA
} else {
detSigma0 <- prod(ev0)
nh0value <- nh0$value
}
} else {
detSigma0 <- NA
nh0value <- NA
}
fdiff <- sratio <- cfs <- numeric(length=p)
fitted.nh <- list(nh0=nh0)
names(cfs) <- colnames(X)
for (j in seq_len(p)) {
niter <- 0
while (any( (sratio[j]<(1-ctl$tol)), (sratio[j]>(1+ctl$tol)), is.na(sratio[j]), is.na(fdiff[j])) & (niter < ctl$maxiter) ) {
nh <- optim(par=ctl$bst.guess, fn=hfn, gr=hgr, i=j, control=list(fnscale=-1), hessian=TRUE)
if (nh$convergence == 0) {
Sigma <- -ginv(nh$hessian)
ev <- eigen(Sigma)$values
if (any(ev <= 0)) {
detSigma <- NA
nhvalue <- NA
} else {
detSigma <- prod(ev)
nhvalue <- nh$value
}
} else {
detSigma <- NA
nhvalue <- NA
}
sratio[j] <- detSigma/detSigma0
fdiff[j] <- exp(nhvalue-nh0value)
ctl$bst.guess <- rnorm(n=p)
niter <- niter + 1
} # end of while
if ((niter >= ctl$maxiter) & verbose) {
message(mc[[1]], "reached the maximum number of trials at",
xid, ";\nmay try to use 'qc()'.\n")
}
fitted.nh[[j+1]] <- nh
niter[j] <- niter # from while
cfs[j] <- sqrt(detSigma/detSigma0) * exp(nhvalue-nh0value) - ctl$const
} # end of for
names(fitted.nh) <- paste("nh", seq(0, j), sep="")
if (verbose) {
message("Ratio of Determinants = ", paste(round(sratio,4), collapse=","))
}
if (verbose) {
message("Likelihood Ratio = ", paste(round(fdiff,4), collapse=","))
}
if (verbose) {
message("Estimates = ", paste(round(cfs, 4), collapse=","),"\n")
}
rvals$cfs <- cfs
rvals$fitted.nh <- fitted.nh
rvals$sratio <- sratio
rvals$niter <- niter
rvals$fdiff <- fdiff
rvals$ftag <- match.call()[[1]]
if (!is.null(xi)) { # estimation of g(beta) = exp(xi'beta) = mui for a single 'xi'
nh1mui <- optim(par=ctl$bst.guess, fn=hfn, gr=hgr, pred.xi=TRUE,
control=list(fnscale=-1), hessian=TRUE)
if (nh1mui$convergence==0) {
Sigma1mui <- -ginv(nh1mui$hessian)
ev1mui <- eigen(Sigma1mui)$values
if (any(ev1mui<=0)) {
detSigma1mui <- NA
nh1muivalue <- NA
} else {
detSigma1mui <- prod(ev1mui)
nh1muivalue <- nh1mui$value
}
} else {
detSigma1mui <- NA
nh1muivalue <- NA
}
rvals$mui <- sqrt(detSigma1mui/detSigma0) * exp(nh1muivalue - nh0value)
}
if(ztrunc){ # estimation of N
nh1N <- optim(par=ctl$bst.guess, fn=hfn, gr=hgr, pred.N=TRUE, control=list(fnscale=-1), hessian=TRUE)
if (nh1N$convergence==0) {
Sigma1N <- -ginv(nh1N$hessian)
ev1N <- eigen(Sigma1N)$values
if (any(ev1N<=0)){
detSigma1N <- NA
nh1Nvalue <- NA
} else {
detSigma1N <- prod(ev1N)
nh1Nvalue <- nh1N$value
}
} else {
detSigma1N <- NA
nh1Nvalue <- NA
}
rvals$N <- sqrt(detSigma1N/detSigma0) * exp(nh1Nvalue-nh0value)
}
return(rvals)
} # end of fn.LApprox
rvals <- switch(method,
"MH"=fn.MHalg(),
"IS"=fn.ISampler(),
"LA"=fn.LApprox())
if (verbose) {
message("Done!\n")
}
return(rvals)
}
NULL
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.