Nothing
#' @title
#' Parameter estimation of Three-Parameter Exponential Family
#'
#' @description
#' The function \code{cpef} numerically estimates a canonical parameter of
#' the three-parameter exponential family of distributions that includes
#' two different types of prior distributions (log-gamma and normal).
#' See \sQuote{Details} and \cite{Lee 2013} for more information.
#'
#' @param y
#' a numeric vector of length \code{n}.
#'
#' @param hparam
#' a numeric vector of length \code{2} for \code{apriori}.
#'
#' @param apriori
#' a character string of length \code{1} to specify a prior distribution;
#' Defaults to \code{normal}. Alternative is \code{lgamma}.
#'
#' @param method
#' a name of numerical method to be used for fitting the \code{formula} in
#' the \code{\link{model}}.
#' Five options of \code{MH}, \code{LA}, \code{IS}, \code{AH}, \code{AS} are
#' available.
#' See \sQuote{Details} for more information.
#'
#' @param ztrunc
#' a logical value; Is the zero truncated?; Defaults to \code{FALSE}.
#'
#' @param verbose
#' a logical value; Be more verbose about the process by displaying messages;
#' Defaults to \code{FALSE}.
#'
#' @param start
#' a starting value for a canonical parameter to be fitted.
#'
#' @param initrun
#' a logical value; Would you like to run the function \code{cpef} for other
#' numerical method in order to have the best guess about \code{start}?
#'
#' @param control
#' a list of parameters for controlling the fitting process.
#' See \sQuote{Details}.
#'
#' @param proposal
#' a list of parameters for controlling the fitting process.
#' See \sQuote{Details}.
#'
#' @return
#' The list of components returned from \code{cpef} depends on what numerical
#' method is employed.
#' All input paramters are inherited.
#' The commonly returned components are:
#'
#' \item{theta}{The estimated canonical parameter of length \code{1}.}
#' \item{ess}{The effective sample size if \code{method=IS}.}
#' \item{sratio}{The ratio of determinants of two hessian matrices
#' if \code{method=LA}}
#' \item{accept.rate}{The proportion of candidate samples that are accepted
#' in the chain of length \code{n=2e3} if \code{method=MH}.}
#'
#' @details
#' The function \code{cpef} stands for a three-parameter exponenital family of
#' distributions of a parameter which is canonically parameterized.
#' This family of distributions has a conjugacy of log-gamma and normal priors
#' to a (zero-truncated) Poisson sampling model.
#'
#' Numericall methods are implemented for both cases of \code{ztrunc=TRUE} and
#' \code{ztrunc=FALSE} on this family of distributions that is a general
#' form including both \code{apriori=lgamma} and \code{apriori=normal}.
#'
#' \code{LA} (Laplace approximation) is the implementation of that in the
#' study \cite{Tierney, Kass, and Kadane 1989}.
#' The correction \code{const} defaults to \code{2e1} (log-scaled value).
#'
#' \code{MH} (Metropolis-Hasting algorithm) is the implementation of that
#' in the study \cite{Chib and Greenberg 1995}.
#' \code{nchains} and \code{nburns} defaults to \code{2e3} and \code{5e2}.
#' Normal distribution with \code{mu=log(mean(y))} and
#' \code{sigma=sd(y)} is chosen for a proposal distribution
#' to generate a candidate sample.
#'
#' \code{IS} (Importance sampling) is the implementation of that in the study
#' \cite{Denny 2001}.
#' \code{ess} (effective sample size) is computed with an importance sample of
#' size \code{n=1e3}.
#' \code{cpef} is recursively used with \code{method=LA} for having
#' the best guess for a mean value \code{mu} of a proposal distribution.
#'
#' \code{AS} (Analytic soluation) is available only and only if
#' \code{ztrunc=FALSE} and \code{apriori=lgamma}.
#'
#' \code{AQ} (Adaptive quadrture) is the direct integration approach in
#' terms of the canonical parameter \code{theta} using the R function
#' \code{integrate} on infinite interval.
#'
#' @examples
#' \dontrun{
#' ## try to change the options of numerical methods
#' cpef(y=rpois(1e5, 1), hparam=c(1,2), ztrunc=TRUE, method="LA")
#' }
#'
#' @keywords
#' Three-parameter exponential family of distributions,
#' Canonical parametrization,
#' Zero-truncated Poisson model with an imprecise prior,
#' Bayesian zero-truncated Poisson with a normal prior,
#' Bayesian zero-truncated Poisson with a log-gamma prior
#'
#' @seealso
#' \code{\link{optim}}, \code{\link{integrate}}, \code{\link{update}},
#' \code{\link{model}}, \code{\link{iprior}}
#'
#' @references
#' Tierney L., Kass R. and Kadane J. (1989). ``Fully Exponential Laplace
#' Approximations to Expectations and Variances of Nonpositive Functions.''
#' _Journal of the American Statistical Association_, *84*(407), pp.
#' 710-716.
#'
#' Chib S. and Greenberg E. (1995). ``Understanding the Metropolis-Hastings
#' Algorithm.'' _The American Statistician_, *49*(4), pp. 327-335.
#'
#' Denny M. (2001). ``Introduction to importance sampling in rare-event
#' simulations.'' _European Journal of Physics_, *22*(4), pp. 403.
#'
#' Lee (2013) ``Imprecise inferential framework'', PhD thesis.
#'
#' R Core Team (2013). R: A language and environment for statistical
#' computing. R Foundation for Statistical Computing, Vienna, Austria.
#' URL \url{http://www.R-project.org/}.
#'
#' @author Chel Hee Lee <\email{gnustats@@gmail.com}>
#' @export
cpef <- function(y, hparam, apriori=c("lgamma", "normal"), ztrunc=FALSE,
method=c("LA", "MH", "IS", "AQ", "AS"), verbose=FALSE, # nutau2=TRUE,
initrun=FALSE, start, control=list(), proposal=list()) {
# sanity check
method <- match.arg(method)
apriori <- match.arg(apriori)
# initial run for other approximation methods
if (initrun) {
xid <- mc <- NULL
} else {
xid <- eval(expr=expression(xtms.i), envir=parent.frame(n=1))
mc <- match.call(expand.dots=TRUE)
if (verbose) {
message(sQuote(xid), " is passed to ", sQuote(mc[[1]]),
" with the options of method=", sQuote(method),
", apriori=", sQuote(apriori),
", and ztrunc=", sQuote(ztrunc), ".")
}
}
stopifnot(!missing(y), is.numeric(y), is.vector(y))
stopifnot(!missing(hparam), is.numeric(hparam), is.vector(hparam), length(hparam)==2)
stopifnot(!missing(ztrunc), is.logical(ztrunc), length(ztrunc)==1)
stopifnot(!missing(method), is.character(method), length(method)==1)
stopifnot(!missing(apriori), is.character(apriori), length(apriori)==1)
stopifnot(is.logical(verbose), length(verbose)==1)
stopifnot(is.logical(initrun), length(initrun)==1)
stopifnot(is.list(control), is.list(proposal))
# naming conventions
sumy <- sum(y)
n <- length(y)
h1 <- hparam[1]
h2 <- hparam[2]
if (apriori == "normal") {
# if(nutau2) eta <- c(0.5/h2, sumy+h1/h2, n)
# else eta <- c(h2, h1+sumy, n)
eta <- c(h2, h1+sumy, n)
}
if (apriori == "lgamma") {
eta <- c(0, sumy+h1, n+h2)
}
# control variables for numerical approximation
ctl <- list()
if (method == "MH") {
ctl$len.chain <- 2e3
ctl$len.burnin <- 5e2
}
if (method == "IS") {
ctl$ess <- 5
ctl$maxiter <- 5e1
}
if (method == "LA") {
ctl$const <- 2e1
ctl$tol <- 2e-3 # TODO: remove it.
stopifnot(!missing(start))
ctl$bst.guess <- start # TODO: try 'ctl$bst.guess <- digamma(h1)-log(h2)'
}
if (all(names(control) %in% names(ctl))) {
ctl[names(control)] <- control
} else {
stop("Unknown names in ", sQuote("control"))
}
# proposal distribution specification
ppsl <- list()
if (method == "MH") {
ppsl$mu <- log(mean(y)) + rnorm(1)
ppsl$sigma <- ifelse(sd(y)>0, sd(y), 1)
}
if (method == "IS") {
ppsl$n <- 1e3
ppsl$mu <- cpef(y=y, hparam=hparam, method="LA", ztrunc=ztrunc, apriori=apriori, initrun=TRUE, start=start)$theta
ppsl$sigma <- ifelse(sd(y)>0, sd(y), 1)
}
if (all(names(proposal) %in% names(ppsl))) {
ppsl[names(proposal)] <- proposal
} else {
stop("Unknown names in ", sQuote("proposal"))
}
# basic returning objects
rvals <- list(xid=xid, y=y, hparam=hparam, ztrunc=ztrunc, method=method, apriori=apriori, eta=eta, control=ctl, proposal=ppsl)
# print(rvals)
if (verbose) {
message("Input sanity check .................... PASSED!")
}
lp.kernel <- function(theta, ...) {
# The kernel of log-posterior in the form of three-parameter exponential
# family of distributions that include both 'log-gamma' and 'normal' priors
# in terms of a canonical parameter 'theta'.
#
# Args:
# theta: A canonical parameter
#
# Returns:
# The evaluated scalar value
val <- eta[2]*theta - eta[3]*exp(theta)
if (apriori == "normal") {
val <- -eta[1]*theta^2 + val
}
if (ztrunc) {
val <- val - n*ppois(q=0, lambda=exp(theta), lower.tail=FALSE, log.p=TRUE)
}
return(val)
}
fn.MHalg <- function(...){
# Implementation of Metropolis-Hastings algorithm
#
# Args: ...
#
# Return:
# theta : The posterior mean of 'theta'
# MHchain : The generated Markov chain of length 'ctl$len.chain'
# accept.rate: The acceptance ratio
len <- with(ctl, sum(len.chain, len.burnin))
THETA <- numeric(len)
accept <- logical(len)
if (verbose) {
cat("progress ")
}
for (i in seq_len(len)) {
if (i==1) {
curr <- rnorm(1, mean=ppsl$mu, sd=ppsl$sigma)
} else {
curr <- THETA[i-1]
}
cand <- rnorm(n=1, mean=curr, sd=ppsl$sigma)
lr <- lp.kernel(theta=cand, ...) - lp.kernel(theta=curr, ...)
if (accept[i] <- (log(runif(1)) < lr)) {
THETA[i] <- cand
} else {
THETA[i] <- curr
}
if (verbose & i%%100==0) {
cat(".")
}
}
THETA <- THETA[-seq_len(ctl$len.burnin)]
accept.rate <- mean(as.numeric(accept[-seq_len(ctl$len.burnin)]))
rvals <- c(rvals, list(MHchain=THETA, theta=mean(THETA), accept.rate=accept.rate))
return(rvals)
}
fn.ISampler <- function(...){
# Implementation of Importance Sampling for variance reduction
#
# Args: ...
#
# Return:
# theta : The posterior mean
# isample: Importance samples
# lweight: Log-valued importance weight
# ess : The effcective sample size
withintol <- FALSE
niter <- 0
while(!withintol & (niter < ctl$maxiter)){
isample <- rnorm(n=ppsl$n, mean=ppsl$mu, sd=ppsl$sigma)
lp <- unlist(lapply(as.list(isample), lp.kernel))
lweight <- lp - unlist(lapply(as.list(isample), dnorm, mean=ppsl$mu, sd=ppsl$sigma, log=TRUE))
lweight <- lweight - max(lweight)
theta <- sum(isample*exp(lweight))/sum(exp(lweight))
ess <- mean(((exp(lweight)/mean(exp(lweight)))-1)^2)
withintol <- (ess < ctl$ess)
niter <- niter + 1
}
rvals <- c(rvals, list(niter=niter, isample=isample, lweight=lweight, theta=theta, ess=ess))
return(rvals)
}
fn.LApprox <- function(...){
# Implementation of Laplace approximation
#
# Args: ...
#
# Return:
# theta : The posterior mean of 'theta'.
# sratio: The difference of sigma (numerator) and sigma0 (denominator).
# fdiff : The value of likelihood ratio between numerator and denominator.
fn <- function(theta, numer=FALSE, ...){
# The objective function for both cases of normal and log-gamma priors
#
# Args:
# theta: A canonical parameter
# numer: Is this computation for the part of numerator?
#
# Return:
# The evaluated scalar value
val <- eta[2]*theta - eta[3]*exp(theta)
if (apriori=="normal") {
val <- val - eta[1]*theta^2
}
if (numer) {
val <- suppressWarnings(log(theta+ctl$const)) + val
}
if (ztrunc) {
val <- val - n*suppressWarnings(ppois(q=0, lambda=exp(theta), lower.tail=FALSE, log.p=TRUE))
}
return(val)
}
gr <- function(theta, numer=FALSE, ...){
# The gradient function for both cases of normal and log-gamma priors
#
# Args:
# theta: A canonical parameter
# numer: Is this computation for the part of numerator?
#
# Return:
# The evaluated scalar value
val <- eta[2] - eta[3]*exp(theta)
if (apriori=="normal") {
val <- -2*eta[1]*theta + val
}
if (numer) {
val <- 1/(theta+ctl$const) + val
}
if (ztrunc) {
val <- val - n*exp(theta-exp(theta))/ppois(q=0, lambda=exp(theta), lower.tail=FALSE, log.p=FALSE)
}
return(val)
}
fit0 <- optim(par=ctl$bst.guess, fn=fn, gr=gr, method="BFGS",
control=list(fnscale=-1), hessian=TRUE)
fit1 <- optim(par=ctl$bst.guess, fn=fn, gr=gr, numer=TRUE, method="BFGS",
control=list(fnscale=-1), hessian=TRUE)
if (fit0$convergence == 0) {
sigma0 <- -1/fit0$hessian
} else {
sigma0 <- NaN
}
if (fit1$convergence == 0) {
sigma1 <- -1/fit1$hessian
} else {
sigma0 <- NaN
}
sratio <- sigma1/sigma0
fdiff <- exp(fit1$value - fit0$value)
theta <- suppressWarnings(sqrt(sratio))*fdiff - ctl$const
rvals <- c(rvals, list(sratio=sratio, fdiff=fdiff, theta=theta))
return(rvals)
}
fn.AdapQuad <- function(...){
# Integration method of Adaptive Qaudrature for computing three-parameter
# exponenital family of distribution (3P-EPD)
#
# Args:
# ...: To be passed
#
# Return:
# theta: The posterior mean of 'theta'
if(apriori=="lgamma") eta[1] <- 0
f1 <- function(theta, ...) {
# The kernel of 3P-EPD in the numerator part
#
# Args:
# theta: A canonical parameter
#
# Return:
# The evaluated scalar value
val <- exp(-eta[1]*theta^2 + eta[2]*theta - exp(log(eta[3])+theta) - as.numeric(ztrunc)*n*ppois(q=0, lambda=exp(theta), lower.tail=FALSE, log.p=TRUE))
return(val)
}
k1 <- tryCatch(integrate(f1, lower=-Inf, upper=Inf)$value, error=function(e) return(NaN))
f2 <- function(theta, ...) {
# The kernel of 3P-EPD in the denominator part
#
# Args:
# theta: A canonical parameter
#
# Return:
# The evaluated scalar value
val <- theta*exp(-eta[1]*theta^2 + eta[2]*theta - exp(log(eta[3])+theta) - as.numeric(ztrunc)*n*ppois(q=0, lambda=exp(theta), lower.tail=FALSE, log.p=TRUE))
return(val)
}
k2 <- tryCatch(integrate(f2, lower=-Inf, upper=Inf)$value, error=function(e) return(NaN))
rvals$theta <- tryCatch(k2/k1, error=function(e) return(NaN))
return(rvals)
}
fn.AnalSol <- function(...) {
# The Analystic solution for the posterior mean of 'theta'
# with a log-gamma prior when the zero is not truncated.
#
# Args:
# ...: To be passed
#
# Return:
# The posterior mean of 'theta'.
if(apriori=="lgamma"){
rvals$theta <- digamma(eta[2]) - log(eta[3])
} else {
stop("Analytic solution doesn't exist.\n")
}
return(rvals)
}
rvals <- switch(method,
"LA"=fn.LApprox(),
"MH"=fn.MHalg(),
"IS"=fn.ISampler(),
"AQ"=fn.AdapQuad(),
"AS"=fn.AnalSol())
# print(rvals)
if (verbose) {
message("Done!\n")
}
return(rvals)
}
NULL
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.