R/cpef2reg.R
In ipeglim: Imprecise Inferential Framework on Statistical Reasoning

Documented in cpef2reg

#' @title 
#' Parameter Estimation of (Zero-Truncated) Poisson Regression Model with
#' Multivariate Normal Prior Distributions.
#' 
#' @description 
#' The function \code{cpef2reg} estimates regression parameters of 
#' (zero-truncated) Poisson regression model with multivariate normal prior
#' distributions. 
#' See \sQuote{Details}.
#'
#' @param y 
#' a numeric vector of length \code{n}.
#'
#' @param X 
#' a model matrix of size \code{n}-by-\code{p}.
#'
#' @param b 
#' a mean vector of length \code{p} for \code{apriori}.
#'
#' @param B 
#' a variance-covariance matrix of size \code{p}-by-\code{p} for \code{apriori}.
#'
#' @param ztrunc 
#' a logical value; Defults to \code{FALSE}; Is the zero truncated?
#'
#' @param method
#' a name of numerical method to be used for fitting the \code{formula} in 
#' the \code{\link{model}}.  
#' Three options of \code{MH}, \code{LA}, and \code{IS} are available. 
#' See \sQuote{Details} for more information.
#'
#' @param xi 
#' a numeric vector of length \code{p} for prediction.
#'
#' @param verbose
#' a logical value; Be more verbose about the process by displaying messages;
#' Defaults to \code{FALSE}.
#'
#' @param initrun
#' a logical value; Would you like to run the function \code{cpef} for other 
#' numerical method in order to have the best guess about \code{start}?
#'
#' @param control
#' a list of parameters for controlling the fitting process. 
#' See \sQuote{Details}.
#' 
#' @param proposal
#' a list of parameters for controlling the fitting process. 
#' See \sQuote{Details}.
#'
#' @param start 
#' a list of parameters for controlling the fitting process. 
#' See \sQuote{Details}.
#'
#' @details
#' Numericall methods are implemented for both cases of \code{ztrunc=TRUE} and 
#' \code{ztrunc=FALSE}.
#'
#' \code{LA} (Laplace approximation) is the implementation of that in the 
#' study \cite{Tierney, Kass, and Kadane 1989}.
#' The correction \code{const} defaults to \code{2e1} (log-scaled value).
#' 
#' \code{MH} (Metropolis-Hasting algorithm) is the implementation of that 
#' in the study \cite{Chib and Greenberg 1995}.
#' \code{nchains} and \code{nburns} defaults to \code{2e3} and \code{5e2}.
#' Normal distribution with \code{mu=log(mean(y))} and 
#' \code{sigma=sd(y)} is chosen for a proposal distribution 
#' to generate a candidate sample.
#'
#' \code{IS} (Importance sampling) is the implementation of that in the study
#' \cite{Denny 2001}. 
#' \code{ess} (effective sample size) is computed with an importance sample of
#' size \code{n=1e3}. 
#' \code{cpef} is recursively used with \code{method=LA} for having 
#' the best guess for a mean value \code{mu} of a proposal distribution.
#'
#' @return 
#' The list of components returned from \code{cpef2reg} depends on 
#' what numerical method is employed.  
#' All input paramters are inherited.
#' The commonly returned components are:
#'
#' \item{theta}{The estimated canonical parameter of length \code{p}.}
#' \item{ess}{The effective sample size if \code{method=IS}.}
#' \item{sratio}{The ratio of determinants of two hessian matrices 
#' if \code{method=LA}} 
#' \item{accept.rate}{The proportion of candidate samples that are accepted 
#' in the chain of length \code{n=2e3} if \code{method=MH}.}
#'
#' @note
#' Estimation with \code{method=AQ} frequently fails on the cases of 
#' \code{ztrunc=FALSE} and \code{ztrunc=TRUE} if \eqn{n<50}.
#' 
#' Estimation with \code{method=LA} frequently fails on the case of
#' \code{ztrunc=TRUE} if \eqn{n<5e2}.
#' 
#' \code{method=MH} is the most safe choice for various conditions.
#' 
#' @keywords 
#' Bayesian (zero-truncated) Poisson regression with a normal prior, 
#' (Zero-truncated) Poisson regression with an imprecise prior
#'
#' @seealso 
#' \code{\link{optim}}, \code{\link{integrate}}, \code{\link{update}}, 
#' \code{\link{model}}, \code{\link{iprior}}
#' 
#' @references 
#' Tierney L., Kass R. and Kadane J. (1989). ``Fully Exponential Laplace
#' Approximations to Expectations and Variances of Nonpositive Functions.''
#' _Journal of the American Statistical Association_, *84*(407), pp.
#' 710-716.
#' 
#' Chib S. and Greenberg E. (1995). ``Understanding the Metropolis-Hastings
#' Algorithm.'' _The American Statistician_, *49*(4), pp. 327-335. 
#'
#' Denny M. (2001). ``Introduction to importance sampling in rare-event
#' simulations.'' _European Journal of Physics_, *22*(4), pp. 403.
#'
#' Lee (2013) ``Imprecise inferential framework'', PhD thesis.
#'
#' @examples
#' \dontrun{
#' # cpef2reg
#' }
#'
#' @author Chel Hee Lee <\email{gnustats@@gmail.com}>
#' @export
cpef2reg <- function(y, X, ztrunc=FALSE, method=c("MH", "IS", "LA", "LAF"), 
                     xi, b, B, apriori=c("normal"), start, verbose=FALSE, 
                     initrun=FALSE, control=list(), proposal=list()){

  # initial run for other numerical approximation
  if (initrun){
    xid <- mc <- NULL
  } else {
    xid <- eval(expr=expression(xtms.i), envir=parent.frame(n=1))
    mc <- match.call(expand.dots=TRUE)
    
    if (verbose) {
      message(sQuote(xid), " is passed to ", sQuote(mc[[1]]), 
              " with the options of \nmethod=", sQuote(method), 
              ", apriori=", sQuote(apriori),
              ", and ztrunc=", sQuote(ztrunc), ".")
    }
  }
  
  # sanity check
  method <- match.arg(method)
  apriori <- match.arg(apriori)
  
  stopifnot(!missing(y), is.numeric(y), is.vector(y))
  stopifnot(!missing(X), is.matrix(X), is.numeric(X))
  stopifnot(!missing(ztrunc), is.logical(ztrunc), length(ztrunc)==1)
  stopifnot(!missing(method), is.character(method), length(method)==1)
  stopifnot(!missing(b), is.numeric(b), is.vector(b))
  stopifnot(!missing(B), is.numeric(B), is.matrix(B))
  stopifnot(!missing(apriori), is.character(apriori), length(apriori)==1)
  stopifnot(is.logical(verbose), length(verbose)==1)
  stopifnot(is.logical(initrun), length(initrun)==1)
  stopifnot(all.equal(length(b), ncol(X), nrow(B), ncol(B), ncol(X)))
  stopifnot(!missing(start), is.list(start))
  stopifnot(is.list(control), is.list(proposal))

  if (missing(xi)) {
    xi <- NULL
  } else {
    stopifnot(is.numeric(xi), is.vector(xi), length(xi)==ncol(X))
    names(xi) <- colnames(X)
  }
    
  # naming convention
  n <- length(y)
  p <- ncol(X)

  # control variables for numerical approximation
  ctl <- list()

  if (method == "MH") {
    ctl$len.chain <- 2e3
    ctl$len.burnin <- 5e2
  }

  if (method == "IS") {
    ctl$ess <- 5
    ctl$maxiter <- 5e1
  }

  if (method == "LA") {
    ctl$bst.guess <- start$cfs
    ctl$const <- 20 # or 15, 20, 25
    ctl$tol <- 2e-3
    ctl$maxiter <- 5e1
    ctl$qc <- FALSE
  }

  if (all(names(control) %in% names(ctl))) {
    ctl[names(control)] <- control
  } else {
    stop("Unknown names in ", sQuote("control"))
  }

  # proposal distribution specification
  ppsl <- list()

  if (method == "MH") {
    ppsl$mu <- start$cfs
    ppsl$sigma <- start$vcv
  }

  if (method == "IS") {
    ppsl$n <- 1e3
    bst.guess <- cpef2reg(y=y, X=X, ztrunc=ztrunc, method="LA", apriori="normal", b=b, B=B, start=start, initrun=TRUE, ...)$cfs
#    print(bst.guess)
#    print(start$cfs)
    if (all(is.na(bst.guess))) {
      ppsl$mu <- start$cfs
    } else {
      ppsl$mu <- bst.guess
    }
    ppsl$sigma <- start$vcv
  }
  
  if (all(names(proposal) %in% names(ppsl))) {
    ppsl[names(proposal)] <- proposal
  } else {
    stop("Unknown names in ", sQuote("proposal"))
  }
  
  # returning basic objects
  rvals <- list(xid=xid, y=y, X=X, ztrunc=ztrunc, method=method, apriori=apriori, b=b, B=B, xi=xi, control=ctl, proposal=ppsl)
  
  if (verbose) {
    message("Input sanity check .................... PASSED!\n")
  }

  lp.kernel <- function(beta){
    # The kernel of log-posterior in terms of regression parameters
    # 
    # Args:
    #   beta: Regression parameters
    #
    # Return:
    #   The evaluated scalar value
    
    beta <- as.vector(beta)
    mu <- as.vector(exp(X%*%beta))
    llk <- -mu + y*log(mu)
    
    if (ztrunc) {
      llk <- sum(llk-ppois(q=0, lambda=mu, lower.tail=FALSE, log.p=TRUE))
    } else {
      llk <- sum(llk)
    }
    
    lp <- llk - 0.5*crossprod(x=(beta-b), y=ginv(B))%*%(beta-b)
    return(lp)  
  }
  
  fn.MHalg <- function(...){
    # Implementation of Metropolis Hastings algorithm
    # 
    # Args: ...
    #
    # Return:
    #   theta      : The posterior means of 'beta'
    #   MHchain    : The generated Markov chain of length 'ctl$len.chain' 
    #   accept.rate: The acceptance ratio
    
    len <- ctl$len.chain + ctl$len.burnin
    BETA <- matrix(numeric(length=p*len), ncol=p)
    accept <- logical(len)
    
    if (verbose) {
      cat("Progress ")
    }

    for (i in seq_len(len)) {
#		i <- 1
#		while (i<len) {
    
      if (i==1) {
        curr <- mvtnorm::rmvnorm(n=1, mean=as.vector(ppsl$mu), sigma=as.matrix(ppsl$sigma))
      } else {
        curr <- BETA[i-1, ]
      }
      
      cand <- mvtnorm::rmvnorm(n=1, mean=curr, sigma=ppsl$sigma)
      lr <- lp.kernel(beta=cand) - lp.kernel(beta=curr)
      
      if (is.nan(lr)) {
				next
			}
      
      if (accept[i] <- (log(runif(1))<lr)){
        BETA[i, ] <- cand
      } else {
        BETA[i, ] <- curr
      }
      
      if (verbose & (i%%100 == 0)) {
        cat(".")
      }
#      i <- i + 1
    }
    
    BETA <- BETA[-seq_len(ctl$len.burnin), ]
    colnames(BETA) <- colnames(X)
    cfs <- colMeans(BETA)
    accept.rate <- mean(as.numeric(accept[-seq_len(ctl$len.burnin)]))
    
    if (verbose) {
      message("\n", "Estimates = ", paste(round(cfs, 4), collapse=","), "\n",
              "Acceptance Rate = ", round(accept.rate, 4), "\n")
    }
    
    rvals <- c(rvals, list(cfs=cfs, MHchain=BETA, accept.rate=accept.rate, ftag=match.call()[[1]]))
    return(rvals)
  }

  fn.ISampler <- function(...){
    # Implementation of Importance Sampling for variance reduction
    # 
    # Args: ...
    # 
    # Return:
    #   theta  : The posterior means
    #   isample: Importance samples
    #   lweight: Log-valued importance weight
    #   ess    : The effcective sample size
    
    niter <- 0
    withintol <- FALSE
    
    while (!withintol & (niter < ctl$maxiter)) {  
    
      isample <- mvtnorm::rmvnorm(n=ppsl$n, mean=ppsl$mu, sigma=ppsl$sigma)
      lp <- unlist(apply(isample, 1, lp.kernel))
      lweight <- lp - unlist(apply(isample, 1, dmvnorm, mean=ppsl$mu, sigma=ppsl$sigma, log=TRUE))
      lweight <- lweight - max(lweight)
      cfs <- colSums(isample*exp(lweight))/sum(exp(lweight))
      ess <- mean(((exp(lweight)/mean(exp(lweight)))-1)^2)
      withintol <- (ess < ctl$ess)
      niter <- niter + 1
      
    }
    
    rvals <- c(rvals, list(cfs=cfs, niter=niter, isample=isample, 
                           lweight=lweight, ess=ess, ftag=match.call()[[1]]))
    return(rvals)
  }

  fn.LApprox <- function(...){
    # Implementation of Laplace approximation
    # 
    # Args: ...
    # 
    # Return:
    #   theta : The posterior means of 'beta'.
    #   sratio: The ratio of det(Sigma) to det(Sigma0).
    #   fdiff : The value of likelihood ratio between numerator and denominator.
  
    hfn <- function(beta, i=NULL, flip=FALSE, pred.xi=FALSE, pred.N=FALSE, ...){
      # The objective function in terms of 
      # 
      # Args: 
      #   theta: A canonical parameter
      #   numer: Is this computation for the part of numerator?
      # 
      # Return:
      #   The evaluated scalar value
      
      beta <- as.vector(beta)
      mu <- as.vector(exp(X%*%beta))
      llk <- -mu + y*log(mu)
      
      if (ztrunc) {
        lp <- sum(llk - ppois(q=0, lambda=mu, lower.tail=FALSE, log.p=TRUE))
      } else {
        lp <- sum(llk)
      }
      
      rval <- lp - 0.5*crossprod(x=(beta-b), y=ginv(B))%*%(beta-b)
      
      if (!is.null(i)) {
        rval <- suppressWarnings(log(beta[i]+ctl$const)) + rval
      }

      if (pred.xi) {
        rval <- xi%*%beta + rval
      }
      
      if (pred.N) {
        rval <- suppressWarnings(log(sum(1/exp(ppois(q=0, lambda=mu, lower.tail=FALSE, log.p=TRUE))))) + rval
      }
      
      return(rval)
    }

    hgr <- function(beta, i=NULL, flip=FALSE, pred.xi=FALSE, pred.N=FALSE, ...){
      # The gradient function for both cases of normal and log-gamma priors
      # 
      # Args:
      #   theta: A canonical parameter
      #   numer: Is this computation for the part of numerator?
      # 
      # Return:
      #   
      
      beta <- as.vector(beta)
      mu <- as.vector(exp(X%*%beta))
      
      if (ztrunc) {
        delta <- ppois(q=0, lambda=mu, lower.tail=FALSE, log.p=FALSE)
      } else {
        delta <- 1
      }
      
      rval <- colSums((-mu/delta + y)*X) + ginv(B)%*%(beta-b)

      if (!is.null(i)) {
        rval <- 1/(beta[i]+ctl$const) + rval
      }
      
      if (pred.xi) {
        rval <- xi + rval
      }
      
      if (pred.N) {
        rval <- colSums((1/ppois(q=0, lambda=mu, lower.tail=FALSE, log.p=FALSE))^2*ppois(q=0, lambda=mu, lower.tail=TRUE, log.p=FALSE)*X)/sum(1/ppois(q=0, lambda=mu, lower.tail=FALSE, log.p=FALSE)) + rval
      }
        
      return(rval)
    }
  
    nh0 <- optim(par=ctl$bst.guess, fn=hfn, gr=hgr, control=list(fnscale=-1), 
                 hessian=TRUE)
                 
    if (nh0$convergence==0) {
      Sigma0 <- -ginv(nh0$hessian)
      ev0 <- eigen(Sigma0)$values
      
      if ( any(ev0<=0) ) {
        detSigma0 <- NA
        nh0value <- NA
      } else {
        detSigma0 <- prod(ev0)
        nh0value <- nh0$value
      }
      
    } else {
      detSigma0 <- NA
      nh0value <- NA
    }
    
    fdiff <- sratio <- cfs <- numeric(length=p)
    fitted.nh <- list(nh0=nh0)
    names(cfs) <- colnames(X)
    
    for (j in seq_len(p)) {
      niter <- 0
      
      while (any( (sratio[j]<(1-ctl$tol)), (sratio[j]>(1+ctl$tol)), is.na(sratio[j]), is.na(fdiff[j])) & (niter < ctl$maxiter) ) {
      
        nh <- optim(par=ctl$bst.guess, fn=hfn, gr=hgr, i=j, control=list(fnscale=-1), hessian=TRUE)
        
        if (nh$convergence == 0) {
          Sigma <- -ginv(nh$hessian)
          ev <- eigen(Sigma)$values
          
          if (any(ev <= 0)) {
            detSigma <- NA
            nhvalue <- NA
          } else {
            detSigma <- prod(ev)
            nhvalue <- nh$value
          }
          
        } else {
          detSigma <- NA
          nhvalue <- NA
        }
        
        sratio[j] <- detSigma/detSigma0 
        fdiff[j] <- exp(nhvalue-nh0value)
        ctl$bst.guess <- rnorm(n=p)
        niter <- niter + 1
        
      } # end of while
      
      if ((niter >= ctl$maxiter) & verbose) {
        message(mc[[1]], "reached the maximum number of trials at", 
                xid, ";\nmay try to use 'qc()'.\n")
      }
      
      fitted.nh[[j+1]] <- nh
      niter[j] <- niter # from while
      cfs[j] <- sqrt(detSigma/detSigma0) * exp(nhvalue-nh0value) - ctl$const      
    } # end of for
    
    names(fitted.nh) <- paste("nh", seq(0, j), sep="")
    if (verbose) {
      message("Ratio of Determinants = ", paste(round(sratio,4), collapse=","))
    }
    if (verbose) {
      message("Likelihood Ratio = ", paste(round(fdiff,4), collapse=","))
    }
    if (verbose) {
      message("Estimates = ", paste(round(cfs, 4), collapse=","),"\n")
    }
    
    rvals$cfs <- cfs
    rvals$fitted.nh <- fitted.nh
    rvals$sratio <- sratio
    rvals$niter <- niter
    rvals$fdiff <- fdiff
    rvals$ftag <- match.call()[[1]]

    if (!is.null(xi)) { # estimation of g(beta) = exp(xi'beta) = mui for a single 'xi'
      nh1mui <- optim(par=ctl$bst.guess, fn=hfn, gr=hgr, pred.xi=TRUE, 
                      control=list(fnscale=-1), hessian=TRUE)
      
      if (nh1mui$convergence==0) {
        Sigma1mui <- -ginv(nh1mui$hessian)
        ev1mui <- eigen(Sigma1mui)$values
        
        if (any(ev1mui<=0)) {
          detSigma1mui <- NA
          nh1muivalue <- NA
        } else {
          detSigma1mui <- prod(ev1mui)
          nh1muivalue <- nh1mui$value
        }
        
      } else {
        detSigma1mui <- NA
        nh1muivalue <- NA
      }
      
      rvals$mui <- sqrt(detSigma1mui/detSigma0) * exp(nh1muivalue - nh0value)
    }

    if(ztrunc){ # estimation of N
      nh1N <- optim(par=ctl$bst.guess, fn=hfn, gr=hgr, pred.N=TRUE, control=list(fnscale=-1), hessian=TRUE)
      
      if (nh1N$convergence==0) {
        Sigma1N <- -ginv(nh1N$hessian)
        ev1N <- eigen(Sigma1N)$values
        
        if (any(ev1N<=0)){
          detSigma1N <- NA
          nh1Nvalue <- NA
        } else {
          detSigma1N <- prod(ev1N)
          nh1Nvalue <- nh1N$value
        }
        
      } else {
        detSigma1N <- NA
        nh1Nvalue <- NA
      }
      
      rvals$N <- sqrt(detSigma1N/detSigma0) * exp(nh1Nvalue-nh0value)
    }
    
    return(rvals)
  } # end of fn.LApprox

  rvals <- switch(method, 
    "MH"=fn.MHalg(),
    "IS"=fn.ISampler(),
    "LA"=fn.LApprox())
  
  if (verbose) {
    message("Done!\n")
  }
  
  return(rvals)
}
NULL