R/MCMCprobitChange.R
In MCMCpack: Markov Chain Monte Carlo (MCMC) Package

#########################################################
##
## sample from the posterior distribution
## of a probit regression model with multiple changepoints
##
## JHP 07/01/2007
## JHP 03/03/2009
#########################################################

#' Markov Chain Monte Carlo for a linear Gaussian Multiple Changepoint Model
#'
#' This function generates a sample from the posterior distribution of a linear
#' Gaussian model with multiple changepoints. The function uses the Markov
#' chain Monte Carlo method of Chib (1998).  The user supplies data and priors,
#' and a sample from the posterior distribution is returned as an mcmc object,
#' which can be subsequently analyzed with functions provided in the coda
#' package.
#'
#' \code{MCMCprobitChange} simulates from the posterior distribution of a
#' probit regression model with multiple parameter breaks. The simulation is
#' based on Chib (1998) and Park (2011).
#'
#' The model takes the following form:
#'
#' \deqn{\Pr(y_t = 1) = \Phi(x_i'\beta_m) \;\; m = 1, \ldots, M}
#'
#' Where \eqn{M} is the number of states, and \eqn{\beta_m}
#' is a parameter when a state is \eqn{m} at \eqn{t}.
#'
#' We assume Gaussian distribution for prior of \eqn{\beta}:
#'
#' \deqn{\beta_m \sim \mathcal{N}(b_0,B_0^{-1}),\;\; m = 1, \ldots, M}
#'
#' And:
#'
#' \deqn{p_{mm} \sim \mathcal{B}eta(a, b),\;\; m = 1, \ldots, M}
#'
#' Where \eqn{M} is the number of states.
#'
#' @param formula Model formula.
#'
#' @param data Data frame.
#'
#' @param m The number of changepoints.
#'
#' @param burnin The number of burn-in iterations for the sampler.
#'
#' @param mcmc The number of MCMC iterations after burnin.
#'
#' @param thin The thinning interval used in the simulation.  The number of
#' MCMC iterations must be divisible by this value.
#'
#' @param verbose A switch which determines whether or not the progress of the
#' sampler is printed to the screen.  If \code{verbose} is greater than 0 the
#' iteration number, the \eqn{\beta} vector, and the error variance are
#' printed to the screen every \code{verbose}th iteration.
#'
#' @param seed The seed for the random number generator.  If NA, the Mersenne
#' Twister generator is used with default seed 12345; if an integer is passed
#' it is used to seed the Mersenne twister.  The user can also pass a list of
#' length two to use the L'Ecuyer random number generator, which is suitable
#' for parallel computation.  The first element of the list is the L'Ecuyer
#' seed, which is a vector of length six or NA (if NA a default seed of
#' \code{rep(12345,6)} is used).  The second element of list is a positive
#' substream number. See the MCMCpack specification for more details.
#'
#' @param beta.start The starting values for the \eqn{\beta} vector.
#' This can either be a scalar or a column vector with dimension equal to the
#' number of betas.  The default value of of NA will use the MLE estimate of
#' \eqn{\beta} as the starting value.  If this is a scalar, that value
#' will serve as the starting value mean for all of the betas.
#'
#' @param P.start The starting values for the transition matrix.  A user should
#' provide a square matrix with dimension equal to the number of states.  By
#' default, draws from the \code{Beta(0.9, 0.1)} are used to construct a proper
#' transition matrix for each raw except the last raw.
#'
#' @param b0 The prior mean of \eqn{\beta}.  This can either be a scalar
#' or a column vector with dimension equal to the number of betas. If this
#' takes a scalar value, then that value will serve as the prior mean for all
#' of the betas.
#'
#' @param B0 The prior precision of \eqn{\beta}.  This can either be a
#' scalar or a square matrix with dimensions equal to the number of betas.  If
#' this takes a scalar value, then that value times an identity matrix serves
#' as the prior precision of beta. Default value of 0 is equivalent to an
#' improper uniform prior for beta.
#'
#' @param a \eqn{a} is the shape1 beta prior for transition probabilities.
#' By default, the expected duration is computed and corresponding a and b
#' values are assigned. The expected duration is the sample period divided by
#' the number of states.
#'
#' @param b \eqn{b} is the shape2 beta prior for transition probabilities.
#' By default, the expected duration is computed and corresponding a and b
#' values are assigned. The expected duration is the sample period divided by
#' the number of states.
#'
#' @param marginal.likelihood How should the marginal likelihood be calculated?
#' Options are: \code{none} in which case the marginal likelihood will not be
#' calculated, and \code{Chib95} in which case the method of Chib (1995) is
#' used.
#'
#' @param ... further arguments to be passed
#'
#' @return An mcmc object that contains the posterior sample.  This object can
#' be summarized by functions provided by the coda package.  The object
#' contains an attribute \code{prob.state} storage matrix that contains the
#' probability of \eqn{state_i} for each period, the log-likelihood of
#' the model (\code{loglike}), and the log-marginal likelihood of the model
#' (\code{logmarglike}).
#'
#' @export
#'
#' @seealso \code{\link{plotState}}, \code{\link{plotChangepoint}}
#'
#' @references Jong Hee Park. 2011. ``Changepoint Analysis of Binary and
#' Ordinal Probit Models: An Application to Bank Rate Policy Under the Interwar
#' Gold Standard."  \emph{Political Analysis}. 19: 188-204. <doi:10.1093/pan/mpr007>
#'
#' Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park. 2011.  ``MCMCpack:
#' Markov Chain Monte Carlo in R.'', \emph{Journal of Statistical Software}.
#' 42(9): 1-21.  \doi{10.18637/jss.v042.i09}.
#'
#' Siddhartha Chib. 1998. ``Estimation and comparison of multiple change-point
#' models.'' \emph{Journal of Econometrics}. 86: 221-241.
#'
#' Albert, J. H. and S. Chib. 1993. ``Bayesian Analysis of Binary and
#' Polychotomous Response Data.'' \emph{J. Amer. Statist. Assoc.} 88, 669-679
#'
#' @keywords models
#'
#' @examples
#'
#' \dontrun{
#' set.seed(1973)
#' x1 <- rnorm(300, 0, 1)
#' true.beta <- c(-.5, .2, 1)
#' true.alpha <- c(.1, -1., .2)
#' X <- cbind(1, x1)
#'
#' ## set two true breaks at 100 and 200
#' true.phi1 <- pnorm(true.alpha[1] + x1[1:100]*true.beta[1])
#' true.phi2 <- pnorm(true.alpha[2] + x1[101:200]*true.beta[2])
#' true.phi3 <-  pnorm(true.alpha[3] + x1[201:300]*true.beta[3])
#'
#' ## generate y
#' y1 <- rbinom(100, 1, true.phi1)
#' y2 <- rbinom(100, 1, true.phi2)
#' y3 <- rbinom(100, 1, true.phi3)
#' Y <- as.ts(c(y1, y2, y3))
#'
#' ## fit multiple models with a varying number of breaks
#' out0 <- MCMCprobitChange(formula=Y~X-1, data=parent.frame(), m=0,
#'                          mcmc=1000, burnin=1000, thin=1, verbose=1000,
#'                          b0 = 0, B0 = 0.1, a = 1, b = 1,  marginal.likelihood = c("Chib95"))
#' out1 <- MCMCprobitChange(formula=Y~X-1, data=parent.frame(), m=1,
#'                          mcmc=1000, burnin=1000, thin=1, verbose=1000,
#'                          b0 = 0, B0 = 0.1, a = 1, b = 1,  marginal.likelihood = c("Chib95"))
#' out2 <- MCMCprobitChange(formula=Y~X-1, data=parent.frame(), m=2,
#'                          mcmc=1000, burnin=1000, thin=1, verbose=1000,
#'                          b0 = 0, B0 = 0.1, a = 1, b = 1,  marginal.likelihood = c("Chib95"))
#' out3 <- MCMCprobitChange(formula=Y~X-1, data=parent.frame(), m=3,
#'                          mcmc=1000, burnin=1000, thin=1, verbose=1000,
#'                          b0 = 0, B0 = 0.1, a = 1, b = 1,  marginal.likelihood = c("Chib95"))
#'
#' ## find the most reasonable one
#' BayesFactor(out0, out1, out2, out3)
#'
#' ## draw plots using the "right" model
#' plotState(out2)
#' plotChangepoint(out2)
#' }
#'
"MCMCprobitChange"<-
  function(formula, data=parent.frame(),  m = 1,
           burnin = 10000, mcmc = 10000, thin = 1, verbose = 0,
           seed = NA, beta.start = NA, P.start = NA,
           b0 = NULL, B0 = NULL, a = NULL, b = NULL,
           marginal.likelihood = c("none", "Chib95"),
           ...){

    ## form response and model matrices
    holder <- parse.formula(formula, data)
    y <- holder[[1]]
    X <- holder[[2]]
    xnames <- holder[[3]]
    k <- ncol(X)
    n <- length(y)
    ns <- m + 1

    ## check iteration parameters
    check.mcmc.parameters(burnin, mcmc, thin)
    totiter <- mcmc + burnin
    cl <- match.call()
    nstore <- mcmc/thin

    ## seeds
    seeds <- form.seeds(seed)
    lecuyer <- seeds[[1]]
    seed.array <- seeds[[2]]
    lecuyer.stream <- seeds[[3]]
    if(!is.na(seed)) set.seed(seed)

    ## prior
    mvn.prior <- form.mvn.prior(b0, B0, k)
    b0 <- mvn.prior[[1]]
    B0 <- mvn.prior[[2]]

    chib <- 0
    if (marginal.likelihood == "Chib95"){
      chib <- 1
    }

    if (m == 0){
      if (marginal.likelihood == "Chib95"){
        output <- MCMCprobit(formula=Y~X-1, burnin = burnin, mcmc = mcmc,
                             thin = thin, verbose = verbose,
                             b0 = b0, B0 = B0,
                             marginal.likelihood = "Laplace")
        cat("\n Chib95 method is not yet available for m = 0. Laplace method is used instead.")
      }
      else {
        output <- MCMCprobit(formula=Y~X-1, burnin = burnin, mcmc = mcmc,
                             thin = thin, verbose = verbose,
                             b0 = b0, B0 = B0)
      }
      attr(output, "y") <- y
    }
    else{
      A0 <- trans.mat.prior(m=m, n=n, a=a, b=b)
      Pstart  <-  check.P(P.start, m, a=a, b=b)
      betastart  <- beta.change.start(beta.start, ns, k, formula, family=binomial, data)
      ## call C++ code to draw sample
      posterior <- .C("cMCMCprobitChange",
                      betaout = as.double(rep(0.0, nstore*ns*k)),
                      Pout = as.double(rep(0.0, nstore*ns*ns)),
                      psout = as.double(rep(0.0, n*ns)),
                      sout = as.double(rep(0.0, nstore*n)),
                      Ydata = as.double(y),
                      Yrow = as.integer(nrow(y)),
                      Ycol = as.integer(ncol(y)),
                      Xdata = as.double(X),
                      Xrow = as.integer(nrow(X)),
                      Xcol = as.integer(ncol(X)),
                      m = as.integer(m),
                      burnin = as.integer(burnin),
                      mcmc = as.integer(mcmc),
                      thin = as.integer(thin),
                      verbose = as.integer(verbose),

                      lecuyer=as.integer(lecuyer),
                      seedarray=as.integer(seed.array),
                      lecuyerstream=as.integer(lecuyer.stream),

                      betastart = as.double(betastart),
                      Pstart = as.double(Pstart),

                      a = as.double(a),
                      b = as.double(b),
                      b0data = as.double(b0),
                      B0data = as.double(B0),
                      A0data = as.double(A0),
                      logmarglikeholder = as.double(0.0),
                      loglikeholder = as.double(0.0),
                      chib = as.integer(chib))

      ## get marginal likelihood if Chib95
      if (chib==1){
        logmarglike <- posterior$logmarglikeholder
        loglike <- posterior$loglikeholder
      }
      else{
        logmarglike <- loglike <- 0
      }

      ## pull together matrix and build MCMC object to return
      beta.holder <- matrix(posterior$betaout, nstore, ns*k)
      P.holder    <- matrix(posterior$Pout, nstore, )
      s.holder    <- matrix(posterior$sout, nstore, )
      ps.holder   <- matrix(posterior$psout, n, )

      output <- mcmc(data=beta.holder, start=burnin+1, end=burnin + mcmc, thin=thin)
      varnames(output)  <- sapply(c(1:ns),
                                  function(i){
                                    paste(c(xnames), "_regime", i, sep = "")
                                  })
      attr(output, "title") <- "MCMCprobitChange Posterior Sample"
      attr(output, "formula") <- formula
      attr(output, "y")       <- y
      attr(output, "X")       <- X
      attr(output, "m")       <- m
      attr(output, "call")    <- cl
      attr(output, "logmarglike") <- logmarglike
      attr(output, "loglike") <- loglike
      attr(output, "prob.state") <- ps.holder/nstore
      attr(output, "s.store") <- s.holder
    }
    return(output)

}## end of MCMC function