R/dic.fit.mcmc.R
In coarseDataTools: Analysis of Coarsely Observed Data

Documented in dic.fit.mcmc mcmc.erlang mcmcpack.ll

##' Fits the distribution to the passed-in data using MCMC
##' as implemented in MCMCpack.
##'
##' Similar to \code{\link{dic.fit}} but uses MCMC instead of a direct likelihood optimization routine to fit the model. Currently, four distributions are supported: log-normal, gamma, Weibull, and Erlang. See Details for prior specification.
##'
##' The following models are used:
##' \deqn{Log-normal model: f(x) = \frac{1}{x*\sigma \sqrt{2 * \pi}} exp\{-\frac{(\log x - \mu)^2}{2 * \sigma^2}\}}
##' \deqn{Log-normal Default Prior: \mu ~ N(0, 1000), log(\sigma) ~ N(0,1000)}
##' \deqn{Weibull model: f(x) = \frac{\alpha}{\beta}(\frac{x}{\beta})^{\alpha-1} exp\{-(\frac{x}{\beta})^{\alpha}\}}
##' \deqn{Weibull Default Prior Specification: log(\alpha) ~ N( 0, 1000), \beta ~ Gamma(0.001,0.001)}
##' \deqn{Gamma model: f(x) = \frac{1}{\theta^k \Gamma(k)} x^{k-1} exp\{-\frac{x}{\theta}\}}
##'
##' \deqn{Gamma Default Prior Specification: p(k,\theta) \propto \frac{1}{\theta} * \sqrt{k*TriGamma(k)-1}}
##' (Note: this is Jeffery's Prior when both parameters are unknown), and
##' \deqn{Trigamma(x) = \frac{\partial}{\partial x^2} ln(\Gamma(x))}.)
##' \deqn{Erlang model: f(x) = \frac{1}{\theta^k (k-1)!} x^{k-1} exp\{-\frac{x}{\theta}\}}
##' \deqn{Erlang Default Prior Specification: k \sim NBinom(100,1), log(\theta) \sim N(0,1000)}
##' (Note: parameters in the negative binomial distribution above represent mean and size, respectively)
##'
##' @param dat the data
##' @param prior.par1 vector of first prior parameters for each model parameter. If \code{NULL} then default parameters are used (as described in Details section).
##' @param prior.par2 vector of second prior parameters for each model parameter. If \code{NULL} then default parameters are used (as described in Details section).
##' @param init.pars the initial parameter values (vector length = 2 )
##' @param ptiles returned percentiles of the survival survival distribution
##' @param verbose how often do you want a print out from MCMCpack on iteration number and M-H acceptance rate
##' @param burnin number of burnin samples
##' @param n.samples number of samples to draw from the posterior (after the burnin)
##' @param dist distribution to be used (L for log-normal,W for weibull, G for Gamma, and E for erlang, off1G for 1 day right shifted gamma)
##' @param seed seed for the random number generator for MCMC
##' @param ... additional parameters to \link[MCMCpack]{MCMCmetrop1R}
##' @return a cd.fit.mcmc S4 object
##' @importFrom MCMCpack MCMCmetrop1R
##' @export
dic.fit.mcmc <- function(dat,
                         prior.par1 = NULL,
                         prior.par2 = NULL,
                         init.pars = c(1, 1),
                         ptiles = c(0.05, 0.95, 0.99),
                         verbose = 1000, # how often to print update
                         burnin = 3000,
                         n.samples = 5000,
                         dist = "L",
                         seed = NULL,
                         ...) {
  ## check to make sure data is well formed for CDT use:
  check.data.structure(dat)

  ## check to make sure distribution is supported
  if (!dist %in% c("G", "W", "L", "E", "off1G", "off1W", "off1L")) stop("Please use one of the following distributions Log-Normal (L) , Weibull (W), Gamma (G), Erlang (E), offset Log-Normal (off1L), offset Weibull (off1W) or offset Gamma (off1G)")

  ##        ## don't need MCMCpack for Erlang
  ##        if (dist != "E") require(MCMCpack)

  ## check that percentiles are valid
  if (any(ptiles >= 1, ptiles <= 0)) stop("Sorry the percentiles you are requesting are not valid.")

  ## default prior parameters if none specified
  if (is.null(prior.par1)) {
    if (dist %in% c("L", "off1L")) {
      prior.par1 <- c(0, 0)
    } else if (dist %in% c("W", "off1W", "G", "off1G")) {
      prior.par1 <- c(0, 0.001)
    } else if (dist == "E") {
      prior.par1 <- c(100, 0)
    }
  }

  ## default prior parameters if none specified
  if (is.null(prior.par2)) {
    if (dist %in% c("L", "off1L")) {
      prior.par2 <- c(1000, 1000)
    } else if (dist %in% c("W", "off1W", "G", "off1G")) {
      prior.par2 <- c(1000, 0.001)
    } else if (dist == "E") {
      prior.par2 <- c(1, 1000)
    }
  }

  ## random seed based off current time if none specified
  if (is.null(seed)) {
    t <- as.numeric(Sys.time())
    seed <- 1e8 * (t - floor(t))
  }

  cat(sprintf("Running %.0f MCMC iterations \n", n.samples + burnin))

  msg <- NULL
  fail <- FALSE

  ## run the MCMC chains

  ## initial parameters set on reporting scale not estimation scale
  init.pars.trans <- dist.optim.transform(dist, init.pars)


  if (dist != "E") {
    # use MCMC pack for effieciency for distibutions with 2 continuous parameters
    tryCatch(
      mcmc.run <- MCMCmetrop1R(
        fun = mcmcpack.ll,
        theta.init = init.pars.trans,
        burnin = burnin,
        mcmc = n.samples,
        dat = dat,
        prior.par1 = prior.par1,
        prior.par2 = prior.par2,
        verbose = verbose,
        dist = dist,
        logfun = TRUE,
        seed = seed,
        ...
      ),
      error = function(e) {
        msg <<- e$message
        fail <<- TRUE
      },
      warning = function(w) {
        msg <<- w$message
        fail <<- TRUE
      }
    )
  } else {
    # Use the custom metropolis hastings algorithm for erlang
    mcmc.run <- mcmc.erlang(
      dat,
      prior.par1,
      prior.par2,
      init.pars,
      verbose,
      burnin,
      n.samples,
      ...
    )
  }



  if (!fail) {
    ## untransform MCMC parameter draws to natural scale
    untrans.mcmcs <- t(apply(mcmc.run[, 1:2], 1, function(x) dist.optim.untransform(dist = dist, pars = x)))

    ## append median to the percentiles in case it isn't there
    ptiles.appended <- sort(union(0.5, ptiles))
    est.pars <- matrix(nrow = length(ptiles.appended) + 2, ncol = 3)

    if (dist == "L") {
      par1.name <- "meanlog"
      par2.name <- "sdlog"
      mcmc.quantiles <- apply(untrans.mcmcs, 1, function(x) qlnorm(ptiles.appended, meanlog = x[1], sdlog = x[2]))
    } else if (dist == "off1L") {
      par1.name <- "meanlog"
      par2.name <- "sdlog"
      mcmc.quantiles <- apply(untrans.mcmcs, 1, function(x) {
        qlnorm(ptiles.appended, meanlog = x[1], sdlog = x[2]) + 1
      })
    } else if (dist == "G") {
      par1.name <- "shape"
      par2.name <- "scale"
      mcmc.quantiles <- apply(untrans.mcmcs, 1, function(x) qgamma(ptiles.appended, shape = x[1], scale = x[2]))
    } else if (dist == "off1G") {
      par1.name <- "shape"
      par2.name <- "scale"
      mcmc.quantiles <- apply(untrans.mcmcs, 1, function(x) {
        qgamma(ptiles.appended, shape = x[1], scale = x[2]) + 1
      })
    } else if (dist == "W") {
      par1.name <- "shape"
      par2.name <- "scale"
      mcmc.quantiles <- apply(untrans.mcmcs, 1, function(x) qweibull(ptiles.appended, shape = x[1], scale = x[2]))
    } else if (dist == "off1W") {
      par1.name <- "shape"
      par2.name <- "scale"
      mcmc.quantiles <- apply(untrans.mcmcs, 1, function(x) {
        qweibull(ptiles.appended, shape = x[1], scale = x[2]) + 1
      })
    } else if (dist == "E") {
      par1.name <- "shape"
      par2.name <- "scale"
      mcmc.quantiles <- apply(untrans.mcmcs, 1, function(x) qgamma(ptiles.appended, shape = x[1], scale = x[2]))
    } else {
      stop("Sorry, unknown distribution type. Check the 'dist' option.")
      ## not actually needed but just in case
    }

    ## make the return matrix
    colnames(est.pars) <- c("est", "CIlow", "CIhigh")
    rownames(est.pars) <- c(par1.name, par2.name, paste0("p", 100 * ptiles.appended))

    # making the matrix with the actual estimates.
    est.pars[1, ] <- quantile(untrans.mcmcs[, 1], c(0.5, 0.025, 0.975))
    est.pars[2, ] <- quantile(untrans.mcmcs[, 2], c(0.5, 0.025, 0.975))
    cis.ptiles <- t(apply(mcmc.quantiles, 1, function(x) quantile(x, c(0.5, 0.025, 0.975))))
    est.pars[3:nrow(est.pars), 1:3] <- cis.ptiles

    ## finally get tbhe log-likelihood evaluated at the mean posterior for each parameter
    ll <- -loglikhd(pars = dist.optim.transform(dist = dist, est.pars[1:2, 1]), dat = data.frame(dat), dist = dist)

    rc <- new("cd.fit.mcmc",
      ests = round(est.pars, 3),
      conv = numeric(),
      MSG = "",
      loglik = ll,
      samples = data.frame(untrans.mcmcs),
      data = data.frame(dat),
      dist = dist,
      inv.hessian = matrix(),
      est.method = "MCMC",
      ci.method = "MCMC"
    )

    return(rc)
  } else {
    if (msg != "") msg <- paste0("Error: ", msg)
    stop(sprintf("\n%s\nTry adjusting the starting parameters (init.pars), or changing the optimization method (opt.method).", msg))
  }
}



##' posterior log likelihood function to pass to MCMCpack sampler
##'
##' @param pars the parameters to calculate the ll at
##' @param dat the date to base it on
##' @param prior.par1 first parameter of each prior
##' @param prior.par2 second parameter of each prior
##' @param dist the distribution the likelihood is being calculated for
##'
##' @return the posterior log likelihood
mcmcpack.ll <- function(pars,
                        dat,
                        prior.par1,
                        prior.par2,
                        dist) {
  ## get parameters on untransformed scale
  pars.untrans <- dist.optim.untransform(dist, pars)



  if (dist == "L" || dist == "off1L") {
    ## default gamma on scale param and (inproper) uniform on location
    ll <- tryCatch(
      -loglikhd(pars, dat, dist) +
        ## dgamma(pars.untrans[2],shape=par.prior.param1[2],
        ## rate=par.prior.param2[2],log=T),
        sum(dnorm(pars,
          prior.par1,
          prior.par2,
          log = TRUE
        )),
      error = function(e) {
        warning("Loglik failure, returning -Inf")
        return(-Inf)
      }
    )
  } else if (dist == "W" || dist == "off1W") {
    ## using normal prior on the log-shape param and gamma on scale
    ll <- tryCatch(
      -loglikhd(pars, dat, dist) +
        dnorm(pars[1],
          prior.par1[1],
          prior.par2[1],
          log = TRUE
        ) +
        dgamma(pars.untrans[2],
          shape = prior.par1[2],
          rate = prior.par2[2], log = TRUE
        ),
      error = function(e) {
        warning("Loglik failure, returning -Inf")
        return(-Inf)
      }
    )
  } else if (dist == "G" || dist == "off1G") {
    ## using Jeffery's prior
    ll <- tryCatch(
      -loglikhd(pars, dat, dist)
      +
        log(1 / pars.untrans[2] * sqrt(pars.untrans[1] * trigamma(pars.untrans[1]) - 1)),
      error = function(e) {
        warning("Loglik failure, returning -Inf")
        return(-Inf)
      }
    )
  } else {
    stop("Sorry, unknown distribution type. Check the 'dist' option.")
  }
  return(ll)
}

##' Does a metropolis hastings for the Erlang distribution
##'
##' @param dat the data to fit
##' @param prior.par1 mean of priors. A negative binomial (for shape) and a normal for log(scale)
##' @param prior.par2 dispersion parameters for priors, dispersion for negative binomial, log scale sd for normal
##' @param init.pars the starting parameters on the reporting scale
##' @param verbose how often to print an update
##' @param burnin how many burnin iterations to do
##' @param n.samples the number of samples to keep and report back
##' @param sds the standard deviations for the proposal distribution
##'
##' @return a matrix of n.samples X 2 parameters, on the estimation scale
##'
mcmc.erlang <- function(dat, prior.par1, prior.par2,
                        init.pars, verbose, burnin, n.samples, sds = c(1, 1)) {
  # make a logging return matrix
  states <- matrix(nrow = burnin + n.samples, ncol = 4)
  colnames(states) <- c("shape", "scale", "ll", "accept")

  # calculate the LL for the initial parameters.
  shape.cur <- init.pars[1]
  scale.cur <- log(init.pars[2])
  ll.cur <- -loglikhd(c(log(shape.cur), scale.cur), dat, dist = "G") +
    dnbinom(shape.cur, mu = prior.par1[1], size = prior.par2[1], log = TRUE) +
    dnorm(scale.cur, prior.par1[2], prior.par2[2], log = TRUE)

  if (ll.cur == -Inf) {
    stop("zero starting likelihood. check priors are appropriate for Erlang distribution")
  }

  states[1, ] <- c(shape.cur, scale.cur, ll.cur, 1)

  for (i in 2:(burnin + n.samples)) {
    # propose new parameters
    shape.prop <- shape.cur + round(rnorm(1, 0, sds[1])) # discretized normal
    scale.prop <- scale.cur + rnorm(1, 0, sds[2])

    # calculate the liklihood using the new parameters...go ahead and do -Inf if shape is -
    if (shape.prop < 0) {
      ll.prop <- -Inf
    } else {
      ll.prop <- -loglikhd(c(log(shape.prop), scale.prop), dat, dist = "G") +
        dnbinom(shape.prop, mu = prior.par1[1], size = prior.par2[1], log = TRUE) +
        dnorm(scale.prop, prior.par1[2], prior.par2[2], log = TRUE)
    }

    # accept or reject
    l.alpha <- log(runif(1))


    if ((ll.prop - ll.cur) > l.alpha) {
      # accept
      shape.cur <- shape.prop
      scale.cur <- scale.prop
      ll.cur <- ll.prop
      states[i, ] <- c(shape.cur, scale.cur, ll.cur, 1)
    } else {
      # reject
      states[i, ] <- c(shape.cur, scale.cur, ll.cur, 0)
    }

    if (verbose != 0 && i %% verbose == 0) {
      cat("Erlang MCMC iteration ", i, " of ", burnin + n.samples, "\n")
      cat("LL = ", ll.cur, "\n")
      cat("theta = ", c(shape.cur, scale.cur), "\n")
      cat("acceptance rate = ", sum(states[1:i, 4]) / i, "\n")
    }
  }

  rc <- states[(burnin + 1):(burnin + n.samples), 1:2]
  return(rc)
}