R/MCMC_spCP.R
In spCP: Spatially Varying Change Points

Documented in spCP

#' MCMC sampler for spatially varying change point model.
#'
#' \code{spCP} is a Markov chain Monte Carlo (MCMC) sampler for a spatially varying change point
#'  model with spatially varying slopes, intercepts, and unique variances at each spatial-temporal
#'  location. The model is implemented using a Bayesian hierarchical framework.
#'
#' @param Y An \code{N} dimensional vector containing the observed outcome data.
#'  Here, \code{N = M * Nu}, where \code{M} represents the number of spatial locations
#'  and \code{Nu} the number of temporal visits. The observations in \code{Y} must be first
#'  ordered spatially and then temporally, meaning the first \code{M} observations
#'  in \code{Y} should come from the initial time point.
#'
#' @param DM An \code{M} dimensional vector containing a dissimilarity metric
#'  for each spatial location. The order of the spatial locations must match the order from
#'  \code{Y}.
#'
#' @param W An \code{M x M} dimensional binary adjacency matrix for dictating the
#'  spatial neighborhood structure.
#'
#' @param Time A \code{Nu} dimensional vector containing the observed time points for each
#'  vector of outcomes in increasing order.
#'
#' @param Starting Either \code{NULL} or a \code{list} containing starting values
#'  to be specified for the MCMC sampler. If \code{NULL} is not chosen then none, some or all
#'  of the starting values may be specified.
#'
#'  When \code{NULL} is chosen then default starting values are automatically generated.
#'  Otherwise a \code{list} must be provided with names \code{Delta}, \code{Alpha} or
#'  \code{Sigma} containing appropriate objects. \code{Delta} must be a \code{5} dimensional
#'  vector, \code{Sigma} a \code{5 x 5} dimensional matrix and \code{Alpha} a scalar.
#'
#' @param Hypers Either \code{NULL} or a \code{list} containing hyperparameter values
#'  to be specified for the MCMC sampler. If \code{NULL} is not chosen then none, some or all
#'  of the hyperparameter values may be specified.
#'
#'  When \code{NULL} is chosen then default hyperparameter values are automatically
#'  generated. These default hyperparameters are described in detail in (Berchuck et al.).
#'  Otherwise a \code{list} must be provided with names \code{Delta}, \code{Sigma} or
#'  \code{Alpha} containing further hyperparameter information. These objects are themselves
#'  \code{lists} and may be constructed as follows.
#'
#'  \code{Delta} is a \code{list} with one object, \code{Kappa2}. \code{Kappa2} represents
#'  the prior variance and must be a scalar. \code{Sigma} is a \code{list} with two objects,
#'  \code{Xi} and \code{Psi}. \code{Xi} represents the degrees of freedom parameter for the
#'  inverse-Wishart hyperprior and must be a real number scalar, while \code{Psi} represents
#'  the scale matrix and must be a \code{5 x 5} dimensional positive definite matrix.
#'
#'  \code{Alpha} is a \code{list} with two objects, \code{AAlpha} and \code{BAlpha}. \code{AAlpha}
#'  represents the lower bound for the uniform hyperprior, while \code{BAlpha} represents
#'  the upper bound. The bounds must be specified carefully.
#'
#' @param Tuning Either \code{NULL} or a \code{list} containing tuning values
#'  to be specified for the MCMC Metropolis steps. If \code{NULL} is not chosen then all
#'  of the tuning values must be specified.
#'
#'  When \code{NULL} is chosen then default tuning values are automatically generated to
#'  \code{1}. Otherwise a \code{list} must be provided with names \code{Lambda0Vec},
#'  \code{Lambda1Vec}, \code{EtaVec} and \code{Alpha}. \code{Lambda0Vec}, \code{Lambda1Vec},
#'  and \code{EtaVec} must be \code{M} dimensional vectors and \code{Alpha} a scalar.
#'  Each containing tuning variances for their corresponding Metropolis updates.
#'
#' @param MCMC Either \code{NULL} or a \code{list} containing input values to be used
#'  for implementing the MCMC sampler. If \code{NULL} is not chosen then all
#'  of the MCMC input values must be specified.
#'
#'  \code{NBurn}: The number of sampler scans included in the burn-in phase. (default =
#'  \code{10,000})
#'
#'  \code{NSims}: The number of post-burn-in scans for which to perform the
#'   sampler. (default = \code{100,000})
#'
#'  \code{NThin}: Value such that during the post-burn-in phase, only every
#'  \code{NThin}-th scan is recorded for use in posterior inference (For return values
#'  we define, NKeep = NSims / NThin (default = \code{10}).
#'
#'  \code{NPilot}: The number of times during the burn-in phase that pilot adaptation
#'  is performed (default = \code{20})
#'
#' @param Family Character string indicating the distribution of the observed data. Options
#'  include: \code{"normal"}, \code{"probit"}, \code{"tobit"}.
#'
#' @param Distance Character string indicating the distance metric for computing the
#'  dissimilarity metric. Options include: \code{"euclidean"} and \code{"circumference"}.
#'
#' @param Weights Character string indicating the type of weight used. Options include:
#'  \code{"continuous"} and \code{"binary"}.
#'
#' @param Rho A scalar in \code{(0,1)} that dictates the magnitude of local spatial sharing.
#'  By default it is fixed at \code{0.99} as suggested by Lee and Mitchell (2012).
#'
#' @param ScaleY A positive scalar used for scaling the observed data, \code{Y}. This is
#'  used to aid numerically for MCMC convergence, as scaling large observations often
#'  stabilizes chains. By default it is fixed at \code{10}.
#'
#' @param ScaleDM A positive scalar used for scaling the dissimilarity metric distances,
#'  \code{DM}. This is used to aid numerically for MCMC convergence. as scaling spatial
#'  distances is often used for improved MCMC convergence. By default it is fixed at \code{100}.
#'
#' @param Seed An integer value used to set the seed for the random number generator
#'  (default = 54).
#'
#' @details Details of the underlying statistical model proposed by proposed by
#'  Berchuck et al. 2018. are forthcoming.
#'
#' @return \code{spCP} returns a list containing the following objects
#'
#'   \describe{
#'
#'   \item{\code{beta0}}{\code{NKeep x M} \code{matrix} of posterior samples for \code{beta0}.
#'   The s-th column contains posterior samples from the the s-th location.}
#'
#'   \item{\code{beta1}}{\code{NKeep x M} \code{matrix} of posterior samples for \code{beta1}.
#'   The s-th column contains posterior samples from the the s-th location.}
#'
#'   \item{\code{lambda0}}{\code{NKeep x M} \code{matrix} of posterior samples for \code{lambda0}.
#'   The s-th column contains posterior samples from the the s-th location.}
#'
#'   \item{\code{lambda1}}{\code{NKeep x M} \code{matrix} of posterior samples for \code{lambda1}.
#'   The s-th column contains posterior samples from the the s-th location.}
#'
#'   \item{\code{eta}}{\code{NKeep x M} \code{matrix} of posterior samples for \code{eta}.
#'   The s-th column contains posterior samples from the the s-th location.}
#'
#'   \item{\code{theta}}{\code{NKeep x M} \code{matrix} of posterior samples for \code{theta}.
#'   The s-th column contains posterior samples from the the s-th location.}
#'
#'   \item{\code{delta}}{\code{NKeep x 5} \code{matrix} of posterior samples for \code{delta}.
#'   The columns have names that describe the samples within them.}
#'
#'   \item{\code{sigma}}{\code{NKeep x 15} \code{matrix} of posterior samples for \code{Sigma}. The
#'   columns have names that describe the samples within them. The row is listed first, e.g.,
#'   \code{Sigma32} refers to the entry in row \code{3}, column \code{2}.}
#'
#'   \item{\code{alpha}}{\code{NKeep x 1} \code{matrix} of posterior samples for \code{Alpha}.}
#'
#'   \item{\code{metropolis}}{\code{(3 * M + 1) x 3} \code{matrix} of metropolis
#'   acceptance rates, updated tuners, and original tuners that result from the pilot
#'   adaptation. The first \code{M} correspond to the \code{Lambda0Vec} parameters,
#'   the next \code{M} correspond to the \code{Lambda1Vec}, the next \code{M} correspond
#'   to the \code{EtaVec} parameters and the last row give the \code{Alpha} values.}
#'
#'   \item{\code{runtime}}{A \code{character} string giving the runtime of the MCMC sampler.}
#'
#'   \item{\code{datobj}}{A \code{list} of data objects that are used in future \code{spCP} functions
#'   and should be ignored by the user.}
#'
#'   \item{\code{dataug}}{A \code{list} of data augmentation objects that are used in future
#'   \code{spCP} functions and should be ignored by the user.}
#'
#'   }
#'
# @author Samuel I. Berchuck
#' @references Reference for Berchuck et al. 2018 is forthcoming.
#' @export
spCP <- function(Y, DM, W, Time, Starting = NULL, Hypers = NULL, Tuning = NULL, MCMC = NULL,
                 Family = "tobit", Weights = "continuous", Distance = "circumference",
                 Rho = 0.99, ScaleY = 10, ScaleDM = 100, Seed = 54) {

  ###Function Inputs
  # Y = Y
  # DM = DM
  # W = W
  # Time = Time
  # Starting = Starting
  # Hypers = Hypers
  # Tuning = Tuning
  # MCMC = MCMC
  # Family = "tobit"
  # Weights = "continuous"
  # Distance = "circumference"
  # Rho = 0.99
  # ScaleY = 1
  # ScaleDM = 1
  # Seed = 54

  ###Check for missing objects
  if (missing(Y)) stop("Y: missing")
  if (missing(DM)) stop("DM: missing")
  if (missing(W)) stop("W: missing")
  if (missing(Time)) stop("Time: missing")

  ###Check model inputs
  CheckInputs(Y, DM, W, Time, Starting, Hypers, Tuning, MCMC, Family, Distance, Weights, Rho, ScaleY, ScaleDM)

  ####Set seed for reproducibility
  set.seed(Seed)

  ###Check to see if the job is interactive
  Interactive <- interactive()

  ###Create objects for use in sampler
  DatObj <- CreateDatObj(Y, DM, W, Time, Rho, ScaleY, ScaleDM, Family, Weights, Distance)
  HyPara <- CreateHyPara(Hypers, DatObj)
  MetrObj <- CreateMetrObj(Tuning, DatObj)
  Para <- CreatePara(Starting, DatObj, HyPara)
  DatAug <- CreateDatAug(DatObj)
  McmcObj <- CreateMcmc(MCMC, DatObj)
  RawSamples <- CreateStorage(DatObj, McmcObj)

  ###Time MCMC sampler
  BeginTime <- Sys.time()

  ###Run MCMC sampler in Rcpp
  RegObj <- spCP_Rcpp(DatObj, HyPara, MetrObj, Para, DatAug, McmcObj, RawSamples, Interactive)

  ###Set regression objects
  RawSamples <- RegObj$rawsamples
  MetropRcpp <- RegObj$metropolis

  ###End time
  FinishTime <- Sys.time()
  RunTime <- FinishTime - BeginTime

  ###Collect output to be returned
  DatObjOut <- OutputDatObj(DatObj)
  DatAugOut <- OutputDatAug(DatAug)
  Metropolis <- SummarizeMetropolis(DatObj, MetrObj, MetropRcpp, McmcObj)
  Samples <- FormatSamples(DatObj, RawSamples)

  ###Return spBDwDM object
  spCP <- list(alpha = Samples$Alpha,
               delta = Samples$Delta,
               sigma = Samples$Sigma,
               beta0 = Samples$Beta0,
               beta1 = Samples$Beta1,
               lambda0 = Samples$Lambda0,
               lambda1 = Samples$Lambda1,
               eta = Samples$Eta,
               theta = Samples$Theta,
               metropolis = Metropolis,
               datobj = DatObjOut,
               dataug = DatAugOut,
               runtime = paste0("Model runtime: ",round(RunTime, 2), " ",attr(RunTime, "units")))
  spCP <- structure(spCP, class = "spCP")
  return(spCP)

###End sampler
}