Nothing
#' Run the case study in KLTG (2021), or a smaller version thereof
#' @param data_df data frame in the same format as the \link{gdp} data set in this package.
#' @param burnin_size length of the burn-in period used for each forecast.
#' @param max_mcmc_sample_size maximal number of MCMC draws to consider (integer, must equal either 1000, 5000, 10000, 20000 or 40000). Defaults to 5000.
#' @param nr_of_chains number of parallel MCMC for each forecast date (integer, defaults to 3).
#' @param first_vint,last_vint first and last data vintage (= time point at which forecasts are made). Default to "19962Q2" and "2014Q3", respectively.
#' @param forecast_horizon forecast horizon to be analyzed (integer, defaults to 1).
#' @param random_seed seed for random numbers used during the MCMC sampling process. Defaults to 816.
#' @return Object of class "casestudy", containing the results of the analysis. This object can be passed to \code{plot} for plotting, see the documentation for \link{plot.casestudy}.
#' @details The full results in Section 5 of KLTG (2021) are based on the following setup: \code{burnin_size = 10000},
#' \code{max_mcmc_sample_size = 50000}, \code{nr_of_chains = 16}, \code{data_df = gdp}, \code{first_vint = "1996Q2"},
#' \code{last_vint = "2014Q3"}, and \code{forecast_horizon = 1}. Since running this full configuration is very
#' time consuming, the default setup offers the possibility to run a small-scale study which reproduces the
#' qualitative outcomes of the analysis. Running the small-scale study implied by the defaults of \code{run_study} as well as the GDP data (\code{data_df = gdp}) takes about 40 minutes on an Intel i7 processor.
#' @references Krueger, F., Lerch, S., Thorarinsdottir, T.L. and T. Gneiting (2021): `Predictive inference based on Markov chain Monte Carlo output', \emph{International Statistical Review} 89, 274-301. \doi{10.1111/insr.12405}
#' @seealso \link{plot.casestudy} produces a summary plot of the results generated by \link{run_casestudy}
#' \link{run_casestudy} uses \link{ar_ms} to fit a Bayesian Markov Switching model, recursively for several time periods.
#' @keywords replication
#' @export
#' @author Fabian Krueger
#' @examples
#' \dontrun{
#' data(gdp)
#' cs <- run_casestudy(data_df = gdp, last_vint = "1999Q4")
#' plot(cs)
#' }
run_casestudy <- function(data_df, burnin_size = 5000,
max_mcmc_sample_size = 5000, nr_of_chains = 3,
first_vint = "1996Q2", last_vint = "2014Q3",
forecast_horizon = 1,
random_seed = 816){
# get inputs
input <- as.list(environment())
# input check
if (max_mcmc_sample_size %in% c(1, 5, 10, 20, 40)*1000){
stop("Please set max_mcmc_sample_size to 1000, 5000, 10000, 20000 or 40000")
}
# fix random seed
set.seed(random_seed)
# mcmc sample sizes
mcmc_sample_sizes <- (c(1, 5, 10, 20, 40)*1000)[c(1, 5, 10, 20, 40)*1000 <= max_mcmc_sample_size]
# Vintages (starting in 1996:Q2)
vints <- sort(unique(data_df[qdiff(first_vint, data_df$vint) >= 0 &
qdiff(data_df$vint, last_vint) >= 0, "vint"]))
# Initialize data frame
df <- data.frame()
# Initial print to screen
print(paste(Sys.time(), "- now starting run_casestudy"))
# Loop over vintages
for (j in 1:length(vints)){
if ((j %% 10) == 0){
print(paste(Sys.time(), "- now running date", j, "out of", length(vints)))
}
# Pick data
cv <- vints[j]
dat <- data_df[data_df$vint == cv, c("dt", "val")]
dat <- sel.complete(dat[order(dat$dt), ])
# Some checks
check1 <- all(dat$dt == plusq(dat$dt[1], 0:(nrow(dat)-1)))
check2 <- plusq(dat$dt[length(dat$dt)], 1) == cv
if (!check1 | !check2) stop()
# Get realization
rlz <- data_df[qdiff(data_df$dt, data_df$vint) == 2 &
qdiff(data_df$dt, cv) == (forecast_horizon - 1), "val"]
if (length(rlz) != 1) stop()
# Loop over parallel chains
for (cc in 1:nr_of_chains){
# Fit forecasting model
fit <- ar_ms(dat$val, forecast_periods = 2,
n_burn = burnin_size, n_rep = max_mcmc_sample_size)
# Get forecast mean and sds
dat_m <- fit$fcMeans[, forecast_horizon]
dat_s <- fit$fcSds[, forecast_horizon]
dat_x <- dat_m + dat_s * rnorm(length(dat_m))
# Loop over sample sizes
for (nn in mcmc_sample_sizes){
# Loop over scoring rules
for (rule in c("logs", "crps")){
tmp_sc <- scores(dat = dat_x[1:nn], m = dat_m[1:nn], s = dat_s[1:nn],
y = rlz, sc = rule)
if (rule == "crps"){
tmp_df <- data.frame(od = vints[j], td = vints[j], n = nn, thin = 1,
method = c("norm", "mixp", "kdens", "ecdf"), rule = rule,
val = tmp_sc, chain_id = cc)
} else {
tmp_df <- data.frame(od = vints[j], td = vints[j], n = nn, thin = 1,
method = c("norm", "mixp", "kdens"), rule = rule,
val = tmp_sc, chain_id = cc)
}
# Expand data frame
df <- rbind(df, tmp_df)
}
}
}
}
# Make output list
out <- list(input = input, output = df)
# Final print to screen
print(paste(Sys.time(), "- now finishing run_casestudy"))
# Return object of class "casestudy"
structure(out, class = "casestudy")
}
#' Plot the output of run_casestudy
#' @param x object of class \code{casestudy}, generated by \link{run_casestudy}
#' @param ... additional parameters, see details below.
#' @return none, used for the effect of drawing a plot.
#' @details The plot is in the same format as Figure 3 in KLTG (2021). Its content (nr of MCMC chains,
#' maximal sample size, etc) depends on the parameters used to generate \link{run_casestudy}. In terms of
#' additional inputs (\code{...}), the following are currently implemented:
#' \itemize{\item \code{scoring_rule}, the scoring rule for which results are to be plotted,
#' either \code{"crps"} or \code{"logs"}. Defaults to \code{"crps"}. \item \code{add_main_title}, logical,
#' whether to add main title to plot. Defaults to \code{TRUE}.}
#' @export
#' @references Krueger, F., Lerch, S., Thorarinsdottir, T.L. and T. Gneiting (2021): `Predictive inference based on Markov chain Monte Carlo output', \emph{International Statistical Review} 89, 274-301. \doi{10.1111/insr.12405}
#' @seealso \link{run_casestudy} produces the forecast results summarized by \link{plot.casestudy}
#' @keywords replication
#' @author Fabian Krueger
plot.casestudy <- function(x, ...){
# Get output component
x <- x$output
# nr of sample sizes covered by input
n_sample_sizes <- sum(max(x$n) >=
c(1000, 5000, 10000, 20000, 40000))
# Dotted inputs
auxlist <- list(...)
if (is.null(auxlist$scoring_rule)){
scoring_rule <- "crps"
} else {
scoring_rule <- auxlist$scoring_rule
}
if (is.null(auxlist$add_main_title)){
add_main_title <- TRUE
} else {
add_main_title <- auxlist$add_main_title
}
# colors and methods
colors <- c("black", "blue", "red", "green")
methods <- c("mixp", "kdens", "norm")
if (scoring_rule == "crps") methods <- c(methods, "ecdf")
# mean scores over time, for each of the parallel chains
mean_scores <- aggregate(x$val, by = list(x$method, x$rule,
x$n, x$chain_id),
mean)
names(mean_scores) <- c("method", "rule", "n", "chain_id", "mean_val")
# plot scores against sample size, no thinning
sel <- mean_scores[mean_scores$rule == scoring_rule, ]
sel$n <- n_helper(sel$n)
for (mm in methods){
sel$n[sel$method == mm] <- sel$n[sel$method == mm] - 0.1 +
0.1*(which(methods == mm) - 1)
}
# plot
rule_print <- ifelse(scoring_rule == "crps", "CRPS", "Log Score")
plot(x = sel$n, y = sel$mean_val, bty = "n", ylab = "", xlab = "",
type = "n", axes = FALSE, xlim = c(0.5, 5),
main = "")
if (add_main_title){
title(main = paste0(rule_print, ", ", min(as.character(x$td)), " to ", max(as.character(x$td))),
cex.main = 1.6)
}
axis(2, cex.axis = 1.6)
for (mm in methods){
tmp <- sel[sel$method == mm, ]
points(x = tmp$n, y = tmp$mean_val, col = colors[which(methods == mm)], pch = 20, cex = 1.5)
tmp2 <- aggregate(tmp$mean_val, by = list(tmp$n), mean)
names(tmp2) <- c("n", "mean2")
lines(x = tmp2$n, y = tmp2$mean2,
col = colors[which(methods == mm)], lwd = 2)
}
axis(1, labels = c(1000, 5000, 10000, 20000, 40000),
at = 1:5, cex.axis = 1.6)
mtext("Sample size", at = 3,
side = 1, cex = 2, line = 4)
legend("topright", c("Mixture", "Kernel", "Normal", "ECDF")[1:length(methods)], lty = 1, lwd = 3,
col = colors[1:length(methods)], bty = "n", cex = 1.4)
}
#' Simple print method for object of class casestudy
#' @param x Object of class casestudy, generated via \link{run_casestudy}
#' @param ... Additional specifications (presently not in use)
#' @export
print.casestudy <- function(x, ...){
print("Object of class casestudy, generated via run_casestudy (scoringRules package).")
}
#' Summary method for class casestudy
#' @param object Object of class casestudy, generated via \link{run_casestudy}
#' @param ... Additional specifications (presently not in use)
#' @export
summary.casestudy <- function(object, ...){
# Get output component
x <- object$output
# Compute summary stats
for (rr in c("logs", "crps")){
sel1 <- which(x$rule == rr & x$thin == 1)
df1 <- data.frame(approx = x$method[sel1], sample_size = x$n[sel1],
value = x$val[sel1])
aux1 <- sort(unique(df1$approx))
aux2 <- sort(unique(df1$sample_size))
res <- matrix("", length(aux1), length(aux2))
row.names(res) <- aux1
colnames(res) <- paste0("m = ", aux2)
for (aa in aux1){
sel2 <- which(df1$approx == aa)
df2 <- data.frame(approx = df1$approx[sel2], sample_size = df1$sample_size[sel2],
value = df1$value[sel2])
dd <- tapply(df2$value, df2$sample_size, mean)
res[which(aux1 == aa), ] <- formatC(as.numeric(dd), format = "e", digits = 3)
}
# Print mean scores for current scoring rule
cat(paste("Mean score:", rr))
print(kable(res))
cat("\n")
}
}
#' Run the Monte Carlo study by KLTG (2021), or a smaller version thereof
#' @param s,a,n parameters characterizing the process from which data are simulated (see Section 4 and Table 4 of KLTG, 2021). Defaults to the values reported in the main text of the paper.
#' @param nr_iterations number of Monte Carlo iterations (defaults to 50).
#' @param zoom set to \code{TRUE} to produce results for a fine grid of small (MCMC) sample sizes, as in Figure 2 of KLTG (2021).
#' @param random_seed seed used for running the simulation experiment. Defaults to 816.
#' @return Object of class "mcstudy", containing the results of the analysis. This object can be passed to \code{plot} for plotting, see the documentation for \link{plot.mcstudy}.
#' @seealso \link{plot.mcstudy} produces a summary plot of the results generated by \link{run_mcstudy}
#' @details The full results in Section 4 of KLTG (2021) are based on \code{s = 2}, \code{a = 0.5},
#' \code{n = 12} and \code{nr_iterations = 1000}. Producing these results takes about 140 minutes on an
#' Intel i7 processor.
#' @references Krueger, F., Lerch, S., Thorarinsdottir, T.L. and T. Gneiting (2021): `Predictive inference based on Markov chain Monte Carlo output', \emph{International Statistical Review} 89, 274-301. \doi{10.1111/insr.12405}
#' @keywords replication
#' @export
#' @author Fabian Krueger
run_mcstudy <- function(s = 2, a = 0.5, n = 12, nr_iterations = 50,
zoom = FALSE, random_seed = 816){
# Collect inputs
input <- as.list(environment())
# Set random seed
set.seed(random_seed)
# True distribution
td <- ergdist(s, n)
# Vector of sample sizes to be considered
if (zoom){
smps <- seq(from = 50, to = 1000, by = 50)
add_z <- "_zoom"
} else {
smps <- c(1000, 5000, 20000, 50000)
add_z <- ""
}
n_smps <- length(smps)
# Maximal sample size
T <- max(smps)
# Result data frame
n_entries <- nr_iterations*n_smps*7
res_df <- data.frame(iteration = integer(n_entries), sample_size = integer(n_entries),
rule = character(n_entries), approx = character(n_entries),
value = numeric(n_entries), stringsAsFactors = FALSE)
aux_rules <- c(rep(c("logs", "crps"), 3), "crps")
aux_approx <- c(rep(c("norm", "mixp", "kdens"), each = 2), "ecdf")
ct <- 0
# Initial print
print(paste(Sys.time(), "- now starting run_mcstudy"))
# Loop over training samples
for (i in 1:nr_iterations) {
if (i %% 10 == 0) print(paste(Sys.time(), "- now running iteration", i, "out of", nr_iterations))
# Draw 'MCMC' training sample
v.fw <- sim.fw(a, s, n, T)$sim
x <- rnorm(T, mean = 0, sd = sqrt(v.fw))
# Loop over subsample sizes
for (k in 1:n_smps) {
# Select sample
smp <- x[1:smps[k]]
# Temporary vector for score divergences
tmp <- rep(0, 7)
# Estimator 1: Normal
m1 <- mean(smp)
s1 <- sd(smp)
try(tmp[1] <- div.ls.t.mixn(td$s, td$df, m1, s1)$kl)
tmp[2] <- div.crps.t.mixn(td$s, td$df, m1, s1)
# Estimator 2: Mixture
m2 <- rep(0, smps[k])
s2 <- sqrt(v.fw[1:smps[k]])
try(tmp[3] <- div.ls.t.mixn(td$s, td$df, m2, s2)$kl)
tmp[4] <- div.crps.t.mixn(td$s, td$df, m2, s2)
# Estimator 3: Kernel
s.tmp <- try(make.bw(smp, choice = 1))
if (!is(s.tmp, "try-error")){
try(tmp[5] <- div.ls.t.mixn(td$s, td$df, smp, s.tmp)$kl)
tmp[6] <- div.crps.t.mixn(td$s, td$df, smp, s.tmp)
}
# Estimator 4: EDF estimation
tmp[7] <- div.crps.t.edf(td$s, td$df, smp)
# Add to data frame
ind1 <- ct + 1
ind2 <- ct + 7
res_df$iteration[ind1:ind2] <- i
res_df$sample_size[ind1:ind2] <- smps[k]
res_df$rule[ind1:ind2] <- aux_rules
res_df$approx[ind1:ind2] <- aux_approx
res_df$value[ind1:ind2] <- tmp
# Move counter
ct <- ct + 7
}
}
# Final print
print(paste(Sys.time(), "- now finishing run_mcstudy"))
# Make output list
out <- list(input = input, output = res_df)
# Output result dataframe
structure(out, class = "mcstudy")
}
#' Plot the output of run_mcstudy
#' @param x object of class \code{mcstudy}, generated by \link{run_mcstudy}
#' @param ... additional parameters, see details below.
#' @return none, used for the effect of drawing a plot.
#' @details The plot is in the same format as Figure 1 or 2 in KLTG (2021), depending on the
#' parameters set when running \link{run_mcstudy}. These parameters also determine the plot content
#' (nr of MCMC chains, maximal sample size, etc). In terms of
#' additional inputs (\code{...}), the following are currently implemented:
#' \itemize{\item \code{scoring_rule}, the scoring rule for which results are to be plotted,
#' either \code{"crps"} or \code{"logs"}. Defaults to \code{"crps"}. \item \code{add_main_title}, logical,
#' whether to add main title to plot. Defaults to \code{TRUE}.}
#' @references Krueger, F., Lerch, S., Thorarinsdottir, T.L. and T. Gneiting (2021): `Predictive inference based on Markov chain Monte Carlo output', \emph{International Statistical Review} 89, 274-301. \doi{10.1111/insr.12405}
#' @keywords replication
#' @export
#' @seealso \link{run_mcstudy} produces the simulation results summarized by \link{plot.mcstudy}
#' @author Fabian Krueger
plot.mcstudy <- function(x, ...){
# Get output component of 'mcstudy' object
x <- x$output
# Dotted inputs
auxlist <- list(...)
if (is.null(auxlist$scoring_rule)){
scoring_rule <- "crps"
} else {
scoring_rule <- auxlist$scoring_rule
}
if (is.null(auxlist$add_main_title)){
add_main_title <- TRUE
} else {
add_main_title <- auxlist$add_main_title
}
# general stuff
n.methods <- ifelse(scoring_rule == "crps", 4, 3)
methods <- c("mixp", "kdens", "norm", "ecdf")
colors <- c("black", "blue", "red", "green")
# check whether input data frame contains zoom results
zoom <- max(x$sample_size) == 1000
if (zoom){
df2 <- data.frame(sample_size = x$sample_size[x$rule == scoring_rule &
x$approx == "mixp"],
value = x$value[x$rule == scoring_rule & x$approx == "mixp"])
# Compute quantiles for each sample size
dat <- as.matrix(aggregate(df2$val, by = list(df2$sample_size),
function(z) quantile(z, c(0.1, 0.5, 0.9))))
# Plot
plot(x = dat[,1], y = dat[,3], col = colors[1], pch = 20, cex = 1.4,
ylab = "Score divergence",
ylim = range(dat[,-1]), xlab = "Sample size",
cex.lab = 1.4, bty = "n")
for (j in 1:nrow(dat)){
segments(x0 = dat[j, 1], y0 = dat[j, 2], y1 = dat[j, 4], lwd = 3)
}
} else {
slct <- x$rule == scoring_rule
df2 <- data.frame(iteration = x$iteration[slct], sample_size = x$sample_size[slct],
approx = x$approx[slct], value = x$value[slct])
sizes <- c(1000, 5000, 20000, 50000)
# Figure 1 in paper
tune <- 44 # graphical tuning parameter, must be bigger than 20
plot(x = 1:tune, ylim = c(0, quantile(df2$value, 0.99)), type = "n",
ylab = "Score divergence", xlab = "Sample size", bty = "n",
xaxt = "n", cex.lab = 1.4)
for (s in 1:4){
for (m in 1:n.methods){
tmp <- df2$value[df2$approx == methods[m] & df2$sample_size == sizes[s]]
vv <- quantile(tmp, c(0.1, 0.5, 0.9))
xloc <- (s-1)*(tune/4) + m # horizontal location of entry
segments(x0 = xloc, y0 = vv[1], y1 = vv[3], col = colors[m], lwd = 3)
points(x = xloc, y = vv[2], col = colors[m], pch = 15, cex = 1.4)
}
}
axis(1, at = (tune/4)*(0:3) + 2.5, labels = sizes, cex.axis = 1.4)
legend("topright", c("Mixture", "Kernel", "Normal", "ECDF")[1:n.methods], lty = 1, lwd = 3,
col = colors[1:n.methods], bty = "n", cex = 1.4)
}
if (add_main_title){
rule_print <- ifelse(scoring_rule == "crps", "CRPS", "Log Score")
nr_iter <- max(x$iteration)
title(main = paste0(rule_print, ", based on ", nr_iter, " MC iterations"),
cex.main = 1.6)
}
}
#' Simple print function for object of class mcstudy
#' @param x Object of class mcstudy, generated via \link{run_mcstudy}
#' @param ... Additional specifications (presently not in use)
#' @export
print.mcstudy <- function(x, ...){
print("Object of class mcstudy, generated via run_mcstudy (scoringRules package).")
}
#' Simple summary method for class mcstudy
#' @param object Object of class mcstudy, generated via \link{run_mcstudy}
#' @param ... Additional specifications (presently not in use)
#' @export
summary.mcstudy <- function(object, ...){
# Get output component of mcstudy
x <- object$output
# Compute summary stats
for (rr in c("logs", "crps")){
sel1 <- which(x$rule == rr)
df1 <- data.frame(approx = x$approx[sel1], sample_size = x$sample_size[sel1],
value = x$value[sel1])
aux <- sort(unique(df1$approx))
res <- matrix("", length(aux), 4)
row.names(res) <- aux
colnames(res) <- paste0("m = ", sort(unique(df1$sample_size)))
for (aa in aux){
sel2 <- which(df1$approx == aa)
df2 <- data.frame(approx = df1$approx[sel2], sample_size = df1$sample_size[sel2],
value = df1$value[sel2])
dd <- tapply(df2$value, df2$sample_size, mean)
res[which(aux == aa), ] <- formatC(as.numeric(dd), format = "e", digits = 2)
}
# Print mean scores for current scoring rule
cat(paste("Mean score divergence:", rr))
print(kable(res))
cat("\n")
}
}
#' Bayesian analysis of a Markov Switching autoregressive model
#' @param y numeric vector (time series to be analyzed).
#' @param nlag integer, number of autoregressive lags (defaults to one).
#' @param beta_switch,variance_switch logicals, indicating whether there should be Markovian state
#' dependence in regression parameters and residual variance, respectively. Defaults to
#' \code{beta_switch = FALSE}, \code{variance_switch = TRUE}.
#' @param identification_constraint character, indicating how to identify latent states. Possible values:
#' \code{"variance"}, \code{"mean"} and \code{"persistence"}. Defaults to \code{"variance"}.
#' @param n_burn,n_rep integers, number of MCMC iterations for burn-in and main analysis.
#' @param forecast_periods number of future periods for which forecasts are computed.
#' @param printout logical, whether to print progress report during MCMC (defaults to \code{FALSE}).
#' @param Hm1_delta,mu_delta,s_,nu_,R prior parameters as described in KLTG (2021, Appendix E and Table 4).
#' @return List containing parameter estimates and forecasts, with the following elements:
#' \itemize{
#' \item \code{pars}, matrix of posterior draws for parameters (rows are MCMC iterations, columns are parameters)
#' \item \code{fcMeans} and \code{fcSds}, matrices of forecast means and standard deviations (rows are MCMC iterations, columns are forecast horizons)
#' \item \code{probs}, matrix of filtered probabilities for first latent state (rows are MCMC iterations, columns are time periods, excluding the first \code{nlag} values for initialization).
#' \item \code{count}, integer, counter for the number of states that were relabeled based on \code{identification_constraint}.
#' }
#' @details The default parameters are as set by KLTG (2021, Section 5). The output matrices \code{fcMeans} and \code{fcSds} can be used to construct
#' the mixture-of-parameters estimator analyzed by KLTG. While many of the model features can be changed as described above, the number of Markov regimes is always fixed at two.
#'
#' \link{ar_ms} is an R/C++ implementation of Matlab code kindly shared by Gianni Amisano via his website (\url{https://sites.google.com/site/gianniamisanowebsite/}). See Amisano and Giacomini (2007) who analyze a similar model.
#' @references Amisano, G. and R. Giacomini (2007), `Comparing density forecasts via weighted likelihood ratio tests', Journal of Business and Economic Statistics 25, 177-190. \doi{10.1198/073500106000000332}
#'
#' Krueger, F., Lerch, S., Thorarinsdottir, T.L. and T. Gneiting (2021): `Predictive inference based on Markov chain Monte Carlo output', \emph{International Statistical Review} 89, 274-301. \doi{10.1111/insr.12405}
#' @seealso \link{run_casestudy} uses \link{ar_ms} to replicate the results of KLTG (2021, Section 5).
#' @author Fabian Krueger, based on Matlab code by Gianni Amisano (see details section)
#' @export
#' @examples
#' \dontrun{
#' # Use GDP data from 2014Q4 edition
#' data(gdp)
#' dat <- subset(gdp, vint == "2014Q4")
#' y <- dat$val[order(dat$dt)]
#'
#' # Fit model, using the default settings
#' set.seed(816)
#' fit <- ar_ms(y)
#'
#' # Histograms of parameter draws
#' par(mfrow = c(2, 2))
#' hist(fit$pars[,1], main = "Intercept (state-invariant)", xlab = "")
#' hist(fit$pars[,2], main = "AR(1) term (state-invariant)", xlab = "")
#' hist(1/fit$pars[,3], main = "Residual variance in 1st state", xlab = "")
#' hist(1/fit$pars[,4], main = "Residual variance in 2nd state", xlab = "")
#'
#' # By construction, the residual variance is smaller in the 1st than in the 2nd state:
#' print(mean(1/fit$pars[,3] < 1/fit$pars[,4]))
#' }
ar_ms <- function(y, nlag = 1, beta_switch = FALSE, variance_switch = TRUE, identification_constraint = "variance",
n_burn = 5000, n_rep = 20000, forecast_periods = 5, printout = FALSE,
Hm1_delta = 25, mu_delta = 0, s_ = 0.3, nu_ = 3,
R = matrix(c(8, 2, 2, 8), nrow = 2)){
check <- (beta_switch == FALSE & identification_constraint == "mean") | (variance_switch == FALSE & identification_constraint == "variance")
if (check) stop("Please choose an identification constraint that matches the specification")
# Fix number of states at two
nm <- 2
# Make regressor matrix, adjust sample for regressand
x <- matrix(1, length(y) - nlag, nlag + 1)
for (ll in 1:nlag) x[, ll + 1] <- y[(nlag-ll+1):(length(y)-ll)]
y <- y[(nlag+1):length(y)]
# Update sample size
T <- length(y)
# Adjust some switches according to varying/constant betas and variances
if (beta_switch == TRUE){
k1 <- 0
k2 <- nlag + 1
} else {
k1 <- nlag + 1
k2 <- 0
}
indSigma <- variance_switch
indCount <- 0
# Prior Stuff
c1 <- Hm1_delta
betaPriorPrec <- diag(rep(1, k1 + k2*nm)) * c1
betaPriorMean <- rep(mu_delta, k1 + k2*nm)
Sv <- rep(s_, 1 + indSigma)
nuv <- rep(nu_, 1 + indSigma)
rm <- R
betaPriorPrecMean <- betaPriorPrec %*% matrix(betaPriorMean)
# Gibbs settings
nskip1 <- 1
M1 <- M2 <- n_burn + n_rep
printseq <- seq(M2/10, M2, M2/10)
# Parameter counts etc
k <- k1 + k2
npar <- k1 + (k2+1)*nm + (nm^2)
nk <- dim(x)[2]
# Initialize matrices
fcMeans <- fcSds <- matrix(0, M1, forecast_periods)
thetaMat <- matrix(0, M1, npar)
filprob.all <- matrix(0, M1, T)
sm <- matrix(0, M1, T)
x1 <- x2 <- {}
if (k1 > 0){
x1 <- as.matrix(x[, 1:k1])
}
if (k2 > 0){
x2 <- as.matrix(x[, (k1+1):(k1+k2)])
}
# Draw from prior
aux <- drawP(T, nm, rep(0, T), rm)
P <- aux$Pdraw
pv <- p <- aux$pv
sv <- rep(0, T)
for (t in 1:T){
sv[t] <- drawMultinom(1, pv)
pv <- t(P[sv[t], ])
}
hv <- rchisq(length(nuv), nuv)/Sv
if (indSigma == 0) hv <- rep(hv[1], nm)
# Gibbs loop
start.time <- Sys.time()
iskip <- 0
for (im in 1:M2){
iskip <- iskip + 1
# beta
aux <- drawBeta(y, x1, x2, hv, sv, T, k1, k2, nm, betaPriorPrec, betaPriorPrecMean)
beta <- aux$betaDraw
epsv <- aux$epsv
# hv
hv <- drawHv(T, nm, indSigma, Sv, nuv, epsv, sv)
# state
aux <- drawState(T, k1, k2, nm, indSigma, p, P, y, x1, x2, beta, hv)
lnl <- aux$lnl
filprob <- aux$filprob
sv <- aux$simstate
# transition matrix
aux <- drawP(T, nm, sv, rm)
P <- aux$P
p <- aux$pv
tm <- aux$tm
# rearranging states to comply with constraint
if (identification_constraint == "variance"){
# Identify states according to residual variances
indexConstraint <- hv[1] < hv[2]
} else if (identification_constraint == "mean"){
# Identify states according to intercepts
indexConstraint <- beta[k1+1] < beta[k1+k2+1]
} else if (identification_constraint == "persistence"){
# Identify states according to persistence of states
indexConstraint <- P[1, 1] < P[2, 2]
}else {
indexConstraint <- FALSE
}
# If constraint is violated: Re-arrange states
if (indexConstraint == TRUE){
indCount <- indCount + 1
# Rearrange beta
if (k1 > 0){
betaTmp <- beta[1:k1]
} else {
betaTmp <- {}
}
if (k2 > 0){
betaTmp <- c(betaTmp, beta[(k1+k2+1):(k1+k2*2)], beta[(k1+1):(k1+k2)])
}
beta <- betaTmp
# Relabel state draws (missing in previous version!)
sv <- 1*(sv == 2) + 2*(sv == 1)
# Rearrange variances, transition probabilities etc
ndxv <- c(2, 1)
hv <- hv[ndxv]
P <- P[ndxv, ndxv]
p <- p[ndxv]
tm <- tm[ndxv, ndxv]
filprob <- filprob[, ndxv]
}
theta <- c(beta, hv, as.vector(P))
if (iskip == nskip1){
im2 <- im/nskip1
thetaMat[im2, ] <- theta
filprob.all[im2, ] <- filprob[,1]
sm[im2, ] <- t(sv == 1)
aux <- predDensAR(y, p = nlag, theta = theta, nm = nm, fpv = filprob[T, ], beta.switch = beta_switch,
variance.switch = variance_switch, hmax = forecast_periods)
fcMeans[im2, ] <- aux$m
fcSds[im2, ] <- aux$s
iskip <- 0
}
if (printout & im %in% printseq){
print(paste(round(im/M2, 3) * 100, "percent complete"))
}
}
cputime <- Sys.time() - start.time
inds <- (n_burn+1):M1
return(list(pars = thetaMat[inds, 1:npar], fcMeans = fcMeans[inds, ], fcSds = fcSds[inds, ], probs = filprob.all[inds, ], count = indCount))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.