#' @useDynLib rogali, .registration = TRUE
#' @importFrom Rcpp sourceCpp
#' @importFrom rlang !!
#' @importFrom magrittr %>%
#' @import ggplot2
#' @importMethodsFrom texreg extract
NULL
.onAttach <- function(libname, pkgname) {
packageStartupMessage(
"rogali defined the options gBurn = .2 and gThin = 1.\nYou can change them using options(gBurn = x, gThin = y)"
)
}
.onLoad <- function(libname, pkgname) {
options(gBurn = .2, gThin = 1)
}
#' Estimate a gibbs sampler
#'
#' @export
#'
#' @param formula \code{formula} for the main model
#' @param data \code{data.frame} for the main model
#' @param formulaMun \code{formula} for the municipalities model
#' @param dataMun \code{data.frame} for the municipalities model
#' @param munCol column in both \code{data} and \code{dataMun}
#' containing the municipality id, syntax as in \code{dplyr}
#' @param zCol column in both \code{data} and \code{dataMun}
#' containing the (possibly missing) municipality type, syntax as in \code{dplyr}
#' @param prior call to \code{\link{normal_prior}} to specify the prior
#' @param nSamples number of samples to be drawn per chain
#' @param nChains number of chains to be drawn
#' @param parallel should estimation of the chains be parallelized? if \code{TRUE}, the \code{cl} argument shoud not be \code{NULL}
#' @param cl cluster used for parallelization
#' @details Make sure to check the \link[=gibbsmethods]{methods} man page
#' to see related methods, as well as the \link{posteriorprobs} man page
#' for how to deal with predicted probabilities.
#' @return a \code{gibbs} object with (among others) the following components:
#' \describe{
#' \item{\code{estimates}}{an \code{\link[=estimatesmethods]{estimates}} object of length \code{nChains}.}
#' }
#'
gibbs <- function(formula, data, formulaMun, dataMun,
munCol = j, zCol = Z, yearCol = year,
prior = normal_prior(),
nSamples = 1000,
nChains = 1, parallel = F, cl = NULL) {
munCol <- rlang::enquo(munCol)
zCol <- rlang::enquo(zCol)
yearCol <- rlang::enquo(yearCol)
cl <- match.call()
dataMun <- dplyr::arrange(dataMun, !!munCol)
data <- dplyr::arrange(data, !!munCol)
notObs <- as.integer(is.na(dplyr::pull(dataMun, !!zCol)))
muns <- dplyr::pull(dataMun, !!munCol)
zOrig <- dplyr::pull(dataMun, !!zCol)
names(zOrig) <- muns
index <- match(dplyr::pull(data, !!munCol), unique(dplyr::pull(data, !!munCol)))
index <- index - 1
replace0 <- list(0)
names(replace0) <- rlang::as_name(zCol)
data0 <- dplyr::mutate(data, !!zCol := 0)
data1 <- dplyr::mutate(data, !!zCol := 1)
dataMun <- tidyr::replace_na(dataMun, replace0)
y <- model.response(model.frame(formula, data0))
y <- cbind(y[,2:3], y[,1])
z <- model.response(model.frame(formulaMun, dataMun))
x0 <- model.matrix(delete.response(formula), data0)
x1 <- model.matrix(delete.response(formula), data1)
xMun <- model.matrix(delete.response(formulaMun), dataMun)
nObs <- getNObs(formula, data0, !!munCol, !!yearCol)
rm(data, dataMun, data0, data1)
namesGamma <- colnames(xMun)
namesBeta <- colnames(x0)
kGamma <- length(namesGamma)
kBeta <- length(namesBeta)
protoPrior <- prior
if(class(protoPrior) == "informative_prior") {
protoPrior <- "informative_prior"
} else {
prior <- makePrior(protoPrior, kGamma, kBeta)
}
out <- list()
if(!parallel) {
out$estimates <- lapply(
1:nChains, gibbsFit,
y = y, z = z,
x0 = x0, x1 = x1, xMun = xMun,
index = index,
notObs = notObs,
priorGamma = prior$gamma, priorVGamma = prior$VGamma,
priorBeta = prior$beta, priorVBeta0 = prior$VBeta0, priorVBeta1 = prior$VBeta1,
nSamples = nSamples,
nChains = nChains
)
} else {
out$estimates <- parallel::parLapply(
cl, 1:nChains, gibbsFit,
y = y, z = z,
x0 = x0, x1 = x1, xMun = xMun,
index = index,
notObs = notObs,
priorGamma = prior$gamma, priorVGamma = prior$VGamma,
priorBeta = prior$beta, priorVBeta0 = prior$VBeta0, priorVBeta1 = prior$VBeta1,
nSamples = nSamples,
nChains = nChains
)
}
out$estimates <- purrr::map(out$estimates, function(param, namesGamma, namesBeta, muns) {
rownames(param$gamma) <- namesGamma
dimnames(param$beta) <- list(namesBeta, NULL, NULL)
rownames(param$pZ) <- muns
class(param) <- "estimate"
param
}, namesGamma, namesBeta, muns)
class(out$estimates) <- "estimates"
out$types <- zOrig
names(out$types) <- muns
out$call <- cl
out$prior <- protoPrior
out$nSamples <- nSamples
out$nChains <- nChains
out$nSpell <- nObs$nSpell
out$nObs <- nObs$nObs
out$nMun <- nObs$nMun
out$munCol <- munCol
out$zCol <- zCol
out$zOrig <- zOrig
class(out) <- "gibbs"
out
}
#' Methods for \code{gibbs} objects
#' @param ... use to select variables a la \code{\link[dplyr]{select}}
#' @param level levels for confidence intervals
#' @param digits number of digits to be displayed
#' @param burn number or percent of iterations to discard
#' @param thin thinning interval. Keep every \code{thin}-th iteration.
#' @param what a \code{character} describing the kind of plot to be returned.
#' @param data if \code{TRUE}, return data used to construct the plot instead of plot
#' @param ncol number of columns for layout.
#' @name gibbsmethods
NULL
#' @export
#' @rdname gibbsmethods
coef.gibbs <- function(object, ..., burn = getOption("gBurn"), thin = getOption("gThin")) {
co <- as_tibble.estimates(object$estimates, ..., burn = burn, thin = thin, details = F)
co <- as.matrix(co)
colMeans(co)
}
sd.gibbs <- function(x, ..., burn = getOption("gBurn"), thin = getOption("gThin")) {
co <- as_tibble.estimates(x$estimates, ..., burn = burn, thin = thin, details = F)
co <- as.matrix(co)
apply(co, 2, sd)
}
#' @export
#' @rdname gibbsmethods
confint.gibbs <- function(object, ..., level = c(.9, .95), burn = getOption("gBurn"), thin = getOption("gThin")) {
coefs <- as_tibble.estimates(object$estimates, ..., burn = burn, thin = thin, details = F)
coefs <- as.matrix(coefs)
alpha <- (1-level)/2
alpha <- sort(unique(c(alpha, 1-alpha)))
t(apply(coefs, 2, quantile, probs = alpha))
}
#' @export
#' @rdname gibbsmethods
summary.gibbs <- function(
object,
...,
burn = getOption("gBurn"), thin = getOption("gThin"),
level = c(0, .5, .95),
digits = 1) {
out <- list(
coefficients = coef(object, ... = ..., burn = burn, thin = thin),
sd = sd.gibbs(object, ... = ..., burn = burn, thin = thin),
ci = confint(object, ... = ..., burn = burn, thin = thin, level = level),
nSamples = object$nSamples,
nChains = object$nChains,
call = object$call,
nObs = object$nObs,
nMun = object$nMun,
nSpell = object$nSpell,
prior = object$prior,
digits = digits
)
estimates <- as.matrix.estimates(object$estimates)
out$k <- ncol(estimates)
out$totSamples <- nrow(estimates)
estimates <- bt(estimates, burn = burn, thin = getOption("gThin"))
out$nBurn <- out$totSamples - nrow(estimates)
estimates <- bt(estimates, burn = 0, thin = thin)
out$nThin <- out$totSamples - out$nBurn - nrow(estimates)
out$nRemaining <- nrow(estimates)
out$kselect <- length(out$coefficients)
mlist <- as.mcmc.list.estimates(object$estimates, ... = ..., burn = burn, thin = thin)
if(object$nChains == 1) {
out$geweke <- coda::geweke.diag(mlist, frac1 = .1, frac2 = .5)[[1]]$z
out$gewekePval <- 2 * stats::pnorm(abs(out$geweke), lower.tail = F)
} else {
out$Rhat <- coda::gelman.diag(mlist, confidence = .95, autoburnin = F, multivariate = F)$psrf[,2]
}
out$n_eff <- round(coda::effectiveSize(mlist))
out$mcse <- summary(mlist)$statistics[,4]
class(out) <- "summary.gibbs"
out
}
#' @export
print.summary.gibbs <- function(x) {
cat("Call:\n")
print(x$call)
cat("\nModel Info: \n\n")
if(class(x$prior) == "normal_prior") {
sprintf(" %-13s gamma ~ N(%.*f, %.*f^2), beta1 ~ N(%.*f, %.*f^2), beta2 ~ N(%.*f, %.*f^2)\n",
"prior:",
x$digits, x$prior$mean$gamma, x$digits, x$prior$sd$gamma,
x$digits, x$prior$mean$beta1, x$digits, x$prior$sd$beta1,
x$digits, x$prior$mean$beta2, x$digits, x$prior$sd$beta2) %>% cat()
} else {
sprintf(" %-13s informative",
"prior:") %>% cat()
}
sprintf(" %-13s %i (= %i total - %i burn-in - %i thinning; %i chains)\n",
"sample:", x$nRemaining, x$totSamples, x$nBurn, x$nThin, x$nChains) %>% cat()
sprintf(" %-13s %i spells, %i individuals, %i municipalities\n",
"observations:", x$nSpell, x$nObs, x$nMun) %>% cat()
sprintf(" %-13s %i\n\n",
"predictors:", x$k) %>% cat()
if(x$k == x$kselect) {
cat("Estimates: \n")
} else {
sprintf("Estimates: (showing %i/%i predictors)\n", x$kselect, x$k) %>% cat()
}
comat <- cbind(x$coefficients, x$sd, x$ci)
colnames(comat)[1:2] <- c("mean", "sd")
comat <- round(comat, x$digits)
print(comat)
cat("\nDiagnostics: \n")
diagmat <- cbind(mcse = round(x$mcse, x$digits), n_eff = x$n_eff)
if(x$nChains == 1) {
diagmat <- cbind(diagmat, gwk = round(x$geweke, x$digits), `p-v` = x$gewekePval)
printCoefmat(diagmat, has.Pvalue = T, P.values = T)
} else {
diagmat <- cbind(diagmat, Rhat = round(x$Rhat, x$digits))
print(diagmat)
}
}
#' @rdname gibbsmethods
#' @export
plot.gibbs <- function(x, ...,
what = c("trace", "density", "coef"),
burn = getOption("gBurn"),
thin = getOption("gThin"),
data = F, ncol = 2) {
if(what[1] == "trace") {
tracePlot(x = x, ... = ..., burn = burn, thin = thin, data = data, ncol = ncol)
} else if(what[1] == "density") {
densityPlot(x = x, ... = ..., burn = burn, thin = thin, data = data, ncol = ncol)
} else {
coefPlot(x = x, ... = ..., burn = burn, thin = thin, data = data, ncol = ncol)
}
}
captionSentence <- function(x, burn, thin) {
sm <- summary(x, burn = burn, thin = thin)
sprintf(
"%s iterations after %s burn-in and thinning of %i over %i chains",
scales::comma(sm$nRemaining),
scales::comma(sm$nBurn),
thin,
x$nChains
)
}
tracePlot <- function(x, ..., burn, thin, data, ncol) {
pl <- as_tibble.estimates(x$estimates, ... = ..., burn = burn, thin = thin)
pl <- tidyr::gather(pl, predictor, estimate, -chain, -iteration) %>%
dplyr::mutate(chain = as.factor(chain))
if(data) {
pl
} else {
if(x$nChains == 1) {
out <- ggplot(pl, aes(x = iteration, y = estimate)) +
geom_line() +
geom_smooth(se = F)
} else {
out <- ggplot(pl, aes(x = iteration, y = estimate, color = chain)) +
geom_line(alpha = .5)
}
out <- out +
labs(
title = "Trace of estimates",
caption = captionSentence(x, burn = burn, thin = thin)
) +
facet_wrap(~ predictor, ncol = ncol, scales = "free_y")
out
}
}
densityPlot <- function(x, ..., burn, thin, data, ncol) {
pl <- as_tibble.estimates(x$estimates, ... = ..., burn = burn, thin = thin, details = F)
pl <- tidyr::gather(pl, predictor, estimate)
pl2 <- tibble::enframe(coef(x, ... = ..., burn = burn, thin = thin)) %>%
dplyr::rename(predictor = name, mean = value)
if(data) {
list(pl, pl2)
} else {
ggplot(pl, aes(estimate)) +
geom_density() +
geom_vline(xintercept = 0, lty = "dotted") +
geom_vline(data = pl2, aes(xintercept = mean), color = "red") +
labs(
title = "Density of estimates",
caption = captionSentence(x, burn = burn, thin = thin)) +
facet_wrap(~ predictor, ncol = ncol, scales = "free")
}
}
coefPlot <- function(x, ..., burn, thin, data, ncol) {
pl <- tibble::enframe(coef(x, ... = ..., burn = burn, thin = thin)) %>%
dplyr::rename(predictor = name, estimate = value)
ci <- tibble::as_tibble(confint(x, ... = ..., burn = burn, thin = thin))
pl <- dplyr::bind_cols(pl, ci) %>%
dplyr::mutate(predictor = forcats::fct_rev(predictor))
if(data) {
pl
} else {
ggplot(pl, aes(x = predictor, y = estimate, ymin = `2.5%`, ymax = `97.5%`)) +
geom_point() +
geom_linerange() +
geom_linerange(aes(ymin = `5%`, ymax = `95%`), lwd = 1) +
geom_hline(yintercept = 0, lty = "dotted") +
coord_flip() +
labs(
title = "Coefficient plot",
caption = captionSentence(x, burn = burn, thin = thin))
}
}
#' @export
#' @rdname posteriorprobs
posterior_probs.gibbs <- function(
object, burn = getOption("gBurn"), thin = getOption("gThin")
) {
pr <- as.matrix.estimates(object$estimates, burn = burn, thin = thin, what = "probs")
pr <- colMeans(pr)
tibble(
!!object$munCol := names(object$zOrig),
!!object$zCol := object$zOrig,
prob = pr
)
}
#' @rdname posteriorprobs
#' @export
confusion_table.gibbs <- function(
object, cutoff = .5, burn = getOption("gBurn"), thin = getOption("gThin")
) {
pr <- posterior_probs(object, burn = burn, thin = thin) %>% na.omit() %>%
dplyr::mutate(prob = as.integer(prob > cutoff))
table(predicted = pr$prob, true = dplyr::pull(pr, !!object$zCol))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.