#' glmBayes: Bayesian Generalized Linear Models
#'
#' This implements the Zellner-Siow Cauchy g-prior for generalized linear models
#'
#' @title glmBayes: Bayesian Generalized Linear Models
#' @param x argument
#' @param ... other arguments
#' @examples
#' glmBayes()
#' @rdname glmBayes
#' @export glmBayes
glmBayes <- function(x, ...){
UseMethod("glmBayes")
}
#' glmBayes: Bayesian Generalized Linear Models
#'
#' @description This implements the Zellner-Siow Cauchy g-prior for generalized linear models.
#'
#'
#' @param formula model formula
#' @param data a data frame
#' @param family "gaussian", "poisson", "negbinom", "binomial", "gamma", "invgauss"
#' @param link the link function. see details for available options.
#' @param g a custom value for g prior. If left as NULL, the default is the ratio of sample size to predictors.
#' @param threshold tolerance for convergence
#' @param max_iter max iterations for IRWLS algoritm
#' @param sim if TRUE, draws the specified number of samples from the posterior distribution. defaults to FALSE.
#' @param nsim the number of posterior samples. defaults to 4000.
#' @details
#' The following distributions and corresponding link functions are available: \cr
#' \cr
#' Gaussian: "identity" \cr
#' Binomial: "logit", "probit", "cauchit", "robit" (Student T with 3 df), and "cloglog" \cr
#' Poisson & Negative Binomial: "log" \cr
#' Gamma: "inverse" (1 / x) \cr
#' Inverse Gaussian: "invsquare" (1/x^2)
#'
glmBayes = function(formula, data, family = "gaussian", link = NULL, g = NULL, max_iter = 9000, sim = FALSE, nsim = 4000, threshold = 1e-9){
thiscall = match.call()
mf = thiscall
m = match(c("formula", "data"), names(mf), 0)
mf = mf[c(1,m)]
mf[[1]] = as.name("model.frame")
mf = eval(mf, parent.frame())
mt = attr(mf, "terms")
X <- model.matrix(formula, data)
y <- as.vector(model.frame(formula,data)[,1])
if (is.null(g)){
g <- nrow(X)/ncol(X)
} else {
if (is.function(g)){
g = g(nrow(X), ncol(X))
} else {
g = g
}
}
testInteger <- function(x){
test <- all.equal(x, as.integer(x), check.attributes = FALSE)
if(test == TRUE){ return(TRUE) }
else { return(FALSE) }
}
if (is.null(family)){
if (all(y >= 0)){
if (testInteger(y)){
if (length(unique(y)) == 2){
family <- "binomial"
}
else {
family <- "poisson"
}
}
else {
family <- "gamma"
}
}
else {
family <- "gaussian"
}
}
if (is.null(link)) {
if (family=="gaussian") link <- "identity"
else if (family=="binomial") link <- "logit"
else if (family=="poisson") link <- "log"
else if (family=="negbinom") link <- "log"
else if (family=="gamma") link <- "inverse"
else if (family=="invgauss") link <- "invsquare"
else stop("unknown family", family)
}
X <- as.matrix(X)
dx <- dim(X)
if (family == "binomial"){
if (is.factor(y)) {
levs = levels(y)
if( length(levs) > 2 )
levs = c(levs[1], "Other")
y <- y != levels(y)[1]
}
else levs = unique(y)
if(length(levs) > 2)
warning("y has more than two levels")
if (is.factor(y)){
y <- as.numeric(y) - 1
}
if (is.numeric(y)){
maxy <- max(y)
miny <- min(y)
y[which(y > miny)] <- 1
y[which(y == miny)] <- 0
}
if (is.character(y)){
y <- as.numeric(as.factor(y)) - 1
}
if (any(y < 0 | y > 1))
stop("y values must be 0 <= y <= 1")
}
#initial value bigger than threshold so that we can enter our while loop
diff = 10000
#counter to ensure we're not stuck in an infinite loop
iter_count = 0
calc_p = function(X,beta)
{
beta = as.vector(beta)
return(X%*%beta)
}
beta = rep(0, ncol(X))
#### iteratively reweighted least squares ####
while(diff > threshold ) {
#calculate linear predictor and fitted values using current estimate of beta
eta = as.vector(calc_p(X, beta))
p = as.vector(linkinv(eta, family, link))
#calculate matrix of weights W
W = diag(mu.eta(p, family, link)^2/as.vector(variance.gpls(p, family, link)))
#calculate the standard errors, coefficients, hat matrix, sum of squared residuals
Vbeta <- g*cvreg:::ridgepow(t(X)%*%(W%*%X),-1)/(g+1)
beta_change = Vbeta %*% (t(X) %*% (y - p))
hat.matrix <- (g/(g+1)) * X %*% cvreg:::ridgepow(t(X) %*% (W %*% X),-1) %*% t(X)
SSR <- t(y) %*% ( diag(1,nrow=nrow(X)) - hat.matrix ) %*% y
#update beta
beta = beta + beta_change
#calculate how much we changed beta by in this iteration
#if this is less than threshold, we'll break the while loop
diff = sum(beta_change^2)
#see if we've hit the maximum number of iterations
iter_count = iter_count + 1
if(iter_count > max_iter) {
stop("Convergence failed :( ")
}
}
linear.predictor <- as.vector(as.vector(beta * (g/(g+1))) %*% t(X))
fitted <- linkinv(eta, family, link)
resids <- (y - fitted) * sqrt(W)/sqrt(variance.gpls(fitted, family, link))
resids <- as.vector(diag(resids))
if (family == "gaussian"){
sigma2 <- mean(resids^2)
std.errors <- sqrt(sigma2 * diag(Vbeta))
} else {
std.errors <- sqrt(diag(Vbeta))
}
coefficients <- as.vector(beta * (g/(g+1)))
names(coefficients) <- colnames(X)
out <- structure(list(coefficients = coefficients, std.err = std.errors, fitted = fitted, linear.predictor = linear.predictor, family = family, link = link, call = thiscall, model.matrix = X, model.frame = model.frame(formula, data), terms = mt, g = g, residuals = resids, vcov = Vbeta), class = "glmBayes")
if (sim){
if (family == "gaussian"){
df = max(nrow(X) - ncol(X), 3)
sims <- mvtnorm::rmvt(nsim, df = df, delta = coefficients, sigma = sigma2 * Vbeta * ((df+g-2)/(df+g+1)), method = "svd", type = "shifted")
colnames(sims) <- colnames(X)
} else {
df = max(nrow(X) - ncol(X), 3)
sims <- mvtnorm::rmvt(nsim, df = df, delta = coefficients, sigma = Vbeta * ((df+g-2)/(df+g+1)), method = "svd", type = "shifted")
colnames(sims) <- colnames(X)
}
out$sims <- sims
}
if (family == "binomial"){
out$levs <- levs
}
out$hat <- hat.matrix
out$weights <- W
return(out)
}
#' Print method for glmBayes models
#'
#' @param x the model
#' @param digits digits to round
#' @method print glmBayes
#' @return none
#' @export
#'
print.glmBayes = function(x, digits = 3) {
cat("Call:\n", deparse(x$call), "\n", sep = "")
cat(crayon::green("\nFamily: "), noquote(x$family), "(link = ", noquote(x$link), ")", "\n", sep = "")
if (length(coef(x))) {
cat(crayon::magenta("\nCoefficients:\n"))
print.default(x$coefficients, print.gap = 1, quote = FALSE, digits = digits)
}
if (x$family == "negbinom"){
purpletext <- crayon::make_style(rgb(.51, .098, .635, 0.95), bg = FALSE)
cat(purpletext("\n\nNegative Binomial Excess Dispersion Parameter", " : ", round(noquote(x$theta), digits), "\n"))
}
invisible(x)
}
#' Summary for glmBayes models
#'
#' @param x the model
#' @param digits digits to round
#' @param cred.level the width of the credible interval. Defaults to 0.95 for a 95 pct Credible Interval.
#' @method summary glmBayes
#' @return none
#' @export
#'
summary.glmBayes = function(x, digits = 3, cred.level = 0.95) {
cat("Call:\n", deparse(x$call), "\n", sep = "")
cat(crayon::green("\nFamily: "), noquote(x$family), "(link = ", noquote(x$link), ")", "\n", sep = "")
cat(crayon::blue("\nCoefficients: \n\n"))
low = (1 - cred.level) / 2
high = 1 - low
df <- nrow(x$model.matrix) - length(x$coefficients)
if (df <= 0){
df <- length(x$coefficients)
}
g <- x$g
results <- data.frame(estimates = round(x$coefficients, 3), "Std.Error" = round(x$std.err, 4), "lowerCI" = round(qst(low, df, x$coefficients, x$std.err), 3), "upperCI" = round(qst(high, df, x$coefficients, x$std.err), 3))
colnames(results) <- c("Estimate", "Std. Error", "Lower CI", "Upper CI")
return(results)
}
#' Predict method for glmBayes models
#'
#' @param model the model
#' @param newdata the new data
#' @param type either "link" (untransformed linear predictor), "response" (transformed prediction),
#' or "class" (for binomial models only)
#'
#' @return none
#' @method predict glmBayes
#' @export
#'
predict.glmBayes <- function(object, newdata = NULL, type = c("response", "link", "class")){
if (!inherits(object, "glmBayes"))
stop("object not of class glmBayes")
tt <- terms(object)
type <- match.arg(type)
family <- object$family
link <- object$link
formula <- object$formula
coefs <- coef(object)
if (is.null(newdata)){
x <- object$model.matrix
} else{
if (!is.data.frame(newdata)){
newdata <- as.data.frame(newdata)
}
x <- model.matrix(formula, newdata)
}
eta <- as.vector(coefs %*% t(x))
preds <- linkinv(eta, family=family, link = link)
if (type == "class"){
if (family != "binomial"){
stop("type 'predict' is only valid for predicting class labels for binomial models.")
}
else {
levs <- object$levs
classification <- preds > 0.50
labels <- ifelse(classification, levs[1], levs[2])
return(labels)
}
}
else if (type == "link"){
return(eta)
}
else if (type == "response"){
return(preds)
}
}
#' Credible Intervals for Model Parameters
#'
#' @param object a glmBayes fitted object
#' @param conf.level the probability level. The default is 95 pct.
#' @param tol tolerance for optimization of the posterior density function
#' @param ... other
#' @method confint glmBayes
#' @return A matrix with columns giving lower and upper credibility limits for each parameter.
#' @export
#'
confint.glmBayes <- function (object, conf.level = 0.95, tol, ...) {
if (missing(tol))
tol <- 1e-10
se <- object$std.err
coef <- object$coefficients
n <- length(object$fitted)
p <- length(coef)
df <- max(1, n - p)
qst_factory <- function(df, mu, sigma){
function(p) qst(p, nu = df, mu = mu, scale = sigma)
}
ci <- matrix(0, nrow = p, ncol = 2)
calculate_ci <- function(ICDF, conf.level, tol){
intervalWidth <- function(lowTailPr, ICDF, conf.level , ...) {
ICDF(conf.level + lowTailPr, ...) - ICDF(lowTailPr, ...)
}
optInfo <- optimize(intervalWidth, c(0, 1 - conf.level ), ICDF = ICDF,
conf.level = conf.level , tol = tol, ...)
HDIlowTailPr <- optInfo$minimum
result <- c(lower = ICDF(HDIlowTailPr, ...), upper = ICDF(conf.level +
HDIlowTailPr, ...))
attr(result, "conf.level ") <- conf.level
return(result)
}
for (i in 1:p){
q <- qst_factory(df = df, mu = coef[i], sigma = se[i])
ci[i, ] <- calculate_ci(q, conf.level, tol)
}
colnames(ci) <- c(
stats:::format.perc((1 - (conf.level)) / 2, 3),
stats:::format.perc(conf.level + (1 - (conf.level)) / 2, 3)
)
rownames(ci) <- names(coef)
return(ci)
}
#' Plot posterior densities of glmBayes models
#'
#' @param object a fitted glmBayes model object
#' @method plot glmBayes
#' @return a ggplot
#' @export
#'
plot.glmBayes = function(object){
if (!inherits(object, "glmBayes"))
stop("object not of class glmBayes")
post = cbind.data.frame(term = names(coef(object)), theta = coef(object), se = object$std.err)
df = nrow(object$model.matrix) - length(coef(object))
post.dens = function(term, df, theta, se){
cbind.data.frame(term = term, x = seq(theta - (se*5) , theta + (se*5), len = 256), dens = dst(seq(theta - (se*5) , theta + (se*5), len = 256), nu = max(3, df), theta, se))
}
listdf = lapply(1:nrow(post), function(i) post.dens(term = post[i, 1], df = df, theta = post[i,2], se = post[i, 3]))
post.gg = do.call("rbind", listdf)
ggplot(post.gg, aes(x = x, y = dens, color = term)) +
facet_wrap(~ term, scales = "free") +
geom_line(size = 1.25, alpha = 0.80) +
theme(legend.position = "none")
}
#' Calculate model log likelihood for glmBayes models
#'
#' @param model a fitted model
#'
#' @return numeric value
#' @export
#' @method logLik glmBayes
#'
logLik.glmBayes <- function(model) {
y <- model$model.frame[,1]
mu <- model$fitted
family = model$family
wts <- rep(1, length(y))
if (family == "binomial") {
if (!is.numeric(y)){
y <- as.numeric(y) - 1
y <- y == max(y)
y <- as.numeric(y)
}
logLik <- dbinom(y, size = 1, prob = mu, log = TRUE)
} else if (family == "poisson") {
logLik <- dpois(y, mu, log = TRUE)
} else if (family == "gaussian") {
logLik <- dnorm(y, mu, sd(y - mu), log = TRUE)
} else if (family == "gamma"){
sigma2 <- var(y - mu)
logLik <- dgamma(y, mu^2/sigma2, mu/sigma2, log = TRUE)
} else if (family == "invgauss"){
dinvgauss <- function (x, mu = 1, sigma = 1, log = FALSE){
if (any(mu < 0))
stop(paste("mu must be positive", "\n", ""))
if (any(sigma < 0))
stop(paste("sigma must be positive", "\n", ""))
if (any(x < 0))
stop(paste("x must be positive", "\n", ""))
log.lik <- (-0.5 * log(2 * pi) - log(sigma) - (3/2) * log(x) -
((x - mu)^2)/(2 * sigma^2 * (mu^2) * x))
if (log == FALSE)
fy <- exp(log.lik)
else fy <- log.lik
fy
}
logLik <- dinvgauss(y, mu, 1, log = TRUE)
} else if (family == "negbinom"){
theta = model$theta
dnegbinom <- function (x, mu = 1, theta = 1, log = FALSE) {
if (any(mu <= 0))
stop(paste("mu must be greater than 0 ", "\n", ""))
if (any(theta <= 0))
stop(paste("theta must be greater than 0 ", "\n", ""))
if (length(theta) > 1)
fy <- ifelse(theta > 1e-04, dnbinom(x, size = mu/theta,
mu = mu, log = log), dpois(x = x, lambda = mu, log = log))
else fy <- if (theta < 1e-04)
dpois(x = x, lambda = mu, log = log)
else dnbinom(x, size = mu/theta, mu = mu, log = log)
fy
}
logLik <- dnegbinom(round(x), round(mu), theta, log = TRUE)
}
return(sum(logLik))
}
#' Calculate observation-wise log likelihood values for glmBayes models
#'
#' @param model a fitted model
#'
#' @return numeric value
#' @export
#' @method logLikFun glmBayes
#'
logLikFun.glmBayes <- function(model) {
y <- model$model.frame[,1]
mu <- model$fitted
family = model$family
wts <- rep(1, length(y))
if (family == "binomial") {
if (!is.numeric(y)){
y <- as.numeric(y) - 1
y <- y == max(y)
y <- as.numeric(y)
}
logLik <- dbinom(y, size = 1, prob = mu, log = TRUE)
} else if (family == "poisson") {
logLik <- dpois(y, mu, log = TRUE)
} else if (family == "gaussian") {
logLik <- dnorm(y, mu, sd(y - mu), log = TRUE)
} else if (family == "gamma"){
sigma2 <- var(y - mu)
logLik <- dgamma(y, mu^2/sigma2, mu/sigma2, log = TRUE)
} else if (family == "invgauss"){
dinvgauss <- function (x, mu = 1, sigma = 1, log = FALSE){
if (any(mu < 0))
stop(paste("mu must be positive", "\n", ""))
if (any(sigma < 0))
stop(paste("sigma must be positive", "\n", ""))
if (any(x < 0))
stop(paste("x must be positive", "\n", ""))
log.lik <- (-0.5 * log(2 * pi) - log(sigma) - (3/2) * log(x) -
((x - mu)^2)/(2 * sigma^2 * (mu^2) * x))
if (log == FALSE)
fy <- exp(log.lik)
else fy <- log.lik
fy
}
logLik <- dinvgauss(y, mu, mu^3, log = TRUE)
} else if (family == "negbinom"){
theta = model$theta
dnegbinom <- function (x, mu = 1, theta = 1, log = FALSE) {
if (any(mu <= 0))
stop(paste("mu must be greater than 0 ", "\n", ""))
if (any(theta <= 0))
stop(paste("theta must be greater than 0 ", "\n", ""))
if (length(theta) > 1)
fy <- ifelse(theta > 1e-04, dnbinom(x, size = mu/theta,
mu = mu, log = log), dpois(x = x, lambda = mu, log = log))
else fy <- if (theta < 1e-04)
dpois(x = x, lambda = mu, log = log)
else dnbinom(x, size = mu/theta, mu = mu, log = log)
fy
}
logLik <- dnegbinom(round(x), round(mu), theta, log = TRUE)
}
return(logLik)
}
#' Calculate Akaike Information Criterion for glmBayes models
#'
#' @param model a fitted model
#' @param correction whether or not to calculate the small sample-bias corrected AIC (AICc).
#'
#' @return numeric value
#' @export
#' @method AIC glmBayes
#' @references Sugiura, N. (1978). Further analysis of the data by Akaike's information criterion and the finite corrections. Comm. Statist. A 7, 13-26.
AIC.glmBayes = function(model, correction = F){
logLik = logLik.glmBayes(model)
k = ncol(model$model.matrix)
n = nrow(model$model.matrix)
aic <- (-2*logLik) + (2 * k)
if (correction){
aic <- aic + (((2*k)*(k + 1)) / (n - k - 1))
}
return(aic)
}
#' Calculate Bayesian Information Criterion for glmBayes models
#'
#' @param model a fitted model
#' @param correction whether or not to calculate the small sample-bias corrected BIC (BICc).
#' @return numeric value
#' @export
#' @method BIC glmBayes
#'
#' @references McQuarrie, A.D. (1999). A small-sample correction for the Schwarz BIC model selection criterion, Statistics & Probability Letters 44 pp.79-86. doi: 10.1016/S0167-7152(98)00294-6
#'
BIC.glmBayes = function(model, correction = F){
logLik = logLik.glmBayes(model)
k = ncol(model$model.matrix)
n = nrow(model$model.matrix)
if (correction){
p = (k * log(n) * n) / (n - k - 1)
} else {
p = k * log(n)
}
(-2*logLik) + p
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.