#' Bayesian inference for simple linear regression
#'
#' This function is used to find the posterior distribution of the simple
#' linear regression slope variable \eqn{\beta}{beta} when we have a random
#' sample of ordered pairs \eqn{(x_{i}, y_{i})} from the simple linear
#' regression model:
#' \deqn{y_{i} = \alpha_{\bar{x}} + \beta x_{i}+\epsilon_{i}} where the
#' observation errors are, \eqn{\epsilon_i}, independent
#' \eqn{normal(0,\sigma^{2})} with known variance.
#'
#'
#' @param y the vector of responses.
#' @param x the value of the explantory variable associated with each response.
#' @param slope.prior use a ``flat'' prior or a ``normal'' prior. for
#' \eqn{\beta}{beta}
#' @param intcpt.prior use a ``flat'' prior or a ``normal'' prior. for
#' \eqn{\alpha_[\bar{x}]}{alpha_xbar}
#' @param mb0 the prior mean of the simple linear regression slope variable
#' \eqn{\beta}{beta}. This argument is ignored for a flat prior.
#' @param sb0 the prior std. deviation of the simple linear regression slope
#' variable \eqn{\beta}{beta} - must be greater than zero. This argument is
#' ignored for a flat prior.
#' @param ma0 the prior mean of the simple linear regression intercept variable
#' \eqn{\alpha_{\bar{x}}}{alpha_xbar}. This argument is ignored for a flat
#' prior.
#' @param sa0 the prior std. deviation of the simple linear regression variable
#' \eqn{\alpha_{\bar{x}}}{alpha_xbar} - must be greater than zero. This
#' argument is ignored for a flat prior.
#' @param sigma the value of the std. deviation of the residuals. By default,
#' this is assumed to be unknown and the sample value is used instead. This
#' affects the prediction intervals.
#' @param alpha controls the width of the credible interval.
#' @param plot.data if true the data are plotted, and the posterior regression
#' line superimposed on the data.
#' @param pred.x a vector of x values for which the predicted y values are
#' obtained and the std. errors of prediction
#' @return A list will be returned with the following components:
#' \item{post.coef}{the posterior mean of the intecept and the slope}
#' \item{post.coef}{the posterior standard deviation of the intercept the
#' slope} \item{pred.x}{the vector of values for which predictions have been
#' requested. If pred.x is NULL then this is not returned} \item{pred.y}{the
#' vector predicted values corresponding to pred.x. If pred.x is NULL then this
#' is not returned} \item{pred.se}{The standard errors of the predicted values
#' in pred.y. If pred.x is NULL then this is not returned}
#' @param \dots additional arguments that are passed to \code{Bolstad.control}
#' @keywords misc
#' @examples
#'
#' ## generate some data from a known model, where the true value of the
#' ## intercept alpha is 2, the true value of the slope beta is 3, and the
#' ## errors come from a normal(0,1) distribution
#' set.seed(123)
#' x = rnorm(50)
#' y = 2 + 3*x + rnorm(50)
#'
#' ## use the function with a flat prior for the slope beta and a
#' ## flat prior for the intercept, alpha_xbar.
#'
#' bayes.lin.reg(y,x)
#'
#' ## use the function with a normal(0,3) prior for the slope beta and a
#' ## normal(30,10) prior for the intercept, alpha_xbar.
#'
#' bayes.lin.reg(y,x,"n","n",0,3,30,10)
#'
#' ## use the same data but plot it and the credible interval
#'
#' bayes.lin.reg(y,x,"n","n",0,3,30,10, plot.data = TRUE)
#'
#' ## The heart rate vs. O2 uptake example 14.1
#' O2 = c(0.47,0.75,0.83,0.98,1.18,1.29,1.40,1.60,1.75,1.90,2.23)
#' HR = c(94,96,94,95,104,106,108,113,115,121,131)
#' plot(HR,O2,xlab="Heart Rate",ylab="Oxygen uptake (Percent)")
#'
#' bayes.lin.reg(O2,HR,"n","f",0,1,sigma=0.13)
#'
#' ## Repeat the example but obtain predictions for HR = 100 and 110
#'
#' bayes.lin.reg(O2,HR,"n","f",0,1,sigma=0.13,pred.x=c(100,110))
#'
#' @export bayes.lin.reg
bayes.lin.reg = function(y, x, slope.prior = c("flat", "normal"),
intcpt.prior = c("flat", "normal"),
mb0 = 0, sb0 = 0, ma0 = 0, sa0 = 0,
sigma = NULL, alpha = 0.05, plot.data = FALSE,
pred.x = NULL,
...) {
if(sum(is.na(y)) > 0 || sum(is.na(x)) > 0)
stop("Error: x and y may not contain missing values")
if(length(y) != length(x))
stop("Error: x and y are unequal lengths")
if(!is.null(sigma) && sigma <= 0){
stop("Error: the std. deviation of the resisuals, sigma, must be greater than or equal to zero")
}
intcpt.prior = match.arg(intcpt.prior, c("flat", "normal"))
slope.prior = match.arg(slope.prior, c("flat", "normal"))
if(!grepl("^(flat|normal)$", slope.prior))
stop("The slope prior must be one of \"normal\" or \"flat\"")
if(!grepl("^(flat|normal)$", intcpt.prior))
stop("The intercept prior must be one of \"normal\" or \"flat\"")
if(slope.prior == "normal" && sb0 <= 0)
stop("Error: the prior std. devation sb0 must be greater than zero")
if(intcpt.prior == "normal" && sa0 <= 0)
stop("Error: the prior std. devation sa0 must be greater than zero")
if(alpha <= 0 || alpha > 0.5)
stop("Error: alpha must be in the range (0, 0.5]")
if(length(y) <= 2)
stop("Error: you really should have more than 2 points for a regression!")
n = length(y)
x.bar = mean(x)
y.bar = mean(y)
x2.bar = mean(x^2)
xy.bar = mean(x * y)
y2.bar = mean(y^2)
b.ls = (xy.bar - x.bar * y.bar) / (x2.bar - x.bar^2)
fitted = y.bar + b.ls * (x - x.bar)
residuals = y - fitted
A0 = y.bar - b.ls * x.bar
Ax.bar = y.bar
quiet = Bolstad.control(...)$quiet
drawPlot = Bolstad.control(...)$plot
sigma.known = TRUE
if(is.null(sigma)){
sigma.known = FALSE
sigma = sqrt(sum((y - (Ax.bar + b.ls * (x - x.bar)))^2)/ (n - 2))
if(!quiet){cat(paste("Standard deviation of residuals: ", signif(sigma, 3), "\n"))}
} else {
if(!quiet){
cat(paste("Known standard deviation: ", signif(sigma, 3), "\n"))
}
}
SSx = n * (x2.bar - x.bar^2)
lb = 0
ub = 0
prior.b = rep(0, 1001)
beta = prior.b
likelihood.b = prior.b
posterior.b = prior.b
d = as.data.frame(cbind(y, x))
if (slope.prior == "flat") {
prior.prec.b = 0
mb0 = 0
bnd.mult.b = 4
} else {
prior.prec.b = 1 / sb0^2
bnd.mult.b = 3
}
if (intcpt.prior == "flat") {
prior.prec.a = 0
ma0 = 0
bnd.mult.a = 4
} else {
prior.prec.a = 1 / sa0^2
bnd.mult.a = 3
}
################
# SLOPE
################
prec.ls = SSx / sigma^2
sd.ls = sqrt(1 / prec.ls)
post.prec.b = prior.prec.b + prec.ls
post.var.b = 1 / post.prec.b
post.sd.b = sqrt(post.var.b)
post.mean.b = (prior.prec.b * mb0 + SSx / sigma^2 * b.ls) / post.prec.b
lb = post.mean.b - bnd.mult.b * post.sd.b
ub = post.mean.b + bnd.mult.b * post.sd.b
beta = seq(lb, ub, length = 1001)
if (slope.prior == "flat") {
prior.b = rep(1, 1001)
norm.const = 0.5 * (2 * sum(prior.b) - prior.b[1] - prior.b[1001] * ((ub - lb) * 0.001))
prior.b = prior.b / norm.const
} else {
prior.b = dnorm(beta, mb0, sb0)
}
likelihood.b = dnorm(beta, b.ls, sd.ls)
posterior.b = dnorm(beta, post.mean.b, post.sd.b)
if(drawPlot){
old.par = par(mfrow = c(2, 2))
y.max = max(c(prior.b, likelihood.b, posterior.b))
plot(beta, prior.b, type = "l", col = "black", lty = 1,
ylim = c(0, 1.1 * y.max), xlab = expression(beta),
ylab = "",
main = expression(paste("Prior, likelihood and posterior for ", beta,
sep = "")),
sub = "(slope)")
lines(beta, likelihood.b, lty = 2, col = "red")
lines(beta, posterior.b, lty = 3, col = "blue")
legend("topleft", bty = "n", cex = 0.7,
lty = 1:3, col = c("black", "red", "blue"),
legend = c("Prior", "Likelihood", "Posterior"))
}
####################################################################################
alpha.xbar = rep(0, 1001)
prior.a = alpha.xbar
likelihood.a = alpha.xbar
posterior.a = alpha.xbar
prec.ls = n / (sigma^2)
sd.ls = sqrt(1 / prec.ls)
post.prec.a = prior.prec.a + prec.ls
post.var.a = 1 / post.prec.a
post.sd.a = sqrt(post.var.a)
post.mean.a = (prior.prec.a * ma0 + n / sigma^2 * Ax.bar) / post.prec.a
lb = post.mean.a - bnd.mult.a * post.sd.a
ub = post.mean.a + bnd.mult.a * post.sd.a
alpha.xbar = seq(lb, ub, length = 1001)
if(intcpt.prior == "flat") {
prior.a = rep(1, 1001)
norm.const = (2 * sum(prior.a) - prior.a[1] - prior.a[1001] * ((ub - lb) / 1000)) / 2
prior.a = prior.a / norm.const
} else {
prior.a = dnorm(alpha.xbar, ma0, sa0)
}
likelihood.a = dnorm(alpha.xbar, y.bar, sd.ls)
posterior.a = dnorm(alpha.xbar, post.mean.a, post.sd.a)
if(!quiet){
cat(sprintf("%-11s %-14s %-24s\n", " ", "Posterior Mean", "Posterior Std. Deviation"))
cat(sprintf("%-11s %-14s %-24s\n", " ", "--------------", "------------------------"))
cat(sprintf("Intercept: %-14.6g %-24.6g\n", signif(post.mean.a, 4), signif(post.sd.a, 5)))
cat(sprintf("Slope: %-14.6g %-24.6g\n", signif(post.mean.b, 4), signif(post.sd.b, 5)))
}
y.max = max(c(prior.a, likelihood.a, posterior.a))
if(drawPlot){
plot(alpha.xbar, prior.a, type = "l", col = "black", lty = 1,
ylim = c(0, 1.1 * y.max), xlab = expression(alpha),
ylab = "",
main = expression(paste("Prior, likelihood and posterior for ", alpha[bar(x)],
sep = "")),
sub = "(intercept)")
lines(alpha.xbar, likelihood.a, lty = 2, col = "red")
lines(alpha.xbar, posterior.a, lty = 3, col = "blue")
legend("topleft", cex = 0.7, lty = 1:3, col = c("black", "red", "blue"),
legend = c("Prior", "Likelihood", "Posterior"), bty = "n")
}
if(sigma.known){
s.e = sqrt(x2.bar - x.bar^2)
x.lwr = x.bar - 3 * s.e
x.upr = x.bar + 3 * s.e
x.values = seq(x.lwr, x.upr, length = 1001)
pred.y = post.mean.b * (x.values - x.bar) + post.mean.a
se.pred = sqrt(post.var.a + (x.values - x.bar)^2 * post.var.b + sigma^2)
t.crit = qt(1 - alpha * .5, n - 2)
pred.lb = pred.y - t.crit * se.pred
pred.ub = pred.y + t.crit * se.pred
} else{
s.e = sqrt(x2.bar - x.bar^2)
x.lwr = x.bar - 3 * s.e
x.upr = x.bar + 3 * s.e
x.values = seq(x.lwr, x.upr, length = 1001)
pred.y = post.mean.b * (x.values - x.bar) + post.mean.a
se.pred = sqrt(post.var.a + (x.values - x.bar)^2 * post.var.b + sigma^2)
z.crit = qnorm(1 - alpha * 0.5)
pred.lb = pred.y - z.crit * se.pred
pred.ub = pred.y + z.crit * se.pred
}
y.max = max(pred.ub)
y.min = min(pred.lb)
if(drawPlot){
if(plot.data){
plot(y~x, main = paste("Predicitions with ", round(100 * (1 - alpha))
,"% bounds", sep = ""), xlab = "x", ylab = "y", ylim = 1.1 * c(y.min, y.max))
lines(x.values, pred.y, lty = 1, col = "black")
} else if (drawPlot && !plot.data){
plot(x.values, pred.y, type = "l", lty = 1, col = "black",
main = paste("Predicitions with ", round(100 * (1 - alpha)),
"% bounds", sep = ""), xlab = "x", ylab = "y",
ylim = 1.1 * c(y.min, y.max))
}
lines(x.values, pred.lb, lty = 2, col = "red")
lines(x.values, pred.ub, lty = 3, col = "blue")
legend("topleft", lty = 1:3, col = c("black", "red", "blue"),
legend = c("Predicted value",
paste(round(100 * (1 - alpha)), "% lower bound", sep = ""),
paste(round(100 * (1 - alpha)), "% upper bound", sep = "")),
cex = 0.7, bty = "n")
}
pred.y = NULL
pred.se = NULL
if(!is.null(pred.x)){
pred.y = post.mean.a + post.mean.b * (pred.x - x.bar)
pred.se = sqrt(post.var.a + (pred.x - x.bar)^2 * post.var.b + sigma^2)
predicted.values = cbind(pred.x, pred.y, pred.se)
fmt = "%-8.4g %-12.4g %-11.5g\n"
fmtS = "%-6s %-12s %-11s\n"
if(!quiet){
cat(sprintf(fmtS, "x", "Predicted y", "SE"))
cat(sprintf(fmtS, "------", "-----------", "-----------"))
n.pred.x = length(pred.x)
for(i in 1:n.pred.x){
cat(sprintf(fmt, signif(predicted.values[i, 1], 4),
signif(predicted.values[i, 2], 4),
signif(predicted.values[i, 3], 5)))
}
}
}
if(drawPlot){
par(old.par)
}
interceptResults = list(name = 'alpha[0]',
param.x = alpha.xbar,
prior = prior.a, likelihood = likelihood.a, posterior = posterior.a,
mean = post.mean.a,
var = post.var.a)
slopeResults = list(name = 'beta',
param.x = beta,
prior = prior.b, likelihood = likelihood.b, posterior = posterior.b,
mean = post.mean.b,
var = post.var.b)
class(interceptResults) = "Bolstad"
class(slopeResults) = "Bolstad"
if(!is.null(pred.x)){
invisible(list(intercept = interceptResults,
slope = slopeResults,
post.coef = c(post.mean.a, post.mean.b),
post.coef.sd = c(post.sd.a, post.sd.b),
pred.x = pred.x, pred.y = pred.y, pred.se = pred.se))
} else{
invisible(list(intercept = interceptResults,
slope = slopeResults,
post.coef = c(post.mean.a, post.mean.b),
post.coef.sd = c(post.sd.a, post.sd.b)))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.