# Bayesian linear model function blm is a wrapper for the different functions
# I wrote: rbr (Bayesian Regression in R), cppbr (Bayesian Regression in C++),
# rgs (Gibbs Sampling in R), rmhs (Metropolis-Hastings Sampling in R).
# It uses the same syntax as the lm and glm functions, using a formula
# to specify a model with at most 1 predicted value (univariate).
# It has options to specify method (rbr / cppbr / rgs / rmhs),
# verbosity (T / F) to return information, priors in the form "beta(1,1)", as
# well as several options such as burnin, inits, and iterations. Before running,
# check that the above mentioned functions are loaded in the environment.
# Erik-Jan van Kesteren
blim <- function(formula, data, iter = 9999, burnin = 50, inits = NULL,
thin = 0, prior_tau = "dgamma(0.001,0.001)",
prior_b = "dnorm(0,10000)", method = "cppbr", mtsprior = F,
verbose = T, dtuning = F){
# Extract response vector y and data matrix X
mf <- model.frame(formula,data)
y <- model.response(mf, "numeric") # assigns the response vector
X <- model.matrix(formula, mf) # assigns the data matrix
# If mtsprior is indicated, calculate a minimum-training-sample prior for b
if (mtsprior == T){
means <- tausq <- numeric(9999)
for (i in 1:9999){
v <- sample(y,2,replace = T)
means[i] <- mean(v)
tausq[i] <- ((v[1]-means[i])^2+(v[2]-means[i])^2)/4
}
prior_b <- rep(paste("dnorm(",mean(means),",",(mean(tausq)+var(means)),
")",sep = ""), ncol(X))
cat("MTS-prior calculated:", prior_b[1], "\n")
}
# Parse functions for priors and check if they exist
fun_prior_tau <- strsplit(prior_tau, "(", fixed = T)[[1]][1]
fun_prior_b <- unlist(strsplit(prior_b, "(",
fixed = T))[seq(1,2*length(prior_b), 2)]
# If not all functions inside the priors for b exist then stop
if (!all(apply(X = as.matrix(fun_prior_b),
MARGIN = 1,
FUN = function(x) existsFunction(x))) ||
!existsFunction(fun_prior_tau)){
stop(paste(fun_prior_b, "or", fun_prior_tau, "does not exist!"))
}
# Parse arguments for the priors
args_tau <- lapply(strsplit(gsub("[()]", "",
unlist(strsplit(prior_tau,
"(", fixed = T))[c(F,T)]),
","),
as.numeric)
args_b <- lapply(strsplit(gsub("[()]", "",
unlist(strsplit(prior_b,
"(", fixed = T))[c(F,T)]),
","),
as.numeric)
# Use only first prior if there are not enough or too many priors.
if (length(fun_prior_b)!=ncol(X)){
fun_prior_b <- rep(fun_prior_b[1],ncol(X))
args_b <- rep(args_b[1],ncol(X))
prior_b <- rep(prior_b[1],ncol(X))
cat("Using", prior_b[1], "as prior for all beta coefficients. \n")
}
# Call the appropriate algorithm from the samplers.r file. Use the data
# from the parsing functions above to call the functions and return the trace.
if (toupper(method) == "RBR"){
if (fun_prior_tau != "dgamma" || any(fun_prior_b != "dnorm")){
stop("Method RBR only applicable with conjugate priors")
}
result <- rbr(iter = iter, y = y, X = X,
prior.a = args_tau[[1]][1], prior.b = args_tau[[1]][2],
prior.mean = unlist(args_b)[c(T,F)],
prior.prec = 1/unlist(args_b)[c(F,T)])
colnames(result) <- c("Variance", colnames(X), "Rsquared")
algo <- "R-based Bayesian Linear Regression"
} else if (toupper(method) == "CPPBR") {
if (fun_prior_tau != "dgamma" || any(fun_prior_b != "dnorm")){
stop("Method CPPBR only applicable with conjugate priors")
}
result <- cppbr(iter = iter, y = y, X = X,
prior_a = args_tau[[1]][1], prior_b = args_tau[[1]][2],
prior_mu = unlist(args_b)[c(T,F)],
prior_prec = 1/unlist(args_b)[c(F,T)])
colnames(result) <- c("Variance", colnames(X), "Rsquared")
algo <- "C++ based Bayesian Linear Regression"
} else if (toupper(method) == "RGS"){
if (fun_prior_tau != "dgamma" || any(fun_prior_b != "dnorm")){
stop("Method RGS only applicable with conjugate priors")
}
result <- rgs(iter = iter, y = y, X = X,
prior_a = args_tau[[1]][1], prior_b = args_tau[[1]][2],
prior_mean = unlist(args_b)[c(T,F)],
prior_prec = 1/unlist(args_b)[c(F,T)],
verbose = verbose, inits = inits)
colnames(result) <- c("Variance", colnames(X), "Rsquared")
algo <- "R based Gibbs Sampling Regression"
} else if (toupper(method) == "RMHS"){
if (fun_prior_tau != "dgamma"){
stop("Method RMHS only applicable with conjugate variance prior")
}
result <- rmhs(iter = iter, y = y, X = X,
prior_a = args_tau[[1]][1], prior_b = args_tau[[1]][2],
prior_beta = prior_b, verbose = verbose, inits = inits,
dtuning = dtuning)
colnames(result) <- c("Variance", colnames(X), "Rsquared")
algo <- "R based Metropolis-Hastings Sampling Regression"
} else {
stop("Unknown method: ", toupper(method))
}
if (verbose == T){cat(algo, "\n")}
# Remove burnin and apply thinning to trace
trace <- result[-(1:(burnin+1)),]
if (thin > 0) {trace <- trace[(seq(from = 1, to = nrow(trace), by = thin)),]}
# Create summary statistics
summary <- cbind(mean = apply(X = trace, MARGIN = 2,
FUN = mean),
s.e. = apply(X = trace, MARGIN = 2,
FUN = sd),
cred2.5 = apply(X = trace, MARGIN = 2,
FUN = function(x) quantile(x, 0.025)),
cred97.5 = apply(X = trace, MARGIN = 2,
FUN = function(x) quantile(x, 0.975)),
mcerror = apply(X = trace, MARGIN = 2,
FUN = function(x) var(x)/sqrt(iter)))
# Create trace autocorrelation matrix
autocor <- matrix(numeric((ncol(X)+2)*40), ncol = 40)
for (i in 1:40){
autocor[,i] <- apply(X = trace, MARGIN = 2,
function(x) abs(cor(x[-(1:i)],
x[-((length(x)+1-i):length(x))]))
)
}
# Give the priors for the betas names before the output
names(prior_b) <- paste("b",0:(ncol(X)-1),sep = "")
# Create an output object with all the relevant information
blimfit <- list(algorithm = algo,
summary = summary,
auto = autocor,
y = y, X = X,
trace = trace,
priors = c(prior_tau,prior_b))
# Give it a classy class.
class(blimfit) <- "blimfit"
return(blimfit)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.