Nothing
#' Create a model component object for a regression (fixed effects) component
#' in the linear predictor
#'
#' This function is intended to be used on the right hand side of the
#' \code{formula} argument to \code{\link{create_sampler}} or
#' \code{\link{generate_data}}. It creates an additive regression term in the
#' model's linear predictor. By default, the prior for the regression
#' coefficients is improper uniform. A proper normal prior can be set up
#' using function \code{\link{pr_normal}}, and passed to argument \code{prior}.
#' It should be noted that \code{\link{pr_normal}} expects a precision matrix
#' as input for its second argument, and that the prior variance (matrix) is
#' taken to be the inverse of this precision matrix, where in case the
#' model's family is \code{"gaussian"} this matrix is additionally
#' multiplied by the residual scalar variance parameter \code{sigma_^2}.
#'
#' @examples
#' \donttest{
#' data(iris)
#' # default: flat priors on regression coefficients
#' sampler <- create_sampler(Sepal.Length ~
#' reg(~ Petal.Length + Species, name="beta"),
#' data=iris
#' )
#' sim <- MCMCsim(sampler, burnin=100, n.iter=400)
#' summary(sim)
#' # (weakly) informative normal priors on regression coefficients
#' sampler <- create_sampler(Sepal.Length ~
#' reg(~ Petal.Length + Species, prior=pr_normal(precision=1e-2), name="beta"),
#' data=iris
#' )
#' sim <- MCMCsim(sampler, burnin=100, n.iter=400)
#' summary(sim)
#' # binary regression
#' sampler <- create_sampler(Species == "setosa" ~
#' reg(~ Sepal.Length, prior=pr_normal(precision=0.1), name="beta"),
#' family="binomial", data=iris)
#' sim <- MCMCsim(sampler, burnin=100, n.iter=400)
#' summary(sim)
#' pred <- predict(sim)
#' str(pred)
#' # example with equality constrained regression effects
#' n <- 500
#' df <- data.frame(x=runif(n))
#' df$y <- rnorm(n, 1 + 2*df$x)
#' R <- matrix(1, 2, 1)
#' r <- 3
#' sampler <- create_sampler(y ~ reg(~ 1 + x, R=R, r=r, name="beta"), data=df)
#' sim <- MCMCsim(sampler)
#' summary(sim)
#' plot(sim, "beta")
#' summary(transform_dc(sim$beta, fun=function(x) crossprod_mv(R, x) - r))
#' }
#'
#' @export
#' @param formula a formula specifying the predictors to be used in the model,
#' in the same way as the right hand side of the \code{formula} argument of
#' R's \code{lm} function. Variable names are looked up in the data frame
#' passed as \code{data} argument to \code{\link{create_sampler}} or
#' \code{\link{generate_data}}, or in \code{environment(formula)}.
#' @param remove.redundant whether redundant columns should be removed from the
#' design matrix. Default is \code{FALSE}. But note that treatment contrasts are
#' automatically applied to all factor variables in \code{formula}.
# TODO allow to pass contrasts.arg to model_matrix (e.g. "contr.none", and later "contr.sparse")
#' @param sparse whether the model matrix associated with \code{formula} should
#' be sparse. The default is to base this on a simple heuristic.
#' @param X a (possibly sparse) design matrix can be specified directly, as an
#' alternative to the creation of one based on \code{formula}. If \code{X} is
#' specified \code{formula} is ignored.
#' @param prior prior specification for the regression coefficients. Supported
#' priors can be specified using functions \code{\link{pr_normal}},
#' \code{\link{pr_fixed}}, or \code{\link{pr_MLiG}}. The latter prior is only
#' available in conjunction with a gamma family sampling distribution.
#' @param Q0 prior precision matrix for the regression effects. The default is a
#' zero matrix corresponding to a noninformative improper prior.
#' It can be specified as a scalar value, as a numeric vector of appropriate
#' length, or as a matrix object. DEPRECATED, please use argument \code{prior}
#' instead, i.e. \code{prior = pr_normal(mean = b0.value, precision = Q0.value)}.
#' @param b0 prior mean for the regression effect. Defaults to a zero vector.
#' It can be specified as a scalar value or as a numeric vector of
#' appropriate length. DEPRECATED, please use argument \code{prior}
#' instead, i.e. \code{prior = pr_normal(mean = b0.value, precision = Q0.value)}.
#' @param R optional constraint matrix for equality restrictions R'x = r where
#' \code{x} is the vector of regression effects.
#' @param r right hand side for the equality constraints.
#' @param S optional constraint matrix for inequality constraints S'x >= s where
#' \code{x} is the vector of regression effects.
#' @param s right hand side for the inequality constraints.
#' @param lower as an alternative to \code{s}, \code{lower} and \code{upper} may be specified
#' for two-sided constraints lower <= S'x <= upper.
#' @param upper as an alternative to \code{s}, \code{lower} and \code{upper} may be specified
#' for two-sided constraints lower <= S'x <= upper.
#' @param name the name of the model component. This name is used in the output of the
#' MCMC simulation function \code{\link{MCMCsim}}. By default the name will be 'reg'
#' with the number of the model term attached.
#' @param debug if \code{TRUE} a breakpoint is set at the beginning of the posterior
#' draw function associated with this model component. Mainly intended for developers.
#' @returns An object with precomputed quantities and functions for sampling from
#' prior or conditional posterior distributions for this model component. Intended
#' for internal use by other package functions.
reg <- function(formula = ~ 1, remove.redundant=FALSE, sparse=NULL, X=NULL,
prior=NULL, Q0=NULL, b0=NULL,
R=NULL, r=NULL, S=NULL, s=NULL, lower=NULL, upper=NULL,
name="", debug=FALSE) {
e <- sys.frame(-2L)
type <- "reg"
if (name == "") stop("missing model component name")
if (e$family$family == "gamma") {
modus <- "gamma" # model for log(mean) of gamma
} else if (any(name == names(e$Vmod))) {
if (e$family$family == "gaussian_gamma")
modus <- "vargamma" # model for log(var) of gaussian and log(mean) of gamma
else
modus <- "var" # model for log(var) of gaussian
} else {
modus <- "regular" # model for mean of gaussian/binomial/...
}
if (is.null(X)) {
if (e$family$family == "multinomial") {
edat <- new.env(parent = .GlobalEnv)
X <- NULL
for (k in seq_len(e$Km1)) {
edat$cat_ <- factor(rep.int(e$cats[k], e[["n0"]]), levels=e$cats[-length(e$cats)])
X <- rbind(X, model_matrix(formula, data=e$data, sparse=sparse, enclos=edat))
}
rm(edat)
} else {
X <- model_matrix(formula, e$data, sparse=sparse)
}
if (remove.redundant) X <- remove_redundancy(X)
}
X <- economizeMatrix(X, sparse=sparse, strip.names=FALSE, check=TRUE)
if (nrow(X) != e[["n"]]) stop("design matrix with incompatible number of rows")
e$coef.names[[name]] <- colnames(X)
X <- unname(X)
q <- ncol(X)
in_block <- any(name == unlist(e$block, use.names=FALSE)) || any(name == unlist(e$block.V, use.names=FALSE))
if (!is.null(b0) || !is.null(Q0)) {
warn("arguments 'b0' and 'Q0' are deprecated; please use argument 'prior' instead")
if (is.null(prior)) {
if (modus == "regular")
prior <- pr_normal(mean = if (is.null(b0)) 0 else b0, precision = if (is.null(Q0)) 0 else Q0)
else
prior <- pr_MLiG(mean = if (is.null(b0)) 0 else b0, precision = if (is.null(Q0)) 0 else Q0)
}
} else {
if (is.null(prior)) {
if (modus == "regular")
prior <- pr_normal(mean=0, precision=0)
else
prior <- pr_MLiG(mean=0, precision=0)
}
}
switch(prior$type,
fixed = {
prior$init(q)
informative.prior <- TRUE
zero.mean <- all(prior$value == 0)
rprior <- function(p) prior$rprior()
},
normal = {
prior$init(q, e$coef.names[[name]], sparse=if (in_block) TRUE else NULL, sigma=!e$sigma.fixed)
informative.prior <- prior$informative
if (modus != "regular" && informative.prior) stop("please use 'pr_MLiG' to specify a (conjugate) prior for gamma family or variance modelling")
Q0 <- prior$precision
zero.mean <- !informative.prior || all(prior$mean == 0)
if (zero.mean) {
prior$mean <- 0
Q0b0 <- 0
} else {
if (length(prior$mean) == 1L)
Q0b0 <- Q0 %m*v% rep.int(prior$mean, q)
else
Q0b0 <- Q0 %m*v% prior$mean
}
rprior <- function(p) prior$rprior(p)
# TODO TMVN prior
},
MLiG = {
if (modus == "regular") stop("MLiG prior only available in combination with gamma family or variance modelling")
if (!is.null(R) || !is.null(S)) stop("constraints not supported in combination with MLiG prior")
prior$init(q, e$coef.names[[name]])
informative.prior <- prior$informative
zero.mean <- all(prior$mean == 0)
rprior <- function(p) prior$rprior()
if (in_block) { # block sampler sparse Q0 for template
if (length(prior$precision) == 1L)
Q0 <- Cdiag(rep.int(prior$precision/prior$a, q))
else
Q0 <- Cdiag(prior$precision/prior$a)
}
},
stop("'", prior$type, "' is not a supported prior for regression coefficients '", name, "'")
)
if (e$compute.weights) {
# TODO see if following restriction can be relaxed
if (prior$type != "normal") stop("weights computation not supported for priors of type ", prior$type)
if (!zero.mean) stop("weights cannot be computed if some coefficients have non-zero prior means")
}
if (modus == "var" || modus == "vargamma") {
if (is_ind_matrix(X) && q < e[["n"]])
compute_Qfactor <- function(p) X %m*v% exp(-p[[name]])
else
compute_Qfactor <- function(p) exp(X %m*v% (-p[[name]]))
}
linpred <- function(p) X %m*v% p[[name]]
# for regression components assume that categorical variables in newdata
# carry the exact same levels as those in the training data
make_predict <- function(newdata=NULL, Xnew, verbose=TRUE) {
if (modus == "vargamma") stop("prediction for 'gaussian_gamma' family not supported")
if (is.null(newdata)) {
if (missing(Xnew)) stop("one of 'newdata' and 'Xnew' should be supplied")
if (is.null(colnames(Xnew))) {
if (ncol(Xnew) != q) stop("wrong number of columns for Xnew matrix of component ", name)
}
} else {
nnew <- nrow(newdata)
if (e$family$family == "multinomial") {
edat <- new.env(parent = .GlobalEnv)
Xnew <- NULL
for (k in seq_len(e$Km1)) {
edat$cat_ <- factor(rep.int(e$cats[k], nnew), levels=e$cats[-length(e$cats)])
Xnew <- rbind(Xnew, model_matrix(formula, data=newdata, sparse=sparse, enclos=edat))
}
rm(edat)
} else {
Xnew <- model_matrix(formula, newdata, sparse=sparse)
}
}
if (remove.redundant && !is.null(colnames(Xnew))) {
# TODO check that any columns removed are redundant
Xnew <- Xnew[, e$coef.names[[name]], drop=FALSE]
} else {
if (ncol(Xnew) != q) stop("'newdata' yields ", ncol(Xnew), " predictor column(s) for model term '", name, "' versus ", q, " originally")
if (!is.null(colnames(Xnew)) && !identical(colnames(Xnew), e$coef.names[[name]])) {
if (!all(e$coef.names[[name]] %in% colnames(Xnew))) {
stop("the following model matrix columns could not be derived from Xnew: ",
paste(setdiff(e$coef.names[[name]], colnames(Xnew)), collapse=", "))
}
Xnew <- Xnew[, e$coef.names[[name]], drop=FALSE]
}
}
Xnew <- economizeMatrix(Xnew, sparse=sparse, strip.names=TRUE, check=TRUE)
rm(newdata, verbose)
function(p) Xnew %m*v% p[[name]]
}
if (!in_block && !e$prior.only && prior$type != "fixed") {
draw <- if (debug) function(p) {browser()} else function(p) {}
if (modus == "var" || modus == "vargamma") {
if (!e$single.V.block) {
if (is_ind_matrix(X) && q < e[["n"]])
draw <- add(draw, bquote(p[["Q_"]] <- p[["Q_"]] * (X %m*v% exp(p[[.(name)]]))))
else
draw <- add(draw, bquote(p[["Q_"]] <- p[["Q_"]] * exp(X %m*v% p[[.(name)]])))
}
} else {
if (e$single.block) {
if (e$use.offset || e$e.is.res)
draw <- add(draw, quote(p$e_ <- e$y_eff()))
} else {
if (e$e.is.res)
draw <- add(draw, bquote(mv_update(p[["e_"]], plus=TRUE, X, p[[.(name)]])))
else
draw <- add(draw, bquote(mv_update(p[["e_"]], plus=FALSE, X, p[[.(name)]])))
}
}
if (e$single.block && e$family$link != "probit" && modus == "regular" &&
(!e$modeled.Q || (all(e$family$family != c("gaussian", "gaussian_gamma")) && !e$use.offset && !e$modeled.r))) {
# single regression component, no variance modeling, Xy fixed
if (e$family$family == "gaussian")
Xy <- crossprod_mv(X, e$Q0 %m*v% e$y_eff()) + Q0b0
else
Xy <- crossprod_mv(X, e$Q_e(e$y_eff())) + Q0b0
} else if (modus == "regular") {
# Xy updated in each iteration
if (all(Q0b0 == 0))
draw <- add(draw, quote(Xy <- crossprod_mv(X, e$Q_e(p))))
else
draw <- add(draw, quote(Xy <- crossprod_mv(X, e$Q_e(p)) + Q0b0))
} else { # modus gamma, var, vargamma
if (modus == "var" || modus == "vargamma") { # variance modelling
if (e$single.V.block)
draw <- add(draw, bquote(vkappa <- .(if (e$sigma.fixed) 0.5 else quote(0.5/p[["sigma_"]]^2)) * p[["e_"]]^2))
else
draw <- add(draw, bquote(vkappa <- .(if (e$sigma.fixed) 0.5 else quote(0.5/p[["sigma_"]]^2)) * p[["e_"]]^2 * p[["Q_"]]))
draw <- add(draw, bquote(Hz <- crossprod_mv(X, rMLiG(.(e[["n"]]), 0.5, vkappa))))
}
if (modus == "gamma") {
if (e$family$alpha.fixed) {
alpha <- e$family$get_shape()
if (e$single.block && !e$use.offset)
kappa <- alpha * e[["y"]]
else {
kappa0 <- alpha * e[["y"]]
draw <- add(draw, quote(kappa <- kappa0 * exp(-p[["e_"]])))
}
} else {
draw <- add(draw, quote(alpha <- e$family$get_shape(p)))
if (e$single.block && !e$use.offset) {
draw <- add(draw, quote(kappa <- alpha * e[["y"]]))
} else {
draw <- add(draw, quote(kappa <- alpha * e[["y"]] * exp(-p[["e_"]])))
}
}
draw <- add(draw, bquote(Hz <- crossprod_mv(X, rMLiG(.(e[["n"]]), alpha, kappa))))
} else if (modus == "vargamma") {
if (e$family$alpha.fixed) {
alpha <- e$family$get_shape()
if (e$single.V.block)
kappa <- alpha * e$family[["sigmasq"]]
else {
kappa0 <- alpha * e$family[["sigmasq"]]
draw <- add(draw, quote(kappa <- kappa0 * p[["Q_"]]))
}
} else {
draw <- add(draw, quote(alpha <- e$family$get_shape(p)))
if (e$single.V.block) {
draw <- add(draw, quote(kappa <- alpha * e$family[["sigmasq"]]))
} else {
draw <- add(draw, quote(kappa <- alpha * e$family[["sigmasq"]] * p[["Q_"]]))
}
}
draw <- add(draw, bquote(Hz <- Hz + crossprod_mv(X, rMLiG(.(e[["n"]]), alpha, kappa))))
}
if (informative.prior) {
if (zero.mean)
draw <- add(draw, bquote(Hz <- Hz + sqrt(prior$precision/prior$a) * rMLiG(.(q), prior$a, prior$a)))
else
draw <- add(draw, bquote(Hz <- Hz + sqrt(prior$precision/prior$a) * rMLiG(.(q), prior$a, prior$a * exp(sqrt(prior$precision/prior$a) * prior$mean))))
}
}
if (e$modeled.Q && modus == "regular") { # in this case both XX and Xy must be updated in each iteration
if (e$Q0.type == "symm")
draw <- add(draw, quote(XX <- crossprod_sym(X, p[["QM_"]])))
else
draw <- add(draw, quote(XX <- crossprod_sym(X, p[["Q_"]])))
if (informative.prior) {
# TODO instead of runif(n, ...) account for the structure in Qmod; lambda may not vary per unit!
mat_sum <- make_mat_sum(M0=Q0, M1=crossprod_sym(X, crossprod_sym(Diagonal(x=runif(e[["n"]], 0.9, 1.1)), e$Q0)))
MVNsampler <- create_TMVN_sampler(
Q=mat_sum(crossprod_sym(X, crossprod_sym(Diagonal(x=runif(e[["n"]], 0.9, 1.1)), e$Q0))),
update.Q=TRUE, name=name, R=R, r=r, S=S, s=s, lower=lower, upper=upper,
chol.control=e$control$chol.control
)
draw <- add(draw, bquote(p <- MVNsampler$draw(p, .(if (e$sigma.fixed) 1 else quote(p[["sigma_"]])), Q=mat_sum(XX), Xy=Xy)))
} else {
MVNsampler <- create_TMVN_sampler(
Q=crossprod_sym(X, crossprod_sym(Diagonal(x=runif(e[["n"]], 0.9, 1.1)), e$Q0)),
update.Q=TRUE, name=name, R=R, r=r, S=S, s=s, lower=lower, upper=upper,
chol.control=e$control$chol.control
)
draw <- add(draw, bquote(p <- MVNsampler$draw(p, .(if (e$sigma.fixed) 1 else quote(p[["sigma_"]])), Q=XX, Xy=Xy)))
}
} else { # precision matrix XX + Q0 not updated
if (modus == "regular") {
if (e$single.block && e$family$link != "probit") {
MVNsampler <- create_TMVN_sampler(
Q=crossprod_sym(X, e$Q0) + Q0, Xy=Xy,
R=R, r=r, S=S, s=s, lower=lower, upper=upper,
name=name, chol.control=e$control$chol.control
)
draw <- add(draw, bquote(p[[.(name)]] <- MVNsampler$draw(p, .(if (e$sigma.fixed) 1 else quote(p[["sigma_"]])))[[.(name)]]))
rm(Xy)
} else {
MVNsampler <- create_TMVN_sampler(
Q=crossprod_sym(X, e$Q0) + Q0,
update.mu=TRUE,
R=R, r=r, S=S, s=s, lower=lower, upper=upper,
name=name, chol.control=e$control$chol.control
)
draw <- add(draw, bquote(p[[.(name)]] <- MVNsampler$draw(p, .(if (e$sigma.fixed) 1 else quote(p[["sigma_"]])), Xy=Xy)[[.(name)]]))
}
} else {
if (modus == "vargamma")
cholHH <- build_chol(economizeMatrix(2 * crossprod_sym(X, CdiagU(e[["n"]])) + (prior$precision/prior$a) * CdiagU(q), symmetric=TRUE))
else
cholHH <- build_chol(economizeMatrix(crossprod_sym(X, CdiagU(e[["n"]])) + (prior$precision/prior$a) * CdiagU(q), symmetric=TRUE))
draw <- add(draw, bquote(p[[.(name)]] <- cholHH$solve(Hz)))
}
}
if (modus == "var" || modus == "vargamma") {
if (e$single.V.block) {
if (is_ind_matrix(X) && q < e[["n"]])
draw <- add(draw, bquote(p[["Q_"]] <- X %m*v% exp(-p[[.(name)]])))
else
draw <- add(draw, bquote(p[["Q_"]] <- exp(X %m*v% (-p[[.(name)]]))))
} else {
if (is_ind_matrix(X) && q < e[["n"]])
draw <- add(draw, bquote(p[["Q_"]] <- p[["Q_"]] * (X %m*v% exp(-p[[.(name)]]))))
else
draw <- add(draw, bquote(p[["Q_"]] <- p[["Q_"]] * exp(X %m*v% (-p[[.(name)]]))))
}
} else {
if (e$e.is.res)
draw <- add(draw, bquote(mv_update(p[["e_"]], plus=FALSE, X, p[[.(name)]])))
else {
if (e$use.offset || !e$single.block)
draw <- add(draw, bquote(mv_update(p[["e_"]], plus=TRUE, X, p[[.(name)]])))
else
draw <- add(draw, bquote(p$e_ <- X %m*v% p[[.(name)]]))
}
}
draw <- add(draw, quote(p))
if (modus == "regular") {
start <- function(p) MVNsampler$start(p)
} else {
start <- function(p) {
if (is.null(p[[name]])) {
# account for the scale of covariates to avoid over/underflow of exp(linpred)
p[[name]] <- rnorm(q) / colwise_maxabs(X)
} else {
p[[name]] <- as.numeric(p[[name]])
if (length(p[[name]]) != q) stop("start value for '", name, "' has wrong length")
}
p
}
}
} else if (!e$prior.only && prior$type == "fixed") {
if (in_block) stop("'pr_fixed' not supported for components in a Gibbs block")
draw <- function(p) {
if (debug) browser()
p[[name]] <- prior$rprior()
p
}
start <- function(p) {
if (is.null(p[[name]]))
p[[name]] <- prior$rprior()
else {
p[[name]] <- as.numeric(p[[name]])
if (length(p[[name]]) != q) stop("start value for '", name, "' has wrong length")
}
if (e$single.block) {
# compute residuals here once and for all
p[["e_"]] <- e$compute_e(p)
}
p
}
}
if (in_block && (!is.null(e$control$CG) || e$control$expanded.cMVN.sampler)) {
# TODO account for b0
if (informative.prior) {
cholQV <- build_chol(Q0, control=e$control$chol.control)
drawMVNvarQ <- function(p) {
y2 <- Crnorm(q)
cholQV$Ltimes(y2, transpose=FALSE)
}
} else {
drawMVNvarQ <- function(p) numeric(q)
}
}
if (!in_block) rm(R, r, S, s, lower, upper)
environment()
}
# # experimental: display the reg component's prior
# show_reg <- function(mc) {
# mod_str <- paste(mc$name, "~")
# mod_str <- paste(mod_str,
# if (mc$informative.prior) {
# if (mc$e$sigma.fixed) {
# "N(b0, Q0^-1)"
# } else {
# "N(b0, sigma_^2 Q0^-1)"
# }
# } else {
# "1"
# }
# )
# if (!is.null(mc$MVNsampler)) {
# # TODO MVNsampler currently only used for posterior sampling
# if (mc$MVNsampler$eq) mod_str <- paste0(mod_str, ", R'", mc$name, " = r")
# if (mc$MVNsampler$ineq) mod_str <- paste0(mod_str, ", S'", mc$name, " >= s")
# }
# cat(mod_str, "\n")
# }
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.