Nothing
#' Prepare Predictor Data
#'
#' Prepare data related to predictor variables in \pkg{brms}.
#' Only exported for use in package development.
#'
#' @param x An \R object.
#' @param ... Further arguments passed to or from other methods.
#'
#' @return A named list of data related to predictor variables.
#'
#' @keywords internal
#' @export
data_predictor <- function(x, ...) {
UseMethod("data_predictor")
}
#' @export
data_predictor.mvbrmsterms <- function(x, data, sdata = NULL, ...) {
out <- list(N = nrow(data))
for (r in names(x$terms)) {
c(out) <- data_predictor(x$terms[[r]], data = data, sdata = sdata, ...)
}
out
}
#' @export
data_predictor.brmsterms <- function(x, data, data2, prior, sdata = NULL, ...) {
out <- list()
data <- subset_data(data, x)
resp <- usc(combine_prefix(x))
args_eff <- nlist(data, data2, prior, sdata, ...)
for (dp in names(x$dpars)) {
args_eff_spec <- list(x = x$dpars[[dp]])
c(out) <- do_call(data_predictor, c(args_eff_spec, args_eff))
}
for (dp in names(x$fdpars)) {
if (is.numeric(x$fdpars[[dp]]$value)) {
out[[paste0(dp, resp)]] <- x$fdpars[[dp]]$value
}
}
for (nlp in names(x$nlpars)) {
args_eff_spec <- list(x = x$nlpars[[nlp]])
c(out) <- do_call(data_predictor, c(args_eff_spec, args_eff))
}
c(out) <- data_gr_local(x, data = data)
c(out) <- data_mixture(x, data2 = data2, prior = prior)
out
}
# prepare data for all types of effects for use in Stan
# @param data the data passed by the user
# @param prior an object of class brmsprior
# @param ... currently ignored
# @return a named list of data to be passed to Stan
#' @export
data_predictor.btl <- function(x, data, data2 = list(), prior = brmsprior(),
sdata = NULL, ...) {
out <- c(
data_fe(x, data),
data_sp(x, data, data2 = data2, prior = prior),
data_re(x, data),
data_cs(x, data),
data_sm(x, data),
data_gp(x, data),
data_ac(x, data, data2 = data2),
data_offset(x, data),
data_bhaz(x, data, data2 = data2, prior = prior)
)
c(out) <- data_special_prior(x, data, prior = prior, sdata = c(sdata, out))
out
}
# prepare data for non-linear parameters for use in Stan
#' @export
data_predictor.btnl <- function(x, data, data2 = list(), prior = brmsprior(),
...) {
out <- list()
c(out) <- data_cnl(x, data)
c(out) <- data_ac(x, data, data2 = data2)
c(out) <- data_bhaz(x, data, data2 = data2, prior = prior)
out
}
# prepare data of fixed effects
data_fe <- function(bframe, data) {
stopifnot(is.btl(bframe))
if (!is.null(bframe$sdata$fe)) {
# standata was already precomputed
return(bframe$sdata$fe)
}
out <- list()
p <- usc(combine_prefix(bframe))
# the intercept is removed inside the Stan code for non-ordinal models
is_ord <- is_ordinal(bframe)
cols2remove <- if (is_ord) "(Intercept)"
X <- get_model_matrix(rhs(bframe$fe), data, cols2remove = cols2remove)
avoid_dpars(colnames(X), bframe)
out[[paste0("K", p)]] <- ncol(X)
if (stan_center_X(bframe)) {
# relevant if the intercept is treated separately to enable centering
out[[paste0("Kc", p)]] <- ncol(X) - ifelse(is_ord, 0, 1)
}
out[[paste0("X", p)]] <- X
out
}
# data preparation for splines
data_sm <- function(bframe, data) {
stopifnot(is.btl(bframe))
if (!is.null(bframe$sdata$sm)) {
# standata was already precomputed
return(bframe$sdata$sm)
}
out <- list()
smterms <- all_terms(bframe[["sm"]])
if (!length(smterms)) {
return(out)
}
p <- usc(combine_prefix(bframe))
# basis contains information on the smooths from the original data
basis <- bframe$basis$sm
new <- length(basis) > 0L
knots <- get_knots(data)
diagonal.penalty <- !require_old_default("2.8.7")
bylevels <- named_list(smterms)
ns <- 0
lXs <- list()
for (i in seq_along(smterms)) {
if (new) {
sm <- basis[[i]]$sm
} else {
sm <- smoothCon(
eval2(smterms[i]), data = data,
knots = knots, absorb.cons = TRUE,
diagonal.penalty = diagonal.penalty
)
}
# may contain multiple terms when 'by' is a factor
for (j in seq_along(sm)) {
ns <- ns + 1
if (length(sm[[j]]$by.level)) {
bylevels[[i]][j] <- sm[[j]]$by.level
}
if (new) {
# prepare smooths for use with new data
# mgcv smooths are based on machine-specific SVD (#1465)
re <- s2rPred(sm[[j]], re = basis[[i]]$re[[j]], data = data)
} else {
re <- mgcv::smooth2random(sm[[j]], names(data), type = 2)
}
lXs[[ns]] <- re$Xf
if (NCOL(lXs[[ns]])) {
colnames(lXs[[ns]]) <- paste0(sm[[j]]$label, "_", seq_cols(lXs[[ns]]))
}
Zs <- re$rand
sfx <- paste0(p, "_", ns)
out[[paste0("nb", sfx)]] <- length(Zs)
if (length(Zs)) {
names(Zs) <- paste0("Zs", sfx, "_", seq_along(Zs))
c(out) <- Zs
out[[paste0("knots", sfx)]] <- as.array(ulapply(Zs, ncol))
} else {
out[[paste0("knots", sfx)]] <- integer(0)
}
}
}
Xs <- do_call(cbind, lXs)
avoid_dpars(colnames(Xs), bframe)
smcols <- lapply(lXs, function(x) which(colnames(Xs) %in% colnames(x)))
Xs <- structure(Xs, smcols = smcols, bylevels = bylevels)
colnames(Xs) <- rename(colnames(Xs))
out[[paste0("Ks", p)]] <- ncol(Xs)
out[[paste0("Xs", p)]] <- Xs
out
}
# prepare data for group-level effects for use in Stan
data_re <- function(bframe, data) {
stopifnot(is.bframel(bframe))
out <- list()
px <- check_prefix(bframe)
reframe <- subset2(bframe$frame$re, type = "sp", fun = "%notin%")
if (!has_rows(reframe)) {
return(out)
}
gn <- unique(reframe$gn)
for (i in seq_along(gn)) {
r <- subset2(reframe, gn = gn[i])
Z <- get_model_matrix(r$form[[1]], data = data, rename = FALSE)
idp <- paste0(r$id[1], usc(combine_prefix(px)))
Znames <- paste0("Z_", idp, "_", r$cn)
if (r$gtype[1] == "mm") {
ng <- length(r$gcall[[1]]$groups)
if (r$type[1] == "cs") {
stop2("'cs' is not supported in multi-membership terms.")
}
if (r$type[1] == "mmc") {
# see issue #353 for the general idea
mmc_expr <- "^mmc\\([^:]*\\)"
mmc_terms <- get_matches_expr(mmc_expr, colnames(Z))
for (t in mmc_terms) {
pos <- which(grepl_expr(escape_all(t), colnames(Z)))
if (length(pos) != ng) {
stop2("Invalid term '", t, "': Expected ", ng,
" coefficients but found ", length(pos), ".")
}
for (j in seq_along(Znames)) {
for (k in seq_len(ng)) {
out[[paste0(Znames[j], "_", k)]] <- as.array(Z[, pos[k]])
}
}
}
} else {
for (j in seq_along(Znames)) {
out[paste0(Znames[j], "_", seq_len(ng))] <- list(as.array(Z[, j]))
}
}
} else {
if (r$type[1] == "cs") {
ncatM1 <- nrow(r) / ncol(Z)
Z_temp <- vector("list", ncol(Z))
for (k in seq_along(Z_temp)) {
Z_temp[[k]] <- replicate(ncatM1, Z[, k], simplify = FALSE)
}
Z <- do_call(cbind, unlist(Z_temp, recursive = FALSE))
}
if (r$type[1] == "mmc") {
stop2("'mmc' is only supported in multi-membership terms.")
}
for (j in seq_cols(Z)) {
out[[Znames[j]]] <- as.array(Z[, j])
}
}
}
out
}
# compute data for each group-level-ID per univariate model
data_gr_local <- function(bframe, data) {
stopifnot(is.brmsframe(bframe))
out <- list()
reframe <- subset2(bframe$frame$re, resp = bframe$resp)
resp <- usc(bframe$resp)
for (id in unique(reframe$id)) {
id_reframe <- subset2(reframe, id = id)
idresp <- paste0(id, resp)
nranef <- nrow(id_reframe)
group <- id_reframe$group[1]
levels <- get_levels(reframe)[[group]]
if (id_reframe$gtype[1] == "mm") {
# multi-membership grouping term
gs <- id_reframe$gcall[[1]]$groups
ngs <- length(gs)
weights <- id_reframe$gcall[[1]]$weights
if (is.formula(weights)) {
scale <- isTRUE(attr(weights, "scale"))
weights <- as.matrix(eval_rhs(weights, data))
if (!identical(dim(weights), c(nrow(data), ngs))) {
stop2(
"Grouping structure 'mm' expects 'weights' to be ",
"a matrix with as many columns as grouping factors."
)
}
if (scale) {
if (isTRUE(any(weights < 0))) {
stop2("Cannot scale negative weights.")
}
weights <- sweep(weights, 1, rowSums(weights), "/")
}
} else {
# all members get equal weights by default
weights <- matrix(1 / ngs, nrow = nrow(data), ncol = ngs)
}
for (i in seq_along(gs)) {
gdata <- get(gs[i], data)
J <- match(gdata, levels)
if (anyNA(J)) {
# occurs for new levels only
new_gdata <- gdata[!gdata %in% levels]
new_levels <- unique(new_gdata)
J[is.na(J)] <- match(new_gdata, new_levels) + length(levels)
}
out[[paste0("J_", idresp, "_", i)]] <- as.array(J)
out[[paste0("W_", idresp, "_", i)]] <- as.array(weights[, i])
}
} else {
# ordinary grouping term
g <- id_reframe$gcall[[1]]$groups
gdata <- get(g, data)
J <- match(gdata, levels)
if (anyNA(J)) {
# occurs for new levels only
new_gdata <- gdata[!gdata %in% levels]
new_levels <- unique(new_gdata)
J[is.na(J)] <- match(new_gdata, new_levels) + length(levels)
}
out[[paste0("J_", idresp)]] <- as.array(J)
}
}
out
}
# prepare global data for each group-level-ID
data_gr_global <- function(bframe, data2) {
stopifnot(is.anybrmsframe(bframe))
out <- list()
reframe <- bframe$frame$re
for (id in unique(reframe$id)) {
tmp <- list()
id_reframe <- subset2(reframe, id = id)
nranef <- nrow(id_reframe)
group <- id_reframe$group[1]
levels <- attr(reframe, "levels")[[group]]
tmp$N <- length(levels)
tmp$M <- nranef
tmp$NC <- as.integer(nranef * (nranef - 1) / 2)
# prepare number of levels of an optional 'by' variable
if (nzchar(id_reframe$by[1])) {
stopifnot(!nzchar(id_reframe$type[1]))
bylevels <- id_reframe$bylevels[[1]]
Jby <- match(attr(levels, "by"), bylevels)
tmp$Nby <- length(bylevels)
tmp$Jby <- as.array(Jby)
}
# prepare within-group covariance matrices
cov <- id_reframe$cov[1]
if (nzchar(cov)) {
# validation is only necessary here for compatibility with 'cov_ranef'
cov_mat <- validate_recov_matrix(data2[[cov]])
found_levels <- rownames(cov_mat)
found <- levels %in% found_levels
if (any(!found)) {
stop2("Levels of the within-group covariance matrix for '", group,
"' do not match names of the grouping levels.")
}
cov_mat <- cov_mat[levels, levels, drop = FALSE]
tmp$Lcov <- t(chol(cov_mat))
}
names(tmp) <- paste0(names(tmp), "_", id)
c(out) <- tmp
}
out
}
# prepare data for special effects for use in Stan
data_sp <- function(bframe, data, data2, prior) {
stopifnot(is.bframel(bframe))
if (!is.null(bframe$sdata$sp)) {
# standata was already precomputed
return(bframe$sdata$sp)
}
out <- list()
spframe <- bframe$frame$sp
if (!has_rows(spframe)) {
return(out)
}
basis <- bframe$basis$sp
px <- check_prefix(bframe)
p <- usc(combine_prefix(px))
# prepare general data
out[[paste0("Ksp", p)]] <- nrow(spframe)
Csp <- sp_model_matrix(bframe$sp, data)
avoid_dpars(colnames(Csp), bframe)
Csp <- Csp[, spframe$Ic > 0, drop = FALSE]
Csp <- lapply(seq_cols(Csp), function(i) as.array(Csp[, i]))
if (length(Csp)) {
Csp_names <- paste0("Csp", p, "_", seq_along(Csp))
out <- c(out, setNames(Csp, Csp_names))
}
if (any(lengths(spframe$Imo) > 0)) {
# prepare data specific to monotonic effects
out[[paste0("Imo", p)]] <- max(unlist(spframe$Imo))
Xmo <- lapply(unlist(spframe$calls_mo), get_mo_values, data = data)
Xmo_names <- paste0("Xmo", p, "_", seq_along(Xmo))
c(out) <- setNames(Xmo, Xmo_names)
if (!is.null(basis$Jmo)) {
# take information from original data
Jmo <- basis$Jmo
} else {
Jmo <- as.array(ulapply(Xmo, attr, "max"))
}
out[[paste0("Jmo", p)]] <- Jmo
# prepare prior concentration of simplex parameters
simo_coef <- get_simo_labels(spframe, use_id = TRUE)
ids <- unlist(spframe$ids_mo)
for (j in seq_along(simo_coef)) {
# index of first ID appearance
j_id <- match(ids[j], ids)
if (is.na(ids[j]) || j_id == j) {
# only evaluate priors without ID or first appearance of the ID
# all other parameters will be copied over in the Stan code
simo_prior <- subset2(prior,
class = "simo", coef = simo_coef[j], ls = px
)
con_simo <- eval_dirichlet(simo_prior$prior, Jmo[j], data2)
out[[paste0("con_simo", p, "_", j)]] <- as.array(con_simo)
}
}
}
uni_mi <- attr(spframe, "uni_mi")
index <- bframe$frame$index
for (j in seq_rows(uni_mi)) {
if (!is.na(uni_mi$idx[j])) {
idxl <- get(uni_mi$idx[j], data)
if (is.null(index[[uni_mi$var[j]]])) {
# the 'idx' argument needs to be mapped against 'index' addition terms
stop2("Response '", uni_mi$var[j], "' needs to have an 'index' addition ",
"term to compare with 'idx'. See ?mi for examples.")
}
idxl <- match(idxl, index[[uni_mi$var[j]]])
if (anyNA(idxl)) {
stop2("Could not match all indices in response '", uni_mi$var[j], "'.")
}
idxl_name <- paste0("idxl", p, "_", uni_mi$var[j], "_", uni_mi$idx2[j])
out[[idxl_name]] <- as.array(idxl)
} else if (isTRUE(attr(index[[uni_mi$var[j]]], "subset"))) {
# cross-formula referencing is required for subsetted variables
stop2("mi() terms of subsetted variables require ",
"the 'idx' argument to be specified.")
}
}
out
}
# prepare data for category specific effects
data_cs <- function(bframe, data) {
stopifnot(is.btl(bframe))
if (!is.null(bframe$sdata$cs)) {
# standata was already precomputed
return(bframe$sdata$cs)
}
out <- list()
if (length(all_terms(bframe[["cs"]]))) {
p <- usc(combine_prefix(bframe))
Xcs <- get_model_matrix(bframe$cs, data)
avoid_dpars(colnames(Xcs), bframe)
out <- c(out, list(Kcs = ncol(Xcs), Xcs = Xcs))
out <- setNames(out, paste0(names(out), p))
}
out
}
# prepare global data for noise free variables
data_Xme <- function(bframe, data) {
stopifnot(is.anybrmsframe(bframe))
meframe <- bframe$frame$me
stopifnot(is.meframe(meframe))
out <- list()
groups <- unique(meframe$grname)
for (i in seq_along(groups)) {
g <- groups[i]
K <- which(meframe$grname %in% g)
Mme <- length(K)
out[[paste0("Mme_", i)]] <- Mme
out[[paste0("NCme_", i)]] <- Mme * (Mme - 1) / 2
if (nzchar(g)) {
levels <- get_levels(meframe)[[g]]
gr <- get_me_group(meframe$term[K[1]], data)
Jme <- match(gr, levels)
if (anyNA(Jme)) {
# occurs for new levels only
# replace NAs with unique values; fixes issue #706
gr[is.na(gr)] <- paste0("new_", seq_len(sum(is.na(gr))), "__")
new_gr <- gr[!gr %in% levels]
new_levels <- unique(new_gr)
Jme[is.na(Jme)] <- length(levels) + match(new_gr, new_levels)
}
ilevels <- unique(Jme)
out[[paste0("Nme_", i)]] <- length(ilevels)
out[[paste0("Jme_", i)]] <- Jme
}
for (k in K) {
Xn <- get_me_values(meframe$term[k], data)
noise <- get_me_noise(meframe$term[k], data)
if (nzchar(g)) {
for (l in ilevels) {
# validate values of the same level
take <- Jme %in% l
if (length(unique(Xn[take])) > 1L ||
length(unique(noise[take])) > 1L) {
stop2(
"Measured values and measurement error should be ",
"unique for each group. Occured for level '",
levels[l], "' of group '", g, "'."
)
}
}
Xn <- get_one_value_per_group(Xn, Jme)
noise <- get_one_value_per_group(noise, Jme)
}
out[[paste0("Xn_", k)]] <- as.array(Xn)
out[[paste0("noise_", k)]] <- as.array(noise)
}
}
out
}
# prepare data for Gaussian process terms
# @param internal store some intermediate data for internal post-processing?
# @param ... passed to '.data_gp'
data_gp <- function(bframe, data, internal = FALSE, ...) {
stopifnot(is.bframel(bframe))
if (!is.null(bframe$sdata$gp)) {
# standata was already precomputed
return(bframe$sdata$gp)
}
out <- list()
internal <- as_one_logical(internal)
px <- check_prefix(bframe)
p <- usc(combine_prefix(px))
basis <- bframe$basis$gp
gpframe <- bframe$frame$gp
for (i in seq_rows(gpframe)) {
pi <- paste0(p, "_", i)
Xgp <- lapply(gpframe$covars[[i]], eval2, data)
D <- length(Xgp)
out[[paste0("Dgp", pi)]] <- D
invalid <- ulapply(Xgp, function(x)
!is.numeric(x) || isTRUE(length(dim(x)) > 1L)
)
if (any(invalid)) {
stop2("Predictors of Gaussian processes should be numeric vectors.")
}
Xgp <- do_call(cbind, Xgp)
cmc <- gpframe$cmc[i]
scale <- gpframe$scale[i]
gr <- gpframe$gr[i]
k <- gpframe$k[i]
c <- gpframe$c[[i]]
if (!isNA(k)) {
out[[paste0("NBgp", pi)]] <- k ^ D
Ks <- as.matrix(do_call(expand.grid, repl(seq_len(k), D)))
}
byvar <- gpframe$byvars[[i]]
byfac <- length(gpframe$cons[[i]]) > 0L
bynum <- !is.null(byvar) && !byfac
if (byfac) {
# for categorical 'by' variables prepare one GP per level
# as.factor will keep unused levels needed for new data
byval <- as.factor(get(byvar, data))
byform <- str2formula(c(ifelse(cmc, "0", "1"), "byval"))
con_mat <- model.matrix(byform)
cons <- colnames(con_mat)
out[[paste0("Kgp", pi)]] <- length(cons)
Ngp <- Nsubgp <- vector("list", length(cons))
for (j in seq_along(cons)) {
# loop along contrasts of 'by'
Cgp <- con_mat[, j]
sfx <- paste0(pi, "_", j)
tmp <- .data_gp(
Xgp, k = k, gr = gr, sfx = sfx, Cgp = Cgp, c = c,
scale = scale, internal = internal, basis = basis,
...
)
Ngp[[j]] <- attributes(tmp)[["Ngp"]]
Nsubgp[[j]] <- attributes(tmp)[["Nsubgp"]]
c(out) <- tmp
}
out[[paste0("Ngp", pi)]] <- unlist(Ngp)
if (gr) {
out[[paste0("Nsubgp", pi)]] <- unlist(Nsubgp)
}
} else {
out[[paste0("Kgp", pi)]] <- 1L
c(out) <- .data_gp(
Xgp, k = k, gr = gr, sfx = pi, c = c,
scale = scale, internal = internal, basis = basis,
...
)
if (bynum) {
Cgp <- as.numeric(get(byvar, data))
out[[paste0("Cgp", pi)]] <- as.array(Cgp)
}
}
}
if (length(basis)) {
# original covariate values are required in new GP prediction
Xgp_old <- basis[grepl("^Xgp", names(basis))]
names(Xgp_old) <- paste0(names(Xgp_old), "_old")
out[names(Xgp_old)] <- Xgp_old
}
out
}
# helper function to preparae GP related data
# @inheritParams data_gp
# @param Xgp matrix of covariate values
# @param k, gr, c see 'frame_gp'
# @param sfx suffix to put at the end of data names
# @param Cgp optional vector of values belonging to
# a certain contrast of a factor 'by' variable
.data_gp <- function(Xgp, k, gr, sfx, Cgp = NULL, c = NULL,
scale = TRUE, internal = FALSE, basis = NULL) {
out <- list()
if (!is.null(Cgp)) {
Cgp <- unname(Cgp)
Igp <- which(Cgp != 0)
Xgp <- Xgp[Igp, , drop = FALSE]
out[[paste0("Igp", sfx)]] <- as.array(Igp)
out[[paste0("Cgp", sfx)]] <- as.array(Cgp[Igp])
attr(out, "Ngp") <- length(Igp)
}
if (gr) {
groups <- factor(match_rows(Xgp, Xgp))
ilevels <- levels(groups)
Jgp <- match(groups, ilevels)
Nsubgp <- length(ilevels)
if (!is.null(Cgp)) {
attr(out, "Nsubgp") <- Nsubgp
} else {
out[[paste0("Nsubgp", sfx)]] <- Nsubgp
}
out[[paste0("Jgp", sfx)]] <- as.array(Jgp)
not_dupl_Jgp <- !duplicated(Jgp)
Xgp <- Xgp[not_dupl_Jgp, , drop = FALSE]
}
if (scale) {
# scale predictor for easier specification of priors
if (length(basis)) {
# scale Xgp based on the original data
dmax <- basis[[paste0("dmax", sfx)]]
} else {
dmax <- sqrt(max(diff_quad(Xgp)))
}
if (!isTRUE(dmax > 0)) {
stop2("Could not scale GP covariates. Please set 'scale' to FALSE in 'gp'.")
}
if (internal) {
# required for scaling of GPs with new data
out[[paste0("dmax", sfx)]] <- dmax
}
Xgp <- Xgp / dmax
}
if (length(basis)) {
# center Xgp based on the original data
cmeans <- basis[[paste0("cmeans", sfx)]]
} else {
cmeans <- colMeans(Xgp)
}
if (internal) {
# required for centering of approximate GPs with new data
out[[paste0("cmeans", sfx)]] <- cmeans
# required to compute inverse-gamma priors for length-scales
out[[paste0("Xgp_prior", sfx)]] <- Xgp
}
if (!isNA(k)) {
# basis function approach requires centered variables
Xgp <- sweep(Xgp, 2, cmeans)
D <- NCOL(Xgp)
L <- choose_L(Xgp, c = c)
Ks <- as.matrix(do_call(expand.grid, repl(seq_len(k), D)))
XgpL <- matrix(nrow = NROW(Xgp), ncol = NROW(Ks))
slambda <- matrix(nrow = NROW(Ks), ncol = D)
for (m in seq_rows(Ks)) {
XgpL[, m] <- eigen_fun_laplacian(Xgp, m = Ks[m, ], L = L)
slambda[m, ] <- sqrt(eigen_val_laplacian(m = Ks[m, ], L = L))
}
out[[paste0("Xgp", sfx)]] <- XgpL
out[[paste0("slambda", sfx)]] <- slambda
} else {
out[[paste0("Xgp", sfx)]] <- as.array(Xgp)
}
out
}
# data for autocorrelation variables
data_ac <- function(bframe, data, data2, ...) {
if (!is.null(bframe$sdata$ac)) {
# standata was already precomputed
return(bframe$sdata$ac)
}
out <- list()
N <- nrow(data)
basis <- bframe$basis$ac
acframe <- bframe$frame$ac
stopifnot(is.acframe(acframe))
if (has_ac_subset(bframe, dim = "time")) {
gr <- get_ac_vars(acframe, "gr", dim = "time")
if (isTRUE(nzchar(gr))) {
tgroup <- as.numeric(factor(data[[gr]]))
} else {
tgroup <- rep(1, N)
}
}
if (has_ac_class(acframe, "arma")) {
# ARMA correlations
acframe_arma <- subset2(acframe, class = "arma")
out$Kar <- acframe_arma$p
out$Kma <- acframe_arma$q
if (!use_ac_cov_time(acframe_arma)) {
# data for the 'predictor' version of ARMA
max_lag <- max(out$Kar, out$Kma)
out$J_lag <- as.array(rep(0, N))
for (n in seq_len(N)[-N]) {
ind <- n:max(1, n + 1 - max_lag)
# indexes errors to be used in the n+1th prediction
out$J_lag[n] <- sum(tgroup[ind] %in% tgroup[n + 1])
}
}
}
if (use_ac_cov_time(acframe)) {
# data for the 'covariance' versions of time-series structures
# TODO: change begin[i]:end[i] notation to slice[i]:(slice[i+1] - 1)
# see comment on PR #1435
out$N_tg <- length(unique(tgroup))
out$begin_tg <- as.array(ulapply(unique(tgroup), match, tgroup))
out$nobs_tg <- as.array(with(out,
c(if (N_tg > 1L) begin_tg[2:N_tg], N + 1) - begin_tg
))
out$end_tg <- with(out, begin_tg + nobs_tg - 1)
if (has_ac_class(acframe, "unstr")) {
time <- get_ac_vars(bframe, "time", dim = "time")
time_data <- get(time, data)
new_times <- extract_levels(time_data)
if (length(basis)) {
times <- basis$times
# unstr estimates correlations only for given time points
invalid_times <- setdiff(new_times, times)
if (length(invalid_times)) {
stop2("Cannot handle new time points in UNSTR models.")
}
} else {
times <- new_times
}
out$n_unique_t <- length(times)
out$n_unique_cortime <- out$n_unique_t * (out$n_unique_t - 1) / 2
Jtime <- match(time_data, times)
out$Jtime_tg <- matrix(0L, out$N_tg, max(out$nobs_tg))
for (i in seq_len(out$N_tg)) {
out$Jtime_tg[i, seq_len(out$nobs_tg[i])] <-
Jtime[out$begin_tg[i]:out$end_tg[i]]
}
}
}
if (has_ac_class(acframe, "sar")) {
acframe_sar <- subset2(acframe, class = "sar")
M <- data2[[acframe_sar$M]]
rmd_rows <- attr(data, "na.action")
if (!is.null(rmd_rows)) {
class(rmd_rows) <- NULL
M <- M[-rmd_rows, -rmd_rows, drop = FALSE]
}
if (!is_equal(dim(M), rep(N, 2))) {
stop2("Dimensions of 'M' for SAR terms must be equal to ",
"the number of observations.")
}
out$Msar <- as.matrix(M)
out$eigenMsar <- eigen(M)$values
# simplifies code of choose_N
out$N_tg <- 1
}
if (has_ac_class(acframe, "car")) {
acframe_car <- subset2(acframe, class = "car")
locations <- NULL
if (length(basis)) {
locations <- basis$locations
}
M <- data2[[acframe_car$M]]
if (acframe_car$gr != "NA") {
loc_data <- get(acframe_car$gr, data)
new_locations <- extract_levels(loc_data)
if (is.null(locations)) {
locations <- new_locations
} else {
invalid_locations <- setdiff(new_locations, locations)
if (length(invalid_locations)) {
stop2("Cannot handle new locations in CAR models.")
}
}
Nloc <- length(locations)
Jloc <- as.array(match(loc_data, locations))
if (is.null(rownames(M))) {
stop2("Row names are required for 'M' in CAR terms.")
}
found <- locations %in% rownames(M)
if (any(!found)) {
stop2("Row names of 'M' for CAR terms do not match ",
"the names of the grouping levels.")
}
M <- M[locations, locations, drop = FALSE]
} else {
warning2(
"Using CAR terms without a grouping factor is deprecated. ",
"Please use argument 'gr' even if each observation ",
"represents its own location."
)
Nloc <- N
Jloc <- as.array(seq_len(Nloc))
if (!is_equal(dim(M), rep(Nloc, 2))) {
if (length(basis)) {
stop2("Cannot handle new data in CAR terms ",
"without a grouping factor.")
} else {
stop2("Dimensions of 'M' for CAR terms must be equal ",
"to the number of observations.")
}
}
}
edges_rows <- (Matrix::tril(M)@i + 1)
edges_cols <- sort(Matrix::triu(M)@i + 1) ## sort to make consistent with rows
edges <- cbind("rows" = edges_rows, "cols" = edges_cols)
c(out) <- nlist(
Nloc, Jloc, Nedges = length(edges_rows),
edges1 = as.array(edges_rows),
edges2 = as.array(edges_cols)
)
if (acframe_car$type %in% c("escar", "esicar")) {
Nneigh <- Matrix::colSums(M)
if (any(Nneigh == 0) && !length(basis)) {
stop2(
"For exact sparse CAR, all locations should have at ",
"least one neighbor within the provided data set. ",
"Consider using type = 'icar' instead."
)
}
inv_sqrt_D <- diag(1 / sqrt(Nneigh))
eigenMcar <- t(inv_sqrt_D) %*% M %*% inv_sqrt_D
eigenMcar <- eigen(eigenMcar, TRUE, only.values = TRUE)$values
c(out) <- nlist(Nneigh, eigenMcar)
} else if (acframe_car$type %in% "bym2") {
c(out) <- list(car_scale = .car_scale(edges, Nloc))
}
}
if (has_ac_class(acframe, "fcor")) {
acframe_fcor <- subset2(acframe, class = "fcor")
M <- data2[[acframe_fcor$M]]
rmd_rows <- attr(data, "na.action")
if (!is.null(rmd_rows)) {
class(rmd_rows) <- NULL
M <- M[-rmd_rows, -rmd_rows, drop = FALSE]
}
if (nrow(M) != N) {
stop2("Dimensions of 'M' for FCOR terms must be equal ",
"to the number of observations.")
}
out$Mfcor <- M
# simplifies code of choose_N
out$N_tg <- 1
}
if (length(out)) {
resp <- usc(combine_prefix(bframe))
out <- setNames(out, paste0(names(out), resp))
}
out
}
# prepare data of offsets for use in Stan
data_offset <- function(bframe, data) {
stopifnot(is.btl(bframe))
if (!is.null(bframe$sdata$offset)) {
# standata was already precomputed
return(bframe$sdata$offset)
}
out <- list()
px <- check_prefix(bframe)
if (is.formula(bframe$offset)) {
p <- usc(combine_prefix(px))
mf <- rm_attr(data, "terms")
mf <- model.frame(bframe$offset, mf, na.action = na.pass)
offset <- model.offset(mf)
if (length(offset) == 1L) {
offset <- rep(offset, nrow(data))
}
# use 'offsets' as 'offset' will be reserved in stanc3
out[[paste0("offsets", p)]] <- as.array(offset)
}
out
}
# data for covariates in non-linear models
# @param x a btnl object
# @return a named list of data passed to Stan
data_cnl <- function(bframe, data) {
stopifnot(is.btnl(bframe))
if (!is.null(bframe$sdata$cnl)) {
# standata was already precomputed
return(bframe$sdata$cnl)
}
out <- list()
covars <- all.vars(bframe$covars)
if (!length(covars)) {
return(out)
}
p <- usc(combine_prefix(bframe))
for (i in seq_along(covars)) {
cvalues <- get(covars[i], data)
if (is_like_factor(cvalues)) {
# need to apply factor contrasts
cform <- str2formula(covars[i])
cvalues <- get_model_matrix(cform, data, cols2remove = "(Intercept)")
if (NCOL(cvalues) == 1L) {
dim(cvalues) <- NULL
}
}
if (isTRUE(dim(cvalues) > 2L)) {
stop2("Non-linear covariates should be vectors or matrices.")
}
out[[paste0("C", p, "_", i)]] <- as.array(cvalues)
}
out
}
# compute the spatial scaling factor of CAR models
# @param edges matrix with two columns defining the adjacency of the locations
# @param Nloc number of locations
# @return a scalar scaling factor
.car_scale <- function(edges, Nloc) {
# amended from Imad Ali's code of CAR models in rstanarm
stopifnot(is.matrix(edges), NCOL(edges) == 2)
# Build the adjacency matrix
adj_matrix <- Matrix::sparseMatrix(
i = edges[, 1], j = edges[, 2], x = 1,
symmetric = TRUE
)
# The ICAR precision matrix (which is singular)
Q <- Matrix::Diagonal(Nloc, Matrix::rowSums(adj_matrix)) - adj_matrix
# Add a small jitter to the diagonal for numerical stability
Q_pert <- Q + Matrix::Diagonal(Nloc) *
max(Matrix::diag(Q)) * sqrt(.Machine$double.eps)
# Compute the diagonal elements of the covariance matrix subject to the
# constraint that the entries of the ICAR sum to zero.
.Q_inv <- function(Q) {
Sigma <- Matrix::solve(Q)
A <- matrix(1, 1, NROW(Sigma))
W <- Sigma %*% t(A)
Sigma <- Sigma - W %*% solve(A %*% W) %*% Matrix::t(W)
return(Sigma)
}
Q_inv <- .Q_inv(Q_pert)
# Compute the geometric mean of the variances (diagonal of Q_inv)
exp(mean(log(Matrix::diag(Q_inv))))
}
# data for special priors such as horseshoe and R2D2
data_special_prior <- function(bframe, data, prior, sdata = NULL) {
out <- list()
px <- check_prefix(bframe)
p <- usc(combine_prefix(px))
if (!has_special_prior(prior, px)) {
return(out)
}
# number of coefficients affected by the shrinkage prior
# fully compute this here to avoid having to pass the prior around
# to all the individual data preparation functions
# the order of adding things to Kscales doesn't matter but for consistency
# it is still the same as the order in the Stan code
Kscales <- 0
if (has_special_prior(prior, px, class = "b")) {
Kscales <- Kscales +
sdata[[paste0("Kc", p)]] %||% sdata[[paste0("K", p)]] %||% 0 +
sdata[[paste0("Ksp", p)]] %||% 0 +
sdata[[paste0("Ks", p)]] %||% 0
}
if (has_special_prior(prior, px, class = "sds")) {
take <- grepl(paste0("^nb", p, "_"), names(sdata))
Kscales <- Kscales + sum(unlist(sdata[take]))
}
if (has_special_prior(prior, px, class = "sdgp")) {
take <- grepl(paste0("^Kgp", p, "_"), names(sdata))
Kscales <- Kscales + sum(unlist(sdata[take]))
}
if (has_special_prior(prior, px, class = "ar")) {
Kscales <- Kscales + sdata[[paste0("Kar", p)]]
}
if (has_special_prior(prior, px, class = "ma")) {
Kscales <- Kscales + sdata[[paste0("Kma", p)]]
}
if (has_special_prior(prior, px, class = "sderr")) {
Kscales <- Kscales + 1
}
if (has_special_prior(prior, px, class = "sdcar")) {
Kscales <- Kscales + 1
}
if (has_special_prior(prior, px, class = "sd")) {
ids <- unique(bframe$frame$re$id)
Kscales <- Kscales + sum(unlist(sdata[paste0("M_", ids)]))
}
out[[paste0("Kscales", p)]] <- Kscales
special <- get_special_prior(prior, px, main = TRUE)
if (special$name == "horseshoe") {
# data for the horseshoe prior
hs_names <- c("df", "df_global", "df_slab", "scale_global", "scale_slab")
hs_data <- special[hs_names]
if (!is.null(special$par_ratio)) {
hs_data$scale_global <- special$par_ratio / sqrt(nrow(data))
}
names(hs_data) <- paste0("hs_", hs_names, p)
c(out) <- hs_data
} else if (special$name == "R2D2") {
# data for the R2D2 prior
R2D2_names <- c("mean_R2", "prec_R2", "cons_D2")
R2D2_data <- special[R2D2_names]
if (length(R2D2_data$cons_D2) == 1L) {
R2D2_data$cons_D2 <- rep(R2D2_data$cons_D2, Kscales)
}
if (length(R2D2_data$cons_D2) != Kscales) {
stop2("Argument 'cons_D2' of the R2D2 prior must be of length 1 or ", Kscales)
}
R2D2_data$cons_D2 <- as.array(R2D2_data$cons_D2)
names(R2D2_data) <- paste0("R2D2_", R2D2_names, p)
c(out) <- R2D2_data
}
out
}
# Construct design matrices for brms models
# @param formula a formula object
# @param data A data frame created with model.frame.
# If another sort of object, model.frame is called first.
# @param cols2remove names of the columns to remove from
# the model matrix; mainly used for intercepts
# @param rename rename column names via rename()?
# @param ... passed to stats::model.matrix
# @return
# The design matrix for the given formula and data.
# For details see ?stats::model.matrix
get_model_matrix <- function(formula, data = environment(formula),
cols2remove = NULL, rename = TRUE, ...) {
stopifnot(is_atomic_or_null(cols2remove))
terms <- validate_terms(formula)
if (is.null(terms)) {
return(NULL)
}
if (no_int(terms)) {
cols2remove <- union(cols2remove, "(Intercept)")
}
X <- stats::model.matrix(terms, data, ...)
cols2remove <- which(colnames(X) %in% cols2remove)
if (length(cols2remove)) {
X <- X[, -cols2remove, drop = FALSE]
}
if (rename) {
colnames(X) <- rename(colnames(X), check_dup = TRUE)
}
X
}
# convenient wrapper around mgcv::PredictMat
PredictMat <- function(object, data, ...) {
data <- sm_prepare_data(object, data)
out <- mgcv::PredictMat(object, data = data, ...)
if (length(dim(out)) < 2L) {
# fixes issue #494
out <- matrix(out, nrow = 1)
}
out
}
# convenient wrapper around mgcv::smoothCon
smoothCon <- function(object, data, ...) {
data <- sm_prepare_data(object, data)
mgcv::smoothCon(object, data = data, ...)
}
# mgcv doesn't handle a lot of special data types well
# need to prepare these variables manually beforehand
sm_prepare_data <- function(object, data) {
data <- rm_attr(data, "terms")
vars <- setdiff(c(object$term, object$by), "NA")
for (v in vars) {
if (is_like_factor(data[[v]])) {
# allow factor-like variables #562
data[[v]] <- as.factor(data[[v]])
} else if (inherits(data[[v]], "difftime")) {
# mgcv cannot handle 'difftime' variables
data[[v]] <- as.numeric(data[[v]])
}
}
data
}
# Aid prediction from smooths represented as 'type = 2'
# code obtained from the doc of ?mgcv::smooth2random
# @param sm output of mgcv::smoothCon
# @param re output of mgcv::smooth2random
# @param data new data supplied for prediction
# @return A list of the same structure as returned by mgcv::smooth2random
s2rPred <- function(sm, re, data) {
# prediction matrix for new data
X <- PredictMat(sm, data)
# transform to RE parameterization
if (!is.null(re$trans.U)) {
X <- X %*% re$trans.U
}
if (is.null(re$trans.D)) {
# regression spline without penalization
out <- list(Xf = X)
} else {
X <- t(t(X) * re$trans.D)
# re-order columns according to random effect re-ordering
X[, re$rind] <- X[, re$pen.ind != 0]
# re-order penalization index in same way
pen.ind <- re$pen.ind
pen.ind[re$rind] <- pen.ind[pen.ind > 0]
# start returning the object
Xf <- X[, which(re$pen.ind == 0), drop = FALSE]
out <- list(rand = list(), Xf = Xf)
for (i in seq_along(re$rand)) {
# loop over random effect matrices
out$rand[[i]] <- X[, which(pen.ind == i), drop = FALSE]
attr(out$rand[[i]], "s.label") <- attr(re$rand[[i]], "s.label")
}
names(out$rand) <- names(re$rand)
}
out
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.