Nothing
#' Fitting Generalized Linear STAP models
#'
#'@template args-adapt_delta
#'@param y n length vector or n x 2 matrix of outcomes
#'@param z n x p design matrix of subject specific covariates
#'@param dists_crs (q_s+q_st) x M matrix of distances between outcome
#'observations and built environment features with a hypothesized spatial scale
#'@param u_s n x (q *2) matrix of compressed row storage array indices for dists_crs
#'@param times_crs (q_t+q_st) x M matrix of times where the outcome observations
#' were exposed to the built environment features with a hypothesized temporal scale
#'@param u_t n x (q*2) matrix of compressed row storage array indices for times_crs
#'@param weight_functions a Q x 2 matrix with integers coding the appropriate weight function for each STAP
#'@param stap_data object of class "stap_data" that contains information on all the spatial-temporal predictors in the model
#'@param max_distance the upper bound on any and all distances included in the model
#'@param max_time the upper bound on any and all times included in the model
#'@param weights weights to be added to the likelihood observation for a given subject
#'@param offset offset term to be added to the outcome for a given subject
#'@param family distributional family - only binomial gaussian or poisson currently allowed
#'@param prior,prior_intercept,prior_stap,prior_theta,prior_aux see \code{stap_glm} for more information
#'@param group list of of group terms from \code{lme4::glmod}
#'@param ... optional arguments passed to the sampler - e.g. iter,warmup, etc.
#'@export stap_glm
stap_glm.fit <- function(y, z, dists_crs, u_s,
times_crs, u_t,
weight_functions,
stap_data,
max_distance = max(dists_crs),
max_time = max(times_crs),
weights = rep(1,NROW(y)),
offset = rep(0, NROW(y)),
family = stats::gaussian(),
...,
prior = normal(),
prior_intercept = normal(),
prior_stap = normal(),
group = list(),
prior_theta = list(theta_one = normal()),
prior_aux = cauchy(location = 0L, scale = 5L),
adapt_delta = NULL){
family <- validate_family(family)
supported_families <- c("binomial","gaussian","poisson")
fam <- which(pmatch(supported_families, family$family, nomatch = 0L) == 1L)
if(!length(fam))
stop("'family' must be one of ", paste(supported_families, collapse = ', '))
if(max_distance < max(dists_crs))
stop("max_distance must be the maximum possible distance amongst all distances in dists_crs")
supported_links <- supported_glm_links(supported_families[fam])
link <- which(supported_links == family$link)
if(!length(link))
# stop("'link' must be one of", paste( supported_links, collapse = ', '))
if (binom_y_prop(y, family, weights))
stop("To specify 'y' as proportion of successes and 'weights' as ",
"number of trials please use stan_glm rather than calling ",
"stan_glm.fit directly.")
y <- validate_glm_outcome_support(y,family)
if(is.binomial(family$family) && NCOL(y) == 2L){
trials <- as.integer(y[,1L] + y[,2L])
y <- as.integer(y[,1L])
}
# useless assignments to pass R CMD check
has_intercept <-
prior_df <- prior_df_for_intercept <- prior_df_for_aux <-
prior_dist <- prior_dist_for_intercept <- prior_dist_for_aux <- prior_mean <-
prior_mean_for_intercept <- prior_mean_for_aux <- prior_scale <-
prior_scale_for_intercept <- prior_scale_for_aux <- prior_autoscale <-
prior_autoscale_for_intercept <- prior_autoscale_for_aux <- ztemp <-
zbar <- prior_dist_for_stap <- prior_mean_for_stap <-
prior_scale_for_stap <- prior_df_for_stap <- prior_dist_for_theta <-
prior_scale_for_theta <- prior_df_for_theta <- prior_mean_for_theta <- NULL
z_stuff <- center_z(z)
for (i in names(z_stuff)) # ztemp, zbar, has_intercept
assign(i, z_stuff[[i]])
nvars <- ncol(ztemp)
ok_dists <- nlist("normal", student_t = "t", "cauchy",
"laplace", "lasso", "product_normal")
ok_intercept_dists <- ok_dists[1:3]
ok_aux_dists <- c(ok_dists[1:3], exponential = "exponential")
prior_stuff <- handle_glm_prior(
prior,
nvars,
link = family$link,
default_scale = 2.5,
ok_dists = ok_dists
)
for (i in names(prior_stuff))
assign(i, prior_stuff[[i]])
prior_intercept_stuff <- handle_glm_prior(
prior_intercept,
nvars = 1,
default_scale = 10,
link = family$link,
ok_dists = ok_intercept_dists
)
names(prior_intercept_stuff) <- paste0(names(prior_intercept_stuff), "_for_intercept")
for (i in names(prior_intercept_stuff))
assign(i, prior_intercept_stuff[[i]])
prior_stap_stuff <- handle_glm_prior(
prior_stap,
nvars = stap_data$Q,
link = family$link,
default_scale = 2.5,
ok_dists = ok_dists
)
names(prior_stap_stuff) <- paste0(names(prior_stap_stuff), "_for_stap")
for(i in names(prior_stap_stuff))
assign(i, prior_stap_stuff[[i]])
prior_aux_stuff <-
handle_glm_prior(
prior_aux,
nvars = 1,
default_scale = 1,
link = NULL, # don't need to adjust scale based on logit vs probit
ok_dists = ok_aux_dists
)
# prior_{dist, mean, scale, df, dist_name, autoscale}_for_aux
names(prior_aux_stuff) <- paste0(names(prior_aux_stuff), "_for_aux")
if (is.null(prior_aux)) {
prior_aux_stuff$prior_scale_for_aux <- Inf
}
for (i in names(prior_aux_stuff))
assign(i, prior_aux_stuff[[i]])
#prior_{dist, mean, scale, df, dist_name, autoscale}_for_theta
if(is.null(prior_theta$dist))
prior_theta <- handle_theta_stap_prior(prior_theta,
ok_dists = nlist("normal","lognormal"),
stap_code = stap_data$stap_code,
coef_names = grep("_scale",coef_names(stap_data),value = T, invert = T)
)
else{
prior_theta_stuff <-
handle_glm_prior(
prior_theta,
nvars = nrow(dists_crs),
default_scale = 1,
link = NULL,
ok_dists = nlist("normal","lognormal")
)
names(prior_theta_stuff) <- paste0(names(prior_theta_stuff),"_for_theta")
for(i in names(prior_theta_stuff))
assign(i, prior_theta_stuff[[i]])
}
famname <- supported_families[fam]
is_bernoulli <- is.binomial(famname) && all(y %in% 0:1)
is_nb <- is.nb(famname)
is_gaussian <- is.gaussian(famname)
is_gamma <- is.gamma(famname)
is_ig <- is.ig(famname)
is_continuous <- is_gaussian || is_gamma || is_ig
# require intercept for certain family and link combinations
if (!has_intercept) {
linkname <- supported_links[link]
needs_intercept <- !is_gaussian && linkname == "identity" ||
is_gamma && linkname == "inverse" ||
is.binomial(famname) && linkname == "log"
if (needs_intercept)
stop("To use this combination of family and link ",
"the model must have an intercept.")
}
if (is_gaussian) {
ss <- sd(y)
if (prior_dist > 0L && prior_autoscale)
prior_scale <- ss * prior_scale
if (prior_dist_for_intercept > 0L && prior_autoscale_for_intercept)
prior_scale_for_intercept <- ss * prior_scale_for_intercept
if (prior_dist_for_aux > 0L && prior_autoscale_for_aux)
prior_scale_for_aux <- ss * prior_scale_for_aux
}
if (prior_dist > 0L && prior_autoscale) {
min_prior_scale <- 1e-12
prior_scale <- pmax(min_prior_scale, prior_scale /
apply(ztemp, 2L, FUN = function(x) {
num.categories <- length(unique(x))
z.scale <- 1
if (num.categories == 2) {
z.scale <- diff(range(x))
} else if (num.categories > 2) {
z.scale <- sd(x)
}
return(z.scale)
}))
}
prior_scale <-
as.array(pmin(.Machine$double.xmax, prior_scale))
prior_scale_for_intercept <-
min(.Machine$double.xmax, prior_scale_for_intercept)
if (length(weights) > 0 && all(weights == 1)) weights <- double()
if (length(offset) > 0 && all(offset == 0)) offset <- double()
if(all(is.na(u_s))){
u_s <- array(double(),dim=c(0,0))
dists_crs <- array(double(),dim=c(0,0))
max_distance <- 0
}
if(all(is.na(u_t))){
u_t <- array(double(),dim=c(0,0))
times_crs <- array(double(),dim=c(0,0))
max_time <- 0
}
# create entries in the data block of the .stan file
standata <- nlist(
N = nrow(ztemp),
K = ncol(ztemp),
Q = stap_data$Q,
Q_s = stap_data$Q_s,
Q_t = stap_data$Q_t,
Q_st = stap_data$Q_st,
weight_mat = stap_data$weight_mats,
log_ar = stap_data$log_switch,
stap_code = stap_data$stap_code,
M = ncol(dists_crs),
zbar = as.array(zbar),
family = stan_family_number(famname),
link,
max_distance = max_distance / get_space_constraint(stap_data,quantile= 0.975),
max_time = max_time / get_time_constraint(stap_data,0.975),
u_s = u_s,
u_t = u_t,
dists_crs = dists_crs,
times_crs = times_crs,
has_weights = length(weights) > 0,
has_offset = length(offset) > 0,
has_intercept,
prior_dist,
prior_mean,
prior_scale,
prior_df,
prior_dist_for_intercept,
prior_scale_for_intercept = c(prior_scale_for_intercept),
prior_mean_for_intercept = c(prior_mean_for_intercept),
prior_df_for_intercept = c(prior_df_for_intercept),
prior_dist_for_intercept,
prior_dist_for_stap, prior_mean_for_stap,
prior_scale_for_stap = array(prior_scale_for_stap),
prior_df_for_stap,
prior_dist_for_aux = prior_dist_for_aux,
num_normals = if(prior_dist == 7) as.integer(prior_df) else integer(0),
num_normals_for_stap = if(prior_dist_for_stap == 7) as.integer(prior_df_for_stap) else integer(0)
# mean,df,scale for aux added below depending on family
)
# create entries in the data block of the .stan file for prior theta
if(is.null(prior_theta$dist)){
if(stap_data$Q_st>0){
standata$prior_dist_for_theta_s <- array(prior_theta$theta_s_dist)
standata$prior_scale_for_theta_s <- array(prior_theta$theta_s_scale)
standata$prior_df_for_theta_s <- array(prior_theta$theta_s_df)
standata$prior_mean_for_theta_s <- array(prior_theta$theta_s_mean)
standata$prior_dist_for_theta_t <- array(prior_theta$theta_t_dist)
standata$prior_scale_for_theta_t <- array(prior_theta$theta_t_scale)
standata$prior_mean_for_theta_t <- array(prior_theta$theta_t_mean)
standata$prior_df_for_theta_t <- array(prior_theta$theta_t_df)
} else if(stap_data$Q_s>0){
standata$prior_dist_for_theta_s <- array(prior_theta$theta_s_dist)
standata$prior_scale_for_theta_s <- array(prior_theta$theta_s_scale)
standata$prior_df_for_theta_s <- array(prior_theta$theta_s_df)
standata$prior_mean_for_theta_s <- array(prior_theta$theta_s_mean)
#null entries
standata$prior_dist_for_theta_t <- double()
standata$prior_scale_for_theta_t <- double()
standata$prior_df_for_theta_t <- double()
standata$prior_mean_for_theta_t <- double()
} else if(stap_data$Q_t>0){
standata$prior_dist_for_theta_t <- array(prior_theta$theta_t_dist)
standata$prior_scale_for_theta_t <- array(prior_theta$theta_t_scale)
standata$prior_mean_for_theta_t <- array(prior_theta$theta_t_mean)
standata$prior_df_for_theta_t <- array(prior_theta$theta_t_df)
#null entries
standata$prior_mean_for_theta_s <- double()
standata$prior_dist_for_theta_s <- double()
standata$prior_scale_for_theta_s <- double()
standata$prior_df_for_theta_s <- double()
}
} else{
if(stap_data$Q_st>0){
standata$prior_dist_for_theta_s <- array(prior_dist_for_theta)
standata$prior_scale_for_theta_s <- array(prior_scale_for_theta)
standata$prior_df_for_theta_s <- array(prior_df_for_theta)
standata$prior_mean_for_theta_s <- array(prior_mean_for_theta)
standata$prior_dist_for_theta_t <- array(as.integer(prior_dist_for_theta))
standata$prior_scale_for_theta_t <- array(prior_scale_for_theta)
standata$prior_mean_for_theta_t <- array(prior_mean_for_theta)
standata$prior_df_for_theta_t <- array(prior_df_for_theta)
} else if(stap_data$Q_t>0){
standata$prior_dist_for_theta_t <- array(as.integer(prior_dist_for_theta))
standata$prior_scale_for_theta_t <- array(prior_scale_for_theta)
standata$prior_mean_for_theta_t <- array(prior_mean_for_theta)
standata$prior_df_for_theta_t <- array(prior_df_for_theta)
#null entries
standata$prior_mean_for_theta_s <- double()
standata$prior_dist_for_theta_s <- double()
standata$prior_scale_for_theta_s <- double()
standata$prior_df_for_theta_s <- double()
} else if(stap_data$Q_s>0){
standata$prior_dist_for_theta_s <- array(as.integer(prior_dist_for_theta))
standata$prior_scale_for_theta_s <- array(prior_scale_for_theta)
standata$prior_df_for_theta_s <- array(prior_df_for_theta)
standata$prior_mean_for_theta_s <- array(prior_mean_for_theta)
#null entries
standata$prior_dist_for_theta_t <- double()
standata$prior_scale_for_theta_t <- double()
standata$prior_df_for_theta_t <- double()
standata$prior_mean_for_theta_t <- double()
}
}
#make a copy of user specification before modifying 'group' (used for keeping
# track of priors)
user_covariance <- if (!length(group)) NULL else group[["decov"]]
if (length(group)) {
check_reTrms(group)
decov <- group$decov
W <- t(group$Zt)
group <-
pad_reTrms(Ztlist = group$Ztlist,
cnms = group$cnms,
flist = group$flist)
W <- group$Z
p <- sapply(group$cnms, FUN = length)
l <- sapply(attr(group$flist, "assign"), function(i)
nlevels(group$flist[[i]]))
t <- length(l)
b_nms <- make_b_nms(group)
g_nms <- unlist(lapply(1:t, FUN = function(i) {
paste(group$cnms[[i]], names(group$cnms)[i], sep = "|")
}))
standata$t <- t
standata$p <- as.array(p)
standata$l <- as.array(l)
standata$q <- ncol(W)
standata$len_theta_L <- sum(choose(p, 2), p)
if (is_bernoulli) {
parts0 <- rstan::extract_sparse_parts(W[y == 0, , drop = FALSE])
parts1 <- rstan::extract_sparse_parts(W[y == 1, , drop = FALSE])
standata$num_non_zero <- c(length(parts0$w), length(parts1$w))
standata$w0 <- parts0$w
standata$w1 <- parts1$w
standata$v0 <- parts0$v
standata$v1 <- parts1$v
standata$u0 <- parts0$u
standata$u1 <- parts1$u
} else {
parts <- rstan::extract_sparse_parts(W)
standata$num_non_zero <- length(parts$w)
standata$w <- parts$w
standata$v <- parts$v
standata$u <- parts$u
}
standata$shape <- as.array(maybe_broadcast(decov$shape, t))
standata$scale <- as.array(maybe_broadcast(decov$scale, t))
standata$len_concentration <- sum(p[p > 1])
standata$concentration <-
as.array(maybe_broadcast(decov$concentration, sum(p[p > 1])))
standata$len_regularization <- sum(p > 1)
standata$regularization <-
as.array(maybe_broadcast(decov$regularization, sum(p > 1)))
standata$special_case <- all(sapply(group$cnms, FUN = function(x) {
length(x) == 1 && x == "(Intercept)" }))
} else { # not multilevel
standata$t <- 0L
standata$p <- integer(0)
standata$l <- integer(0)
standata$q <- 0L
standata$len_theta_L <- 0L
if (is_bernoulli) {
standata$num_non_zero <- rep(0L, 2)
standata$w0 <- standata$w1 <- double(0)
standata$v0 <- standata$v1 <- integer(0)
standata$u0 <- standata$u1 <- integer(0)
} else {
standata$num_non_zero <- 0L
standata$w <- double(0)
standata$v <- integer(0)
standata$u <- integer(0)
}
standata$special_case <- 0L
standata$shape <- standata$scale <- standata$concentration <-
standata$regularization <- rep(0, 0)
standata$len_concentration <- 0L
standata$len_regularization <- 0L
}
if (!is_bernoulli) {
standata$Z <- array(ztemp, dim = dim(ztemp))
standata$y <- y
standata$weights <- weights
standata$offset <- offset
}
if (is_continuous) {
standata$ub_y <- Inf
standata$lb_y <- if (is_gaussian) -Inf else 0
standata$prior_scale_for_aux <- prior_scale_for_aux %ORifINF% 0
standata$prior_df_for_aux <- c(prior_df_for_aux)
standata$prior_mean_for_aux <- c(prior_mean_for_aux)
standata$len_y <- length(y)
stanfit <- stanmodels$stap_continuous
} else if (is.binomial(famname)) {
standata$prior_scale_for_aux <-
if (!length(group) || prior_scale_for_aux == Inf)
0 else prior_scale_for_aux
standata$prior_mean_for_aux <- 0
standata$prior_df_for_aux <- 0
if (is_bernoulli) {
y0 <- y == 0
y1 <- y == 1
standata$y_0 <- which(y0)
standata$y_1 <- which(y1)
standata$N <- c(sum(y0), sum(y1))
standata$NN <- nrow(ztemp)
standata$Z0 <- ztemp[y0, , drop = FALSE]
standata$Z1 <- ztemp[y1, , drop = FALSE]
standata$w_W0 = double(0)
standata$v_W0 = integer(0)
standata$u_W0 = integer(0)
standata$w_W1 = double(0)
standata$v_W1 = integer(0)
standata$u_W1 = integer(0)
if (length(weights)) {
# nocov start
# this code is unused because weights are interpreted as number of
# trials for binomial glms
standata$weights0 <- weights[y0]
standata$weights1 <- weights[y1]
# nocov end
} else {
standata$weights0 <- double(0)
standata$weights1 <- double(0)
}
if (length(offset)) {
# nocov start
standata$offset0 <- offset[y0]
standata$offset1 <- offset[y1]
# nocov end
} else {
standata$offset0 <- double(0)
standata$offset1 <- double(0)
}
stanfit <- stanmodels$stap_bernoulli
} else {
standata$trials <- trials
stanfit <- stanmodels$stap_binomial
}
} else if (is.poisson(famname)) {
standata$prior_scale_for_aux <- prior_scale_for_aux %ORifINF% 0
standata$prior_mean_for_aux <- 0
standata$prior_df_for_aux <- 0
stanfit <- stanmodels$stap_count
} else if (is_nb) {
standata$prior_scale_for_aux <- prior_scale_for_aux %ORifINF% 0
standata$prior_df_for_aux <- c(prior_df_for_aux)
standata$prior_mean_for_aux <- c(prior_mean_for_aux)
stanfit <- stanmodels$count
} else if (is_gamma) {
# nothing
} else { # nocov start
# family already checked above
stop(paste(famname, "is not supported."))
} # nocov end
prior_info <- summarize_glm_prior(
user_prior = prior_stuff,
user_prior_intercept = prior_intercept_stuff,
user_prior_stap = prior_stap_stuff,
user_prior_theta = if(!is.null(prior_theta$dist)) prior_theta_stuff else prior_theta,
user_prior_aux = prior_aux_stuff,
has_intercept = has_intercept,
has_predictors = nvars > 0,
adjusted_prior_scale = prior_scale,
adjusted_prior_intercept_scale = prior_scale_for_intercept,
adjusted_prior_aux_scale = prior_scale_for_aux,
family = family
)
pars <- c(if (has_intercept) "alpha",
"delta",
"adj_beta",
if(stap_data$Q_s + stap_data$Q_st>0) "theta_s",
if(stap_data$Q_t + stap_data$Q_st >0) "theta_t",
if(length(group)) "b",
if(standata$len_theta_L) "theta_L",
if (is_continuous | is_nb) "aux",
"mean_PPD")
sampling_args <- set_sampling_args(
object = stanfit,
prior = prior,
user_dots = list(...),
user_adapt_delta = adapt_delta,
data = standata,
pars = pars,
show_messages = FALSE)
stapfit <- do.call(sampling, sampling_args)
check <- try(check_stanfit(stapfit))
if (!isTRUE(check)) return(standata)
if(standata$len_theta_L){
thetas_ref <- rstan::extract(stapfit, pars = "theta_L", inc_warmup = FALSE,
permuted = FALSE)
cnms <- group$cnms
nc <- sapply(cnms, FUN = length)
nms <- names(cnms)
Sigma <- apply(thetas_ref, 1:2, FUN = function(theta) {
Sigma <- lme4::mkVarCorr(sc = 1, cnms, nc, theta, nms)
unlist(sapply(Sigma, simplify = FALSE,
FUN = function(x) x[lower.tri(x,TRUE)]))
})
l <- length(dim(Sigma))
end <- utils::tail(dim(Sigma), 1L)
shift <- grep("^theta_L", names(stapfit@sim$samples[[1]]))[1] - 1L
if(l==3) for (chain in 1:end) for (param in 1:nrow(Sigma)) {
stapfit@sim$samples[[chain]][[shift + param]] <- Sigma[param, , chain]
}
else for (chain in 1:end) {
stapfit@sim$samples[[chain]][[shift + 1]] <- Sigma[, chain]
}
Sigma_nms <- lapply(cnms, FUN = function(grp) {
nm <- outer(grp, grp, FUN = paste, sep = ",")
nm[lower.tri (nm , diag = TRUE)]
})
for(j in seq_along(Sigma_nms)){
Sigma_nms[[j]] <- paste0(nms[j], ":", Sigma_nms[[j]])
}
Sigma_nms <- unlist(Sigma_nms)
}
new_names <- c(if (has_intercept) "(Intercept)",
colnames(ztemp),
if((stap_data$Q_s + stap_data$Q_st)>0) rownames(dists_crs),
if((stap_data$Q_t)>0) rownames(times_crs), ## only count Q_st once
if((stap_data$Q_s + stap_data$Q_st)>0) paste0(rownames(dists_crs),'_spatial_scale'),
if((stap_data$Q_t + stap_data$Q_st)>0) paste0(rownames(times_crs),'_temporal_scale'),
if(length(group) && length(group$flist)) c (paste0("b[", b_nms, "]")),
if(standata$len_theta_L) paste0("Sigma[", Sigma_nms, "]"),
if (is_gaussian) "sigma",
if (is_gamma) "shape",
if (is_ig) "lambda",
if (is_nb) "reciprocal_dispersion",
"mean_PPD",
"log-posterior")
stapfit@sim$fnames_oi <- new_names
return(structure(stapfit, prior.info = prior_info))
}
# internal ---------------------------------------------------------------------------------------------------------
# @param family_name: string naming the family
# @return character vector of supported link functions for the family
supported_glm_links <- function(family_name){
switch(
family_name,
binomial = c("logit","probit","cauchit", "log","cloglog"),
gaussian = c("identity", "log", "inverse"),
poisson = c("log", "identity", "sqrt"),
stop("unsupported family")
)
}
# Family number to pass to Stan
# @param famname string naming the family
# @return an integer family code
stan_family_number <- function(famname) {
switch(
famname,
"gaussian" = 1L,
"Gamma" = 2L,
"inverse.gaussian" = 3L,
"beta" = 4L, ## need to get rid of this eventually
"Beta regression" = 4L,
"binomial" = 5L,
"poisson" = 6L,
"neg_binomial_2" = 7L,
stop("Family not valid.")
)
}
# Add extra level _NEW_ to each group
#
# @param Ztlist ranef indicator matrices
# @param cnms group$cnms
# @param flist group$flist
pad_reTrms <- function(Ztlist, cnms, flist) {
stopifnot(is.list(Ztlist))
l <- sapply(attr(flist, "assign"), function(i) nlevels(flist[[i]]))
p <- sapply(cnms, FUN = length)
n <- ncol(Ztlist[[1]])
for (i in attr(flist, "assign")) {
levels(flist[[i]]) <- c(gsub(" ", "_", levels(flist[[i]])),
paste0("_NEW_", names(flist)[i]))
}
for (i in 1:length(p)) {
Ztlist[[i]] <- rbind(Ztlist[[i]], Matrix(0, nrow = p[i], ncol = n, sparse = TRUE))
}
Z <- t(do.call(rbind, args = Ztlist))
return(nlist(Z, cnms, flist))
}
# Drop the extra reTrms from a matrix x
#
# @param x A matrix or array (e.g. the posterior sample or matrix of summary
# stats)
# @param columns Do the columns (TRUE) or rows (FALSE) correspond to the
# variables?
unpad_reTrms <- function(x, ...) UseMethod("unpad_reTrms")
unpad_reTrms.default <- function(x, ...) {
if (is.matrix(x) || is.array(x))
return(unpad_reTrms.array(x, ...))
keep <- !grepl("_NEW_", names(x), fixed = TRUE)
x[keep]
}
unpad_reTrms.array <- function(x, columns = TRUE, ...) {
ndim <- length(dim(x))
if (ndim > 3)
stop("'x' should be a matrix or 3-D array")
nms <- if (columns)
last_dimnames(x) else rownames(x)
keep <- !grepl("_NEW_", nms, fixed = TRUE)
if (length(dim(x)) == 2) {
x_keep <- if (columns)
x[, keep, drop = FALSE] else x[keep, , drop = FALSE]
} else {
x_keep <- if (columns)
x[, , keep, drop = FALSE] else x[keep, , , drop = FALSE]
}
return(x_keep)
}
make_b_nms <- function(group, stub = "Long") {
group_nms <- names(group$cnms)
b_nms <- character()
for (i in seq_along(group$cnms)) {
nm <- group_nms[i]
nms_i <- paste(group$cnms[[i]], nm)
levels(group$flist[[nm]]) <- gsub(" ", "_", levels(group$flist[[nm]]))
if (length(nms_i) == 1) {
b_nms <- c(b_nms, paste0(nms_i, ":", levels(group$flist[[nm]])))
} else {
b_nms <- c(b_nms, c(t(sapply(paste0(nms_i), paste0, ":",
levels(group$flist[[nm]])))))
}
}
return(b_nms)
}
# Create "prior.info" attribute needed for prior_summary()
#
# @param user_* The user's prior, prior_intercept, prior_covariance, and
# prior_aux specifications. For prior and prior_intercept these should be
# passed in after broadcasting the df/location/scale arguments if necessary.
# @param has_intercept T/F, does model have an intercept?
# @param has_predictors T/F, does model have predictors?
# @param adjusted_prior_*_scale adjusted scales computed if using autoscaled priors
# @param family Family object.
# @return A named list with components 'prior', 'prior_intercept', and possibly
# 'prior_covariance' and 'prior_aux' each of which itself is a list
# containing the needed values for prior_summary.
summarize_glm_prior <-
function(user_prior,
user_prior_intercept,
user_prior_aux,
user_prior_stap,
user_prior_theta,
user_prior_cov,
has_intercept,
has_predictors,
adjusted_prior_scale,
adjusted_prior_intercept_scale,
adjusted_prior_aux_scale,
family) {
rescaled_coef <-
user_prior$prior_autoscale &&
has_predictors &&
!is.na(user_prior$prior_dist_name) &&
!all(user_prior$prior_scale == adjusted_prior_scale)
rescaled_int <-
user_prior_intercept$prior_autoscale_for_intercept &&
has_intercept &&
!is.na(user_prior_intercept$prior_dist_name_for_intercept) &&
(user_prior_intercept$prior_scale_for_intercept != adjusted_prior_intercept_scale)
rescaled_aux <- user_prior_aux$prior_autoscale_for_aux &&
!is.na(user_prior_aux$prior_dist_name_for_aux) &&
(user_prior_aux$prior_scale_for_aux != adjusted_prior_aux_scale)
rescaled_stap <- user_prior_stap$prior_autoscale &&
has_predictors &&
!is.na(user_prior$prior_dist_name) &&
!(all(user_prior$prior_scale == adjusted_prior_scale))
if (has_predictors && user_prior$prior_dist_name %in% "t") {
if (all(user_prior$prior_df == 1)) {
user_prior$prior_dist_name <- "cauchy"
} else {
user_prior$prior_dist_name <- "student_t"
}
}
if (has_intercept &&
user_prior_intercept$prior_dist_name_for_intercept %in% "t") {
if (all(user_prior_intercept$prior_df_for_intercept == 1)) {
user_prior_intercept$prior_dist_name_for_intercept <- "cauchy"
} else {
user_prior_intercept$prior_dist_name_for_intercept <- "student_t"
}
}
if (user_prior_aux$prior_dist_name_for_aux %in% "t") {
if (all(user_prior_aux$prior_df_for_aux == 1)) {
user_prior_aux$prior_dist_name_for_aux <- "cauchy"
} else {
user_prior_aux$prior_dist_name_for_aux <- "student_t"
}
}
prior_list <- list(
prior =
if (!has_predictors) NULL else with(user_prior, list(
dist = prior_dist_name,
location = prior_mean,
scale = prior_scale,
adjusted_scale = if (rescaled_coef)
adjusted_prior_scale else NULL,
df = if (prior_dist_name %in% c
("student_t", "hs", "hs_plus", "lasso", "product_normal"))
prior_df else NULL
)),
prior_stap = with(user_prior_stap, list(dist = prior_dist_name_for_stap,
location = prior_mean_for_stap,
scale = prior_scale_for_stap,
adjusted_scale = if(rescaled_stap)
adjusted_prior_scale else NULL,
df = if(prior_dist_name_for_stap %in% c("student_t","hs","hs_plus","lasso","product_normal"))
prior_df else NULL )),
prior_theta = if(!is.null(user_prior_theta$dist)) with(user_prior_theta, list(dist = prior_dist_name_for_theta,
location = prior_mean_for_theta,
scale = prior_scale_for_theta)) else NULL,
prior_intercept =
if (!has_intercept) NULL else with(user_prior_intercept, list(
dist = prior_dist_name_for_intercept,
location = prior_mean_for_intercept,
scale = prior_scale_for_intercept,
adjusted_scale = if (rescaled_int)
adjusted_prior_intercept_scale else NULL,
df = if (prior_dist_name_for_intercept %in% "student_t")
prior_df_for_intercept else NULL
))
)
aux_name <- .rename_aux(family)
prior_list$prior_aux <- if (is.na(aux_name))
NULL else with(user_prior_aux, list(
dist = prior_dist_name_for_aux,
location = if (!is.na(prior_dist_name_for_aux) &&
prior_dist_name_for_aux != "exponential")
prior_mean_for_aux else NULL,
scale = if (!is.na(prior_dist_name_for_aux) &&
prior_dist_name_for_aux != "exponential")
prior_scale_for_aux else NULL,
adjusted_scale = if (rescaled_aux)
adjusted_prior_aux_scale else NULL,
df = if (!is.na(prior_dist_name_for_aux) &&
prior_dist_name_for_aux %in% "student_t")
prior_df_for_aux else NULL,
rate = if (!is.na(prior_dist_name_for_aux) &&
prior_dist_name_for_aux %in% "exponential")
1 / prior_scale_for_aux else NULL,
aux_name = aux_name
))
return(prior_list)
}
# rename aux parameter based on family
.rename_aux <- function(family) {
fam <- family$family
if (is.gaussian(fam)) "sigma" else
if (is.gamma(fam)) "shape" else
if (is.ig(fam)) "lambda" else
if (is.nb(fam)) "reciprocal_dispersion" else NA
}
# Verify that outcome values match support implied by family object
#
# @param y outcome variable
# @param family family object
# @return y (possibly slightly modified) unless an error is thrown
#
validate_glm_outcome_support <- function(y, family){
.is_count <- function(x) {
all(x >= 0) && all(abs(x - round(x)) < .Machine$double.eps^0.5)
}
fam <- family$family
if (fam!='binomial') {
# make sure y has ok dimensions (matrix only allowed for binomial models)
if (length(dim(y)) > 1) {
if (NCOL(y) == 1) {
y <- y[, 1]
} else {
stop("Except for binomial models the outcome variable ",
"should not have multiple columns.",
call. = FALSE)
}
}
# check that values match support for non-binomial models
if (fam!='gaussian') {
return(y)
} else if (fam=='poisson' && !.is_count(y)) {
stop("All outcome values must be counts for Poisson models",
call. = FALSE)
}
} else { # binomial models
if (NCOL(y) == 1L) {
if (is.numeric(y) || is.logical(y))
y <- as.integer(y)
if (is.factor(y))
y <- fac2bin(y)
if (!all(y %in% c(0L, 1L)))
stop("All outcome values must be 0 or 1 for Bernoulli models.",
call. = FALSE)
} else if (isTRUE(NCOL(y) == 2L)) {
if (!.is_count(y))
stop("All outcome values must be counts for binomial models.",
call. = FALSE)
} else {
stop("For binomial models the outcome should be a vector or ",
"a matrix with 2 columns.",
call. = FALSE)
}
}
return(y)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.