R/stan-predictor.R

Defines functions stan_sigma_transform stan_dpar_transform stan_dpar_tmp_types stan_dpar_types stan_assign_b_tpar stan_center_X stan_eta_ilink stan_eta_transform stan_eta_rsp stan_eta_re stan_eta_fe stan_eta_combine stan_Xme stan_nl stan_offset stan_ac stan_gp stan_sp stan_cs stan_sm .stan_re stan_re stan_fe stan_predictor.mvbrmsterms stan_predictor.brmsterms stan_predictor.btnl stan_predictor.btl stan_predictor

# unless otherwise specified, functions return a named list 
# of Stan code snippets to be pasted together later on

# generate stan code for predictor terms
stan_predictor <- function(x, ...) {
  UseMethod("stan_predictor")
}

# combine effects for the predictors of a single (non-linear) parameter
# @param ... arguments passed to the underlying effect-specific functions
#' @export
stan_predictor.btl <- function(x, ...) {
  out <- collapse_lists(
    stan_fe(x, ...),
    stan_thres(x, ...),
    stan_sp(x, ...),
    stan_cs(x, ...),
    stan_sm(x, ...),
    stan_gp(x, ...),
    stan_ac(x, ...),
    stan_offset(x, ...),
    stan_bhaz(x, ...),
    stan_special_prior_global(x, ...)
  )
  stan_eta_combine(out, bterms = x, ...) 
}

# prepare Stan code for non-linear terms
#' @export
stan_predictor.btnl <- function(x, ...) {
  collapse_lists(
    stan_nl(x, ...),
    stan_thres(x, ...),
    stan_bhaz(x, ...),
    stan_ac(x, ...)
  )
}

#' @export
stan_predictor.brmsterms <- function(x, data, prior, normalize, ...) {
  px <- check_prefix(x)
  resp <- usc(combine_prefix(px))
  data <- subset_data(data, x)
  out <- list()
  str_add_list(out) <- stan_response(x, data = data, normalize = normalize)
  valid_dpars <- valid_dpars(x)
  args <- nlist(data, prior, normalize, nlpars = names(x$nlpars), ...)
  args$primitive <- use_glm_primitive(x)
  for (nlp in names(x$nlpars)) {
    nlp_args <- list(x$nlpars[[nlp]])
    str_add_list(out) <- do_call(stan_predictor, c(nlp_args, args))
  }
  for (dp in valid_dpars) {
    dp_terms <- x$dpars[[dp]]
    if (is.btl(dp_terms) || is.btnl(dp_terms)) {
      # distributional parameter is predicted
      ilink <- stan_eta_ilink(dp, bterms = x, resp = resp)
      dp_args <- list(dp_terms, ilink = ilink)
      str_add_list(out) <- do_call(stan_predictor, c(dp_args, args))
    } else if (is.numeric(x$fdpars[[dp]]$value)) {
      # distributional parameter is fixed to a numeric value
      dp_type <- stan_dpar_types(dp, resp, family = x$family, fixed = TRUE)
      if (nzchar(dp_type)) {
        dp_value <- x$fdpars[[dp]]$value
        dp_comment <- stan_comment(attr(dp_type, "comment"))
        str_add(out$tpar_def) <- glue(
          "  {dp_type} {dp}{resp} = {dp_value};{dp_comment}\n"
        )
        str_add(out$pll_args) <- glue(", real {dp}{resp}")
      }
    } else if (is.character(x$fdpars[[dp]]$value)) {
      # distributional parameter is fixed to another distributional parameter
      if (!x$fdpars[[dp]]$value %in% valid_dpars) {
        stop2("Parameter '", x$fdpars[[dp]]$value, "' cannot be found.")
      }
      dp_type <- stan_dpar_types(dp, resp, family = x$family)
      if (nzchar(dp_type)) {
        dp_value <- x$fdpars[[dp]]$value
        dp_comment <- stan_comment(attr(dp_type, "comment"))
        str_add(out$tpar_def) <- glue(
          "  {dp_type} {dp}{resp};{dp_comment}\n"
        )
        str_add(out$tpar_comp) <- glue(
          "  {dp}{resp} = {dp_value}{resp};\n"
        )
        str_add(out$pll_args) <- glue(", real {dp}{resp}")
      }
    } else {
      # distributional parameter is estimated as a scalar
      dp_type <- stan_dpar_types(dp, resp, family = x$family)
      dp_tmp_type <- stan_dpar_tmp_types(dp, resp, family = x$family)
      if (nzchar(dp_tmp_type)) {
        # distributional parameter has a temporary definition
        dp_comment <- attr(dp_tmp_type, "comment")
        str_add_list(out) <- stan_prior(
          prior, dp, type = dp_tmp_type, prefix = "tmp_", 
          suffix = resp, header_type = "real", px = px, 
          comment = dp_comment, normalize = normalize
        )
      } else if (nzchar(dp_type)) {
        # distributional parameter has a regular definition
        dp_comment <- attr(dp_type, "comment")
        str_add_list(out) <- stan_prior(
          prior, dp, type = dp_type, suffix = resp, 
          header_type = "real", px = px, 
          comment = dp_comment, normalize = normalize
        )
      }
    }
  }
  str_add_list(out) <- stan_mixture(
    x, data = data, prior = prior, normalize = normalize, ...
  )
  str_add_list(out) <- stan_dpar_transform(x, ...)
  out$model_log_lik <- stan_log_lik(x, data = data, normalize = normalize, ...) 
  list(out)
}

#' @export
stan_predictor.mvbrmsterms <- function(x, prior, threads, normalize, ...) {
  out <- lapply(x$terms, stan_predictor, prior = prior, threads = threads,
                normalize = normalize, ...)
  out <- unlist(out, recursive = FALSE)
  if (!x$rescor) {
    return(out)
  }
  resp_type <- out[[1]]$resp_type
  out <- collapse_lists(ls = out)
  out$resp_type <- "vector"
  adforms <- lapply(x$terms, "[[", "adforms")
  adnames <- unique(ulapply(adforms, names))
  adallowed <- c("se", "weights", "mi")
  if (!all(adnames %in% adallowed))  {
    stop2("Only ", collapse_comma(adallowed), " are supported ", 
          "addition arguments when 'rescor' is estimated.")
  }
  # we already know at this point that all families are identical
  family <- family_names(x)[1]
  stopifnot(family %in% c("gaussian", "student"))
  resp <- x$responses
  nresp <- length(resp)
  str_add(out$model_def) <- glue( 
    "  // multivariate predictor array\n",
    "  vector[nresp] Mu[N];\n"
  )
  str_add(out$model_comp_mvjoin) <- glue(
    "    Mu[n] = {stan_vector(glue('mu_{resp}[n]'))};\n"
  )
  str_add(out$data) <- glue(
    "  int<lower=1> nresp;  // number of responses\n",   
    "  int nrescor;  // number of residual correlations\n"
  )
  str_add(out$pll_args) <- glue(", data int nresp")
  str_add(out$tdata_def) <- glue(
    "  vector[nresp] Y[N];  // response array\n"
  )
  str_add(out$tdata_comp) <- glue(
    "  for (n in 1:N) {{\n",
    "    Y[n] = {stan_vector(glue('Y_{resp}[n]'))};\n",
    "  }}\n"
  )
  str_add(out$pll_args) <- ", data vector[] Y"
  if (any(adnames %in% "weights")) {
    str_add(out$tdata_def) <- glue(
      "  // weights of the pointwise log-likelihood\n",
      "  vector<lower=0>[N] weights = weights_{resp[1]};\n" 
    )
    str_add(out$pll_args) <- glue(", data vector weights")
  }
  miforms <- rmNULL(lapply(adforms, "[[", "mi"))
  if (length(miforms)) {
    str_add(out$model_no_pll_def) <- "  vector[nresp] Yl[N] = Y;\n"
    str_add(out$pll_args) <- ", vector[] Yl"
    for (i in seq_along(miforms)) {
      j <- match(names(miforms)[i], resp)
      # needs to happen outside of reduce_sum 
      # to maintain consistency of indexing Yl
      str_add(out$model_no_pll_comp_mvjoin) <- glue(
        "    Yl[n][{j}] = Yl_{resp[j]}[n];\n"
      )
    }
  }
  str_add_list(out) <- stan_prior(
    prior, class = "Lrescor", 
    type = "cholesky_factor_corr[nresp]", header_type = "matrix",
    comment = "parameters for multivariate linear models",
    normalize = normalize
  )
  if (family == "student") {
    str_add_list(out) <- stan_prior(
      prior, class = "nu", type = stan_dpar_types("nu"),
      header_type = "real", normalize = normalize
    )
  } 
  sigma <- ulapply(x$terms, stan_sigma_transform, threads = threads)
  if (any(grepl(stan_nn_regex(), sigma))) {
    str_add(out$model_def) <- "  vector[nresp] sigma[N];\n"
    str_add(out$model_comp_mvjoin) <- glue(
      "    sigma[n] = {stan_vector(sigma)};\n"
    )
    if (family == "gaussian") {
      str_add(out$model_def) <- glue(
        "  // cholesky factor of residual covariance matrix\n",
        "  matrix[nresp, nresp] LSigma[N];\n"
      )
      str_add(out$model_comp_mvjoin) <- glue(
        "    LSigma[n] = diag_pre_multiply(sigma[n], Lrescor);\n"
      )
    } else if (family == "student") {
      str_add(out$model_def) <- glue(
        "  // residual covariance matrix\n",
        "  matrix[nresp, nresp] Sigma[N];\n"
      )
      str_add(out$model_comp_mvjoin) <- glue(
        "    Sigma[n] = multiply_lower_tri_self_transpose(", 
        "diag_pre_multiply(sigma[n], Lrescor));\n" 
      )
    }
  } else {
    str_add(out$model_def) <- glue(
      "  vector[nresp] sigma = {stan_vector(sigma)};\n"
    )
    if (family == "gaussian") {
      str_add(out$model_def) <- glue(
        "  // cholesky factor of residual covariance matrix\n",
        "  matrix[nresp, nresp] LSigma = ",
        "diag_pre_multiply(sigma, Lrescor);\n"
      )
    } else if (family == "student") {
      str_add(out$model_def) <- glue(
        "  // residual covariance matrix\n",
        "  matrix[nresp, nresp] Sigma = ",
        "multiply_lower_tri_self_transpose(", 
        "diag_pre_multiply(sigma, Lrescor));\n"
      )
    }
  }
  str_add(out$gen_def) <- glue(
    "  // residual correlations\n",
    "  corr_matrix[nresp] Rescor",
    " = multiply_lower_tri_self_transpose(Lrescor);\n",
    "  vector<lower=-1,upper=1>[nrescor] rescor;\n"
  )
  str_add(out$gen_comp) <- stan_cor_gen_comp("rescor", "nresp")
  out$model_comp_mvjoin <- paste0(
    "  // combine univariate parameters\n",
    "  for (n in 1:N) {\n", 
    out$model_comp_mvjoin, 
    "  }\n"
  )
  if (isTRUE(nzchar(out$model_no_pll_comp_mvjoin))) {
    out$model_no_pll_comp_mvjoin <- paste0(
      "  // combine univariate parameters\n",
      "  for (n in 1:N) {\n", 
      out$model_no_pll_comp_mvjoin, 
      "  }\n"
    )
  }
  out$model_log_lik <- stan_log_lik(
    x, threads = threads, normalize = normalize, ...
  )
  list(out)
}

# Stan code for population-level effects
stan_fe <- function(bterms, data, prior, stanvars, threads, primitive, 
                    normalize, ...) {
  out <- list()
  family <- bterms$family
  fixef <- colnames(data_fe(bterms, data)$X)
  sparse <- is_sparse(bterms$fe)
  decomp <- get_decomp(bterms$fe)
  if (length(fixef) < 2L) {
    # decompositions require at least two predictors
    decomp <- "none"
  }
  center_X <- stan_center_X(bterms)
  ct <- str_if(center_X, "c")
  # remove the intercept from the design matrix?
  if (center_X) {
    fixef <- setdiff(fixef, "Intercept")
  }
  px <- check_prefix(bterms)
  p <- usc(combine_prefix(px))
  resp <- usc(px$resp)
  if (length(fixef)) {
    str_add(out$data) <- glue( 
      "  int<lower=1> K{p};",
      "  // number of population-level effects\n", 
      "  matrix[N{resp}, K{p}] X{p};",
      "  // population-level design matrix\n"
    )
    if (decomp == "none") {
      str_add(out$pll_args) <- glue(", data matrix X{ct}{p}") 
    }
    if (sparse) {
      if (decomp != "none") {
        stop2("Cannot use ", decomp, " decomposition for sparse matrices.")
      }
      if (use_threading(threads)) {
        stop2("Cannot use threading and sparse matrices at the same time.")
      }
      str_add(out$tdata_def) <- glue(
        "  // sparse matrix representation of X{p}\n",
        "  vector[rows(csr_extract_w(X{p}))] wX{p}", 
        " = csr_extract_w(X{p});\n",
        "  int vX{p}[size(csr_extract_v(X{p}))]",
        " = csr_extract_v(X{p});\n",
        "  int uX{p}[size(csr_extract_u(X{p}))]",
        " = csr_extract_u(X{p});\n"
      )
    }
    # prepare population-level coefficients
    b_bound <- get_bound(prior, class = "b", px = px)
    b_type <- glue("vector{b_bound}[K{ct}{p}]")
    b_coef_type <- glue("real{b_bound}")
    assign_b_tpar <- stan_assign_b_tpar(bterms, prior)
    if (decomp == "none") {
      b_suffix <- ""
      b_comment <- "population-level effects"
      if (assign_b_tpar) {
        str_add(out$tpar_def) <- glue("  {b_type} b{p};  // {b_comment}\n")
        str_add(out$pll_args) <- glue(", vector b{p}")
      } else {
        str_add_list(out) <- stan_prior(
          prior, class = "b", coef = fixef, type = b_type, 
          coef_type = b_coef_type, px = px, suffix = p,
          header_type = "vector", comment = b_comment,
          normalize = normalize
        )
      }
    } else {
      stopifnot(decomp == "QR")
      if (nzchar(b_bound)) {
        stop2("Cannot impose bounds on decomposed coefficients.")
      }
      b_suffix <- "Q"
      b_comment <- "regression coefficients at QR scale"
      if (assign_b_tpar) {
        str_add(out$tpar_def) <- glue("  {b_type} bQ{p};  // {b_comment}\n")
        str_add(out$pll_args) <- glue(", vector bQ{p}")
      } else {
        str_add_list(out) <- stan_prior(
          prior, class = "b", coef = fixef, type = b_type,
          coef_type = b_coef_type, px = px, suffix = glue("Q{p}"), 
          header_type = "vector", comment = b_comment, 
          normalize = normalize
        )
      }
      str_add(out$gen_def) <- glue(
        "  // obtain the actual coefficients\n",
        "  vector[K{ct}{p}] b{p} = XR{p}_inv * bQ{p};\n"
      )
    }
    str_add_list(out) <- stan_special_prior_local(
      prior, class = "b", ncoef = length(fixef), 
      px = px, center_X = center_X, suffix = b_suffix,
      normalize = normalize
    )
  }
  
  order_intercepts <- order_intercepts(bterms)
  if (order_intercepts && !center_X) {
    stop2(
      "Identifying mixture components via ordering requires ",
      "population-level intercepts to be present.\n",
      "Try setting order = 'none' in function 'mixture'."
    )
  }
  if (center_X) {
    # centering the design matrix improves convergence
    sub_X_means <- ""
    if (length(fixef)) {
      sub_X_means <- glue(" - dot_product(means_X{p}, b{p})")
      if (is_ordinal(family)) {
        # the intercept was already removed during the data preparation
        str_add(out$tdata_def) <- glue(
          "  int Kc{p} = K{p};\n",
          "  matrix[N{resp}, Kc{p}] Xc{p};", 
          "  // centered version of X{p}\n",
          "  vector[Kc{p}] means_X{p};",
          "  // column means of X{p} before centering\n"
        )
        str_add(out$tdata_comp) <- glue(
          "  for (i in 1:K{p}) {{\n",
          "    means_X{p}[i] = mean(X{p}[, i]);\n",
          "    Xc{p}[, i] = X{p}[, i] - means_X{p}[i];\n",
          "  }}\n"
        )
      } else {
        str_add(out$tdata_def) <- glue(
          "  int Kc{p} = K{p} - 1;\n",
          "  matrix[N{resp}, Kc{p}] Xc{p};", 
          "  // centered version of X{p} without an intercept\n",
          "  vector[Kc{p}] means_X{p};",
          "  // column means of X{p} before centering\n"
        )
        str_add(out$tdata_comp) <- glue(
          "  for (i in 2:K{p}) {{\n",
          "    means_X{p}[i - 1] = mean(X{p}[, i]);\n",
          "    Xc{p}[, i - 1] = X{p}[, i] - means_X{p}[i - 1];\n",
          "  }}\n"
        )
      }
    }
    if (!is_ordinal(family)) {
      # intercepts of ordinal models are handled in 'stan_thres'
      intercept_type <- "real"
      if (order_intercepts) {
        # identify mixtures via ordering of the intercepts
        dp_id <- dpar_id(px$dpar)
        str_add(out$tpar_def) <- glue(
          "  // identify mixtures via ordering of the intercepts\n",                   
          "  real Intercept{p} = ordered_Intercept{resp}[{dp_id}];\n"
        )
        str_add(out$pll_args) <- glue(", real Intercept{p}")
        # intercept parameter needs to be defined outside of 'stan_prior'
        intercept_type <- ""
      }
      str_add(out$eta) <- glue(" + Intercept{p}")
      str_add(out$gen_def) <- glue(
        "  // actual population-level intercept\n",
        "  real b{p}_Intercept = Intercept{p}{sub_X_means};\n"
      )
      str_add_list(out) <- stan_prior(
        prior, class = "Intercept", type = intercept_type,
        suffix = p, px = px, header_type = "real", 
        comment = "temporary intercept for centered predictors",
        normalize = normalize
      )
    }
  }
  if (decomp == "QR") {
    str_add(out$tdata_def) <- glue(
      "  // matrices for QR decomposition\n",
      "  matrix[N{resp}, K{ct}{p}] XQ{p};\n",
      "  matrix[K{ct}{p}, K{ct}{p}] XR{p};\n",
      "  matrix[K{ct}{p}, K{ct}{p}] XR{p}_inv;\n"
    )
    str_add(out$tdata_comp) <- glue(
      "  // compute and scale QR decomposition\n",
      "  XQ{p} = qr_thin_Q(X{ct}{p}) * sqrt(N{resp} - 1);\n",
      "  XR{p} = qr_thin_R(X{ct}{p}) / sqrt(N{resp} - 1);\n",
      "  XR{p}_inv = inverse(XR{p});\n"
    )
    str_add(out$pll_args) <- glue(", data matrix XQ{p}")
  }
  str_add(out$eta) <- stan_eta_fe(fixef, bterms, threads, primitive)
  out
}

# Stan code for group-level effects
stan_re <- function(ranef, prior, normalize, ...) {
  lpdf <- ifelse(normalize, "lpdf", "lupdf")
  IDs <- unique(ranef$id)
  out <- list()
  # special handling of student-t group effects as their 'df' parameters
  # are defined on a per-group basis instead of a per-ID basis
  tranef <- get_dist_groups(ranef, "student")
  if (has_rows(tranef)) {
    str_add(out$par) <- 
      "  // parameters for student-t distributed group-level effects\n"
    for (i in seq_rows(tranef)) {
      g <- usc(tranef$ggn[i])
      id <- tranef$id[i]
      str_add_list(out) <- stan_prior(
        prior, class = "df", group = tranef$group[i], 
        type = "real<lower=1>", suffix = g, normalize = normalize
      )
      str_add(out$par) <- glue(
        "  vector<lower=0>[N_{id}] udf{g};\n"
      )
      str_add(out$prior) <- glue(
        "  target += inv_chi_square_{lpdf}(udf{g} | df{g});\n"
      )
      # separate definition from computation to support fixed parameters
      str_add(out$tpar_def) <- glue(
        "  vector[N_{id}] dfm{g};\n"
      )
      str_add(out$tpar_comp) <- glue(
        "  dfm{g} = sqrt(df{g} * udf{g});\n"
      )
    }
  }
  # the ID syntax requires group-level effects to be evaluated separately
  tmp <- lapply(IDs, .stan_re, ranef = ranef, prior = prior, normalize = normalize, ...)
  out <- collapse_lists(ls = c(list(out), tmp))
  out
}

# Stan code for group-level effects per ID
# @param id the ID of the grouping factor
# @param ranef output of tidy_ranef
# @param prior object of class brmsprior
.stan_re <- function(id, ranef, prior, threads, normalize, ...) {
  lpdf <- ifelse(normalize, "lpdf", "lupdf")
  out <- list()
  r <- subset2(ranef, id = id)
  has_cov <- nzchar(r$cov[1])
  has_by <- nzchar(r$by[[1]])
  Nby <- seq_along(r$bylevels[[1]]) 
  ng <- seq_along(r$gcall[[1]]$groups)
  px <- check_prefix(r)
  uresp <- usc(unique(px$resp))
  idp <- paste0(r$id, usc(combine_prefix(px)))
  # define data needed for group-level effects
  str_add(out$data) <- glue(
    "  // data for group-level effects of ID {id}\n",
    "  int<lower=1> N_{id};  // number of grouping levels\n",
    "  int<lower=1> M_{id};  // number of coefficients per level\n"
  )
  if (r$gtype[1] == "mm") {
    for (res in uresp) {
      str_add(out$data) <- cglue(
        "  int<lower=1> J_{id}{res}_{ng}[N{res}];",
        "  // grouping indicator per observation\n",
        "  real W_{id}{res}_{ng}[N{res}];",
        "  // multi-membership weights\n"
      )
      str_add(out$pll_args) <- cglue(
        ", data int[] J_{id}{res}_{ng}, data real[] W_{id}{res}_{ng}"
      )
    }
  } else {
    str_add(out$data) <- cglue(
      "  int<lower=1> J_{id}{uresp}[N{uresp}];",
      "  // grouping indicator per observation\n"
    )
    str_add(out$pll_args) <- cglue(
      ", data int[] J_{id}{uresp}"
    )
  }
  if (has_by) {
    str_add(out$data) <- glue(
      "  int<lower=1> Nby_{id};  // number of by-factor levels\n",
      "  int<lower=1> Jby_{id}[N_{id}];", 
      "  // by-factor indicator per observation\n" 
    )
  }
  if (has_cov) {
    str_add(out$data) <- glue(
      "  matrix[N_{id}, N_{id}] Lcov_{id};",
      "  // cholesky factor of known covariance matrix\n"
    )
  }
  J <- seq_rows(r)
  reqZ <- !r$type %in% "sp"
  if (any(reqZ)) {
    str_add(out$data) <- "  // group-level predictor values\n"
    if (r$gtype[1] == "mm") {
      for (i in which(reqZ)) {
        str_add(out$data) <- cglue(
          "  vector[N{usc(r$resp[i])}] Z_{idp[i]}_{r$cn[i]}_{ng};\n"
        )
        str_add(out$pll_args) <- cglue(
          ", data vector Z_{idp[i]}_{r$cn[i]}_{ng}"
        )
      }
    } else {
      str_add(out$data) <- cglue(
        "  vector[N{usc(r$resp[reqZ])}] Z_{idp[reqZ]}_{r$cn[reqZ]};\n"
      )
      str_add(out$pll_args) <- cglue(
        ", data vector Z_{idp[reqZ]}_{r$cn[reqZ]}"
      )
    }
  }
  # define standard deviation parameters
  if (has_by) {
    str_add_list(out) <- stan_prior(
      prior, class = "sd", group = r$group[1], coef = r$coef,
      type = glue("matrix<lower=0>[M_{id}, Nby_{id}]"),
      coef_type = glue("row_vector<lower=0>[Nby_{id}]"),
      suffix = glue("_{id}"), px = px, broadcast = "matrix",
      comment = "group-level standard deviations",
      normalize = normalize
    )
  } else {
    str_add_list(out) <- stan_prior(
      prior, class = "sd", group = r$group[1], coef = r$coef,
      type = glue("vector<lower=0>[M_{id}]"), 
      coef_type = "real<lower=0>",
      suffix = glue("_{id}"), px = px, 
      comment = "group-level standard deviations",
      normalize = normalize
    )
  }
  dfm <- ""
  tr <- get_dist_groups(r, "student")
  if (nrow(r) > 1L && r$cor[1]) {
    # multiple correlated group-level effects
    str_add(out$data) <- glue( 
      "  int<lower=1> NC_{id};  // number of group-level correlations\n"
    )
    str_add(out$par) <- glue(
      "  matrix[M_{id}, N_{id}] z_{id};",
      "  // standardized group-level effects\n"
    )
    str_add(out$prior) <- glue(
      "  target += std_normal_{lpdf}(to_vector(z_{id}));\n"
    )
    if (has_rows(tr)) {
      dfm <- glue("rep_matrix(dfm_{tr$ggn[1]}, M_{id}) .* ")
    }
    if (has_by) {
      str_add_list(out) <- stan_prior(
        prior, class = "L", group = r$group[1], coef = Nby, 
        type = glue("cholesky_factor_corr[M_{id}]"),
        coef_type = glue("cholesky_factor_corr[M_{id}]"),
        suffix = glue("_{id}"), dim = glue("[Nby_{id}]"), 
        comment = "cholesky factor of correlation matrix",
        normalize = normalize
      ) 
      # separate definition from computation to support fixed parameters
      str_add(out$tpar_def) <- glue(
        "  matrix[N_{id}, M_{id}] r_{id};  // actual group-level effects\n"
      )
      if (has_cov) {
        rdef <- glue(
          "scale_r_cor_by_cov(z_{id}, sd_{id}, L_{id}, Jby_{id}, Lcov_{id})"
        )
      } else {
        rdef  <- glue("scale_r_cor_by(z_{id}, sd_{id}, L_{id}, Jby_{id})")      
      }
      str_add(out$tpar_comp) <- glue(
        "  // compute actual group-level effects\n",
        "  r_{id} = {dfm}{rdef};\n"
      ) 
      str_add(out$gen_def) <- cglue(
        "  // compute group-level correlations\n",
        "  corr_matrix[M_{id}] Cor_{id}_{Nby}",
        " = multiply_lower_tri_self_transpose(L_{id}[{Nby}]);\n",
        "  vector<lower=-1,upper=1>[NC_{id}] cor_{id}_{Nby};\n"
      )
      str_add(out$gen_comp) <- stan_cor_gen_comp(
        glue("cor_{id}_{Nby}"), glue("M_{id}")
      )
    } else {
      str_add_list(out) <- stan_prior(
        prior, class = "L", group = r$group[1], suffix = usc(id),
        type = glue("cholesky_factor_corr[M_{id}]"), 
        comment = "cholesky factor of correlation matrix",
        normalize = normalize
      )
      if (has_cov) {
        rdef <- glue("scale_r_cor_cov(z_{id}, sd_{id}, L_{id}, Lcov_{id})")
      } else {
        rdef <- glue("scale_r_cor(z_{id}, sd_{id}, L_{id})")
      }
      # separate definition from computation to support fixed parameters
      str_add(out$tpar_def) <- glue(
        "  matrix[N_{id}, M_{id}] r_{id};  // actual group-level effects\n"
      )
      str_add(out$tpar_comp) <- glue(
        "  // compute actual group-level effects\n",
        "  r_{id} = {dfm}{rdef};\n"
      )
      str_add(out$gen_def) <- glue(
        "  // compute group-level correlations\n",
        "  corr_matrix[M_{id}] Cor_{id}",
        " = multiply_lower_tri_self_transpose(L_{id});\n",
        "  vector<lower=-1,upper=1>[NC_{id}] cor_{id};\n"
      )
      str_add(out$gen_comp) <- stan_cor_gen_comp(
        cor = glue("cor_{id}"), ncol = glue("M_{id}")
      )
    }
    # separate definition from computation to support fixed parameters
    str_add(out$tpar_def) <- 
      "  // using vectors speeds up indexing in loops\n"
    str_add(out$tpar_def) <- cglue(
      "  vector[N_{id}] r_{idp}_{r$cn};\n"
    )
    str_add(out$tpar_comp) <- cglue(
      "  r_{idp}_{r$cn} = r_{id}[, {J}];\n"
    )
    str_add(out$pll_args) <- cglue(
      ", vector r_{idp}_{r$cn}"
    )
  } else {
    # single or uncorrelated group-level effects
    str_add(out$par) <- glue(
      "  vector[N_{id}] z_{id}[M_{id}];",
      "  // standardized group-level effects\n"
    )
    str_add(out$prior) <- cglue(
      "  target += std_normal_{lpdf}(z_{id}[{seq_rows(r)}]);\n"
    )
    Lcov <- str_if(has_cov, glue("Lcov_{id} * "))
    if (has_rows(tr)) {
      dfm <- glue("dfm_{tr$ggn[1]} .* ")
    }
    if (has_by) {
      # separate definition from computation to support fixed parameters
      str_add(out$tpar_def) <- cglue(
        "  vector[N_{id}] r_{idp}_{r$cn};  // actual group-level effects\n"
      )
      str_add(out$tpar_comp) <- cglue(
        "  r_{idp}_{r$cn} = {dfm}(transpose(sd_{id}[{J}, Jby_{id}])", 
        " .* ({Lcov}z_{id}[{J}]));\n"
      )
    } else {
      # separate definition from computation to support fixed parameters
      str_add(out$tpar_def) <- cglue(
        "  vector[N_{id}] r_{idp}_{r$cn};  // actual group-level effects\n"
      )
      str_add(out$tpar_comp) <- cglue(
        "  r_{idp}_{r$cn} = {dfm}(sd_{id}[{J}] * ({Lcov}z_{id}[{J}]));\n"
      )
    }
    str_add(out$pll_args) <- cglue(
      ", vector r_{idp}_{r$cn}"
    )
  }
  out
}

# Stan code of smooth terms
stan_sm <- function(bterms, data, prior, threads, normalize, ...) {
  lpdf <- ifelse(normalize, "lpdf", "lupdf")
  out <- list()
  smef <- tidy_smef(bterms, data)
  if (!NROW(smef)) {
    return(out)
  }
  px <- check_prefix(bterms)
  p <- usc(combine_prefix(px))
  resp <- usc(px$resp)
  slice <- stan_slice(threads)
  Xs_names <- attr(smef, "Xs_names")
  if (length(Xs_names)) {
    str_add(out$data) <- glue(
      "  // data for splines\n",
      "  int Ks{p};  // number of linear effects\n",
      "  matrix[N{resp}, Ks{p}] Xs{p};",
      "  // design matrix for the linear effects\n"
    )
    str_add(out$pll_args) <- glue(", data matrix Xs{p}")
    str_add_list(out) <- stan_prior(
      prior, class = "b", coef = Xs_names,
      type = glue("vector[Ks{p}]"), suffix = glue("s{p}"), 
      header_type = "vector", px = px, 
      comment = "spline coefficients", normalize = normalize
    )
    str_add(out$eta) <- glue(" + Xs{p}{slice} * bs{p}")
  }
  for (i in seq_rows(smef)) {
    pi <- glue("{p}_{i}")
    nb <- seq_len(smef$nbases[[i]])
    str_add(out$data) <- glue(
      "  // data for spline {smef$byterm[i]}\n",  
      "  int nb{pi};  // number of bases\n",
      "  int knots{pi}[nb{pi}];  // number of knots\n"
    )
    str_add(out$data) <- "  // basis function matrices\n"
    str_add(out$data) <- cglue(
      "  matrix[N{resp}, knots{pi}[{nb}]] Zs{pi}_{nb};\n"
    )
    str_add(out$pll_args) <- cglue(", data matrix Zs{pi}_{nb}")
    str_add(out$par) <- glue(
      "  // parameters for spline {smef$byterm[i]}\n"
    )
    str_add(out$par) <- cglue(
      "  // standarized spline coefficients\n",
      "  vector[knots{pi}[{nb}]] zs{pi}_{nb};\n"
    )
    for (j in nb) {
      str_add_list(out) <- stan_prior(
        prior, class = "sds", coef = smef$term[i], 
        type = "real<lower=0>", coef_type = "real<lower=0>",
        suffix = glue("{pi}_{j}"), px = px, 
        comment = "standard deviations of spline coefficients",
        normalize = normalize
      ) 
    }
    # separate definition from computation to support fixed parameters
    str_add(out$tpar_def) <- cglue(
      "  // actual spline coefficients\n",
      "  vector[knots{pi}[{nb}]] s{pi}_{nb};\n"
    )
    str_add(out$tpar_comp) <- cglue(
      "  // compute actual spline coefficients\n",
      "  s{pi}_{nb} = sds{pi}_{nb} * zs{pi}_{nb};\n"
    )
    str_add(out$pll_args) <- cglue(", vector s{pi}_{nb}")
    str_add(out$prior) <- cglue(
      "  target += std_normal_{lpdf}(zs{pi}_{nb});\n"
    )
    str_add(out$eta) <- cglue(
      " + Zs{pi}_{nb}{slice} * s{pi}_{nb}"
    )
  }
  out
}

# Stan code for category specific effects
# @note not implemented for non-linear models
stan_cs <- function(bterms, data, prior, ranef, threads, normalize, ...) {
  out <- list()
  csef <- colnames(get_model_matrix(bterms$cs, data))
  px <- check_prefix(bterms)
  p <- usc(combine_prefix(px))
  resp <- usc(bterms$resp)
  slice <- stan_slice(threads)
  ranef <- subset2(ranef, type = "cs", ls = px)
  if (length(csef)) {
    str_add(out$data) <- glue(
      "  int<lower=1> Kcs{p};  // number of category specific effects\n",
      "  matrix[N{resp}, Kcs{p}] Xcs{p};  // category specific design matrix\n"
    )
    str_add(out$pll_args) <- glue(", data matrix Xcs{p}")
    bound <- get_bound(prior, class = "b", px = px)
    str_add_list(out) <- stan_prior(
      prior, class = "b", coef = csef,
      type = glue("matrix{bound}[Kcs{p}, nthres{resp}]"),
      coef_type = glue("row_vector{bound}[nthres{resp}]"),
      suffix = glue("cs{p}"), px = px, broadcast = "matrix",
      header_type = "matrix", comment = "category specific effects",
      normalize = normalize
    )
    str_add(out$model_def) <- glue(
      "  // linear predictor for category specific effects\n",
      "  matrix[N{resp}, nthres{resp}] mucs{p} = Xcs{p}{slice} * bcs{p};\n"
    )
  }
  if (nrow(ranef)) {
    if (!length(csef)) {
      # only group-level category specific effects present
      str_add(out$model_def) <- glue(
        "  // linear predictor for category specific effects\n",               
        "  matrix[N{resp}, nthres{resp}] mucs{p}", 
        " = rep_matrix(0, N{resp}, nthres{resp});\n"
      )
    }
    n <- stan_nn(threads)
    thres_regex <- "(?<=\\[)[[:digit:]]+(?=\\]$)"
    thres <- get_matches(thres_regex, ranef$coef, perl = TRUE)
    nthres <- max(as.numeric(thres))
    mucs_loop <- ""
    for (i in seq_len(nthres)) {
      r_cat <- ranef[grepl(glue("\\[{i}\\]$"), ranef$coef), ]
      str_add(mucs_loop) <- glue(
        "    mucs{p}[n, {i}] = mucs{p}[n, {i}]"
      )
      for (id in unique(r_cat$id)) {
        r <- r_cat[r_cat$id == id, ]
        rpx <- check_prefix(r)
        idp <- paste0(r$id, usc(combine_prefix(rpx)))
        idresp <- paste0(r$id, usc(rpx$resp))
        str_add(mucs_loop) <- cglue(
          " + r_{idp}_{r$cn}[J_{idresp}{n}] * Z_{idp}_{r$cn}{n}"
        )
      }
      str_add(mucs_loop) <- ";\n"
    }
    str_add(out$model_comp_eta_loop) <- glue(
      "  for (n in 1:N{resp}) {{\n", 
      stan_nn_def(threads), mucs_loop, 
      "  }\n"
    )
  }
  out
}

# Stan code for special effects
stan_sp <- function(bterms, data, prior, stanvars, ranef, meef, threads, 
                    normalize, ...) {
  out <- list()
  spef <- tidy_spef(bterms, data)
  if (!nrow(spef)) return(out)
  px <- check_prefix(bterms)
  p <- usc(combine_prefix(px))
  resp <- usc(px$resp)
  n <- stan_nn(threads)
  ranef <- subset2(ranef, type = "sp", ls = px)
  spef_coef <- rename(spef$term)
  invalid_coef <- setdiff(ranef$coef, spef_coef)
  if (length(invalid_coef)) {
    stop2(
      "Special group-level terms require corresponding ", 
      "population-level terms:\nOccured for ", 
      collapse_comma(invalid_coef)
    )
  }
  # prepare Stan code of the linear predictor component
  for (i in seq_rows(spef)) {
    eta <- spef$joint_call[[i]]
    if (!is.null(spef$calls_mo[[i]])) {
      new_mo <- glue("mo(simo{p}_{spef$Imo[[i]]}, Xmo{p}_{spef$Imo[[i]]}{n})")
      eta <- rename(eta, spef$calls_mo[[i]], new_mo)
    }
    if (!is.null(spef$calls_me[[i]])) {
      Kme <- seq_along(meef$term)
      Ime <- match(meef$grname, unique(meef$grname))
      nme <- ifelse(nzchar(meef$grname), glue("[Jme_{Ime}{n}]"), n)
      new_me <- glue("Xme_{Kme}{nme}")
      eta <- rename(eta, meef$term, new_me)
    }
    if (!is.null(spef$calls_mi[[i]])) {
      is_na_idx <- is.na(spef$idx2_mi[[i]])
      idx_mi <- glue("[idxl{p}_{spef$vars_mi[[i]]}_{spef$idx2_mi[[i]]}{n}]")
      idx_mi <- ifelse(is_na_idx, n, idx_mi)
      new_mi <- glue("Yl_{spef$vars_mi[[i]]}{idx_mi}")
      eta <- rename(eta, spef$calls_mi[[i]], new_mi)
      str_add(out$pll_args) <- glue(", vector Yl_{spef$vars_mi[[i]]}")
    }
    if (spef$Ic[i] > 0) {
      str_add(eta) <- glue(" * Csp{p}_{spef$Ic[i]}{n}")
    }
    r <- subset2(ranef, coef = spef_coef[i])
    rpars <- str_if(nrow(r), cglue(" + {stan_eta_rsp(r)}"))
    str_add(out$loopeta) <- glue(" + (bsp{p}[{i}]{rpars}) * {eta}")
  }
  
  # prepare general Stan code
  ncovars <- max(spef$Ic)
  str_add(out$data) <- glue(
    "  int<lower=1> Ksp{p};  // number of special effects terms\n"
  )
  if (ncovars > 0L) {
    str_add(out$data) <- "  // covariates of special effects terms\n"
    str_add(out$data) <- cglue(
      "  vector[N{resp}] Csp{p}_{seq_len(ncovars)};\n"
    )
    str_add(out$pll_args) <- cglue(", data vector Csp{p}_{seq_len(ncovars)}")
  }
  
  # include special Stan code for monotonic effects
  which_Imo <- which(lengths(spef$Imo) > 0)
  if (any(which_Imo)) {
    str_add(out$data) <- glue(
      "  int<lower=1> Imo{p};  // number of monotonic variables\n",
      "  int<lower=1> Jmo{p}[Imo{p}];  // length of simplexes\n"
    )
    ids <- unlist(spef$ids_mo)
    lpdf <- stan_lpdf_name(normalize)
    for (i in which_Imo) {
      for (k in seq_along(spef$Imo[[i]])) {
        j <- spef$Imo[[i]][[k]]
        id <- spef$ids_mo[[i]][[k]]
        # index of first ID appearance
        j_id <- match(id, ids)
        str_add(out$data) <- glue(
          "  int Xmo{p}_{j}[N{resp}];  // monotonic variable\n"
        )
        str_add(out$pll_args) <- glue(
          ", int[] Xmo{p}_{j}, vector simo{p}_{j}"
        )
        if (is.na(id) || j_id == j) {
          # no ID or first appearance of the ID
          str_add(out$data) <- glue(
            "  vector[Jmo{p}[{j}]] con_simo{p}_{j};", 
            "  // prior concentration of monotonic simplex\n"
          )
          str_add(out$par) <- glue(
            "  simplex[Jmo{p}[{j}]] simo{p}_{j};  // monotonic simplex\n"
          )
          str_add(out$prior) <- glue(
            "  target += dirichlet_{lpdf}(simo{p}_{j} | con_simo{p}_{j});\n"
          )
        } else {
          # use the simplex shared across all terms of the same ID
          str_add(out$tpar_def) <- glue(
            "  simplex[Jmo{p}[{j}]] simo{p}_{j} = simo{p}_{j_id};\n"
          )
        }
      }
    }
  }
  
  # include special Stan code for missing value terms
  uni_mi <- na.omit(attr(spef, "uni_mi"))
  for (j in seq_rows(uni_mi)) {
    idxl <- glue("idxl{p}_{uni_mi$var[j]}_{uni_mi$idx2[j]}")
    str_add(out$data) <- glue(
      "  int {idxl}[N{resp}];  // matching indices\n"
    )
    str_add(out$pll_args) <- glue(", data int[] {idxl}")
  }
  
  # prepare special effects coefficients
  bound <- get_bound(prior, class = "b", px = px)
  if (stan_assign_b_tpar(bterms, prior)) {
    str_add(out$tpar_def) <- glue(
      "  // special effects coefficients\n", 
      "  vector{bound}[Ksp{p}] bsp{p};\n"
    )
  } else {
    str_add_list(out) <- stan_prior(
      prior, class = "b", coef = spef$coef, 
      type = glue("vector{bound}[Ksp{p}]"),
      coef_type = glue("real{bound}"), px = px, 
      suffix = glue("sp{p}"), header_type = "vector",
      comment = "special effects coefficients",
      normalize = normalize
    )
  }
  stan_special_priors <- stan_special_prior_local(
    prior, class = "bsp", ncoef = nrow(spef), 
    px = px, center_X = FALSE, normalize = normalize
  )  
  out <- collapse_lists(out, stan_special_priors)
  out
}

# Stan code for latent gaussian processes
stan_gp <- function(bterms, data, prior, threads, normalize, ...) {
  lpdf <- stan_lpdf_name(normalize)
  out <- list()
  px <- check_prefix(bterms)
  p <- usc(combine_prefix(px))
  resp <- usc(px$resp)
  slice <- stan_slice(threads)
  gpef <- tidy_gpef(bterms, data)
  # kernel methods cannot simply be split up into partial sums
  for (i in seq_rows(gpef)) {
    pi <- glue("{p}_{i}")
    byvar <- gpef$byvars[[i]] 
    cons <- gpef$cons[[i]]
    byfac <- length(cons) > 0L
    bynum <- !is.null(byvar) && !byfac 
    k <- gpef$k[i]
    is_approx <- !isNA(k)
    iso <- gpef$iso[i]
    gr <- gpef$gr[i]
    sfx1 <- gpef$sfx1[[i]]
    sfx2 <- gpef$sfx2[[i]]
    str_add(out$data) <- glue(
      "  // data related to GPs\n",
      "  int<lower=1> Kgp{pi};", 
      "  // number of sub-GPs (equal to 1 unless 'by' was used)\n",
      "  int<lower=1> Dgp{pi};  // GP dimension\n"
    )
    if (is_approx) {
      str_add(out$data) <- glue(
        "  // number of basis functions of an approximate GP\n",
        "  int<lower=1> NBgp{pi};\n"
      )
    } 
    str_add_list(out) <- stan_prior(
      prior, class = "sdgp", coef = sfx1, 
      type = glue("vector<lower=0>[Kgp{pi}]"),
      coef_type = "real<lower=0>", px = px, suffix = pi, 
      comment = "GP standard deviation parameters",
      normalize = normalize
    )
    if (gpef$iso[i]) {
      lscale_type <- "vector<lower=0>[1]"
      lscale_dim <- glue("[Kgp{pi}]")
      lscale_comment <- "GP length-scale parameters"
    } else {
      lscale_type <- glue("vector<lower=0>[Dgp{pi}]")
      lscale_dim <- glue("[Kgp{pi}]")
      lscale_comment <- "GP length-scale parameters"
    }
    if (byfac) {
      J <- seq_along(cons)
      Ngp <- glue("Ngp{pi}")
      Nsubgp <- glue("N", str_if(gr, "sub"), glue("gp{pi}"))
      Igp <- glue("Igp{pi}_{J}")
      str_add(out$data) <- glue(
        "  // number of observations relevant for a certain sub-GP\n",
        "  int<lower=1> {Ngp}[Kgp{pi}];\n"
      )
      str_add(out$data) <- 
        "  // indices and contrasts of sub-GPs per observation\n"
      str_add(out$data) <- cglue(
        "  int<lower=1> {Igp}[{Ngp}[{J}]];\n",
        "  vector[{Ngp}[{J}]] Cgp{pi}_{J};\n"
      )
      str_add(out$pll_args) <- cglue(
        ", data int[] {Igp}, data vector Cgp{pi}_{J}"
      )
      str_add_list(out) <- stan_prior(
        prior, class = "lscale", coef = sfx2, 
        type = lscale_type, coef_type = "real<lower=0>",
        dim = lscale_dim, suffix = glue("{pi}"), px = px,
        comment = lscale_comment, normalize = normalize
      )
      if (gr) {
        str_add(out$data) <- glue(
          "  // number of latent GP groups\n",
          "  int<lower=1> Nsubgp{pi}[Kgp{pi}];\n"
        )
        str_add(out$data) <- cglue(
          "  // indices of latent GP groups per observation\n",
          "  int<lower=1> Jgp{pi}_{J}[{Ngp}[{J}]];\n"
        )
        str_add(out$pll_args) <- cglue(", data int[] Jgp{pi}_{J}")
      }
      if (is_approx) {
        str_add(out$data) <- 
          "  // approximate GP basis matrices and eigenvalues\n"
        str_add(out$data) <- cglue(
          "  matrix[{Nsubgp}[{J}], NBgp{pi}] Xgp{pi}_{J};\n",
          "  vector[Dgp{pi}] slambda{pi}_{J}[NBgp{pi}];\n"
        )
        str_add(out$par) <- "  // latent variables of the GP\n"
        str_add(out$par) <- cglue(
          "  vector[NBgp{pi}] zgp{pi}_{J};\n"
        )
        str_add(out$model_no_pll_def) <- "  // scale latent variables of the GP\n"
        str_add(out$model_no_pll_def) <- cglue(
          "  vector[NBgp{pi}] rgp{pi}_{J} = sqrt(spd_cov_exp_quad(", 
          "slambda{pi}_{J}, sdgp{pi}[{J}], lscale{pi}[{J}])) .* zgp{pi}_{J};\n"
        )
        gp_call <- glue("Xgp{pi}_{J} * rgp{pi}_{J}")
      } else {
        # exact GPs
        str_add(out$data) <- "  // covariates of the GP\n"
        str_add(out$data) <- cglue(
          "  vector[Dgp{pi}] Xgp{pi}_{J}[{Nsubgp}[{J}]];\n"
        )
        str_add(out$par) <- "  // latent variables of the GP\n"
        str_add(out$par) <- cglue(
          "  vector[{Nsubgp}[{J}]] zgp{pi}_{J};\n"
        )
        gp_call <- glue(
          "gp(Xgp{pi}_{J}, sdgp{pi}[{J}], lscale{pi}[{J}], zgp{pi}_{J})"
        )
      }
      slice2 <- ""
      Igp_sub <- Igp
      if (use_threading(threads)) {
        str_add(out$model_comp_basic) <- cglue(
          "  int which_gp{pi}_{J}[size_range({Igp}, start, end)] =", 
          " which_range({Igp}, start, end);\n"
        )
        slice2 <- glue("[which_gp{pi}_{J}]")
        Igp_sub <- glue("start_at_one({Igp}{slice2}, start)")
      }
      # TODO: add all GP elements to 'eta' at the same time?
      eta <- combine_prefix(px, keep_mu = TRUE, nlp = TRUE)
      eta <- glue("{eta}[{Igp_sub}]")
      str_add(out$model_no_pll_def) <- cglue(
        "  vector[{Nsubgp}[{J}]] gp_pred{pi}_{J} = {gp_call};\n"
      )
      str_add(out$pll_args) <- cglue(", vector gp_pred{pi}_{J}")
      Cgp <- glue("Cgp{pi}_{J}{slice2} .* ")
      Jgp <- str_if(gr, glue("[Jgp{pi}_{J}{slice2}]"), slice)
      str_add(out$model_comp_basic) <- cglue(
        "  {eta} += {Cgp}gp_pred{pi}_{J}{Jgp};\n"
      )
      str_add(out$prior) <- cglue(
        "{tp()}std_normal_{lpdf}(zgp{pi}_{J});\n"
      )
    } else {
      # no by-factor variable
      str_add_list(out) <- stan_prior(
        prior, class = "lscale", coef = sfx2, 
        type = lscale_type, coef_type = "real<lower=0>",
        dim = lscale_dim, suffix = glue("{pi}"), px = px,
        comment = lscale_comment, normalize = normalize
      )
      Nsubgp <- glue("N{resp}")
      if (gr) {
        Nsubgp <- glue("Nsubgp{pi}")
        str_add(out$data) <- glue(
          "  // number of latent GP groups\n",
          "  int<lower=1> {Nsubgp};\n",
          "  // indices of latent GP groups per observation\n",
          "  int<lower=1> Jgp{pi}[N{resp}];\n"
        )
        str_add(out$pll_args) <- glue(", data int[] Jgp{pi}")
      }
      Cgp <- ""
      if (bynum) {
        str_add(out$data) <- glue(
          "  // numeric by-variable of the GP\n",
          "  vector[N{resp}] Cgp{pi};\n"
        )
        str_add(out$pll_args) <- glue(", data vector Cgp{pi}")
        Cgp <- glue("Cgp{pi}{slice} .* ")
      }
      if (is_approx) {
        str_add(out$data) <- glue(
          "  // approximate GP basis matrices\n",
          "  matrix[{Nsubgp}, NBgp{pi}] Xgp{pi};\n",
          "  // approximate GP eigenvalues\n",
          "  vector[Dgp{pi}] slambda{pi}[NBgp{pi}];\n"
        )
        str_add(out$par) <- glue(
          "  vector[NBgp{pi}] zgp{pi};  // latent variables of the GP\n"
        )
        str_add(out$model_no_pll_def) <- glue(
          "  // scale latent variables of the GP\n",
          "  vector[NBgp{pi}] rgp{pi} = sqrt(spd_cov_exp_quad(", 
          "slambda{pi}, sdgp{pi}[1], lscale{pi}[1])) .* zgp{pi};\n"
        )
        if (gr) {
          # grouping prevents GPs to be computed efficiently inside reduce_sum
          str_add(out$model_no_pll_def) <- glue(
            "  vector[{Nsubgp}] gp_pred{pi} = Xgp{pi} * rgp{pi};\n"
          )
          str_add(out$eta) <- glue(" + {Cgp}gp_pred{pi}[Jgp{pi}{slice}]")
          str_add(out$pll_args) <- glue(", vector gp_pred{pi}")
        } else {
          # efficient computation of approx GPs inside reduce_sum is possible
          str_add(out$model_def) <- glue(
            "  vector[N{resp}] gp_pred{pi} = Xgp{pi}{slice} * rgp{pi};\n"
          )
          str_add(out$eta) <- glue(" + {Cgp}gp_pred{pi}")
          str_add(out$pll_args) <- glue(", data matrix Xgp{pi}, vector rgp{pi}")
        }
      } else {
        # exact GPs
        str_add(out$data) <- glue(
          "  vector[Dgp{pi}] Xgp{pi}[{Nsubgp}];  // covariates of the GP\n"
        ) 
        str_add(out$par) <- glue(
          "  vector[{Nsubgp}] zgp{pi};  // latent variables of the GP\n"
        )
        gp_call <- glue("gp(Xgp{pi}, sdgp{pi}[1], lscale{pi}[1], zgp{pi})")
        # exact GPs are kernel based methods which
        # need to be computed outside of reduce_sum 
        str_add(out$model_no_pll_def) <- glue(
          "  vector[{Nsubgp}] gp_pred{pi} = {gp_call};\n"
        )
        Jgp <- str_if(gr, glue("[Jgp{pi}{slice}]"), slice)
        str_add(out$eta) <- glue(" + {Cgp}gp_pred{pi}{Jgp}")
        str_add(out$pll_args) <- glue(", vector gp_pred{pi}")
      }
      str_add(out$prior) <- glue(
        "{tp()}std_normal_{lpdf}(zgp{pi});\n"
      )
    }
  }
  out
}

# Stan code for the linear predictor of autocorrelation terms 
stan_ac <- function(bterms, data, prior, threads, normalize, ...) {
  lpdf <- stan_lpdf_name(normalize)
  out <- list()
  px <- check_prefix(bterms)
  p <- usc(combine_prefix(px))
  resp <- usc(px$resp)
  n <- stan_nn(threads)
  slice <- stan_slice(threads)
  has_natural_residuals <- has_natural_residuals(bterms)
  has_ac_latent_residuals <- has_ac_latent_residuals(bterms)
  acef <- tidy_acef(bterms, data)
  
  if (has_ac_latent_residuals) {
    # families that do not have natural residuals require latent
    # residuals for residual-based autocor structures
    err_msg <- "Latent residuals are not implemented"
    if (is.btnl(bterms)) {
      stop2(err_msg, " for non-linear models.")
    }
    str_add(out$par) <- glue(
      "  vector[N{resp}] zerr{p};  // unscaled residuals\n"
    )
    str_add_list(out) <- stan_prior(
      prior, class = "sderr", px = px, suffix = p,
      type = "real<lower=0>", comment = "SD of residuals",
      normalize = normalize
    )
    str_add(out$tpar_def) <- glue(
      "  vector[N{resp}] err{p};  // actual residuals\n"
    )
    str_add(out$pll_args) <- glue(", vector err{p}")
    str_add(out$prior) <- glue(
      "  target += std_normal_{lpdf}(zerr{p});\n"
    )
    str_add(out$eta) <- glue(" + err{p}{slice}")
  }

  # validity of the autocor terms has already been checked in 'tidy_acef'
  acef_arma <- subset2(acef, class = "arma")
  if (NROW(acef_arma)) {
    if (use_threading(threads) && (!acef_arma$cov || has_natural_residuals)) {
      stop2("Threading is not supported for this ARMA model.")
    }
    str_add(out$data) <- glue( 
      "  // data needed for ARMA correlations\n",
      "  int<lower=0> Kar{p};  // AR order\n",
      "  int<lower=0> Kma{p};  // MA order\n"
    )
    str_add(out$tdata_def) <- glue( 
      "  int max_lag{p} = max(Kar{p}, Kma{p});\n"
    )
    if (!acef_arma$cov) {
      err_msg <- "Please set cov = TRUE in ARMA structures"
      if (is.formula(bterms$adforms$se)) {
        stop2(err_msg, " when including known standard errors.")
      }
      str_add(out$data) <- glue( 
        "  // number of lags per observation\n",
        "  int<lower=0> J_lag{p}[N{resp}];\n"                
      )
      str_add(out$model_def) <- glue(
        "  // matrix storing lagged residuals\n",
        "  matrix[N{resp}, max_lag{p}] Err{p}",
        " = rep_matrix(0, N{resp}, max_lag{p});\n"
      )
      if (has_natural_residuals) {
        str_add(out$model_def) <- glue(
          "  vector[N{resp}] err{p};  // actual residuals\n"
        )
        Y <- str_if(is.formula(bterms$adforms$mi), "Yl", "Y") 
        comp_err <- glue("    err{p}[n] = {Y}{p}[n] - mu{p}[n];\n")
      } else {
        if (acef_arma$q > 0) {
          # AR and MA structures cannot be distinguished when
          # using a single vector of latent residuals
          stop2("Please set cov = TRUE when modeling MA structures ",
                "for this family.")
        }
        str_add(out$tpar_comp) <- glue(
          "  // compute ctime-series residuals\n",
          "  err{p} = sderr{p} * zerr{p};\n"
        )
        comp_err <- ""
      }
      add_ar <- str_if(acef_arma$p > 0,
        glue("    mu{p}[n] += Err{p}[n, 1:Kar{p}] * ar{p};\n")             
      )
      add_ma <- str_if(acef_arma$q > 0,
        glue("    mu{p}[n] += Err{p}[n, 1:Kma{p}] * ma{p};\n")             
      )
      str_add(out$model_comp_arma) <- glue(
        "  // include ARMA terms\n",
        "  for (n in 1:N{resp}) {{\n",
        add_ma,
        comp_err,
        "    for (i in 1:J_lag{p}[n]) {{\n",
        "      Err{p}[n + 1, i] = err{p}[n + 1 - i];\n",
        "    }}\n",
        add_ar,
        "  }}\n"
      )
    }
    # no boundaries are required in the conditional formulation
    # when natural residuals automatically define the scale
    need_arma_bound <- acef_arma$cov || has_ac_latent_residuals
    if (acef_arma$p > 0) {
      ar_bound <- str_if(need_arma_bound, "<lower=-1,upper=1>")
      str_add_list(out) <- stan_prior(
        prior, class = "ar", px = px, suffix = p,
        coef = seq_along(acef_arma$p),
        type = glue("vector{ar_bound}[Kar{p}]"),
        coef_type = glue("real{ar_bound}"),
        header_type = "vector",
        comment = "autoregressive coefficients",
        normalize = normalize
      )
    }
    if (acef_arma$q > 0) {
      ma_bound <- str_if(need_arma_bound, "<lower=-1,upper=1>")
      str_add_list(out) <- stan_prior(
        prior, class = "ma", px = px, suffix = p,
        coef = seq_along(acef_arma$q),
        type = glue("vector{ma_bound}[Kma{p}]"),
        coef_type = glue("real{ma_bound}"),
        header_type = "vector",
        comment = "moving-average coefficients",
        normalize = normalize
      )
    }
  }
  
  acef_cosy <- subset2(acef, class = "cosy")
  if (NROW(acef_cosy)) {
    # compound symmetry correlation structure
    # most code is shared with ARMA covariance models
    # cosy correlations may be negative in theory but 
    # this causes problems divergent transitions (#878)
    # str_add(out$tdata_def) <- glue(
    #   "  real lb_cosy{p} = -1.0 / (max(nobs_tg{p}) - 1);",
    #   "  // lower bound of the cosy correlation\n"
    # )
    str_add_list(out) <- stan_prior(
      prior, class = "cosy", px = px, suffix = p, 
      type = glue("real<lower=0,upper=1>"), 
      comment = "compound symmetry correlation",
      normalize = normalize
    )
  }
  
  acef_time_cov <- subset2(acef, dim = "time", cov = TRUE)
  if (NROW(acef_time_cov)) {
    # use correlation structures in covariance matrix parameterization
    # optional for ARMA models and obligatory for COSY models
    # can only model one covariance structure at a time
    stopifnot(NROW(acef_time_cov) == 1)
    str_add(out$data) <- glue( 
      "  // see the functions block for details\n",
      "  int<lower=1> N_tg{p};\n",
      "  int<lower=1> begin_tg{p}[N_tg{p}];\n", 
      "  int<lower=1> end_tg{p}[N_tg{p}];\n", 
      "  int<lower=1> nobs_tg{p}[N_tg{p}];\n"
    )
    str_add(out$tdata_def) <- glue(
      "  int max_nobs_tg{p} = max(nobs_tg{p});",
      "  // maximum dimension of the autocorrelation matrix\n"
    )
    if (!is.formula(bterms$adforms$se)) {
      str_add(out$tdata_def) <- glue(
        "  // no known standard errors specified by the user\n",
        "  vector[N{resp}] se2{p} = rep_vector(0.0, N{resp});\n"
      )
    }
    str_add(out$tpar_def) <- glue(
      "  // cholesky factor of the autocorrelation matrix\n",
      "  matrix[max_nobs_tg{p}, max_nobs_tg{p}] chol_cor{p};\n"               
    )
    if (acef_time_cov$class == "arma") {
      if (acef_time_cov$p > 0 && acef_time_cov$q == 0) {
        cor_fun <- "ar1"
        cor_args <- glue("ar{p}[1]")
      } else if (acef_time_cov$p == 0 && acef_time_cov$q > 0) {
        cor_fun <- "ma1"
        cor_args <- glue("ma{p}[1]")
      } else {
        cor_fun <- "arma1"
        cor_args <- glue("ar{p}[1], ma{p}[1]")
      }
    } else if (acef_time_cov$class == "cosy") {
      cor_fun <- "cosy"
      cor_args <- glue("cosy{p}")
    }
    str_add(out$tpar_comp) <- glue(
      "  // compute residual covariance matrix\n",
      "  chol_cor{p} = cholesky_cor_{cor_fun}({cor_args}, max_nobs_tg{p});\n"
    )
    if (has_ac_latent_residuals) {
      str_add(out$tpar_comp) <- glue(
        "  // compute correlated time-series residuals\n",
        "  err{p} = scale_time_err(",
        "zerr{p}, sderr{p}, chol_cor{p}, nobs_tg{p}, begin_tg{p}, end_tg{p});\n"
      )
    }
  }
  
  acef_sar <- subset2(acef, class = "sar")
  if (NROW(acef_sar)) {
    if (!has_natural_residuals) {
      stop2("SAR terms are not implemented for this family.")
    }
    if (use_threading(threads)) {
      stop2("Threading is not supported for SAR models.")
    }
    str_add(out$data) <- glue(
      "  matrix[N{resp}, N{resp}] Msar{p};  // spatial weight matrix\n",
      "  vector[N{resp}] eigenMsar{p};  // eigenvalues of Msar{p}\n"
    )
    str_add(out$tdata_def) <- glue(
      "  // the eigenvalues define the boundaries of the SAR correlation\n",
      "  real min_eigenMsar{p} = min(eigenMsar{p});\n",
      "  real max_eigenMsar{p} = max(eigenMsar{p});\n"
    )
    if (acef_sar$type == "lag") {
      str_add_list(out) <- stan_prior(
        prior, class = "lagsar", px = px, suffix = p,
        type = glue("real<lower=min_eigenMsar{p},upper=max_eigenMsar{p}>"),
        comment = "lag-SAR correlation parameter",
        normalize = normalize
      )
    } else if (acef_sar$type == "error") {
      str_add_list(out) <- stan_prior(
        prior, class = "errorsar", px = px, suffix = p,
        type = glue("real<lower=min_eigenMsar{p},upper=max_eigenMsar{p}>"),
        comment = "error-SAR correlation parameter",
        normalize = normalize
      )
    }
  }
  
  acef_car <- subset2(acef, class = "car")
  if (NROW(acef_car)) {
    if (is.btnl(bterms)) {
      stop2("CAR terms are not implemented for non-linear models.")
    }
    str_add(out$data) <- glue(
      "  // data for the CAR structure\n",
      "  int<lower=1> Nloc{p};\n",
      "  int<lower=1> Jloc{p}[N{resp}];\n",
      "  int<lower=0> Nedges{p};\n",
      "  int<lower=1> edges1{p}[Nedges{p}];\n",
      "  int<lower=1> edges2{p}[Nedges{p}];\n"
    )
    str_add_list(out) <- stan_prior(
      prior, class = "sdcar", px = px, suffix = p,
      type = "real<lower=0>", comment = "SD of the CAR structure",
      normalize = normalize
    )
    str_add(out$pll_args) <- glue(", vector rcar{p}, data int[] Jloc{p}")
    str_add(out$loopeta) <- glue(" + rcar{p}[Jloc{p}{n}]")
    if (acef_car$type %in% c("escar", "esicar")) {
      str_add(out$data) <- glue(
        "  vector[Nloc{p}] Nneigh{p};\n",
        "  vector[Nloc{p}] eigenMcar{p};\n"
      )
    }
    if (acef_car$type == "escar") {
      str_add(out$par) <- glue(
        "  vector[Nloc{p}] rcar{p};\n"
      )
      str_add_list(out) <- stan_prior(
        prior, class = "car", px = px, suffix = p,
        type = "real<lower=0, upper=1>", normalize = normalize
      )
      car_args <- c(
        "car", "sdcar", "Nloc", "Nedges", 
        "Nneigh", "eigenMcar", "edges1", "edges2"
      )
      car_args <- paste0(car_args, p, collapse = ", ")
      str_add(out$prior) <- glue(
        "  target += sparse_car_lpdf(\n", 
        "    rcar{p} | {car_args}\n",
        "  );\n"
      )
    } else if (acef_car$type == "esicar") {
      str_add(out$par) <- glue(
        "  vector[Nloc{p} - 1] zcar{p};\n"
      )
      str_add(out$tpar_def) <- glue(
        "  vector[Nloc{p}] rcar{p};\n"                
      )
      str_add(out$tpar_comp) <- glue(
        "  // sum-to-zero constraint\n",
        "  rcar[1:(Nloc{p} - 1)] = zcar{p};\n",
        "  rcar[Nloc{p}] = - sum(zcar{p});\n"
      )
      car_args <- c(
        "sdcar", "Nloc", "Nedges", "Nneigh", 
        "eigenMcar", "edges1", "edges2"
      )
      car_args <- paste0(car_args, p, collapse = ", ")
      str_add(out$prior) <- glue(
        "  target += sparse_icar_lpdf(\n", 
        "    rcar{p} | {car_args}\n",
        "  );\n"
      )
    } else if (acef_car$type %in% "icar") {
      # intrinsic car based on the case study of Mitzi Morris
      # http://mc-stan.org/users/documentation/case-studies/icar_stan.html
      str_add(out$par) <- glue(
        "  // parameters for the ICAR structure\n",
        "  vector[Nloc{p}] zcar{p};\n"
      )
      # separate definition from computation to support fixed parameters
      str_add(out$tpar_def) <- glue(
        "  // scaled parameters for the ICAR structure\n",
        "  vector[Nloc{p}] rcar{p};\n"
      )
      str_add(out$tpar_comp) <- glue(
        "  // compute scaled parameters for the ICAR structure\n",
        "  rcar{p} = zcar{p} * sdcar{p};\n"
      )
      str_add(out$prior) <- glue(
        "  // improper prior on the spatial CAR component\n",
        "  target += -0.5 * dot_self(zcar{p}[edges1{p}] - zcar{p}[edges2{p}]);\n",
        "  // soft sum-to-zero constraint\n",
        "  target += normal_{lpdf}(sum(zcar{p}) | 0, 0.001 * Nloc{p});\n"
      )
    } else if (acef_car$type == "bym2") {
      # BYM2 car based on the case study of Mitzi Morris
      # http://mc-stan.org/users/documentation/case-studies/icar_stan.html
      str_add(out$data) <- glue(
        "  // scaling factor of the spatial CAR component\n",
        "  real<lower=0> car_scale{p};\n"
      )
      str_add(out$par) <- glue(
        "  // parameters for the BYM2 structure\n",
        "  vector[Nloc{p}] zcar{p};  // spatial part\n",
        "  vector[Nloc{p}] nszcar{p};  // non-spatial part\n",
        "  // proportion of variance in the spatial part\n"
      )
      str_add_list(out) <- stan_prior(
        prior, class = "rhocar", px = px, suffix = p,
        type = "real<lower=0,upper=1>",
        normalize = normalize
      )
      # separate definition from computation to support fixed parameters
      str_add(out$tpar_def) <- glue(
        "  // scaled parameters for the BYM2 structure\n",
        "  vector[Nloc{p}] rcar{p};\n"
      )
      str_add(out$tpar_comp) <- glue(
        "  // join the spatial and the non-spatial CAR component\n",
        "  rcar{p} = (sqrt(1 - rhocar{p}) * nszcar{p}", 
        " + sqrt(rhocar{p} * inv(car_scale{p})) * zcar{p}) * sdcar{p};\n"
      )
      str_add(out$prior) <- glue(
        "  // improper prior on the spatial BYM2 component\n",
        "  target += -0.5 * dot_self(zcar{p}[edges1{p}] - zcar{p}[edges2{p}]);\n",
        "  // soft sum-to-zero constraint\n",
        "  target += normal_{lpdf}(sum(zcar{p}) | 0, 0.001 * Nloc{p});\n",
        "  // proper prior on the non-spatial BYM2 component\n",
        "  target += std_normal_{lpdf}(nszcar{p});\n"
      )
    }
  }
  
  acef_fcor <- subset2(acef, class = "fcor")
  if (NROW(acef_fcor)) {
    if (!has_natural_residuals) {
      stop2("FCOR terms are not implemented for this family.")
    }
    if (use_threading(threads)) {
      stop2("Threading is not supported for FCOR models.")
    }
    str_add(out$data) <- glue( 
      "  matrix[N{resp}, N{resp}] Mfcor{p};  // known residual covariance matrix\n"
    )
    str_add(out$tdata_def) <- glue(
      "  matrix[N{resp}, N{resp}] Lfcor{p} = cholesky_decompose(Mfcor{p});\n"
    )
  }
  out
}

# stan code for offsets
stan_offset <- function(bterms, threads, ...) {
  out <- list()
  if (is.formula(bterms$offset)) {
    p <- usc(combine_prefix(bterms))
    resp <- usc(bterms$resp)
    slice <- stan_slice(threads)
    # use 'offsets' as 'offset' will be reserved in stanc3
    str_add(out$data) <- glue( "  vector[N{resp}] offsets{p};\n")
    str_add(out$pll_args) <- glue(", data vector offsets{p}")
    str_add(out$eta) <- glue(" + offsets{p}{slice}")
  }
  out
}

# Stan code for non-linear predictor terms
# @param nlpars names of the non-linear parameters
# @param ilink character vector of length 2 defining the link to be applied
stan_nl <- function(bterms, data, nlpars, threads, ilink = rep("", 2), ...) {
  stopifnot(length(ilink) == 2L)
  out <- list()
  resp <- usc(bterms$resp)
  par <- combine_prefix(bterms, keep_mu = TRUE, nlp = TRUE)
  # prepare non-linear model
  n <- paste0(str_if(bterms$loop, "[n]"), " ") 
  new_nlpars <- glue(" nlp{resp}_{nlpars}{n}")
  # covariates in the non-linear model
  covars <- all.vars(bterms$covars)
  new_covars <- NULL
  if (length(covars)) {
    p <- usc(combine_prefix(bterms))
    new_covars <- rep(NA, length(covars))
    data_cnl <- data_cnl(bterms, data)
    if (bterms$loop) {
      slice <- stan_nn(threads)
    } else {
      slice <- stan_slice(threads)
    }
    slice <- paste0(slice, " ")
    str_add(out$data) <- "  // covariate vectors for non-linear functions\n"
    for (i in seq_along(covars)) {
      is_integer <- is.integer(data_cnl[[glue("C{p}_{i}")]])
      if (is_integer) {
        str_add(out$data) <- glue(
          "  int C{p}_{i}[N{resp}];\n"
        )
        str_add(out$pll_args) <- glue(", data int[] C{p}_{i}")
      } else {
        str_add(out$data) <- glue(
          "  vector[N{resp}] C{p}_{i};\n"
        )
        str_add(out$pll_args) <- glue(", data vector C{p}_{i}")
      }
      new_covars[i] <- glue(" C{p}_{i}{slice}")
    }
  }
  # add white spaces to be able to replace parameters and covariates
  syms <- c(
    "+", "-", "*", "/", "%", "^", ".*", "./", "'", ")", "(", 
    ",", "==", "!=", "<=", ">=", "<", ">", "!", "&&", "||" 
  )
  regex <- glue("(?<!\\.){escape_all(syms)}(?!=)")
  eta <- rm_wsp(collapse(deparse(bterms$formula[[2]])))
  eta <- wsp(rename(eta, regex, wsp(syms), fixed = FALSE, perl = TRUE)) 
  vars <- c(wsp(nlpars), wsp(covars), " ( ", " ) ")
  new_vars <- c(new_nlpars, new_covars, "(", ")")
  eta <- trimws(rename(eta, vars, new_vars))
  # possibly transform eta in the transformed params block
  str_add(out$model_def) <- glue(
    "  // initialize non-linear predictor term\n",
    "  vector[N{resp}] {par};\n"
  )
  # make sure mu comes last as it might depend on other parameters
  is_mu <- isTRUE("mu" %in% dpar_class(bterms[["dpar"]]))
  position <- str_if(is_mu, "model_comp_mu_link", "model_comp_dpar_link")
  if (bterms$loop) {
    str_add(out[[position]]) <- glue(
      "  for (n in 1:N{resp}) {{\n",
      stan_nn_def(threads),
      "    // compute non-linear predictor values\n",
      "    {par}[n] = {ilink[1]}{eta}{ilink[2]};\n",
      "  }}\n"
    )
  } else {
    str_add(out[[position]]) <- glue(
      "  // compute non-linear predictor values\n",
      "  {par} = {ilink[1]}{eta}{ilink[2]};\n"
    )
  }
  out
}

# global Stan definitions for noise-free variables
# @param meef output of tidy_meef
stan_Xme <- function(meef, prior, threads, normalize) {
  stopifnot(is.meef_frame(meef))
  if (!nrow(meef)) {
    return(list())
  }
  lpdf <- stan_lpdf_name(normalize)
  out <- list()
  coefs <- rename(paste0("me", meef$xname))
  str_add(out$data) <- "  // data for noise-free variables\n"
  str_add(out$par) <- "  // parameters for noise free variables\n"
  groups <- unique(meef$grname)
  for (i in seq_along(groups)) {
    g <- groups[i]
    # K are the global and J the local (within group) indices
    K <- which(meef$grname %in% g)
    J <- seq_along(K)
    if (nzchar(g)) {
      Nme <- glue("Nme_{i}")
      str_add(out$data) <- glue(
        "  int<lower=0> Nme_{i};  // number of latent values\n",
        "  int<lower=1> Jme_{i}[N];  // group index per observation\n"
      )
      str_add(out$pll_args) <- glue(", data int[] Jme_{i}")
    } else {
      Nme <- "N"
    }
    str_add(out$data) <- glue(
      "  int<lower=1> Mme_{i};  // number of groups\n"
    )
    str_add(out$data) <- cglue(
      "  vector[{Nme}] Xn_{K};  // noisy values\n",
      "  vector<lower=0>[{Nme}] noise_{K};  // measurement noise\n"
    )
    str_add_list(out) <- stan_prior(
      prior, "meanme", coef = coefs[K], suffix = usc(i),
      type = glue("vector[Mme_{i}]"), comment = "latent means",
      normalize = normalize
    )
    str_add_list(out) <- stan_prior(
      prior, "sdme", coef = coefs[K], suffix = usc(i),
      type = glue("vector<lower=0>[Mme_{i}]"), 
      coef_type = "real<lower=0>", comment = "latent SDs",
      normalize = normalize
    )
    str_add(out$prior) <- cglue(
      "  target += normal_{lpdf}(Xn_{K} | Xme_{K}, noise_{K});\n"
    )
    if (meef$cor[K[1]] && length(K) > 1L) {
      str_add(out$data) <- glue(
        "  int<lower=1> NCme_{i};  // number of latent correlations\n"
      )
      str_add(out$par) <- glue(
        "  matrix[Mme_{i}, {Nme}] zme_{i};  // standardized latent values\n"
      )
      str_add_list(out) <- stan_prior(
        prior, "Lme", group = g, suffix = usc(i),
        type = glue("cholesky_factor_corr[Mme_{i}]"),
        comment = "cholesky factor of the latent correlation matrix",
        normalize = normalize
      )
      # separate definition from computation to support fixed parameters
      str_add(out$tpar_def) <- glue(
        "  matrix[{Nme}, Mme_{i}] Xme{i};  // actual latent values\n"
      )
      str_add(out$tpar_comp) <- glue(
        "  // compute actual latent values\n",
        "  Xme{i} = rep_matrix(transpose(meanme_{i}), {Nme})", 
        " + transpose(diag_pre_multiply(sdme_{i}, Lme_{i}) * zme_{i});\n"
      )
      str_add(out$tpar_def) <- cglue(
        "  // using separate vectors increases efficiency\n",
        "  vector[{Nme}] Xme_{K};\n"
      )
      str_add(out$tpar_comp) <- cglue(
        "  Xme_{K} = Xme{i}[, {J}];\n"
      )
      str_add(out$pll_args) <- cglue(", vector Xme_{K}")
      str_add(out$prior) <- glue(
        "  target += std_normal_{lpdf}(to_vector(zme_{i}));\n"
      )
      str_add(out$gen_def) <- cglue(
        "  // obtain latent correlation matrix\n",
        "  corr_matrix[Mme_{i}] Corme_{i}", 
        " = multiply_lower_tri_self_transpose(Lme_{i});\n",
        "  vector<lower=-1,upper=1>[NCme_{i}] corme_{i};\n"
      )
      str_add(out$gen_comp) <- stan_cor_gen_comp(
        cor = glue("corme_{i}"), ncol = glue("Mme_{i}")
      )
    } else {
      str_add(out$par) <- cglue(
        "  vector[{Nme}] zme_{K};  // standardized latent values\n"
      )
      # separate definition from computation to support fixed parameters
      str_add(out$tpar_def) <- cglue(
        "  vector[{Nme}] Xme_{K};  // actual latent values\n"
      )
      str_add(out$tpar_comp) <- cglue(
        "  // compute actual latent values\n",
        "  Xme_{K} = meanme_{i}[{J}] + sdme_{i}[{J}] * zme_{K};\n"
      )
      str_add(out$pll_args) <- cglue(", vector Xme_{K}")
      str_add(out$prior) <- cglue(
        "  target += std_normal_{lpdf}(zme_{K});\n"
      )
    }
  }
  out
}

# initialize and compute a linear predictor term in Stan language
# @param out list of character strings containing Stan code
# @param bterms btl object
# @param ranef output of tidy_ranef
# @param primitive use Stan's GLM likelihood primitives?
# @param ilink character vector of length 2 defining the link to be applied
# @param ... currently unused
# @return list of character strings containing Stan code
stan_eta_combine <- function(out, bterms, ranef, threads, primitive,
                             ilink = c("", ""), ...) {
  stopifnot(is.list(out), is.btl(bterms), length(ilink) == 2L)
  if (primitive && !has_special_terms(bterms)) {
    # only overall effects and perhaps an intercept are present
    # which will be evaluated directly in the GLM primitive likelihood
    return(out)
  }
  px <- check_prefix(bterms)
  resp <- usc(bterms$resp)
  eta <- combine_prefix(px, keep_mu = TRUE, nlp = TRUE)
  out$eta <- sub("^[ \t\r\n]+\\+", "", out$eta, perl = TRUE)
  str_add(out$model_def) <- glue(
    "  // initialize linear predictor term\n",
    "  vector[N{resp}] {eta} ={out$eta};\n"
  )
  out$eta <- NULL
  str_add(out$loopeta) <- stan_eta_re(ranef, threads = threads, px = px)
  if (nzchar(out$loopeta)) {
    # parts of eta are computed in a loop over observations
    out$loopeta <- sub("^[ \t\r\n]+\\+", "", out$loopeta, perl = TRUE)
    str_add(out$model_comp_eta_loop) <- glue(
      "  for (n in 1:N{resp}) {{\n",
      "    // add more terms to the linear predictor\n",
      stan_nn_def(threads),
      "    {eta}[n] +={out$loopeta};\n",
      "  }}\n"
    )
  }
  out$loopeta <- NULL
  # possibly transform eta before it is passed to the likelihood
  if (sum(nzchar(ilink))) {
    # make sure mu comes last as it might depend on other parameters
    is_mu <- isTRUE("mu" %in% dpar_class(bterms[["dpar"]]))
    position <- str_if(is_mu, "model_comp_mu_link", "model_comp_dpar_link")
    str_add(out[[position]]) <- glue(
      "  for (n in 1:N{resp}) {{\n",
      "    // apply the inverse link function\n",
      "    {eta}[n] = {ilink[1]}{eta}[n]{ilink[2]};\n",
      "  }}\n"
    )
  }
  out
}

# define Stan code to compute the fixef part of eta
# @param fixef names of the population-level effects
# @param bterms object of class 'btl'
# @param primitive use Stan's GLM likelihood primitives?
# @return a single character string
stan_eta_fe <- function(fixef, bterms, threads, primitive) {
  if (length(fixef) && !primitive) {
    p <- usc(combine_prefix(bterms))
    center_X <- stan_center_X(bterms)
    decomp <- get_decomp(bterms$fe)
    sparse <- is_sparse(bterms$fe)
    if (sparse) {
      stopifnot(!center_X && decomp == "none")
      csr_args <- sargs(
        paste0(c("rows", "cols"), "(X", p, ")"),
        paste0(c("wX", "vX", "uX", "b"), p)
      )
      eta_fe <- glue("csr_matrix_times_vector({csr_args})")
    } else {
      sfx_X <- sfx_b <- ""
      if (decomp == "QR") {
        sfx_X <- sfx_b <- "Q"
      } else if (center_X) {
        sfx_X <- "c"
      }
      slice <- stan_slice(threads)
      eta_fe <- glue("X{sfx_X}{p}{slice} * b{sfx_b}{p}")
    }
  } else { 
    resp <- usc(bterms$resp)
    eta_fe <- glue("rep_vector(0.0, N{resp})")
  }
  glue(" + {eta_fe}")
}

# write the group-level part of the linear predictor
# @return a single character string
stan_eta_re <- function(ranef, threads, px = list()) {
  eta_re <- ""
  n <- stan_nn(threads)
  ranef <- subset2(ranef, type = c("", "mmc"), ls = px)
  for (id in unique(ranef$id)) {
    r <- subset2(ranef, id = id)
    rpx <- check_prefix(r)
    idp <- paste0(r$id, usc(combine_prefix(rpx)))
    idresp <- paste0(r$id, usc(rpx$resp))
    if (r$gtype[1] == "mm") {
      ng <- seq_along(r$gcall[[1]]$groups)
      for (i in seq_rows(r)) {
        str_add(eta_re) <- cglue(
          " + W_{idresp[i]}_{ng}{n}", 
          " * r_{idp[i]}_{r$cn[i]}[J_{idresp[i]}_{ng}{n}]",
          " * Z_{idp[i]}_{r$cn[i]}_{ng}{n}"
        ) 
      }
    } else {
      str_add(eta_re) <- cglue(
        " + r_{idp}_{r$cn}[J_{idresp}{n}] * Z_{idp}_{r$cn}{n}"
      )
    }
  }
  eta_re
}

# Stan code for group-level parameters in special predictor terms
# @param r data.frame created by tidy_ranef
# @return a character vector: one element per row of 'r' 
stan_eta_rsp <- function(r) {
  stopifnot(nrow(r) > 0L, length(unique(r$gtype)) == 1L)
  rpx <- check_prefix(r)
  idp <- paste0(r$id, usc(combine_prefix(rpx)))
  idresp <- paste0(r$id, usc(rpx$resp))
  if (r$gtype[1] == "mm") {
    ng <- seq_along(r$gcall[[1]]$groups)
    out <- rep("", nrow(r))
    for (i in seq_along(out)) {
      out[i] <- glue(
        "W_{idresp[i]}_{ng}[n] * r_{idp[i]}_{r$cn[i]}[J_{idresp[i]}_{ng}[n]]",
        collapse = " + "
      ) 
    }
  } else {
    out <- glue("r_{idp}_{r$cn}[J_{idresp}[n]]")
  }
  out
}

# does eta need to be transformed manually using the link functions
# @param family the model family
# @param cens_or_trunc is the model censored or truncated?
stan_eta_transform <- function(family, bterms) {
  transeta <- "transeta" %in% family_info(family, "specials")
  no_transform <- family$link == "identity" && !transeta || 
    has_joint_link(family) && !is.customfamily(family)
  !no_transform && !stan_has_built_in_fun(family, bterms)
}

# correctly apply inverse link to eta
# @param dpar name of the parameter for which to define the link
# @param bterms object of class 'brmsterms'
# @param resp name of the response variable
# @return a single character string
stan_eta_ilink <- function(dpar, bterms, resp = "") {
  stopifnot(is.brmsterms(bterms))
  out <- rep("", 2)
  family <- bterms$dpars[[dpar]]$family
  if (stan_eta_transform(family, bterms)) {
    dpar_id <- dpar_id(dpar)
    pred_dpars <- names(bterms$dpars)
    shape <- glue("shape{dpar_id}")
    n_shape <- str_if(shape %in% pred_dpars, "[n]")
    shape <- glue("{shape}{resp}{n_shape}")
    nu <- glue("nu{dpar_id}")
    n_nu <- str_if(nu %in% pred_dpars, "[n]")
    nu <- glue("{nu}{resp}{n_nu}")
    
    family_link <- str_if(
      family$family %in% c("gamma", "hurdle_gamma", "exponential"),
      paste0(family$family, "_", family$link), family$family
    )
    ilink <- stan_ilink(family$link)
    out <- switch(family_link,
      c(glue("{ilink}("), ")"),
      gamma_log = c(glue("{shape} * exp(-("), "))"),
      gamma_inverse = c(glue("{shape} * ("), ")"),
      gamma_identity = c(glue("{shape} / ("), ")"),
      hurdle_gamma_log = c(glue("{shape} * exp(-("), "))"),
      hurdle_gamma_inverse = c(glue("{shape} * ("), ")"),
      hurdle_gamma_identity = c(glue("{shape} / ("), ")"),
      exponential_log = c("exp(-(", "))"),
      exponential_inverse = c("(", ")"),
      exponential_identity = c("inv(", ")"),
      weibull = c(glue("{ilink}("), glue(") / tgamma(1 + 1 / {shape})")),
      frechet = c(glue("{ilink}("), glue(") / tgamma(1 - 1 / {nu})"))
    )
  }
  out
}

# indicate if the population-level design matrix should be centered
# implies a temporary shift in the intercept of the model
stan_center_X <- function(x) {
  is.btl(x) && !no_center(x$fe) && has_intercept(x$fe) && 
    !fix_intercepts(x) && !is_sparse(x$fe) && !has_sum_to_zero_thres(x)
}

# indicate if the overall coefficients 'b' should be
# assigned in the transformed parameters block 
stan_assign_b_tpar <- function(bterms, prior) {
  special <- get_special_prior(prior, bterms)
  !is.null(special$horseshoe) || !is.null(special$R2D2)
}

# default Stan definitions for distributional parameters
# @param dpar name of a distributional parameter
# @param suffix optional suffix of the parameter name
# @param family optional brmsfamily object
# @param fixed should the parameter be fixed to a certain value?
stan_dpar_types <- function(dpar, suffix = "", family = NULL, fixed = FALSE) {
  dpar <- as_one_character(dpar)
  suffix <- as_one_character(suffix)
  fixed <- as_one_logical(fixed)
  if (is.mixfamily(family)) {
    if (dpar_class(dpar) == "theta") {
      return("")  # theta is handled in stan_mixture
    }
    family <- family$mix[[as.numeric(dpar_id(dpar))]]
  }
  if (is.customfamily(family)) {
    dpar_class <- dpar_class(dpar)
    lb <- family$lb[[dpar_class]]
    ub <- family$ub[[dpar_class]]
    lb <- if (!is.na(lb)) glue("lower={lb}")
    ub <- if (!is.na(ub)) glue("upper={ub}")
    bounds <- paste0(c(lb, ub), collapse = ",")
    if (nzchar(bounds)) bounds <- glue("<{bounds}>")
    return(glue("real{bounds}"))
  }
  if (fixed) {
    min_Y <- glue("min(Y{suffix})")
  } else {
    min_Y <- glue("min_Y{suffix}")
  }
  default_types <- list(
    sigma = list(
      type = "real<lower=0>", 
      comment = "dispersion parameter"
    ),
    shape = list(
      type = "real<lower=0>", 
      comment = "shape parameter"
    ),
    nu = list(
      type = "real<lower=1>", 
      comment = "degrees of freedom or shape"
    ),
    phi = list(
      type = "real<lower=0>", 
      comment = "precision parameter"
    ),
    kappa = list(
      type = "real<lower=0>", 
      comment = "precision parameter"
    ),
    beta = list(
      type = "real<lower=0>", 
      comment = "scale parameter"
    ),
    zi = list(
      type = "real<lower=0,upper=1>", 
      comment = "zero-inflation probability"
    ), 
    hu = list(
      type = "real<lower=0,upper=1>", 
      comment = "hurdle probability"
    ),
    zoi = list(
      type = "real<lower=0,upper=1>", 
      comment = "zero-one-inflation probability"
    ), 
    coi = list(
      type = "real<lower=0,upper=1>", 
      comment = "conditional one-inflation probability"
    ),
    bs = list(
      type = "real<lower=0>", 
      comment = "boundary separation parameter"
    ),
    ndt = list(
      type = glue("real<lower=0,upper={min_Y}>"), 
      comment = "non-decision time parameter"
    ),
    bias = list(
      type = "real<lower=0,upper=1>", 
      comment = "initial bias parameter"
    ),
    disc = list(
      type = "real<lower=0>", 
      comment = "discrimination parameters"
    ),
    quantile = list(
      type = "real<lower=0,upper=1>", 
      comment = "quantile parameter"
    ),
    xi = list(
      type = "real", 
      comment = "shape parameter"
    ),
    alpha = list(
      type = "real",
      comment = "skewness parameter"
    )
  )
  out <- ""
  types <- default_types[[dpar_class(dpar)]]
  if (!is.null(types)) {
    out <- types$type
    attr(out, "comment") <- types$comment
  }
  out
}

# default Stan definitions for temporary distributional parameters
stan_dpar_tmp_types <- function(dpar, suffix = "", family = NULL) {
  dpar <- as_one_character(dpar)
  suffix <- as_one_character(suffix)
  if (is.mixfamily(family)) {
    family <- family$mix[[as.numeric(dpar_id(dpar))]]
  }
  if (is.customfamily(family)) {
    return("")  # no temporary parameters in custom families
  }
  default_types <- list(
    xi = list(
      type = "real", 
      comment = "unscaled shape parameter"
    )
  )
  out <- ""
  types <- default_types[[dpar_class(dpar)]]
  if (!is.null(types)) {
    out <- types$type
    attr(out, "comment") <- types$comment
  }
  out
}

# Stan code for transformations of distributional parameters
stan_dpar_transform <- function(bterms, threads, ...) {
  stopifnot(is.brmsterms(bterms))
  out <- list()
  families <- family_names(bterms)
  p <- usc(combine_prefix(bterms))
  resp <- usc(bterms$resp)
  if (any(conv_cats_dpars(families))) {
    str_add(out$model_def) <- glue( 
      "  // linear predictor matrix\n",
      "  vector[ncat{p}] mu{p}[N{resp}];\n"
    )
    mu_dpars <- make_stan_names(glue("mu{bterms$family$cats}"))
    mu_dpars <- glue("{mu_dpars}{p}[n]")
    iref <- match(bterms$family$refcat, bterms$family$cats)
    mu_dpars[iref] <- "0" 
    str_add(out$model_comp_catjoin) <- glue(
      "  for (n in 1:N{resp}) {{\n",
      "    mu{p}[n] = {stan_vector(mu_dpars)};\n",
      "  }}\n"
    )
  }
  if (any(families %in% "skew_normal")) {
    # as suggested by Stephen Martin use sigma and mu of CP 
    # but the skewness parameter alpha of DP
    dp_names <- names(bterms$dpars)
    for (i in which(families %in% "skew_normal")) {
      id <- str_if(length(families) == 1L, "", i)
      sigma <- stan_sigma_transform(bterms, id = id, threads = threads)
      ns <- str_if(grepl(stan_nn_regex(), sigma), "[n]")
      na <- str_if(glue("alpha{id}") %in% dp_names, "[n]")
      type_delta <- str_if(nzchar(na), glue("vector[N{resp}]"), "real")
      no <- str_if(any(nzchar(c(ns, na))), "[n]", "")
      type_omega <- str_if(nzchar(no), glue("vector[N{resp}]"), "real")
      str_add(out$model_def) <- glue(
        "  // parameters used to transform the skew-normal distribution\n",
        "  {type_delta} delta{id}{p};  // transformed alpha parameter\n",
        "  {type_omega} omega{id}{p};  // scale parameter\n"
      )
      alpha <- glue("alpha{id}{p}{na}")
      delta <- glue("delta{id}{p}{na}")
      omega <- glue("omega{id}{p}{no}")
      comp_delta <- glue(
        "  {delta} = {alpha} / sqrt(1 + {alpha}^2);\n"
      )
      comp_omega <- glue(
        "  {omega} = {sigma} / sqrt(1 - sqrt(2 / pi())^2 * {delta}^2);\n"
      )
      str_add(out$model_comp_dpar_trans) <- glue(
        "  // use efficient skew-normal parameterization\n",
        str_if(!nzchar(na), comp_delta),
        str_if(!nzchar(no), comp_omega),
        "  for (n in 1:N{resp}) {{\n",
        stan_nn_def(threads),
        str_if(nzchar(na), glue("  ", comp_delta)),
        str_if(nzchar(no), glue("  ", comp_omega)),
        "    mu{id}{p}[n] = mu{id}{p}[n]", 
        " - {omega} * {delta} * sqrt(2 / pi());\n",
        "  }}\n"
      )
    }
  }
  if (any(families %in% "gen_extreme_value")) {
    dp_names <- c(names(bterms$dpars), names(bterms$fdpars))
    for (i in which(families %in% "gen_extreme_value")) {
      id <- str_if(length(families) == 1L, "", i)
      xi <- glue("xi{id}")
      if (!xi %in% dp_names) {
        str_add(out$model_def) <- glue(
          "  real {xi};  // scaled shape parameter\n"
        )
        sigma <- glue("sigma{id}")
        sfx <- str_if(sigma %in% names(bterms$dpars), "_vector")
        args <- sargs(
          glue("tmp_{xi}"), glue("Y{p}"), 
          glue("mu{id}{p}"), glue("{sigma}{p}")
        )
        str_add(out$model_comp_dpar_trans) <- glue(
          "  {xi}{p} = scale_xi{sfx}({args});\n"
        )
      }
    }
  }
  out
}

# Stan code for sigma to incorporate addition argument 'se'
stan_sigma_transform <- function(bterms, id = "", threads = NULL) {
  if (nzchar(id)) {
    # find the right family in mixture models
    family <- family_names(bterms)[as.integer(id)]
  } else {
    family <- bterms$family$family
    stopifnot(!isTRUE(family == "mixture"))
  }
  p <- usc(combine_prefix(bterms))
  ns <- str_if(glue("sigma{id}") %in% names(bterms$dpars), "[n]")
  has_sigma <- has_sigma(family) && !no_sigma(bterms)
  sigma <- str_if(has_sigma, glue("sigma{id}{p}{ns}"))
  if (is.formula(bterms$adforms$se)) {
    nse <- stan_nn(threads)
    sigma <- str_if(nzchar(sigma), 
      glue("sqrt(square({sigma}) + se2{p}{nse})"), 
      glue("se{p}{nse}")
    )
  }
  sigma
}

Try the brms package in your browser

Any scripts or data that you put into this service are public.

brms documentation built on Aug. 23, 2021, 5:08 p.m.