Nothing
# Family-specific helper functions
#
# `extend_family(family)` returns a `family` object augmented with auxiliary
# functions that are needed for computing KL-divergence, log likelihood,
# dispersion projection, etc.
#
# Missing: Quasi-families are not implemented. If dis_gamma is the correct shape
# parameter for projected Gamma regression, everything should be OK for gamma.
#' Extend a family
#'
#' This function adds some internally required elements to an object of class
#' `family` (see, e.g., [family()]). It is called internally by
#' [init_refmodel()], so you will rarely need to call it yourself.
#'
#' @param family An object of class `family`.
#' @param latent A single logical value indicating whether to use the latent
#' projection (`TRUE`) or not (`FALSE`). Note that setting `latent = TRUE`
#' causes all arguments starting with `augdat_` to be ignored.
#' @param latent_y_unqs Only relevant for a latent projection where the original
#' response space has finite support (i.e., the original response values may
#' be regarded as categories), in which case this needs to be the character
#' vector of unique response values (which will be assigned to `family$cats`
#' internally) or may be left at `NULL` (so that \pkg{projpred} will try to
#' infer it from `family$cats`). See also section "Latent projection" below.
#' @param latent_ilink Only relevant for the latent projection, in which case
#' this needs to be the inverse-link function. If the original response family
#' was the [binomial()] or the [poisson()] family, then `latent_ilink` can be
#' `NULL`, in which case an internal default will be used. Can also be `NULL`
#' in all other cases, but then an internal default based on `family$linkinv`
#' will be used which might not work for all families. See also section
#' "Latent projection" below.
#' @param latent_ll_oscale Only relevant for the latent projection, in which
#' case this needs to be the function computing response-scale (not
#' latent-scale) log-likelihood values. If `!is.null(family$cats)` (after
#' taking `latent_y_unqs` into account) or if the original response family was
#' the [binomial()] or the [poisson()] family, then `latent_ll_oscale` can be
#' `NULL`, in which case an internal default will be used. Can also be `NULL`
#' in all other cases, but then downstream functions will have limited
#' functionality (a message thrown by [extend_family()] will state what
#' exactly won't be available). See also section "Latent projection" below.
#' @param latent_ppd_oscale Only relevant for the latent projection, in which
#' case this needs to be the function sampling response values given latent
#' predictors that have been transformed to response scale using
#' `latent_ilink`. If `!is.null(family$cats)` (after taking `latent_y_unqs`
#' into account) or if the original response family was the [binomial()] or
#' the [poisson()] family, then `latent_ppd_oscale` can be `NULL`, in which
#' case an internal default will be used. Can also be `NULL` in all other
#' cases, but then downstream functions will have limited functionality (a
#' message thrown by [extend_family()] will state what exactly won't be
#' available). See also section "Latent projection" below. Note that although
#' this function has the abbreviation "PPD" in its name (which stands for
#' "posterior predictive distribution"), \pkg{projpred} currently only uses it
#' in [proj_predict()], i.e., for sampling from what would better be termed
#' posterior-projection predictive distribution (PPPD).
#' @param augdat_y_unqs Only relevant for augmented-data projection, in which
#' case this needs to be the character vector of unique response values (which
#' will be assigned to `family$cats` internally) or may be left at `NULL` if
#' `family$cats` is already non-`NULL`. See also section "Augmented-data
#' projection" below.
#' @param augdat_link Only relevant for augmented-data projection, in which case
#' this needs to be the link function. Use `NULL` for the traditional
#' projection. See also section "Augmented-data projection" below.
#' @param augdat_ilink Only relevant for augmented-data projection, in which
#' case this needs to be the inverse-link function. Use `NULL` for the
#' traditional projection. See also section "Augmented-data projection" below.
#' @param augdat_args_link Only relevant for augmented-data projection, in which
#' case this may be a named `list` of arguments to pass to the function
#' supplied to `augdat_link`.
#' @param augdat_args_ilink Only relevant for augmented-data projection, in
#' which case this may be a named `list` of arguments to pass to the function
#' supplied to `augdat_ilink`.
#' @param ... Ignored (exists only to swallow up further arguments which might
#' be passed to this function).
#'
#' @details
#'
#' In the following, \eqn{N}, \eqn{C_{\mathrm{cat}}}{C_cat},
#' \eqn{C_{\mathrm{lat}}}{C_lat}, \eqn{S_{\mathrm{ref}}}{S_ref}, and
#' \eqn{S_{\mathrm{prj}}}{S_prj} from help topic [refmodel-init-get] are used.
#' Note that \eqn{N} does not necessarily denote the number of original
#' observations; it can also refer to new observations. Furthermore, let \eqn{S}
#' denote either \eqn{S_{\mathrm{ref}}}{S_ref} or \eqn{S_{\mathrm{prj}}}{S_prj},
#' whichever is appropriate in the context where it is used.
#'
#' # Augmented-data projection
#'
#' As their first input, the functions supplied to arguments `augdat_link` and
#' `augdat_ilink` have to accept:
#' * For `augdat_link`: an \eqn{S \times N \times C_{\mathrm{cat}}}{S x N x
#' C_cat} array containing the probabilities for the response categories. The
#' order of the response categories is the same as in `family$cats` (see
#' argument `augdat_y_unqs`).
#' * For `augdat_ilink`: an \eqn{S \times N \times C_{\mathrm{lat}}}{S x N x
#' C_lat} array containing the linear predictors.
#'
#' The return value of these functions needs to be:
#' * For `augdat_link`: an \eqn{S \times N \times C_{\mathrm{lat}}}{S x N x
#' C_lat} array containing the linear predictors.
#' * For `augdat_ilink`: an \eqn{S \times N \times C_{\mathrm{cat}}}{S x N x
#' C_cat} array containing the probabilities for the response categories. The
#' order of the response categories has to be the same as in `family$cats` (see
#' argument `augdat_y_unqs`).
#'
#' For the augmented-data projection, the response vector resulting from
#' `extract_model_data` (see [init_refmodel()]) is coerced to a `factor` (using
#' [as.factor()]) at multiple places throughout this package. Inside of
#' [init_refmodel()], the levels of this `factor` have to be identical to
#' `family$cats` (*after* applying [extend_family()] inside of
#' [init_refmodel()]). Everywhere else, these levels have to be a subset of
#' `<refmodel>$family$cats` (where `<refmodel>` is an object resulting from
#' [init_refmodel()]). See argument `augdat_y_unqs` for how to control
#' `family$cats`.
#'
#' For ordinal \pkg{brms} families, be aware that the submodels (onto which the
#' reference model is projected) currently have the following restrictions:
#' * The discrimination parameter `disc` is not supported (i.e., it is a
#' constant with value 1).
#' * The thresholds are `"flexible"` (see [brms::brmsfamily()]).
#' * The thresholds do not vary across the levels of a `factor`-like variable
#' (see argument `gr` of [brms::resp_thres()]).
#' * The `"probit_approx"` link is replaced by `"probit"`.
#'
#' For the [brms::categorical()] family, be aware that:
#' * For multilevel submodels, the group-level effects are allowed to be
#' correlated between different response categories.
#' * For multilevel submodels, \pkg{mclogit} versions < 0.9.4 may throw the
#' error \code{'a' (<number> x 1) must be square}. Updating \pkg{mclogit} to a
#' version >= 0.9.4 should fix this.
#'
#' # Latent projection
#'
#' The function supplied to argument `latent_ilink` needs to have the prototype
#' ```{r, eval = FALSE}
#' latent_ilink(lpreds, cl_ref, wdraws_ref = rep(1, length(cl_ref)))
#' ```
#' where:
#' * `lpreds` accepts an \eqn{S \times N}{S x N} matrix containing the linear
#' predictors.
#' * `cl_ref` accepts a numeric vector of length \eqn{S_{\mathrm{ref}}}{S_ref},
#' containing \pkg{projpred}'s internal cluster indices for these draws.
#' * `wdraws_ref` accepts a numeric vector of length
#' \eqn{S_{\mathrm{ref}}}{S_ref}, containing weights for these draws. These
#' weights should be treated as not being normalized (i.e., they don't
#' necessarily sum to `1`).
#'
#' The return value of `latent_ilink` needs to contain the linear predictors
#' transformed to the original response space, with the following structure:
#' * If `is.null(family$cats)` (after taking `latent_y_unqs` into account): an
#' \eqn{S \times N}{S x N} matrix.
#' * If `!is.null(family$cats)` (after taking `latent_y_unqs` into account): an
#' \eqn{S \times N \times C_{\mathrm{cat}}}{S x N x C_cat} array. In that case,
#' `latent_ilink` needs to return *probabilities* (for the response categories
#' given in `family$cats`, after taking `latent_y_unqs` into account).
#'
#' The function supplied to argument `latent_ll_oscale` needs to have the
#' prototype
#' ```{r, eval = FALSE}
#' latent_ll_oscale(ilpreds, y_oscale, wobs = rep(1, length(y_oscale)), cl_ref,
#' wdraws_ref = rep(1, length(cl_ref)))
#' ```
#' where:
#' * `ilpreds` accepts the return value from `latent_ilink`.
#' * `y_oscale` accepts a vector of length \eqn{N} containing response values on
#' the original response scale.
#' * `wobs` accepts a numeric vector of length \eqn{N} containing observation
#' weights.
#' * `cl_ref` accepts the same input as argument `cl_ref` of `latent_ilink`.
#' * `wdraws_ref` accepts the same input as argument `wdraws_ref` of
#' `latent_ilink`.
#'
#' The return value of `latent_ll_oscale` needs to be an \eqn{S \times N}{S x N}
#' matrix containing the response-scale (not latent-scale) log-likelihood values
#' for the \eqn{N} observations from its inputs.
#'
#' The function supplied to argument `latent_ppd_oscale` needs to have the
#' prototype
#' ```{r, eval = FALSE}
#' latent_ppd_oscale(ilpreds_resamp, wobs, cl_ref,
#' wdraws_ref = rep(1, length(cl_ref)), idxs_prjdraws)
#' ```
#' where:
#' * `ilpreds_resamp` accepts the return value from `latent_ilink`, but possibly
#' with resampled (clustered) draws (see argument `nresample_clusters` of
#' [proj_predict()]).
#' * `wobs` accepts a numeric vector of length \eqn{N} containing observation
#' weights.
#' * `cl_ref` accepts the same input as argument `cl_ref` of `latent_ilink`.
#' * `wdraws_ref` accepts the same input as argument `wdraws_ref` of
#' `latent_ilink`.
#' * `idxs_prjdraws` accepts a numeric vector of length `dim(ilpreds_resamp)[1]`
#' containing the resampled indices of the projected draws (i.e., these indices
#' are values from the set \eqn{\{1, ..., \texttt{dim(ilpreds)[1]}\}}{{1, ...,
#' dim(ilpreds)[1]}} where `ilpreds` denotes the return value of
#' `latent_ilink`).
#'
#' The return value of `latent_ppd_oscale` needs to be a
#' \eqn{\texttt{dim(ilpreds\_resamp)[1]} \times N}{dim(ilpreds_resamp)[1] x N}
#' matrix containing the response-scale (not latent-scale) draws from the
#' posterior(-projection) predictive distributions for the \eqn{N} observations
#' from its inputs.
#'
#' If the bodies of these three functions involve parameter draws from the
#' reference model which have not been projected (e.g., for `latent_ilink`, the
#' thresholds in an ordinal model), [cl_agg()] is provided as a helper function
#' for aggregating these reference model draws in the same way as the draws have
#' been aggregated for the first argument of these functions (e.g., `lpreds` in
#' case of `latent_ilink`).
#'
#' In fact, the weights passed to argument `wdraws_ref` are nonconstant only in
#' case of [cv_varsel()] with `cv_method = "LOO"` and `validate_search = TRUE`.
#' In that case, the weights passed to this argument are the PSIS-LOO CV weights
#' for one observation. Note that although argument `wdraws_ref` has the suffix
#' `_ref`, `wdraws_ref` does not necessarily obtain weights for the *initial*
#' reference model's posterior draws: In case of [cv_varsel()] with `cv_method =
#' "kfold"`, these weights may refer to one of the \eqn{K} reference model
#' refits (but in that case, they are constant anyway).
#'
#' If `family$cats` is not `NULL` (after taking `latent_y_unqs` into account),
#' then the response vector resulting from `extract_model_data` (see
#' [init_refmodel()]) is coerced to a `factor` (using [as.factor()]) at multiple
#' places throughout this package. Inside of [init_refmodel()], the levels of
#' this `factor` have to be identical to `family$cats` (*after* applying
#' [extend_family()] inside of [init_refmodel()]). Everywhere else, these levels
#' have to be a subset of `<refmodel>$family$cats` (where `<refmodel>` is an
#' object resulting from [init_refmodel()]).
#'
#' @return The `family` object extended in the way needed by \pkg{projpred}.
#'
#' @export
extend_family <- function(family,
latent = FALSE,
latent_y_unqs = NULL,
latent_ilink = NULL,
latent_ll_oscale = NULL,
latent_ppd_oscale = NULL,
augdat_y_unqs = NULL,
augdat_link = NULL,
augdat_ilink = NULL,
augdat_args_link = list(),
augdat_args_ilink = list(),
...) {
if (has_family_extras(family)) {
# If the family was already extended using this function, then return as-is:
return(family)
}
if (latent) {
family_oscale_tmp <- family$family
link_oscale_tmp <- family$link
linkinv_oscale_tmp <- family$linkinv
cats_oscale_tmp <- family$cats
family <- gaussian()
family$family_oscale <- family_oscale_tmp
family$link_oscale <- link_oscale_tmp
}
aug_data <- !is.null(augdat_link) && !is.null(augdat_ilink) && !latent
if (!aug_data) {
extend_family_specific <- paste0("extend_family_", tolower(family$family))
if (!exists(extend_family_specific, mode = "function")) {
stop("Family '", family$family, "' is not supported by projpred.")
}
extend_family_specific <- get(extend_family_specific, mode = "function")
family <- extend_family_specific(family)
# If `family$cats` weren't `NULL`, then downstream code in projpred would
# have to be adapted:
stopifnot(is.null(family$cats))
if (latent) {
if (!is.null(latent_y_unqs)) {
family$cats <- latent_y_unqs
} else {
family$cats <- cats_oscale_tmp
}
if (is.null(latent_ilink)) {
if (family$family_oscale == "binomial") {
latent_ilink <- function(lpreds, cl_ref,
wdraws_ref = rep(1, length(cl_ref))) {
ilpreds <- ilinkfun_raw(lpreds, link_nm = family$link_oscale)
if (!is.null(family$cats)) {
ilpreds <- abind::abind(1 - ilpreds, ilpreds, rev.along = 0)
}
return(ilpreds)
}
} else {
if (family$family_oscale != "poisson") {
message("Defining `latent_ilink` as a function which calls ",
"`family$linkinv`, but there is no guarantee that this ",
"will work for all families. If relying on ",
"`family$linkinv` is not appropriate or if this raises an ",
"error in downstream functions, supply a custom ",
"`latent_ilink` function (which is also allowed to return ",
"only `NA`s if response-scale post-processing is not ",
"needed).")
}
latent_ilink <- function(lpreds, cl_ref,
wdraws_ref = rep(1, length(cl_ref))) {
return(linkinv_oscale_tmp(lpreds))
}
}
}
if (is.null(latent_ll_oscale)) {
if (!is.null(family$cats)) {
latent_ll_oscale <- latent_ll_oscale_cats
} else if (family$family_oscale == "binomial") {
latent_ll_oscale <- latent_ll_oscale_binom_nocats
} else if (family$family_oscale == "poisson") {
latent_ll_oscale <- latent_ll_oscale_poiss
} else {
latent_ll_oscale <- latent_ll_oscale_NA
message("`latent_ll_oscale` was `NULL` and a suitable internal ",
"default could not be found (other than a function ",
"returning only `NA`s). Thus, cv_varsel() with `cv_method = ",
"\"LOO\"` won't be usable. Furthermore, some features of ",
"predict.refmodel(), summary.vsel(), print.vsel(), ",
"plot.vsel(), suggest_size.vsel(), and proj_linpred() won't ",
"work on response scale (only on latent scale).")
}
}
if (is.null(latent_ppd_oscale)) {
if (!is.null(family$cats)) {
latent_ppd_oscale <- latent_ppd_oscale_cats
} else if (family$family_oscale == "binomial") {
latent_ppd_oscale <- latent_ppd_oscale_binom_nocats
} else if (family$family_oscale == "poisson") {
latent_ppd_oscale <- latent_ppd_oscale_poiss
} else {
latent_ppd_oscale <- latent_ppd_oscale_NA
message("`latent_ppd_oscale` was `NULL` and a suitable internal ",
"default could not be found (other than a function ",
"returning only `NA`s). Thus, proj_predict() won't work on ",
"response scale (only on latent scale).")
}
}
family$latent_ilink <- latent_ilink
family$latent_ll_oscale <- latent_ll_oscale
family$latent_ppd_oscale <- latent_ppd_oscale
}
family$for_latent <- latent
family$for_augdat <- FALSE
} else {
if (!is.null(augdat_y_unqs)) {
family$cats <- augdat_y_unqs
} else if (is.null(family$cats)) {
stop("Please supply argument `augdat_y_unqs` or ensure that ",
"`family$cats` is not `NULL`.")
}
# Checks for special 'brms' features (currently only necessary for argument
# `refcat` of the brms::categorical() family):
if (!(is.null(family$refcat) ||
identical(family$refcat, utils::head(family$cats, 1)))) {
stop("Currently, the first category must be the reference category.")
}
family$linkfun <- function(mu) {
mu_arr <- augmat2arr(mu, margin_draws = 1)
eta_arr <- do.call(augdat_link, c(list(mu_arr), augdat_args_link))
return(arr2augmat(eta_arr, margin_draws = 1))
}
family$linkinv <- function(eta) {
eta_arr <- augmat2arr(eta, margin_draws = 1)
mu_arr <- do.call(augdat_ilink, c(list(eta_arr), augdat_args_ilink))
return(arr2augmat(mu_arr, margin_draws = 1))
}
family$ce_ptwise <- function(mu_ref, mu_sub, w_obs = 1) {
mu_ref_arr <- augmat2arr(mu_ref)
mu_sub_arr <- augmat2arr(mu_sub)
stopifnot(identical(dim(mu_ref_arr), dim(mu_sub_arr)))
n_obs <- dim(mu_ref_arr)[1]
n_prjdraws <- dim(mu_ref_arr)[3]
if (length(w_obs) == 0) {
w_obs <- rep(1, n_obs)
} else if (length(w_obs) == 1) {
w_obs <- rep(w_obs, n_obs)
} else if (length(w_obs) != n_obs) {
stop("Argument `w_obs` needs to be of length 0, 1, or `n_obs`.")
}
return(do.call(rbind, lapply(seq_len(n_obs), function(i_obs) {
do.call(c, lapply(seq_len(n_prjdraws), function(i_prjdraws) {
prbs_ref <- mu_ref_arr[i_obs, , i_prjdraws]
prbs_sub <- mu_sub_arr[i_obs, , i_prjdraws]
# Assign some nonzero value to have a finite log() value (the specific
# value doesn't matter for `prbs_ref` because of the multiplication
# with zero):
prbs_ref[prbs_ref == 0] <- .Machine$double.eps
# Assign some nonzero value to have a finite log() value (for
# `prbs_sub`, the specific value matters, in contrast to `prbs_ref`):
prbs_sub[prbs_sub == 0] <- .Machine$double.eps
# In analogy to `extend_family(binomial())$ce()`,
# `extend_family(poisson())$ce()`, and
# `extend_family(gaussian())$ce()`, multiply by the observation
# weight:
return(w_obs[i_obs] * (-sum(prbs_ref * log(prbs_sub))))
}))
})))
}
family$ce <- function(pref, data, psub) {
ce_sums <- colSums(
family$ce_ptwise(mu_ref = pref$mu,
mu_sub = psub$mu,
w_obs = data$weights)
)
return(ce_sums / sum(data$weights))
}
family$dis_fun <- function(pref, psub, wobs = 1) {
return(rep(NA, ncol(pref$mu)))
}
family$predvar <- function(mu, dis, wdraws = 1) {
return(rep(NA, NROW(mu)))
}
family$ll_fun <- function(mu, dis = NULL, y, weights = 1) {
mu_arr <- augmat2arr(mu)
return(ll_cats(mu_arr, y = y, wobs = weights))
}
family$ppd <- function(mu, dis, weights = 1) {
mu_arr <- augmat2arr(augvec2augmat(mu))
return(ppd_cats(mu_arr, wobs = weights, return_vec = TRUE))
}
family$for_latent <- FALSE
family$for_augdat <- TRUE
}
family$is_extended <- TRUE
return(family)
}
extend_family_binomial <- function(family) {
# Helper function for calculating the log PMF of the binomial distribution,
# but (i) modified to be non-zero at `x` not contained in the support and (ii)
# "reduced" in the sense of lacking the (additive) part
# `log(choose(size, size * x))`:
dbinom_log_reduced <- function(x, size, prob) {
size * (x * log(prob) + (1 - x) * log(1 - prob))
}
ce_binom <- function(pref, data, psub) {
ce_sums <- -colSums(
dbinom_log_reduced(x = pref$mu, size = data$weights, prob = psub$mu)
)
return(ce_sums / sum(data$weights))
}
dis_na <- function(pref, psub, wobs = 1) {
rep(NA, ncol(pref$mu))
}
predvar_na <- function(mu, dis, wdraws = 1) {
rep(NA, NROW(mu))
}
ll_binom <- function(mu, dis, y, weights = 1) {
y <- as.matrix(y)
dbinom(y, weights, mu, log = TRUE)
}
dev_binom <- function(mu, y, weights = 1, dis = NULL) {
if (NCOL(y) < NCOL(mu)) {
y <- matrix(y, nrow = length(y), ncol = NCOL(mu))
}
-2 * dbinom_log_reduced(x = y, size = weights, prob = mu)
}
ppd_binom <- function(mu, dis, weights = 1) {
rbinom(length(mu), weights, mu)
}
initialize_binom <- expression({
if (NCOL(y) == 1) {
if (is.factor(y)) {
y <- y != levels(y)[1L]
}
n <- rep.int(1, nobs)
y[weights == 0] <- 0
if (any(y < 0 | y > 1)) {
stop("y values must be 0 <= y <= 1")
}
mustart <- (weights * y + 0.5) / (weights + 1)
m <- weights * y
if ("binomial" == "binomial" && any(abs(m - round(m)) >
0.001)) {
### Deactivated because in general, this will be the case in 'projpred':
# warning(gettextf("non-integer #successes in a %s glm!",
# "binomial"), domain = NA)
###
}
}
else if (NCOL(y) == 2) {
if ("binomial" == "binomial" && any(abs(y - round(y)) >
0.001)) {
warning(gettextf("non-integer counts in a %s glm!",
"binomial"), domain = NA)
}
n <- (y1 <- y[, 1L]) + y[, 2L]
y <- y1 / n
if (any(n0 <- n == 0)) {
y[n0] <- 0
}
weights <- weights * n
mustart <- (n * y + 0.5) / (n + 1)
} else {
stop(gettextf(paste("for the '%s' family, y must be a vector of 0 and",
"1's\nor a 2 column matrix where col 1 is no.",
"successes and col 2 is no. failures"),
"binomial"), domain = NA)
}
})
family$initialize <- initialize_binom
family$ce <- ce_binom
family$dis_fun <- dis_na
family$predvar <- predvar_na
family$ll_fun <- ll_binom
family$deviance <- dev_binom
family$ppd <- ppd_binom
return(family)
}
extend_family_poisson <- function(family) {
# Helper function for calculating the log PMF of the Poisson distribution,
# but (i) modified to be non-zero at `x` not contained in the support and (ii)
# "reduced" in the sense of lacking the (additive) part
# `- wobs * log(factorial(x))`:
dpois_log_reduced <- function(x, lamb, wobs) {
wobs * (x * log(lamb) - lamb)
}
ce_poiss <- function(pref, data, psub) {
ce_sums <- -colSums(
dpois_log_reduced(x = pref$mu, lamb = psub$mu, wobs = data$weights)
)
return(ce_sums / sum(data$weights))
}
dis_na <- function(pref, psub, wobs = 1) {
rep(NA, ncol(pref$mu))
}
predvar_na <- function(mu, dis, wdraws = 1) {
rep(NA, NROW(mu))
}
ll_poiss <- function(mu, dis, y, weights = 1) {
y <- as.matrix(y)
weights * dpois(y, mu, log = TRUE)
}
dev_poiss <- function(mu, y, weights = 1, dis = NULL) {
if (NCOL(y) < NCOL(mu)) {
y <- matrix(y, nrow = length(y), ncol = NCOL(mu))
}
-2 * dpois_log_reduced(x = y, lamb = mu, wobs = weights)
}
ppd_poiss <- function(mu, dis, weights = 1) {
weights <- parse_wobs_ppd(weights, n_obs = length(mu))
rpois(length(mu), mu)
}
family$ce <- ce_poiss
family$dis_fun <- dis_na
family$predvar <- predvar_na
family$ll_fun <- ll_poiss
family$deviance <- dev_poiss
family$ppd <- ppd_poiss
return(family)
}
extend_family_gaussian <- function(family) {
# ce_gauss() does not give the actual cross-entropy (not even the one which
# would result from dropping terms which would cancel out when calculating the
# KL divergence) but a reasonable surrogate. This additional approximation was
# already made back when this used to be the KL divergence, not the
# cross-entropy.
ce_gauss <- function(pref, data, psub) {
ce_sums <- colSums(data$weights * (-2 * pref$mu * psub$mu + psub$mu^2))
return(ce_sums / sum(data$weights))
}
dis_gauss <- function(pref, psub, wobs = 1) {
sqrt(colSums(wobs / sum(wobs) * (pref$var + (pref$mu - psub$mu)^2)))
}
predvar_gauss <- function(mu, dis, wdraws = 1) {
wdraws <- wdraws / sum(wdraws)
mu_var <- do.call(rbind, lapply(seq_len(nrow(mu)), function(i) {
sum(wdraws * (mu[i, ] - weighted.mean(mu[i, ], wdraws))^2)
}))
as.vector(sum(wdraws * dis^2) + mu_var)
}
ll_gauss <- function(mu, dis, y, weights = 1) {
y <- as.matrix(y)
dis <- matrix(rep(dis, each = length(y)), ncol = NCOL(mu))
weights * dnorm(y, mu, dis, log = TRUE)
}
dev_gauss <- function(mu, y, weights = 1, dis = NULL) {
if (is.null(dis)) {
dis <- 1
} else {
dis <- matrix(rep(dis, each = length(y)), ncol = NCOL(mu))
}
if (NCOL(y) < NCOL(mu)) {
y <- matrix(y, nrow = length(y), ncol = NCOL(mu))
}
-2 * weights * (-0.5 / dis^2 * (y - mu)^2 - log(dis))
}
ppd_gauss <- function(mu, dis, weights = 1) {
weights <- parse_wobs_ppd(weights, n_obs = length(mu))
rnorm(length(mu), mu, dis)
}
family$ce <- ce_gauss
family$dis_fun <- dis_gauss
family$predvar <- predvar_gauss
family$ll_fun <- ll_gauss
family$deviance <- dev_gauss
family$ppd <- ppd_gauss
return(family)
}
extend_family_gamma <- function(family) {
ce_gamma <- function(pref, data, psub) {
stop("Cross-entropy for the Gamma() family not implemented yet.")
### TODO (Gamma()): This commented code stems from a time when this was
### still the actual KL divergence and not the (possibly reduced)
### cross-entropy ("possibly reduced" means: possibly reduced to only those
### terms which would not cancel out when calculating the KL divergence):
## mean(data$weights*(
## p_sub$dis*(log(pref$dis)-log(p_sub$dis)+log(psub$mu)-log(pref$mu)) +
## digamma(pref$dis)*(pref$dis - p_sub$dis) - lgamma(pref$dis) +
## lgamma(p_sub$dis) + pref$mu*p_sub$dis/p_sub$mu - pref$dis))
###
}
dis_gamma <- function(pref, psub, wobs = 1) {
## TODO (Gamma()), IMPLEMENT THIS
stop("Projection of dispersion parameter not yet implemented for family",
" Gamma.")
## mean(wobs*((pref$mu - p_sub$mu)/
## family$mu.eta(family$linkfun(p_sub$mu))^2))
}
predvar_gamma <- function(mu, dis, wdraws = 1) {
stop("Family Gamma not implemented yet.")
}
ll_gamma <- function(mu, dis, y, weights = 1) {
y <- as.matrix(y)
dis <- matrix(rep(dis, each = length(y)), ncol = NCOL(mu))
weights * dgamma(y, dis, dis / matrix(mu), log = TRUE)
}
dev_gamma <- function(mu, dis, y, weights = 1) {
stop("Loss function not implemented for Gamma-family yet.")
## dis <- matrix(rep(dis, each=length(y)), ncol=NCOL(mu))
## weights*dgamma(y, dis, dis/matrix(mu), log= TRUE)
}
ppd_gamma <- function(mu, dis, weights = 1) {
weights <- parse_wobs_ppd(weights, n_obs = length(mu))
rgamma(length(mu), dis, dis / mu)
}
family$ce <- ce_gamma
family$dis_fun <- dis_gamma
family$predvar <- predvar_gamma
family$ll_fun <- ll_gamma
family$deviance <- dev_gamma
family$ppd <- ppd_gamma
return(family)
}
extend_family_student_t <- function(family) {
ce_student_t <- function(pref, data, psub) {
stop("Cross-entropy for the Student_t() family not implemented yet.")
### TODO (Student_t()): This commented code stems from a time when this was
### still the actual KL divergence and not the (possibly reduced)
### cross-entropy ("possibly reduced" means: possibly reduced to only those
### terms which would not cancel out when calculating the KL divergence):
# log(psub$dis)
##- 0.5*log(pref$var) # FIX THIS, NOT CORRECT
###
}
dis_student_t <- function(pref, psub, wobs = 1) {
s2 <- colSums(psub$w / sum(wobs) *
(pref$var + (pref$mu - psub$mu)^2)) # CHECK THIS
sqrt(s2)
## stop('Projection of dispersion not yet implemented for student-t')
}
predvar_student_t <- function(mu, dis, wdraws = 1) {
wdraws <- wdraws / sum(wdraws)
mu_var <- do.call(rbind, lapply(seq_len(nrow(mu)), function(i) {
sum(wdraws * (mu[i, ] - weighted.mean(mu[i, ], wdraws))^2)
}))
as.vector(family$nu / (family$nu - 2) * sum(wdraws * dis^2) + mu_var)
}
ll_student_t <- function(mu, dis, y, weights = 1) {
y <- as.matrix(y)
dis <- matrix(rep(dis, each = length(y)), ncol = NCOL(mu))
weights * (dt((y - mu) / dis, family$nu, log = TRUE) - log(dis))
}
dev_student_t <- function(mu, y, weights = 1, dis = NULL) {
if (is.null(dis)) {
dis <- 1
} else {
dis <- matrix(rep(dis, each = length(y)), ncol = NCOL(mu))
}
if (NCOL(y) < NCOL(mu)) {
y <- matrix(y, nrow = length(y), ncol = NCOL(mu))
}
(-2 * weights * (-0.5 * (family$nu + 1)
* log(1 + 1 / family$nu * ((y - mu) / dis)^2) - log(dis)))
}
ppd_student_t <- function(mu, dis, weights = 1) {
weights <- parse_wobs_ppd(weights, n_obs = length(mu))
rt(length(mu), family$nu) * dis + mu
}
family$ce <- ce_student_t
family$dis_fun <- dis_student_t
family$predvar <- predvar_student_t
family$ll_fun <- ll_student_t
family$deviance <- dev_student_t
family$ppd <- ppd_student_t
return(family)
}
has_dispersion <- function(family) {
# a function for checking whether the family has a dispersion parameter
family$family %in% c("gaussian", "Student_t", "Gamma")
}
# A function for checking whether a `family` object has the required extra
# functions, that is, whether it has already been extended (typically by a call
# to extend_family()):
has_family_extras <- function(family) {
return(isTRUE(family$is_extended))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.