#' Use simulations to compare the observed distribution with the modelled
#' distribution
#' @inheritParams get_observed
#' @inheritParams dispersion_check
#' @inheritParams INLA::inla.posterior.sample
#' @name distribution_check
#' @rdname distribution_check
#' @exportMethod distribution_check
#' @docType methods
#' @importFrom methods setGeneric
#' @family checks
setGeneric(
name = "distribution_check",
def = function(object, nsim = 1000, seed = 0L) {
standardGeneric("distribution_check") # nocov
}
)
#' @rdname distribution_check
#' @importFrom assertthat assert_that is.flag is.count
#' @importFrom dplyr %>% count mutate_all group_by arrange mutate summarise
#' inner_join filter
#' @importFrom ggplot2 ggplot aes_string geom_ribbon geom_line geom_hline ylab
#' geom_text
#' @importFrom INLA inla.hyperpar.sample inla.posterior.sample
#' @importClassesFrom INLA inla
#' @importFrom methods setMethod new
#' @importFrom purrr map_dfc
#' @importFrom rlang .data
#' @importFrom stats quantile rpois rnbinom
#' @importFrom tidyr gather complete
#' @examples
#' \donttest{
#' library(INLA)
#' model <- inla(
#' poisson ~ 1,
#' family = "poisson",
#' data = data.frame(
#' poisson = rpois(20, lambda = 10),
#' base = 1
#' ),
#' control.predictor = list(compute = TRUE),
#' control.compute = list(config = TRUE)
#' )
#' distribution_check(model, seed = 20181202)
#' }
setMethod(
f = "distribution_check",
signature = signature(object = "inla"),
definition = function(object, nsim = 1000, seed = 0L) {
assert_that(is.count(nsim))
if (length(object$.args$family) > 1) {
stop("Only single responses are handled")
}
observed <- get_observed(object)
samples <- inla.posterior.sample(
n = nsim, result = object, seed = seed
)
relevant <- grep("^Predictor:", rownames(samples[[1]]$latent))
eta <- map_dfc(samples, "latent")[relevant, , drop = FALSE]
n_sampled <- switch(
object$.args$family,
poisson = {
mutate_all(exp(eta), rpois, n = nrow(eta)) %>%
gather("run", "x") %>%
count(.data$run, .data$x)
},
nbinomial = {
relevant <- grep(
"size for the nbinomial observations",
names(samples[[1]]$hyperpar)
)
size <- inla.hyperpar.sample(n = nsim, result = object)[, relevant] #nolint
mutate_all(
exp(eta),
function(mu) {
rnbinom(n = length(mu), mu = mu, size = size)
}
) %>%
gather("run", "x") %>%
count(.data$run, .data$x)
},
gpoisson = {
relevant <- grep(
"Overdispersion",
names(samples[[1]]$hyperpar)
)
phi <- inla.hyperpar.sample(n = nsim, result = object)[
, relevant, drop = TRUE
] #nolint
mu <- exp(eta)
sapply(
seq_len(nsim),
function(i) {
rgpoisson(n = 1, mu = as.vector(mu[, i, drop = TRUE]), phi = phi[i])
}
) -> n_sampled
data.frame(
run = rep(seq_len(nsim), each = nrow(mu)),
x = as.vector(n_sampled)
) %>%
count(.data$run, .data$x)
},
zeroinflatedpoisson1 = {
relevant <- grep(
"zero-inflated poisson_1",
names(samples[[1]]$hyperpar)
)
mutate_all(
exp(eta),
function(lambda, prob_zero) {
n <- length(lambda)
rbinom(n = n, size = 1, prob = 1 - prob_zero) *
rpois(n = n, lambda = lambda)
},
prob_zero = inla.hyperpar.sample(
n = nsim, result = object
)[, relevant] #nolint
) %>%
gather("run", "x") %>%
count(.data$run, .data$x)
},
stop(object$.args$family, " is not yet handled")
)
data.frame(x = observed) %>%
count(.data$x) -> n_observed
n_count <- unique(c(n_observed$x, n_sampled$x))
n_sampled %>%
complete(run = .data$run, x = n_count, fill = list(n = 0)) %>%
group_by(.data$run) %>%
arrange(.data$x) %>%
mutate(ecdf = cumsum(.data$n) / sum(.data$n)) %>%
group_by(.data$x) %>%
summarise(
median = quantile(.data$ecdf, probs = 0.5),
lcl = quantile(.data$ecdf, probs = 0.025),
ucl = quantile(.data$ecdf, probs = 0.975)
) %>%
inner_join(
n_observed %>%
complete(x = n_count, fill = list(n = 0)) %>%
arrange(.data$x) %>%
mutate(ecdf = cumsum(.data$n) / sum(.data$n))
,
by = "x"
) -> ecdf
class(ecdf) <- c("distribution_check", class(ecdf))
return(ecdf)
}
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.