Nothing
#' Prepare data stack for INLA
#'
#' @details Creates the formatted input data to be used by the INLA model. For more
#' information about penalized complexity priors, see Daniel Simpson's
#' paper on the subject: \doi{doi:10.1214/16-STS576}
#'
#' @seealso [mbg::fit_inla_model()] [mbg::MbgModelRunner]
#'
#' @param input_data A data.frame with at least the following columns:
#' - 'indicator': number of "hits' per site, e.g. tested positive for malaria
#' - 'samplesize': total population sampled at the site
#' - 'x': x position, often longitude
#' - 'y': y position, often latitude
#' @param id_raster [terra::SpatRaster] with non-NA pixels delineating the extent of the
#' study area
#' @param covariates (list) Named list of all covariate effects included in the model,
#' typically generated by [load_covariates()]. Only used if `use_covariates` is TRUE.
#' @param use_covariates (`boolean(1)`, default TRUE) Should covariate fixed effects be
#' included in the model? If TRUE, includes fixed effects for all covariates in the
#' `covariates` argument. If FALSE, includes only an intercept.
#' @param covariates_sum_to_one (logical, default FALSE) Should the input covariates be
#' constrained to sum to one? Usually FALSE when raw covariates are passed to the model,
#' and TRUE if running an ensemble (stacking) model.
#' @param family (`character(1)`, default 'binomial') Statistical family; used to
#' link stacked covariates with outcomes
#' @param use_spde (`boolean(1)`, default TRUE) Should an SPDE approximation of a Gaussian
#' process be included in the model?
#' @param spde_range_pc_prior (list) A named list specifying the penalized complexity
#' prior for the SPDE range. The two named items are "threshold", the test threshold
#' (set as a proportion of the overall mesh extent), and "prob_below", the prior
#' probability that the value is BELOW that range threshold. The function automatically
#' converts "threshold" from a proportion of the overall mesh extent into a distance.
#' @param spde_sigma_pc_prior (list) A named list specifying the penalized complexity
#' prior for sigma (standard deviation) of the SPDE object. The two named items are
#' "threshold", the test threshold for the standard deviation, and "prob_above",
#' the prior probability that sigma will EXCEED that threshold.
#' @param spde_integrate_to_zero (`boolean(1)`, default TRUE) Should the 'volume' under
#' the SPDE mesh integrate to zero?
#' @param mesh_max_edge (`numeric(2)`, default c(0.2, 5)) Maximum size of the INLA SPDE
#' mesh inside (1) and outside (2) of the modeled region.
#' @param mesh_cutoff (`numeric(1)`, default 0.04) Minimum size of the INLA mesh, usually
#' reached in data-dense areas.
#' @param use_nugget (`boolean(1)`, default TRUE) Should a nugget (IID observation-level
#' error or noise) term be included?
#' @param nugget_pc_prior A named list specifying the penalized complexity prior for the
#' nugget term. The two named items are "threshold", the test threshold for the nugget
#' standard deviation, and "prob_above", the prior probability that the standard
#' deviation will EXCEED that threshold. Only used if `use_nugget` is TRUE
#' @param use_admin_effect (`boolean(1)`, default FALSE) Should an IID random effect by
#' administrative unit be included?
#' @param admin_boundaries ([sf][sf::sf] object, default NULL) admin boundaries spatial
#' object. Only used if `use_admin_effect` is TRUE
#' @param admin_pc_prior (list) A named list specifying the penalized complexity prior
#' for the admin-level IID term. The two named items are "threshold", the test threshold
#' for the standard deviation of admin-level effects, and "prob_above", the prior
#' probability that the standard deviation will EXCEED that threshold. Only used if
#' `use_admin_effect` is TRUE.
#'
#' @return List containing the following items:
#' - "mesh": The mesh used to approximate the latent Gaussian process
#' - "spde": The SPDE object that will be used to fit the INLA model
#' - "inla_data_stack": The data stack to be passed to `INLA::inla()`
#' - "formula_string": The formula specification to be passed to `INLA::inla()`
#'
#' @concept model_fit
#'
#' @importFrom data.table as.data.table
#' @importFrom sf st_sf st_join st_nearest_feature
#' @importFrom stats qlogis
#' @importFrom terra extract
#' @importFrom glue glue
#' @export
prepare_inla_data_stack <- function(
input_data, id_raster, covariates,
use_covariates = TRUE,
covariates_sum_to_one = FALSE,
family = 'binomial',
use_spde = TRUE,
spde_range_pc_prior = list(threshold = 0.1, prob_below = 0.05),
spde_sigma_pc_prior = list(threshold = 3, prob_above = 0.05),
spde_integrate_to_zero = TRUE,
mesh_max_edge = c(0.2, 5),
mesh_cutoff = 0.04,
use_nugget = TRUE,
nugget_pc_prior = list(threshold = 3, prob_above = 0.05),
use_admin_effect = FALSE,
admin_boundaries = NULL,
admin_pc_prior = list(threshold = 3, prob_above = 0.05)
){
# Helper variable for extracting XY data
xy_fields <- c('x','y')
# Prepare the effects list and observations map list
effects_list <- list()
obs_list <- list()
## Optionally add covariate fixed effects
if(use_covariates){
# Extract all covariates
cov_names <- names(covariates)
for(cov_name in cov_names){
input_data[[cov_name]] <- terra::extract(
x = covariates[[cov_name]],
y = as.matrix(input_data[, xy_fields, with = F])
)[, 1]
}
# If sum to one, add a model constraint and convert to logit space
if(covariates_sum_to_one){
constraint_suffix <- glue::glue(
", extraconstr = list(A = matrix(1, ncol = {length(cov_names)}), e = 1)"
)
if(family == 'binomial'){
for(cov_name in cov_names) input_data[[cov_name]] <- qlogis(input_data[[cov_name]])
}
} else {
constraint_suffix <- ""
}
# Append to effects list, observations map list, and formula string
effects_list$covariates <- seq_along(cov_names)
obs_list$covariates <- as.matrix(input_data[, cov_names, with = F])
formula_string <- glue::glue(
"y ~ 0 + f(covariates, model = 'iid', fixed = TRUE{constraint_suffix})"
)
} else {
formula_string <- "y ~ 1"
}
## Optionally add the SPDE approximation to a Gaussian process
if(use_spde){
# Build prediction mesh
id_raster_table <- data.table::as.data.table(id_raster, xy = TRUE) |> na.omit()
mesh <- INLA::inla.mesh.2d(
loc = input_data[, xy_fields, with = F],
loc.domain = id_raster_table[, xy_fields, with = F],
max.edge = mesh_max_edge,
cutoff = mesh_cutoff
)
# The maximum mesh dimension will be used to determine the SPDE range prior
max_d <- apply(X = mesh$loc, MARGIN = 2, FUN = function(x) diff(range(x))) |>
max(na.rm = TRUE)
# SPDE object
spde <- INLA::inla.spde2.pcmatern(
mesh = mesh,
alpha = 2,
constr = !spde_integrate_to_zero,
prior.range = c(max_d * spde_range_pc_prior$threshold, spde_range_pc_prior$prob_below),
prior.sigma = c(spde_sigma_pc_prior$threshold, spde_sigma_pc_prior$prob_above)
)
# Append to effects list, observations map list, and formula string
effects_list$s <- INLA::inla.spde.make.index(name = "space", n.spde = spde$n.spde)
obs_list$s <- INLA::inla.spde.make.A(
mesh = mesh,
loc = as.matrix(input_data[, xy_fields, with = F])
)
formula_string <- glue::glue("{formula_string} + f(space, model = spde)")
} else {
mesh <- NULL
spde <- NULL
}
## Optionally add nugget
if(use_nugget){
obs_list$nugget <- 1
effects_list$nugget <- as.matrix(input_data[, 'cluster_id', with = F])
n_pcp <- nugget_pc_prior
formula_string <- glue::glue(
"{formula_string} +
f(nugget, model = 'iid', hyper = list(
prec = list(prior = 'pc.prec', param = c({n_pcp$threshold}, {n_pcp$prob_above}))
))
"
)
}
## Optionally add admin-level effects
if(use_admin_effect){
# Merge on unique admin ID for each observation
admin_boundaries$admin_id <- seq_len(nrow(admin_boundaries))
input_data_merged <- sf::st_as_sf(input_data, coords = xy_fields, crs = 'EPSG:4326') |>
sf::st_join(y = admin_boundaries[, c('admin_id')], join = sf::st_nearest_feature) |>
data.table::as.data.table()
# Add the effect to the observations list, effects list, and model formula
obs_list$adm_effect <- 1
effects_list$adm_effect <- as.matrix(input_data_merged[, 'admin_id', with = F])
a_pcp <- admin_pc_prior
formula_string <- glue::glue(
"{formula_string} +
f(adm_effect, model = 'iid', hyper = list(
prec = list(prior = 'pc.prec', param = c({a_pcp$threshold}, {a_pcp$prob_above}))
))
"
)
}
## Combine observation and effects lists into an INLA estimation data stack
inla_data_stack <- INLA::inla.stack(
tag = 'est',
data = list(y = input_data$indicator),
A = obs_list,
effects = effects_list
)
# Return a list containing the mesh, SPDE object, INLA data stack, and INLA formula
data_list <- list(
mesh = mesh,
spde = spde,
inla_data_stack = inla_data_stack,
formula_string = formula_string
)
return(data_list)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.