Nothing
#' Fitting a Small Area Model
#'
#' `fit_sae()` is used to fit Beta-based small area models, such as the classical Beta, zero and/or one inflated Beta and Flexible Beta models. The random effect part can incorporate either a temporal and/or a spatial dependency structure devoted to the prior specification settings. In addition, different prior assumptions can be specified for the unstructured random effects, allowing for robust and shrinking priors and different parametrizations can be set up.
#'
#'
#' @param formula_fixed An object of class `"formula"` specifying the linear regression fixed part at the linking level.
#' @param data An object of class `"data.frame"` containing all relevant quantities.
#' @param domains Data column name displaying the domain names. If `NULL` (default), the domains are denoted with a progressive number.
#' @param disp_direct Data column name displaying given values of sampling dispersion for each domain. In out-of-sample areas, dispersion must be `NA`.
#' @param type_disp Parametrization of the dispersion parameter. The choices are variance (`"var"`) or \eqn{\phi_d} + 1 (`"neff"`) parameter.
#' @param domain_size Data column name indicating domain sizes (optional). In out-of-sample areas, sizes must be `NA`.
#' @param household_size Data column name indicating the number of sampled households. Required for the `ExtBeta` likelihood option.
#' @param likelihood Sampling likelihood to be used. The choices are `"beta"` (default), `"flexbeta"`, `ExtBeta`, `"Infbeta0"`, `"Infbeta1"` and `"Infbeta01"`.
#' @param prior_coeff Prior distribution of the regression coefficients. The choices are `"normal` or `HorseShoe`.
#' @param p0_HorseShoe If `prior_coeff = "HorseShoe"`, it requires the expected number of relevant covariates.
#' @param prior_reff Prior distribution of the unstructured random effect. The choices are: `"normal"`, `"t"`, `"VG"`.
#' @param spatial_error Logical indicating whether to include a spatially structured random effect.
#' @param spatial_df Object of class `SpatialPolygonsDataFrame` or `sf` with the shapefile of the studied region. Required if `spatial_error = TRUE`.
#' @param domains_spatial_df Column name of the `spatial_df` object displaying the domain names. Required if `spatial_error = TRUE`.
#' @param temporal_error Logical indicating whether to include a temporally structured random effect.
#' @param temporal_variable Data column name indicating temporal variable. Required if `temporal_error = TRUE`.
#' @param scale_prior List with the values of the prior scales. 4 named elements must be provided: "Unstructured", "Spatial", "Temporal", "Coeff.". Default: all equal to 2.5.
#' @param adapt_delta HMC option: target average proposal acceptance probability. See \code{\link[rstan]{stan}} documentation.
#' @param max_treedepth HMC option: target average proposal acceptance probability. See \code{\link[rstan]{stan}} documentation.
#' @inheritParams rstan::sampling
#' @param ... Arguments passed to \code{\link[rstan]{sampling}} (e.g. iter, chains).
#' @return A list of class `fitsae` containing the following objects:
#' \describe{
#' \item{`model_settings`}{A list summarizing all the assumptions of the model: sampling likelihood, presence of intercept, dispersion parametrization, random effects priors and possible structures.}
#' \item{`data_obj`}{A list containing input objects including in-sample and out-of-sample relevant quantities.}
#' \item{`stanfit`}{A `stanfit` object, outcome of \code{\link[rstan]{sampling}} function containing full posterior draws. For details, see \code{\link[rstan]{stan}} documentation.}
#' \item{`pars_interest`}{A vector containing the names of parameters whose posterior samples are stored.}
#' \item{`call`}{Image of the function call that produced the `fitsae` object.}
#' }
#' @seealso \code{\link[rstan]{sampling}} for sampler options and \code{\link{summary.fitsae}} for handling the output.
#'
#' @examples
#' library(tipsae)
#'
#' # loading toy cross sectional dataset
#' data("emilia_cs")
#'
#' # fitting a cross sectional model
#' fit_beta <- fit_sae(formula_fixed = hcr ~ x, data = emilia_cs, domains = "id",
#' type_disp = "var", disp_direct = "vars", domain_size = "n",
#' # MCMC setting to obtain a fast example. Remove next line for reliable results.
#' chains = 1, iter = 150, seed = 0)
#'
#'
#' # Spatio-temporal model: it might require time to be fitted
#'\dontrun{
#' # loading toy panel dataset
#' data("emilia")
#' # loading the shapefile of the concerned areas
#' data("emilia_shp")
#'
#' # fitting a spatio-temporal model
#' fit_ST <- fit_sae(formula_fixed = hcr ~ x,
#' domains = "id",
#' disp_direct = "vars",
#' type_disp = "var",
#' domain_size = "n",
#' data = emilia,
#' spatial_error = TRUE,
#' spatial_df = emilia_shp,
#' domains_spatial_df = "NAME_DISTRICT",
#' temporal_error = TRUE,
#' temporal_variable = "year",
#' max_treedepth = 15,
#' seed = 0)
#'}
#'
#' @references
#'
#' \insertRef{janicki2020properties}{tipsae}
#'
#' \insertRef{carpenter2017stan}{tipsae}
#'
#' \insertRef{morris2019bayesian}{tipsae}
#'
#' \insertRef{DeNicolo2021}{tipsae}
#'
#' \insertRef{JSS}{tipsae}
#'
#'
#'@export
fit_sae <- function(formula_fixed,
data,
domains = NULL,
disp_direct,
type_disp = c("neff", "var"),
domain_size = NULL,
household_size = NULL,
likelihood = c("beta", "flexbeta", "Infbeta0","Infbeta1","Infbeta01", "ExtBeta"),
prior_coeff = c("normal", "HorseShoe"),
p0_HorseShoe = NULL,
prior_reff = c("normal", "t", "VG"),
spatial_error = FALSE,
spatial_df = NULL,
domains_spatial_df = NULL,
temporal_error = FALSE,
temporal_variable = NULL,
scale_prior = list("Unstructured" = 2.5,
"Spatial" = 2.5,
"Temporal" = 2.5,
"Coeff." = 2.5),
adapt_delta = 0.95,
max_treedepth=10,
init="0",
...) {
call <- match.call()
type_disp <- match.arg(type_disp)
likelihood <- match.arg(likelihood)
prior_reff <- match.arg(prior_reff)
prior_coeff <- match.arg(prior_coeff)
# check parameters
check_par_fit(formula_fixed,
domains,
disp_direct,
type_disp,
domain_size,
household_size,
data,
likelihood,
prior_reff,
prior_coeff,
p0_HorseShoe,
spatial_error,
spatial_df,
domains_spatial_df,
temporal_error,
temporal_variable,
adapt_delta,
max_treedepth,
init)
data <- as.data.frame(data)
# check scale_prior
check_scale_prior(scale_prior)
# order dataset w.r.t. domain
if (!is.null(domains)) {
if (!temporal_error) {
data <- data[order(data[,domains]), ]
}else{# with temporal error: domain nested within times
data <- data[order(data[,temporal_variable], data[,domains]), ]
}
}
if (!is.null(spatial_df)) {
if (inherits(spatial_df, "SpatialPolygonsDataFrame")){
dom_spat <- spatial_df@data[,domains_spatial_df]
} else {
dom_spat <- sf::st_drop_geometry(spatial_df[,domains_spatial_df])[[1]]
}
spatial_df <- spatial_df[order(dom_spat), ]
}
# creation data objects
data_obj <- create_data(formula_fixed, data, domain_size, household_size, domains, disp_direct)
# spatial error
data_spatial <- arrange_spatial_structure(spatial_error, spatial_df, data_obj)
# temporal error
data_temporal <- arrange_temporal_structure(temporal_error, temporal_variable, data_obj, data)
# check data objects
check_data_fit(data_obj, likelihood, prior_coeff, domain_size, household_size)
# Creation stan data object
standata <- list(
y = data_obj$y_is,
X = data_obj$X_scal[!data_obj$is_oos,],
X_oos = data_obj$X_scal[data_obj$is_oos,],
M_is = data_obj$M_is,
M_oos = length(data_obj$y[data_obj$is_oos]),
P = ncol(data_obj$X_scal),
disp = data_obj$dispersion,
prior_coeff = ifelse(prior_coeff == "normal", 0, 1),
indices_is = data_obj$indices_is,
indices_oos = as.array(data_obj$indices_oos),
intercept = data_obj$intercept,
sigma_unstr = scale_prior[["Unstructured"]],
sigma_spatial = scale_prior[["Spatial"]],
sigma_temporal = scale_prior[["Temporal"]],
sigma_coeff = scale_prior[["Coeff."]]
)
# adding intercepts of islands
if (data_spatial$islands > 1) {
standata$X <- cbind(data_spatial$intercept_islands[!data_obj$is_oos,], standata$X)
standata$X_oos <- cbind(data_spatial$intercept_islands[data_obj$is_oos,], standata$X_oos)
standata$P <- ncol(standata$X)
data_obj$X_scal <- cbind(data_spatial$intercept_islands, data_obj$X_scal)
}
# adding temporal and spatial information
data_spatial[["intercept_islands"]] <- NULL
data_spatial[["islands"]] <- NULL
standata <- c(standata, data_spatial, data_temporal)
# consistent definition of matrix
if (!is.matrix(standata$X_oos)) {
standata$X_oos <- matrix(standata$X_oos, ncol = ncol(data_obj$X_scal))
}
if (!is.matrix(standata$X)) {
standata$X <- matrix(standata$X, ncol = ncol(data_obj$X_scal))
}
### creation dummy variables
standata <- dummy_standata(standata, data_obj, type_disp,
likelihood, prior_reff, prior_coeff, p0_HorseShoe)
# parameters of interest
pars_interest <- target_parameters(standata, data_obj, likelihood, prior_reff)
# fit model
out <- rstan::sampling(
stanmodels$global,
data = standata,
control = list(adapt_delta = adapt_delta, max_treedepth = max_treedepth),
pars = pars_interest,
init = init,
...
)
# list with model settings
model_settings <- list(
likelihood = likelihood,
intercept = standata$intercept,
type_disp = type_disp,
prior_reff = prior_reff,
spatial_error = spatial_error,
temporal_error = temporal_error,
spatio_temporal = standata$spatio_temporal
)
# adding time variable
if (temporal_error) {
data_obj[["times"]] <- data[, temporal_variable]
}
output <- list(model_settings = model_settings,
data_obj = data_obj,
stanfit = out,
pars_interest = pars_interest,
call = call)
class(output) <- "fitsae"
return(output)
}
#' Print Method for a `fitsae` Object
#'
#' The generic method `print()` allow to explore relevant outputs of the input object
#'
#' @param x Object of class `fitsae`.
#' @param ... Currently unused.
#'
#' @return Printed information on a `fitsae` object.
#'
#' @export
#'
print.fitsae <- function(x, ...) {
if (!inherits(x, "fitsae"))
stop("Indicated object does not have 'fitsae' class.")
cat("Results of the fitting process for SAE model \n")
cat("\n")
print(x$call)
cat("\n")
cat("--------------------------------------------------------------------------\n")
cat("* Model likelihood:", x$model_settings$likelihood, "\n")
cat("* Dispersion parameter type:", ifelse(x$model_settings$type_disp == "var", "variance", "effective sample size") , "\n")
if (x$model_settings$spatio_temporal == 0) {
cat("* Prior on unstructured random effects:", x$model_settings$prior_reff , "\n")
}
cat("* Spatial error:", as.logical(x$model_settings$spatial_error) , "\n")
cat("* Temporal error:", as.logical(x$model_settings$temporal_error) , "\n")
cat("--------------------------------------------------------------------------")
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.