#' @title Restricted Shared Component model
#'
#' @description Fit a Restricted Shared Component model for two diseases
#'
#' @usage rscm(data, formula1, formula2, family = c("poisson", "poisson"),
#' E1 = NULL, E2 = NULL, area = NULL, neigh = NULL,
#' proj = "none", nsamp = 1000,
#' priors = list(prior_gamma = c(0, 0.35),
#' prior_prec = list(tau_s = c(0.01, 0.01),
#' tau_1 = c(0.01, 0.01),
#' tau_2 = c(0.01, 0.01))),
#' random_effects = list(shared = TRUE,
#' specific_1 = TRUE,
#' specific_2 = TRUE),
#' ...)
#'
#' @param data a data frame or list containing the variables in the model.
#' @param formula1 an object of class "formula" (or one that can be coerced to that class): a symbolic description of the model to be fitted for disease 1.
#' @param formula2 an object of class "formula" (or one that can be coerced to that class): a symbolic description of the model to be fitted for disease 2.
#' @param family a vector of size two with two families. Some allowed families are: poisson, nbinomial, zeroinflatedpoisson0, zeroinflatednbinomial0. See INLA::inla.list.models() for more information.
#' @param E1 known component, for disease 1, in the mean for the Poisson likelihoods defined as E = exp(\eqn{\eta}), where \eqn{\eta} is the linear predictor. Default = 1.
#' @param E2 known component, for disease 2, in the mean for the Poisson likelihoods defined as E = exp(\eqn{\eta}), where \eqn{\eta} is the linear predictor. Default = 1.
#' @param area areal variable name in \code{data}.
#' @param neigh neighborhood structure. A \code{SpatialPolygonsDataFrame} or \code{sf} object.
#' @param proj "none" or "spock".
#' @param nsamp number of samples. Default = 1000.
#' @param priors a list containing:
#' \itemize{
#' \item prior_gamma: a vector of size two containing mean and precision for the normal distribution applied for \eqn{\gamma}
#' \item prior_prec: a list with:
#' \itemize{
#' \item tau_s: a vector of size two containing shape and scale for the gamma distribution applied for \eqn{\tau_s}
#' \item tau_1: a vector of size two containing shape and scale for the gamma distribution applied for \eqn{\tau_1}
#' \item tau_2: a vector of size two containing shape and scale for the gamma distribution applied for \eqn{\tau_2}
#' }
#' }
#' @param random_effects a list determining which effects should we include in the model. Default: list(shared = TRUE, specific_1 = TRUE, specific_2 = TRUE).
#' @param ... other parameters used in ?INLA::inla
#'
#' @details The fitted model is given by
#' \deqn{Y_1 ~ Poisson(E_1\theta_1),}
#' \deqn{Y_2 ~ Poisson(E_2\theta_2),}
#'
#' \deqn{log(\theta_1) = X\beta + \gamma\psi + \phi_1,}
#' \deqn{log(\theta_2) = X\beta + \psi + \phi_2,}
#'
#' \deqn{\psi ~ ICAR(\tau_s); \phi_1 ~ ICAR(\tau_1); \phi_2 ~ ICAR(\tau_2).}
#'
#' \deqn{\delta = \sqrt\gamma}
#'
#' @examples
#' library(spdep)
#'
#' set.seed(123456)
#'
#' ##-- Spatial structure
#' data("neigh_RJ")
#'
#' ##-- Parameters
#' alpha_1 <- 0.5
#' alpha_2 <- 0.1
#' beta_1 <- c(-0.5, -0.2)
#' beta_2 <- c(-0.8, -0.4)
#' tau_s <- 1
#' tau_1 <- tau_2 <- 10
#' delta <- 1.5
#'
#' ##-- Data
#' data <- rshared(alpha_1 = alpha_1, alpha_2 = alpha_2,
#' beta_1 = beta_1, beta_2 = beta_2,
#' delta = delta,
#' tau_1 = tau_1, tau_2 = tau_2, tau_s = tau_s,
#' confounding = "linear",
#' neigh = neigh_RJ)
#'
#' ##-- Models
#' scm_inla <- rscm(data = data,
#' formula1 = Y1 ~ X11 + X12,
#' formula2 = Y2 ~ X21 + X12,
#' family = c("nbinomial", "poisson"),
#' E1 = E1, E2 = E2,
#' area = "reg", neigh = neigh_RJ,
#' priors = list(prior_prec = list(tau_s = c(0.5, 0.05)), prior_gamma = c(0, 0.5)),
#' proj = "none", nsamp = 1000,
#' random_effects = list(shared = TRUE, specific_1 = TRUE, specific_2 = TRUE))
#'
#' rscm_inla <- rscm(data = data,
#' formula1 = Y1 ~ X11 + X12,
#' formula2 = Y2 ~ X21 + X12,
#' family = c("nbinomial", "poisson"),
#' E1 = E1, E2 = E2,
#' area = "reg", neigh = neigh_RJ,
#' priors = list(prior_prec = list(tau_s = c(0.5, 0.05)), prior_gamma = c(0, 0.5)),
#' proj = "spock", nsamp = 1000,
#' random_effects = list(shared = TRUE, specific_1 = TRUE, specific_2 = TRUE))
#'
#' ##-- Summary
#' scm_inla$summary_fixed
#' rscm_inla$summary_fixed
#'
#' scm_inla$summary_hyperpar
#' rscm_inla$summary_hyperpar
#'
#' @return \item{$sample}{a sample of size nsamp for all parameters in the model}
#' \item{$summary_fixed}{summary measures for the coefficients}
#' \item{$summary_hyperpar}{summary measures for hyperparameters}
#' \item{$summary_random}{summary measures for random quantities}
#' \item{$out}{INLA output}
#' \item{$time}{time elapsed for fitting the model}
#'
#' @importFrom stats as.formula terms model.frame update
#' @importFrom INLA inla inla.posterior.sample inla.hyperpar.sample
#'
#' @export
rscm <- function(data, formula1, formula2, family = c("poisson", "poisson"),
E1 = NULL, E2 = NULL, area = NULL, neigh = NULL,
proj = "none", nsamp = 1000,
priors = list(prior_gamma = c(0, 0.35),
prior_prec = list(tau_s = c(0.01, 0.01),
tau_1 = c(0.01, 0.01),
tau_2 = c(0.01, 0.01))),
random_effects = list(shared = TRUE, specific_1 = TRUE, specific_2 = TRUE),
...) {
##-- Time
time_start <- Sys.time()
##-- Tests
if(missing(formula1)) stop("You must provide the formula for disease 1")
if(missing(formula2)) stop("You must provide the formula for disease 2")
if(!proj %in% c("none", "spock")) stop("proj must be 'none' or 'spock'")
if("sf" %in% class(neigh)) neigh <- as(neigh, "Spatial")
if("sf" %in% class(data)) sf::st_geometry(data) <- NULL
random_effects <- append_list(list(shared = TRUE, specific_1 = TRUE, specific_2 = TRUE), random_effects)
priors <- append_list(list(prior_gamma = c(0, 0.35),
prior_prec = list(tau_s = c(0.01, 0.01),
tau_1 = c(0.01, 0.01),
tau_2 = c(0.01, 0.01))),
priors)
shared <- random_effects$shared
specific_1 <- random_effects$specific_1
specific_2 <- random_effects$specific_2
prior_gamma <- priors$prior_gamma
if(!is.list(priors$prior_prec) & length(priors$prior_prec) == 2) {
message(sprintf("priors$prior_prec should be a list of tau_s, tau_1 and tau_2 entries.
Setting all priors as Gamma(%s, %s).", priors$prior_prec[1], priors$prior_prec[2]))
prior_tau_s <- priors$prior_prec
prior_tau_1 <- priors$prior_prec
prior_tau_2 <- priors$prior_prec
} else{
prior_tau_s <- priors$prior_prec$tau_s
prior_tau_1 <- priors$prior_prec$tau_1
prior_tau_2 <- priors$prior_prec$tau_2
}
##-- Setup INLA
Y <- data[, c(deparse(formula1[[2]]), deparse(formula2[[2]]))]
X1 <- all.vars(formula1[-2])
X2 <- all.vars(formula2[-2])
covs_s <- union(X1, X2)
X1 <- data[, X1, drop = FALSE]
X2 <- data[, X2, drop = FALSE]
Xs <- data[, covs_s, drop = FALSE]
n <- nrow(Y)
n_covs1 <- ncol(X1)
n_covs2 <- ncol(X2)
n_covss <- ncol(Xs)
if(!is.null(neigh)) {
W <- spdep::nb2mat(neighbours = spdep::poly2nb(neigh), style = "B", zero.policy = TRUE)
Ws <- W1 <- W2 <- W
}
if(proj == "spock" & !is.null(area) & !is.null(neigh)) {
time_start_correction <- Sys.time()
if(specific_1) {
neigh1 <- spock(X = as.matrix(cbind(1, X1)), map = neigh)
W1 <- spdep::nb2mat(neigh1, style = "B", zero.policy = TRUE)
}
if(specific_2) {
neigh2 <- spock(X = as.matrix(cbind(1, X2)), map = neigh)
W2 <- spdep::nb2mat(neigh2, style = "B", zero.policy = TRUE)
}
if(shared) {
neighs <- spock(X = as.matrix(cbind(1, Xs)), map = neigh)
Ws <- spdep::nb2mat(neighs, style = "B", zero.policy = TRUE)
}
time_end_correction <- Sys.time()
} else {
time_end_correction <- time_start_correction <- Sys.time()
}
E1 <- substitute(E1)
E2 <- substitute(E2)
if(is.null(E1)) {
E1 <- rep(1, n)
} else {
E1 <- data[, deparse(E1), drop = FALSE]
}
if(is.null(E2)) {
E2 <- rep(1, n)
} else {
E2 <- data[, deparse(E2), drop = FALSE]
}
E <- data.frame(E1 = E1, E2 = E2)
E <- c(E[, 1], E[, 2])
inla_list <- list()
inla_list$Y <- cbind(c(Y[, 1], rep(NA, n)),
c(rep(NA, n), Y[, 2]))
inla_list$alpha <- as.factor(rep(c(1, 2), each = n))
if(!is.null(area)) {
inla_list$psi_gamma <- c(data[[area]], rep(NA, n)) ##-- gamma*psi
inla_list$psi <- c(rep(NA, n), data[[area]]) ##-- psi
inla_list$phi1 <- inla_list$psi_gamma ##-- psi_1
inla_list$phi2 <- inla_list$psi ##-- psi_2
}
inla_list$X <- matrix(NA, nrow = 2*n, ncol = n_covs1 + n_covs2)
inla_list$X[1:n, 1:n_covs1] <- as.matrix(X1)
inla_list$X[(n+1):(2*n), (n_covs1+1):(n_covs1 + n_covs2)] <- as.matrix(X2)
colnames(inla_list$X) <- c(paste(colnames(X1), 1, sep = "_"), paste(colnames(X2), 2, sep = "_"))
##-- Fit shared
f_s <- Y ~ -1 + alpha + X
if(!is.null(area) & !is.null(neigh)) {
if(shared) {
f_s <- update(f_s, ~ . + f(psi,
model = "besag",
graph = Ws,
scale.model = TRUE,
hyper = list(prec =
list(prior = "loggamma",
param = prior_tau_s))) +
f(psi_gamma,
copy = "psi",
range = c(0, Inf),
hyper = list(beta =
list(fixed = FALSE,
prior = "normal",
param = prior_gamma)))
)
}
if(specific_1) {
f_s <- update(f_s, ~ . + f(phi1,
model = "besag",
graph = W1,
scale.model = TRUE,
hyper = list(prec =
list(prior = "loggamma",
param = prior_tau_1)))
)
}
if(specific_2) {
f_s <- update(f_s, ~ . + f(phi2,
model = "besag",
graph = W2,
scale.model = TRUE,
hyper = list(prec =
list(prior = "loggamma",
param = prior_tau_2)))
)
}
}
time_start_inla <- Sys.time()
args <- list(...)
args$control.compute$config <- TRUE
args$control.inla$strategy <- "laplace"
W <- parent.frame()$W
inla_aux <- function(...) INLA::inla(formula = f_s, family = family, data = inla_list, E = as.vector(E), ...)
mod <- do.call(what = inla_aux, args = args, envir = parent.frame())
model_sample <- INLA::inla.posterior.sample(result = mod, n = nsamp, use.improved.mean = TRUE)
hyperpar_samp <- INLA::inla.hyperpar.sample(result = mod, n = nsamp, improve.marginals = TRUE)
time_end_inla <- Sys.time()
id_fixed <- paste0(c("alpha1", "alpha2", colnames(inla_list$X)), ":", 1)
if(!is.null(area) & !is.null(neigh)) {
phi1 <- paste0("phi1:", 1:n)
phi2 <- paste0("phi2:", 1:n)
psi <- paste0("psi:", 1:n)
psi_gamma <- paste0("psi_gamma:", 1:n)
gamma <- paste0("gamma:", 1)
}
beta_samp <- do.call(args = lapply(model_sample, select_marginal, ids = id_fixed), what = "rbind")
colnames(beta_samp) <- c("alpha1", "alpha2", colnames(inla_list$X))
latent_sample <- c()
if(!is.null(area) & !is.null(neigh)) {
if(specific_1) {
phi1_sample <- do.call(args = lapply(model_sample, select_marginal, ids = phi1), what = "rbind")
colnames(phi1_sample) <- phi1
latent_sample <- cbind(latent_sample, phi1_sample)
}
if(specific_2) {
phi2_sample <- do.call(args = lapply(model_sample, select_marginal, ids = phi2), what = "rbind")
colnames(phi2_sample) <- phi2
latent_sample <- cbind(latent_sample, phi2_sample)
}
if(shared) {
psi_sample <- do.call(args = lapply(model_sample, select_marginal, ids = psi), what = "rbind")
colnames(psi_sample) <- psi
psi_gamma_sample <- do.call(args = lapply(model_sample, select_marginal, ids = psi_gamma), what = "rbind")
colnames(psi_gamma_sample) <- psi_gamma
delta_sample <- matrix(get_trans_samp(fun = sqrt, marg = mod$marginals.hyperpar$`Beta for psi_gamma`, n = nsamp, trunc = TRUE), ncol = 1)
colnames(delta_sample) <- "Delta"
psi_precision_sample <- hyperpar_samp[, "Precision for psi"]/hyperpar_samp[, "Beta for psi_gamma"]
hyperpar_samp <- cbind(hyperpar_samp, delta_sample, `Precision for psi SC` = psi_precision_sample)
latent_sample <- cbind(latent_sample, psi_sample, psi_gamma_sample, psi_precision_sample)
}
sample <- cbind(hyperpar_samp, beta_samp, latent_sample)
} else {
sample <- cbind(hyperpar_samp, beta_samp)
}
##-- Time
time_tab <- data.frame(INLA = as.numeric(difftime(time1 = time_end_inla, time2 = time_start_inla, units = "secs")),
correction = as.numeric(difftime(time1 = time_end_correction, time2 = time_start_correction, units = "secs")),
total = as.numeric(difftime(time1 = Sys.time(), time2 = time_start, units = "secs")))
##-- Return
out <- list()
out$sample <- sample
out$summary_fixed <- chain_summary(sample = beta_samp)
out$summary_hyperpar <- chain_summary(sample = hyperpar_samp)
if(is.null(area)) `<-`(out$summary_random, chain_summary(sample = latent_sample)) else `<-`(out$summary_random, NULL)
out$out <- mod
out$time <- time_tab
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.