#' Turn sdmTMB model output into a tidy data frame
#'
#' @param x Output from [sdmTMB()].
#' @param effects A character value. One of `"fixed"` ('fixed' or main-effect
#' parameters), `"ran_pars"` (standard deviations, spatial range, and other
#' random effect and dispersion-related terms), or `"ran_vals"` (individual
#' random intercepts, if included; behaves like `ranef()`).
#' @param conf.int Include a confidence interval?
#' @param conf.level Confidence level for CI.
#' @param exponentiate Whether to exponentiate the fixed-effect coefficient
#' estimates and confidence intervals.
#' @param model Which model to tidy if a delta model (1 or 2).
#' @param silent Omit any messages?
#' @param ... Extra arguments (not used).
#'
#' @return A data frame
#' @details
#' Follows the conventions of the \pkg{broom} and \pkg{broom.mixed} packages.
#'
#' Currently, `effects = "ran_pars"` also includes dispersion-related terms
#' (e.g., `phi`), which are not actually associated with random effects.
#'
#' Standard errors for spatial variance terms fit in log space (e.g., variance
#' terms, range, or parameters associated with the observation error) are
#' omitted to avoid confusion. Confidence intervals are still available.
#'
#' @export
#'
#' @importFrom assertthat assert_that
#' @importFrom stats plogis
#' @examples
#' fit <- sdmTMB(density ~ poly(depth_scaled, 2, raw = TRUE),
#' data = pcod_2011, mesh = pcod_mesh_2011,
#' family = tweedie()
#' )
#' tidy(fit)
#' tidy(fit, conf.int = TRUE)
#' tidy(fit, "ran_pars", conf.int = TRUE)
#'
#' pcod_2011$fyear <- as.factor(pcod_2011$year)
#' fit <- sdmTMB(density ~ poly(depth_scaled, 2, raw = TRUE) + (1 | fyear),
#' data = pcod_2011, mesh = pcod_mesh_2011,
#' family = tweedie()
#' )
#' tidy(fit, "ran_vals")
tidy.sdmTMB <- function(x, effects = c("fixed", "ran_pars", "ran_vals"), model = 1,
conf.int = FALSE, conf.level = 0.95, exponentiate = FALSE,
silent = FALSE, ...) {
effects <- match.arg(effects)
assert_that(is.logical(exponentiate))
assert_that(is.logical(conf.int))
if (conf.int) {
assert_that(is.numeric(conf.level),
conf.level > 0, conf.level < 1,
length(conf.level) == 1,
msg = "`conf.level` must be length 1 and between 0 and 1")
}
crit <- stats::qnorm(1 - (1 - conf.level) / 2)
if (exponentiate) trans <- exp else trans <- I
reinitialize(x)
delta <- isTRUE(x$family$delta)
assert_that(is.numeric(model))
assert_that(length(model) == 1L)
if (delta) assert_that(model %in% c(1, 2), msg = "`model` must be 1 or 2.")
if (!delta) assert_that(model == 1, msg = "Only one model: `model` must be 1.")
se_rep <- as.list(x$sd_report, "Std. Error", report = TRUE)
est_rep <- as.list(x$sd_report, "Estimate", report = TRUE)
se <- as.list(x$sd_report, "Std. Error", report = FALSE)
est <- as.list(x$sd_report, "Estimate", report = FALSE)
se <- c(se, se_rep)
est <- c(est, est_rep)
# cleanup:
est$epsilon_st <- NULL
est$zeta_s <- NULL
est$omega_s <- NULL
est$ln_H_input <- NULL
se$epsilon_st <- NULL
se$zeta_s <- NULL
se$omega_s <- NULL
se$ln_H_input <- NULL
subset_pars <- function(p, model) {
p$b_j <- if (model == 1) p$b_j else p$b_j2
p$ln_tau_O <- p$ln_tau_O[model]
p$ln_tau_Z <- p$ln_tau_Z[model]
p$ln_tau_E <- p$ln_tau_E[model]
p$ln_kappa <- as.numeric(p$ln_kappa[,model])
p$ln_phi <- p$ln_phi[model]
p$ln_tau_V <- as.numeric(p$ln_tau_V[,model])
p$ar1_phi <- as.numeric(p$ar1_phi[model])
p$ln_tau_G <- as.numeric(p$ln_tau_G[,model])
p$log_sigma_O <- as.numeric(p$log_sigma_O[1,model])
p$log_sigma_E <- as.numeric(p$log_sigma_E[1,model])
p$log_sigma_Z <- as.numeric(p$log_sigma_Z[,model])
p$log_range <- as.numeric(p$log_range[,model])
p$phi <- p$phi[model]
p$range <- as.numeric(p$range[,model])
p$sigma_E <- as.numeric(p$sigma_E[1,model])
p$sigma_O <- as.numeric(p$sigma_O[1,model])
p$sigma_Z <- as.numeric(p$sigma_Z[,model])
p$sigma_G <- as.numeric(p$sigma_G[,model])
p
}
est <- subset_pars(est, model)
se <- subset_pars(se, model)
if (x$family$family[[model]] %in% c("binomial", "poisson")) {
se$ln_phi <- NULL
est$ln_phi <- NULL
se$phi <- NULL
est$phi <- NULL
}
ii <- 1
# grab fixed effects:
.formula <- x$split_formula[[model]]$form_no_bars
.formula <- remove_s_and_t2(.formula)
if (!"mgcv" %in% names(x)) x[["mgcv"]] <- FALSE
fe_names <- colnames(model.matrix(.formula, x$data))
b_j <- est$b_j[!fe_names == "offset", drop = TRUE]
b_j_se <- se$b_j[!fe_names == "offset", drop = TRUE]
fe_names <- fe_names[!fe_names == "offset"]
out <- data.frame(term = fe_names, estimate = b_j, std.error = b_j_se, stringsAsFactors = FALSE)
if (exponentiate) out$estimate <- trans(out$estimate)
if (x$tmb_data$threshold_func > 0) {
if (x$threshold_function == 1L) {
par_name <- paste0(x$threshold_parameter, c("-slope", "-breakpt"))
} else {
par_name <- paste0(x$threshold_parameter, c("-s50", "-s95", "-smax"))
}
out <- rbind(
out,
data.frame(
term = par_name, estimate = est$b_threshold[,model,drop=TRUE],
std.error = se$b_threshold[,model,drop=TRUE], stringsAsFactors = FALSE
)
)
}
if (conf.int) {
out$conf.low <- as.numeric(trans(out$estimate - crit * out$std.error))
out$conf.high <- as.numeric(trans(out$estimate + crit * out$std.error))
}
out_re <- list()
log_name <- c("log_range")
name <- c("range")
if (!isTRUE(is.na(x$tmb_map$ln_phi))) {
log_name <- c(log_name, "ln_phi")
name <- c(name, "phi")
}
if (x$tmb_data$include_spatial[model]) {
log_name <- c(log_name, "log_sigma_O")
name <- c(name, "sigma_O")
}
if (!x$tmb_data$spatial_only[model]) {
log_name <- c(log_name, "log_sigma_E")
name <- c(name, "sigma_E")
}
if (x$tmb_data$spatial_covariate) {
log_name <- c(log_name, "log_sigma_Z")
name <- c(name, "sigma_Z")
}
if (x$tmb_data$random_walk) {
log_name <- c(log_name, "ln_tau_V")
name <- c(name, "tau_V")
}
if (length(est$ln_tau_G) > 0L) {
log_name <- c(log_name, "ln_tau_G")
name <- c(name, "sigma_G")
}
j <- 0
if (!"log_range" %in% names(est)) {
cli_warn("This model was fit with an old version of sdmTMB. Some parameters may not be available to the tidy() method. Re-fit the model with the current version of sdmTMB if you need access to any missing parameters.")
}
for (i in name) {
j <- j + 1
if (i %in% names(est)) {
.e <- est[[log_name[j]]]
.se <- se[[log_name[j]]]
.e <- if (is.null(.e)) NA else .e
.se <- if (is.null(.se)) NA else .se
non_log_name <- gsub("ln_", "", gsub("log_", "", log_name))
this <- non_log_name[j]
if (this == "tau_G") this <- "sigma_G"
if (this == "tau_V") this <- "sigma_V"
this_se <- as.numeric(se[[this]])
this_est <- as.numeric(est[[this]])
if (length(this_est) && !(all(this_se == 0) && all(this_est == 0))) {
out_re[[i]] <- data.frame(
term = i, estimate = this_est, std.error = this_se,
conf.low = exp(.e - crit * .se),
conf.high = exp(.e + crit * .se),
stringsAsFactors = FALSE
)
}
ii <- ii + 1
}
}
discard <- unlist(lapply(out_re, function(x) length(x) == 1L)) # e.g. old models and phi
out_re[discard] <- NULL
if ("tweedie" %in% x$family$family) {
out_re$tweedie_p <- data.frame(
term = "tweedie_p", estimate = plogis(est$thetaf) + 1,
std.error = se$tweedie_p, stringsAsFactors = FALSE)
out_re$tweedie_p$conf.low <- plogis(est$thetaf - crit * se$thetaf) + 1
out_re$tweedie_p$conf.high <- plogis(est$thetaf + crit * se$thetaf) + 1
ii <- ii + 1
}
if ("ar1_phi" %in% names(est)) {
ar_phi <- est$ar1_phi
ar_phi_se <- se$ar1_phi
rho_est <- 2 * stats::plogis(ar_phi) - 1
rho_lwr <- 2 * stats::plogis(ar_phi - crit * ar_phi_se) - 1
rho_upr <- 2 * stats::plogis(ar_phi + crit * ar_phi_se) - 1
out_re[[ii]] <- data.frame(
term = "rho", estimate = rho_est, std.error = NA,
conf.low = rho_lwr, conf.high = rho_upr, stringsAsFactors = FALSE
)
ii <- ii + 1
}
if (all(!x$tmb_data$include_spatial) && all(x$tmb_data$spatial_only)) out_re$range <- NULL
out_re <- do.call("rbind", out_re)
row.names(out_re) <- NULL
if (identical(est$ln_tau_E, 0)) out_re <- out_re[out_re$term != "sigma_E", ]
if (identical(est$ln_tau_V, 0)) out_re <- out_re[out_re$term != "sigma_V", ]
if (identical(est$ln_tau_G, 0)) out_re <- out_re[out_re$term != "sigma_G", ]
if (identical(est$ln_tau_O, 0)) out_re <- out_re[out_re$term != "sigma_O", ]
if (identical(est$ln_tau_Z, 0)) out_re <- out_re[out_re$term != "sigma_Z", ]
if (is.na(x$tmb_map$ar1_phi[model])) out_re <- out_re[out_re$term != "rho", ]
if (!conf.int) {
out_re[["conf.low"]] <- NULL
out_re[["conf.high"]] <- NULL
}
# random intercepts
n_re_int <- x$split_formula[[model]]$n_bars
if (n_re_int == 0 && effects == "ran_vals") {
cli::cli_abort("effects = 'ran_vals' currently only works with random intercepts (e.g., `+ (1 | g)`).")
}
if (n_re_int > 0) {
out_ranef <- list()
re_est <- as.list(x$sd_report, "Estimate")$RE
re_ses <- as.list(x$sd_report, "Std. Error")$RE
for(jj in 1:n_re_int) {
level_names <- levels(x$data[[x$split_formula[[model]]$barnames[jj]]])
n_levels <- length(level_names)
re_name <- x$split_formula[[model]]$barnames[jj]
if(jj==1) {
start_pos <- 1
end_pos <- n_levels
} else {
start_pos <- end_pos + 1
end_pos <- start_pos + n_levels - 1
}
out_ranef[[jj]] <- data.frame(
term = paste0(re_name,"_",level_names),
estimate = re_est[start_pos:end_pos,model],
std.error = re_ses[start_pos:end_pos,model],
conf.low = re_est[start_pos:end_pos,model] - crit * re_ses[start_pos:end_pos,model],
conf.high = re_est[start_pos:end_pos,model] + crit * re_ses[start_pos:end_pos,model],
stringsAsFactors = FALSE
)
if (!conf.int) {
out_ranef[[jj]][["conf.low"]] <- NULL
out_ranef[[jj]][["conf.high"]] <- NULL
}
}
out_ranef <- do.call("rbind", out_ranef)
row.names(out_ranef) <- NULL
}
out <- unique(out) # range can be duplicated
out_re <- unique(out_re)
if (requireNamespace("tibble", quietly = TRUE)) {
frm <- tibble::as_tibble
} else {
frm <- as.data.frame
}
if (effects == "fixed") {
return(frm(out))
} else if (effects == "ran_vals") {
return(frm(out_ranef))
} else if (effects == "ran_pars") {
return(frm(out_re))
} else {
cli_abort("The specified 'effects' type is not available.")
}
}
#' @importFrom generics tidy
#' @export
generics::tidy
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.