#' Reformats output from a boral fit
#'
#' Reformats estimates from a fitted boral model to be in a slightly tidier
#' format, meaning everything is presented as a long data frame.
#'
#' Formatting the output into a long data frame maybe useful if one wishes to
#' take the estimated parameters (namely the posterior
#' mean/median/interquartile range/standard deviation, and the lower and upper
#' limits of the HPD intervals), and subsequently wrangle them for presentation
#' purposes using packages from the tidyverse e.g., Wickham and Henry (2018),
#' construct plots from them using the ggplot2 package (Wickham, 2016), and so
#' on.
#'
#' It is important to note that this function is solely designed to format
#' output from a fitted boral model relating to the estimated parameters. It
#' does not an additional information like model call and MCMC samples. Please
#' do NOT erase the original fitted model in place of this!
#'
#' @name tidyboral
#' @docType package
#' @param object An object of class "boral".
#' @return A a list containing the following components where applicable:
#' \item{lv.coefs}{A long format data frame containing the mean/median/standard
#' deviation/interquartile range of the posterior distributions of the latent
#' variable coefficients. This also includes the column-specific intercepts,
#' and dispersion parameters if appropriate.}
#'
#' \item{lv}{A long format data frame containing the mean/median/standard
#' deviation/interquartile range of the posterior distributions of the latent
#' variables.}
#'
#' \item{lv.covparams}{A long format data frame containing the
#' mean/median/standard deviation/interquartile range of the posterior
#' distributions for the parameters characterizing the correlation structure of
#' the latent variables when they are assumed to be non-independent across
#' rows.}
#'
#' \item{X.coefs}{A long format data frame containing the mean/median/standard
#' deviation/interquartile range of the posterior distributions of the
#' column-specific coefficients relating to \code{X}.}
#'
#' \item{traits.coefs}{A long format data frame containing the
#' mean/median/standard deviation/interquartile range of the posterior
#' distributions of the coefficients and standard deviation relating to the
#' species traits; please see \code{\link{about.traits}}.}
#'
#' \item{cutoffs}{A long format data frame containing the mean/median/standard
#' deviation/interquartile range of the posterior distributions of the common
#' cutoffs for ordinal responses (please see the not-so-brief tangent on
#' distributions above).}
#'
#' \item{ordinal.sigma}{A long format data frame containing the
#' mean/median/standard deviation/interquartile range of the posterior
#' distributions of the standard deviation for the random intercept normal
#' distribution corresponding to the ordinal response columns.}
#'
#' \item{powerparam}{A long format data frame containing the
#' mean/median/standard deviation/interquartile range of the posterior
#' distributions of the common power parameter for tweedie responses (please
#' see the not-so-brief tangent on distributions above).}
#'
#' \item{row.coefs}{A list with each element being a long format data frame
#' containing the mean/median/standard deviation/interquartile range of the
#' posterior distributions of the row effects. The length of the list is equal
#' to the number of row effects included i.e., \code{ncol(row.ids)}.}
#'
#' \item{row.sigma}{A list with each element being a long format data frame
#' containing the mean/median/standard deviation/interquartile range of the
#' posterior distributions of the standard deviation for the row random effects
#' normal distribution. The length of the list is equal to the number of row
#' effects included i.e., \code{ncol(row.ids)}.}
#'
#' \item{ssvs.indcoefs}{A long format data frame containing posterior
#' probabilities and associated standard deviation for individual SSVS of
#' coefficients in \code{X}.}
#'
#' \item{ssvs.gpcoefs}{A long format data frame containing posterior
#' probabilities and associated standard deviation for group SSVS of
#' coefficients in \code{X}.}
#'
#' \item{ssvs.traitscoefs}{A long format data frame containing posterior
#' probabilities and associated standard deviation for individual SSVS of
#' coefficients relating to species traits.}
#'
#' \item{hpdintervals}{A list containing long format data frames corresponding
#' to the lower and upper bounds of highest posterior density (HPD) intervals
#' for all the parameters indicated above. Please see
#' \code{\link{get.hpdintervals}} for more details.}
#' @section Warnings: \itemize{ \item This function is solely designed to
#' format output from a fitted boral model relating to the estimated
#' parameters. It does not an additional information like model call and MCMC
#' samples. Please do NOT erase the original fitted model in place of this! }
#' @author
#' c("\\Sexpr[results=rd,stage=build]{tools:::Rd_package_author(\"#1\")}",
#' "boral")\Sexpr{tools:::Rd_package_author("boral")}
#'
#' Maintainer:
#' c("\\Sexpr[results=rd,stage=build]{tools:::Rd_package_maintainer(\"#1\")}",
#' "boral")\Sexpr{tools:::Rd_package_maintainer("boral")}
#' @seealso \code{\link{boral}} for the fitting function on which
#' \code{tidyboral} can be applied.
#' @references \itemize{ \item Wickham, H. (2016). ggplot2: elegant graphics
#' for data analysis. Springer.
#'
#' \item Wickham, H., & Henry, L. (2017). Tidyr: Easily tidy data with `spread
#' ()' and `gather ()' functions.}
#' @examples
#'
#' \dontrun{
#' ## NOTE: The values below MUST NOT be used in a real application;
#' ## they are only used here to make the examples run quick!!!
#' example_mcmc_control <- list(n.burnin = 10, n.iteration = 100,
#' n.thin = 1)
#'
#' library(mvabund) ## Load a dataset from the mvabund package
#' data(spider)
#' y <- spider$abun
#'
#' spiderfit_nb <- boral(y, family = "negative.binomial", lv.control = list(num.lv = 2),
#' row.eff = "fixed", mcmc.control = example_mcmc_control)
#'
#' spiderfit_nb_tidy <- tidyboral(spiderfit_nb)
#' }
#'
tidyboral <- function(object) {
message("This function is purely to produce \"tidier\" output for boral. Please do not overwrite the original boral object!")
tidyout_fit <- list()
lv_coefs_arr <- abind(
object$lv.coefs.median,
object$lv.coefs.mean,
object$lv.coefs.iqr,
object$lv.coefs.sd,
along = 3
)
if (object$num.lv > 0) {
lv_arr <- abind(
object$lv.median,
object$lv.mean,
object$lv.iqr,
object$lv.sd,
along = 3
)
dimnames(lv_arr) <- list(rows = rownames(object$y), lv = paste0("lv", 1:object$num.lv), type = c("median", "mean", "iqr", "sd"))
lv_arr <- melt(lv_arr, value.name = "estimate", varnames = names(dimnames(lv_arr)))
tidyout_fit$lv <- lv_arr
rm(lv_arr)
if (dim(lv_coefs_arr)[2] == (object$num.lv + 2)) {
dimnames(lv_coefs_arr) <- list(
cols = colnames(object$y),
coefficients = c("beta0", paste0("theta", 1:object$num.lv), "Dispersion"), type = c("median", "mean", "iqr", "sd")
)
}
if (dim(lv_coefs_arr)[2] == (object$num.lv + 1)) {
dimnames(lv_coefs_arr) <- list(
cols = colnames(object$y),
coefficients = c("beta0", paste0("theta", 1:object$num.lv)), type = c("median", "mean", "iqr", "sd")
)
}
if (object$lv.control$type != "independent") {
lv_params_arr <- cbind(object$lv.covparams.median, object$lv.covparams.mean, object$lv.covparams.iqr, object$lv.covparams.sd)
if (nrow(lv_params_arr) == 1) {
rownames(lv_params_arr) <- c("spatialscale (tau1)")
}
if (nrow(lv_params_arr) == 2) {
rownames(lv_params_arr) <- c("spatialscale (tau1)", "spatialpower (tau2)")
}
colnames(lv_params_arr) <- c("median", "mean", "iqr", "sd")
lv_params_arr <- melt(lv_params_arr, value.name = "estimate", varnames = c("parameters", "type"))
tidyout_fit$lv.covparams <- lv_params_arr
rm(lv_params_arr)
}
}
if (object$num.lv == 0) {
if (dim(lv_coefs_arr)[2] == 2) {
dimnames(lv_coefs_arr) <- list(
cols = colnames(object$y),
coefficients = c("beta0", "Dispersion"), type = c("median", "mean", "iqr", "sd")
)
}
if (dim(lv_coefs_arr)[2] == 1) {
dimnames(lv_coefs_arr) <- list(
cols = colnames(object$y),
coefficients = c("beta0"), type = c("median", "mean", "iqr", "sd")
)
}
}
lv_coefs_arr <- melt(lv_coefs_arr, value.name = "estimate", varnames = names(dimnames(lv_coefs_arr)))
tidyout_fit$lv.coefs <- lv_coefs_arr
rm(lv_coefs_arr)
if (object$row.eff != "none") {
tidyout_fit$row.coefs <- vector("list", ncol(object$row.ids))
names(tidyout_fit$row.coefs) <- colnames(object$row.ids)
for (k in 1:ncol(object$row.ids))
{
row_coefs_arr <- cbind(object$row.coefs[[k]]$median, object$row.coefs[[k]]$mean, object$row.coefs[[k]]$iqr, object$row.coefs[[k]]$sd)
dimnames(row_coefs_arr) <- list(rowID = 1:length(unique(object$row.ids[, k])), type = c("median", "mean", "iqr", "sd"))
row_coefs_arr <- melt(row_coefs_arr, value.name = "estimate", varnames = names(dimnames(row_coefs_arr)))
tidyout_fit$row.coefs[[k]] <- row_coefs_arr
rm(row_coefs_arr)
}
if (object$row.eff == "random") {
tidyout_fit$row.sigma <- vector("list", ncol(object$row.ids))
names(tidyout_fit$row.sigma) <- colnames(object$row.ids)
for (k in 1:ncol(object$row.ids))
{
tidyout_fit$row.sigma[[k]] <- melt(object$row.sigma[[k]], value.name = "estimate")
}
}
}
if (object$num.X > 0) {
X_coefs_arr <- abind(
object$X.coefs.median,
object$X.coefs.mean,
object$X.coefs.iqr,
object$X.coefs.sd,
along = 3
)
dimnames(X_coefs_arr) <- list(cols = colnames(object$y), coefficients = colnames(object$X), type = c("median", "mean", "iqr", "sd"))
X_coefs_arr <- melt(X_coefs_arr, value.name = "estimate", varnames = names(dimnames(X_coefs_arr)))
tidyout_fit$X.coefs <- X_coefs_arr
rm(X_coefs_arr)
if (any(object$prior.control$ssvs.index == 0)) { ## You should not be able to enter this loop if num.traits > 0!
ssvs_indcoefs_arr <- abind(
object$ssvs.indcoefs.mean,
object$ssvs.indcoefs.sd,
along = 3
)
dimnames(ssvs_indcoefs_arr) <- list(cols = colnames(object$y), coefficients = colnames(object$X), type = c("mean", "sd"))
ssvs_indcoefs_arr <- melt(ssvs_indcoefs_arr, value.name = "estimate", varnames = names(dimnames(ssvs_indcoefs_arr)))
tidyout_fit$ssvs.indcoefs <- ssvs_indcoefs_arr
rm(ssvs_indcoefs_arr)
}
if (any(object$prior.control$ssvs.index > 0)) {
ssvs_gpcoefs_arr <- cbind(object$ssvs.gpcoefs.mean, object$ssvs.gpcoefs.sd)
dimnames(ssvs_gpcoefs_arr) <- list(Gp = rownames(object$ssvs.gpcoefs.mean), type = c("mean", "sd"))
ssvs_gpcoefs_arr <- melt(ssvs_gpcoefs_arr, value.name = "estimate", varnames = names(dimnames(ssvs_gpcoefs_arr)))
tidyout_fit$ssvs.gpcoefs <- ssvs_gpcoefs_arr
rm(ssvs_gpcoefs_arr)
}
if (any(unlist(object$prior.control$ssvs.traitsindex) == 0)) {
ssvs_traitcoefs_arr <- abind(
object$ssvs.traitscoefs.mean,
object$ssvs.traitscoefs.sd,
along = 3
)
dimnames(ssvs_traitcoefs_arr) <- list(
X.coefficients = c("beta0", colnames(object$X)),
traits.coefficients = colnames(object$traits), type = c("mean", "sd")
)
ssvs_traitcoefs_arr <- melt(ssvs_traitcoefs_arr, value.name = "estimate", varnames = names(dimnames(ssvs_traitcoefs_arr)))
tidyout_fit$ssvs.traitscoefs <- ssvs_traitcoefs_arr
rm(ssvs_traitcoefs_arr)
}
}
if (object$num.traits > 0) {
traitcoefs_arr <- abind(
object$traits.coefs.median,
object$traits.coefs.mean,
object$traits.coefs.iqr,
object$traits.coefs.sd,
along = 3
)
dimnames(traitcoefs_arr) <- list(X.coefficients = c("beta0", colnames(object$X)), traits.coefficients = c("kappa0", colnames(object$traits), "sigma"), type = c("median", "mean", "iqr", "sd"))
traitcoefs_arr <- melt(traitcoefs_arr, value.name = "estimate", varnames = names(dimnames(traitcoefs_arr)))
tidyout_fit$traits.coefs <- traitcoefs_arr
rm(traitcoefs_arr)
}
if (any(object$family == "ordinal")) {
cutoffs_arr <- cbind(object$cutoffs.median, object$cutoffs.mean, object$cutoffs.iqr, object$cutoffs.sd)
dimnames(cutoffs_arr) <- list(parameters = paste0(1:(object$num.ord.levels - 1), "|", 2:object$num.ord.levels), type = c("median", "mean", "iqr", "sd"))
cutoffs_arr <- melt(cutoffs_arr, value.name = "estimate", varnames = names(dimnames(cutoffs_arr)))
tidyout_fit$cutoffs <- cutoffs_arr
rm(cutoffs_arr)
## If there are traits, then ordinal random intercept is either zero (if there is only 1 ordinal column, or has the trait.sigma (if there are >1 ordinal columns)
if (sum(object$family == "ordinal") > 1 & is.null(traits)) {
ordinal_sigma_vec <- c(object$ordinal.sigma.median, object$ordinal.sigma.mean, object$ordinal.sigma.iqr, object$ordinal.sigma.sd)
names(ordinal_sigma_vec) <- c("median", "mean", "iqr", "sd")
ordinal_sigma_vec <- melt(ordinal_sigma_vec)
colnames(ordinal_sigma_vec) <- c("estimate", "type")
tidyout_fit$ordinal.sigma <- ordinal_sigma_vec
rm(ordinal_sigma_vec)
}
}
if (any(object$family == "tweedie")) {
powerparam_vec <- c(object$powerparam.median, object$powerparam.mean, object$powerparam.iqr, object$powerparam.sd)
names(powerparam_vec) <- c("median", "mean", "iqr", "sd")
powerparam_vec <- melt(powerparam_vec)
colnames(powerparam_vec) <- c("estimate", "type")
tidyout_fit$powerparam <- powerparam_vec
rm(powerparam_vec)
}
tidyout_fit$hpdintervals <- tidyboral.hpdintervals(object)
return(tidyout_fit)
}
tidyboral.hpdintervals <- function(object) {
n <- nrow(object$y)
p <- ncol(object$y)
num.lv <- object$lv.control$num.lv
tidyout_fit <- list()
if (num.lv > 0) {
tidyout_fit$lv <- melt(object$hpdintervals$lv, value.name = "estimate", varnames = names(dimnames(object$hpdintervals$lv)))
if (object$lv.control$type != "independent") {
lv_covparams_arr <- melt(object$hpdintervals$lv.covparams, value.name = "estimate")
colnames(lv_covparams_arr) <- c("parameters", "type", "estimate")
tidyout_fit$lv.covparams <- lv_covparams_arr
rm(lv_covparams_arr)
}
}
tidyout_fit$lv.coefs <- melt(object$hpdintervals$lv.coefs, value.name = "estimate", varnames = names(dimnames(object$hpdintervals$lv.coefs)))
if (!is.null(object$hpdintervals$row.coefs)) {
tidyout_fit$row.coefs <- vector("list", ncol(object$row.ids))
names(tidyout_fit$row.coefs) <- colnames(object$row.ids)
for (k in 1:ncol(object$row.ids))
{
tidyout_fit$row.coefs[[k]] <- melt(object$hpdintervals$row.coefs[[k]], value.name = "estimate")
colnames(tidyout_fit$row.coefs[[k]]) <- c("rowID", "type", "estimate")
}
if (!is.null(object$hpdintervals$row.sigma)) {
tidyout_fit$row.sigma <- vector("list", ncol(object$row.ids))
names(tidyout_fit$row.sigma) <- colnames(object$row.ids)
for (k in 1:ncol(object$row.ids))
{
tidyout_fit$row.sigma[[k]] <- melt(object$hpdintervals$row.sigma[[k]], value.name = "estimate")
}
}
}
if (!is.null(object$hpdintervals$X.coefs)) {
tidyout_fit$X.coefs <- melt(object$hpdintervals$X.coefs, value.name = "estimate", varnames = names(dimnames(object$hpdintervals$X.coefs)))
}
if (!is.null(object$hpdintervals$traits.coefs)) { ## If T.params exists, then X.coefs are regressed against traits
tidyout_fit$traits.coefs <- melt(object$hpdintervals$traits.coefs, value.name = "estimate", varnames = names(dimnames(object$hpdintervals$traits.coefs)))
}
if (!is.null(object$hpdintervals$cutoffs)) { ## If cutoffs exists, then cutoffs are there and some columns involved ordinal responses
dimnames(object$hpdintervals$cutoffs) <- list(parameters = paste0(1:(object$num.ord.levels - 1), "|", 2:object$num.ord.levels), type = c("median", "mean", "iqr", "sd"))
tidyout_fit$cutoffs <- melt(object$hpdintervals$cutoffs, value.name = "estimate", varnames = names(dimnames(object$hpdintervals$cutoffs)))
if (!is.null(object$hpdintervals$ordinal.sigma)) {
tidyout_fit$ordinal.sigma <- melt(object$hpdintervals$ordinal.sigma, value.name = "estimate")
}
}
if (!is.null(object$hpdintervals$powerparam)) { ## If powerparam exists, then power parameters are there and some columns involved tweedie responses
tidyout_fit$powerparam <- melt(object$hpdintervals$powerparam, value.name = "estimate")
}
return(tidyout_fit)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.