R/variance_plots.R

Defines functions gg_varpart gg_varpart_data

Documented in gg_varpart gg_varpart_data

#' Prepare data to plot variance components for a boral model
#'
#' This function calls the boral function \code{\link[boral]{calc.varpart}} to
#' estimate the proportion of variance accounted for by explanatory variables,
#' latent variables and any row effects. It returns the results as a data frame
#' suitable for plotting with ggplot. It is used by \code{\link{gg_varpart}} but
#' you can also use it directly if you are constructing plots 'manually'.
#'
#' This function requires that you fitted the model with explanatory variables
#' and specified \code{save.model = TRUE} when calling boral so that MCMC samples
#' were stored.
#'
#' @param model A boral model fitted with one or more latent variables.
#'
#' @return A data frame to use with ggplot that has the following columns:
#'   \describe{
#'     \item{varpart}{Variance component: one of X (explanatory variables),
#'       lv (latent variables), row (row effects).}
#'     \item{value}{Estimated proportion of variance.}
#'     \item{label}{Name of the response (e.g. species).}
#'   }
#'
#' @seealso \code{\link[boral]{calc.varpart}} \code{\link{gg_varpart}}
#'
#' @examples
#' #' library(boral)
#' library(ggboral)
#'
#' data(spider, package = "mvabund")
#' y <- spider$abun
#' X <- scale(spider$x)
#'
#' # Warning - these settings are only to make the example run quickly.
#' # Don't use them for a real analysis!
#' example.control <- list(n.burnin = 10, n.iteration = 100, n.thin = 1)
#'
#' # Note that we specify save.model = TRUE when calling boral
#' #
#' spiderfit_nb <- boral(y, X,
#'                       family = "negative.binomial",
#'                       lv.control = list(num.lv = 2),
#'                       row.eff = "fixed",
#'                       mcmc.control = example.control,
#'                       save.model = TRUE)
#'
#' dat.varpart <- gg_varpart_data(spiderfit_nb)
#' head(dat.varpart)
#'
#' @importFrom dplyr %>%
#' @export
#'
gg_varpart_data <- function(model) {

  if (model$num.X == 0) stop("Model was fitted with no explanatory variables")

  v <- boral::calc.varpart(model)

  dat <- do.call(cbind, v) %>%
    as.data.frame() %>%

    dplyr::mutate(label = colnames(model$y)) %>%

    # re-order responses by X var part
    dplyr::mutate(label = factor(label, levels = label[order(varpart.X)])) %>%

    # re-shape for plotting
    tidyr::gather(varpart, value, -label) %>%
    dplyr::mutate(varpart = stringr::str_replace(varpart, "^varpart\\.", ""))
}


#' Plot variance components as estimated by calc.varpart
#'
#' Plots estimates of the proportion of variance in each response accounted for
#' by explanatory variables, latent variables and any row effects, as estimated
#' by the boral function \code{calc.varpart}.
#'
#' Proceed with caution! Read the caveats and warnings detailed in the help page
#' for \code{calc.varpart} when interpreting the plot drawn by this function.
#' Also keep in mind that the results can be influenced by data artefacts, e.g.
#' when modelling species occurrence data with a binomial model, the estimate
#' of variance explained by predictor variables will tend to be higher for species
#' with few presences than for more common species.
#'
#' @param model A boral model fitted with one or more latent variables.
#'
#' @param as.percent If \code{TRUE}, format variance axis tick marks and label
#'   as percentages. If \code{FALSE} (default), format as proportions.
#'
#' @param label.means If \code{TRUE}, append the mean value for each variance
#'   component to the legend label. Default is \code{FALSE}.
#'
#' @return A ggplot object.
#'
#' @seealso \code{\link[boral]{calc.varpart}}  \code{\link{gg_varpart_data}}
#'
#' @examples
#' library(boral)
#' library(ggboral)
#'
#' data(spider, package = "mvabund")
#' y <- spider$abun
#' X <- scale(spider$x)
#'
#' # Warning - these settings are only to make the example run quickly.
#' # Don't use them for a real analysis!
#' example.control <- list(n.burnin = 10, n.iteration = 100, n.thin = 1)
#'
#' # Note that we specify save.model = TRUE when calling boral
#' #
#' spiderfit_nb <- boral(y, X,
#'                       family = "negative.binomial",
#'                       lv.control = list(num.lv = 2),
#'                       row.eff = "fixed",
#'                       mcmc.control = example.control,
#'                       save.model = TRUE)
#'
#' gg_varpart(spiderfit_nb,
#'            as.percent = TRUE,
#'            label.means = TRUE) + theme_bw()
#'
#' @importFrom dplyr %>%
#'
#' @export
#'
gg_varpart <- function(model, as.percent = FALSE, label.means = FALSE) {
  dat <- gg_varpart_data(model)
  if (as.percent) dat$value <- dat$value * 100

  Labels <- data.frame(
    varpart = c("X", "row", "lv"),
    label = c("Predictors", "Row effects", "Latent variables"),
    stringsAsFactors = FALSE)


  if (label.means) {
    if (as.percent)
      fn_fmt <- function(x) sprintf("(%.0f%%)", x)
    else
      fn_fmt <- function(x) sprintf("(%0.2f)", x)

    dat.mu <- dat %>%
      dplyr::group_by(varpart) %>%
      dplyr::summarize(mu = mean(value, na.rm = TRUE))

    Labels <- Labels %>%
      dplyr::left_join(dat.mu, by = "varpart") %>%
      dplyr::mutate(label = paste(label, fn_fmt(mu)))
  }


  gg <- ggplot(data = dat) +
    geom_bar(aes(x = label, y = value, fill = varpart),
             stat = "identity", width = 0.5, alpha = 0.6) +

    scale_fill_viridis_d(name = "Variance component",
                         breaks = Labels$varpart,
                         labels = Labels$label,
                         begin = 0, end = 0.8, direction = -1)

  if (as.percent)
    gg <- gg + labs(x = "", y = "Percentage of variance")
  else
    gg <- gg + labs(x = "", y = "Proportion of variance")

  gg +
    coord_flip() +
    theme(legend.position = "right")

}
mbedward/ggboral documentation built on June 27, 2020, 10:15 a.m.