Nothing
#' Partitioning R2 (R-square) in mixed models
#'
#' R2, semi-partial (part) R2 for predictors and their combinations as well as inclusive R2,
#' structure coefficients and beta weights for Gaussian, Poisson and binomial
#' mixed models.
#'
#' @param mod Fitted lme4 model (a merMod object).
#' @param partvars Character vector specifying the predictors (fixed effects) for which to partition the R2.
#' Can be main effects like c("Var1", "Var2") and interactions ("Var1:Var2"). Predictors
#' specified in partvars have to be named precisely like the terms in the formula to
#' fit the model.
#' @param data The data.frame used to fit the lme4 model. If not provided,
#' partR2 will try to fetch it.
#' @param R2_type "marginal" or "conditional" R2. With "marginal", the variance explained
#' by fixed effects is calculated. With "conditional", the variance explained by
#' both fixed and random effects is calculated.
#' @param max_level Level up to which shared semi-partial R2s are calculated.
#' The number of sets for which to calculate R2 increases exponentially,
#' i.e. for 10 variables 2^10 - 1 R2s can be calculated. If you are
#' only interested in the unique but not the shared effects, use max_level = 1.
#' If interested in unique effects and combinations of two terms,
#' use max_level = 2 etc.
#' @param nboot Number of parametric bootstrap iterations for confidence interval estimation
#' (defaults to NULL, i.e. no bootstrapping). Larger numbers of bootstraps give a better
#' asymptotic CI, but may be time-consuming. Bootstrapping can be switched on by setting
#' \code{nboot = 1000}.
#' @param CI Width of the required confidence interval between 0 and 1 (defaults to
#' 0.95).
#' @param parallel If TRUE, computation uses \code{future} within \code{furrr::map} which allows
#' parallelisation. However, it is necessary to specify a plan before running
#' \code{partR2()}. To see which options you have, check \code{?future::plan} and have
#' a look at our vignette for details. When running RStudio,
#' usually \code{plan(multisession, workers = 4)} is a good choice,
#' when you want to use 4 cores. To detect how many cores you have, use
#' \code{parallel::detectCores()}. If no plan is specified, \code{partR2} will simply run
#' sequentially.
#' @param expct A string specifying the method for estimating the expectation in Poisson models
#' with log link and in Binomial models with logit link (in all other cases the argument is ignored).
#' The only valid terms are 'meanobs', 'latent', 'none' (and 'liability for binary and proportion data).
#' With the default 'meanobs', the expectation is estimated as the mean of the observations in the sample.
#' With 'latent', the expectation is estimated from estimates of the intercept and variances on the link scale.
#' While this is a preferred solution, it is susceptible to the distribution of fixed effect covariates and gives
#' appropriate results typically only when all covariances are centered to zero. With 'liability'
#' estimates follow formulae as presented in Nakagawa & Schielzeth (2010). With 'none', R2 is calculated
#' without distribution specific variance in the denominator.
#' @param olre Logical, defaults to TRUE. This argument allows the user to prevent the automatic fitting of
#' an observation level random effect (by setting it to FALSE) in Poisson and binomial models.
#' The OLRE is used to account for overdispersion.
#' @param partbatch List of character vectors with predictors that should be fitted and
#' removed together. For example, partbatch = list(batch1 = c("V1", "V2", "V3"),
#' batch2 = c("V4", "V5", "V6")) would calculate part R2 only for combinations of
#' predictors which contain the variables V1, V2, V3 together or/and V4,V5,V6 together.
#' This is useful when the number of potential subsets gets too large to
#' be computationally practical, for example when dummy coding is used.
#' See our vignette for details. This feature is still experimental and
#' should be used with caution.
#' @param allow_neg_r2 Calculating part R2 involves fitting two models, one with
#' and one without the predictor of interest. In cases where the predictor
#' has little association with the response, the resulting part R2 value
#' can become negative. By default we set negative values to 0, but by
#' setting this parameter to TRUE, R2 values can become negative.
#'
#'
#'
#' @return
#' Returns an object of class \code{partR2} that is a a list with the following elements:
#' \item{call}{model call}
#' \item{R2_type}{Marginal or conditional R2}
#' \item{R2}{R2 and confidence intervals for full model and semi-partial R2 for
#' predictors and their combinations}
#' \item{SC}{Structure coefficients and confidence intervals. SC are the
#' correlation between a predictor and the predicted response.}
#' \item{IR2}{Inclusive R2. This is SC^2 * R2_full.}
#' \item{BW}{Standardised model estimates (beta weights) for fixed effects. Beta weights for Gaussian models
#' are calculated as beta * sd(x)/sd(y), with beta being the estimated
#' slope of a fixed effect for predictor x and response y. Beta weight for Non-Gaussian
#' models are calculated as beta * sd(x). Beta weights for interactions or polynomial
#' terms are not informative at the moment and we recommend users to standardise
#' variables themselves before fitting the model and to look at the model estimates (Ests)
#' instead of beta weights (BW) in the partR2 output. See vignette for details. }
#' \item{Ests}{Model estimates and confidence intervals.}
#' \item{R2_boot}{Parametric bootstrap samples for R2 for full model and partitions}
#' \item{SC_boot}{Parametric bootstrap samples for structure coefficients}
#' \item{IR2_boot}{Parametric bootstrap samples for inclusive R2 values}
#' \item{BW_boot}{Parametric bootstrap samples for beta weights}
#' \item{Ests_boot}{Parametric bootstrap samples for model estimates}
#' \item{partvars}{Predictors to partition}
#' \item{CI}{Coverage of the confidence interval as specified by the \code{CI} argument.}
#' \item{boot_warnings}{Potential warnings from estimating partial R2s during
#' parametric bootstrapping}
#' \item{boot_message}{Potential messages from estimating partial R2s
#' during parametric bootstrapping. Common are for example singularity messages
#' in lme4.}
#'
#' @references
#'
#' Nakagawa, S., & Schielzeth, H. (2013). \emph{A general and simple method for obtaining R2 from
#' generalized linear mixed-effects models}. Methods in Ecology and Evolution, 4(2), 133-142.
#'
#' Nakagawa, S., Johnson, P. C., & Schielzeth, H. (2017). \emph{The coefficient of
#' determination R2 and intra-class correlation coefficient from generalized
#' linear mixed-effects models revisited and expanded}. Journal of the Royal Society Interface, 14(134), 20170213.
#'
#' @examples
#'
#' data(biomass)
#' library(lme4)
#'
#' # scale data
#' biomass[] <- lapply(biomass, function(x) if (is.double(x)) scale(x) else x)
#'
#' # Gaussian data
#' mod <- lmer(Biomass ~ Year + Temperature + Precipitation + SpeciesDiversity + (1 | Population),
#' data = biomass)
#'
#' # R2
#' (R2_1 <- partR2(mod))
#'
#' # R2 with CI
#' (R2_2 <- partR2(mod, R2_type = "marginal", nboot = 15, CI = 0.95))
#'
#' # Part (semi-partial) R2s with CIs
#' (R2_3 <- partR2(mod,
#' partvars = c("SpeciesDiversity", "Temperature", "Precipitation"),
#' R2_type = "marginal", nboot = 10, CI = 0.95
#' ))
#' @export
partR2 <- function(mod, partvars = NULL, data = NULL, R2_type = "marginal", max_level = NULL,
nboot = NULL, CI = 0.95, parallel = FALSE, expct = "meanobs",
olre = TRUE, partbatch = NULL, allow_neg_r2 = FALSE) {
# initial checks
if (!inherits(mod, "merMod")) stop("partR2 only supports merMod objects at the moment")
partition <- ifelse(is.null(partvars) & is.null(partbatch), FALSE, TRUE)
if (!is.null(nboot)) {
if (nboot < 2) stop("nboot has to be greater than 1 or NULL")
}
if (!(R2_type %in% c("marginal", "conditional"))) {
stop("R2_type has to be marginal or conditional")
}
# check if data is there and if not try to get it
if (is.null(data)) {
dat_name <- deparse(mod@call$data)
data <- tryCatch(eval(mod@call$data), error=function(e) return(NULL))
if (is.null(data)) stop(paste0("data ", dat_name,
" cannot be found. Please provide it with the data argument"))
}
# make combinations of predictors from partvars and partbatch
all_comb <- make_combs(partvars, partbatch, max_level)
# get family and response variable
if (!(stats::family(mod)[[1]] %in% c("gaussian", "binomial", "poisson"))) {
stop("partR2 currently only handles gaussian, binomial and poisson models")
}
# overdispersion, will only apply when poisson/proportion
overdisp_out <- model_overdisp(mod = mod, dat = data, olre = olre)
mod <- overdisp_out$mod
data_mod <- overdisp_out$dat
overdisp_name <- overdisp_out$overdisp_name
# point estimates for all statistics
# model estimates
ests_pe <- fixef_simple(mod, intcp = FALSE)
# beta weights
bws_pe <- get_bw(mod)
# calculate R2 and partial R2s
r2s_pe <- part_R2s(
mod = mod, expct = expct, overdisp_name = overdisp_name,
R2_type = R2_type, all_comb = all_comb,
partition = partition, data_mod = data_mod, allow_neg_r2 = allow_neg_r2
)
# structure coefficients
scs_pe <- SC_pe(mod)
# inclusive r2s
ir2s_pe <- scs_pe %>%
dplyr::mutate(estimate = c(.data$estimate)^2 * r2s_pe[r2s_pe$term == "Full",
"estimate", drop = TRUE])
# param. bootstrapping
if (!is.null(nboot)) {
# get bootstrap estimates
boot_all <- bootstrap_all(
nboot = nboot, mod = mod, R2_type = R2_type,
all_comb = all_comb, partition = partition,
data_mod = data_mod, allow_neg_r2 = allow_neg_r2,
parallel = parallel, expct = expct, overdisp_name = overdisp_name
)
# all iterations in one df as list columns and calculating inclusive r2
boot_out <- purrr::map_dfr(boot_all, "result", .id = "iter") %>%
dplyr::mutate(ir2s = purrr::map2(.data$scs, .data$r2s, function(sc, r2) {
tidyr::tibble(term = sc$term, estimate = sc$estimate^2 * unlist(r2[r2$term == "Full", "estimate"]))
})) %>%
dplyr::mutate(warnings = purrr::map(boot_all, "warnings"),
messages = purrr::map(boot_all, "messages"))
}
# if no bootstrap return same data structure with NA
if (is.null(nboot)) {
dfs_pe <- list(r2s_pe, ests_pe, bws_pe, scs_pe, ir2s_pe)
boot_out <- purrr::map(dfs_pe, function(x){
x$estimate <- NA
list(x)
}) %>%
stats::setNames(c("r2s", "ests", "scs", "bws", "ir2s")) %>%
dplyr::as_tibble() %>%
dplyr::mutate(iter = NA, .before = 1) %>%
dplyr::mutate(warnings = NA,
messages = NA)
}
# calculate CIs over list columns
get_cis <- function(stc, df_pe) {
# stc <- dplyr::enquo(stc)
boot_out[[stc]] %>%
dplyr::bind_rows() %>%
dplyr::group_by(.data$term) %>%
dplyr::summarise(estimate = list(.data$estimate)) %>%
dplyr::rowwise() %>%
dplyr::mutate(calc_CI(unlist(.data$estimate), CI)) %>%
dplyr::select(-.data$estimate) %>%
dplyr::right_join(x = df_pe, by = "term")
}
stcs <- list("r2s", "ests", "bws", "scs", "ir2s")
dfs_pe <- list(r2s_pe, ests_pe, bws_pe, scs_pe, ir2s_pe)
pe_cis <- purrr::map2(stcs, dfs_pe, get_cis) %>%
stats::setNames(stcs)
# calculate numerator degrees of freedom and add to partial R2 object
if ((length(all_comb) == 1) & (any(is.na(all_comb)))) {
ndf_terms <- "Full"
} else {
ndf_terms <- c("Full", all_comb)
}
pe_cis$r2s$ndf <- suppressWarnings(purrr::map_int(
ndf_terms, get_ndf,
mod, data_mod
))
# change partbatch with names, if present
if (!is.null(names(partbatch))) {
part_names <- mod_names_partbatch(partbatch, unname(pe_cis$r2s$term))
pe_cis$r2s$term <- part_names
boot_out$r2s <- purrr::map(boot_out$r2s, function(x) {
x$term <- part_names
x
})
}
res <- list(
call = mod@call,
R2_type = R2_type,
R2 = pe_cis$r2s,
SC = pe_cis$scs,
IR2 = pe_cis$ir2s,
BW = pe_cis$bws,
Ests = pe_cis$ests,
R2_boot = boot_to_df(boot_out$r2s),
SC_boot = boot_to_df(boot_out$scs),
IR2_boot = boot_to_df(boot_out$ir2s),
BW_boot = boot_to_df(boot_out$bws),
Ests_boot = boot_to_df(boot_out$ests),
partvars = partvars,
partbatch = ifelse(is.null(partbatch), NA, partbatch),
CI = CI,
boot_warnings = boot_out$warnings,
boot_messages = boot_out$messages
)
class(res) <- "partR2"
return(res)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.