knitr::opts_chunk$set(echo = TRUE)
This package provides three R^2^s for statistical models with correlated errors including classes: 'lmerMod' (LMM), 'glmerMod' (GLMM), 'phylolm' (Phylogenetic GLS), and 'binaryPGLMM/phyloglm/communityPGLMM' (Phylogenetic Logistic Regression). Detailed technical descriptions can be found in Ives 2018.
This package can be installed with:
install.packages("rr2") # or install the latest version # install.packages("devtools") devtools::install_github("arives/rr2")
This package has three main functions: R2.resid()
, R2.lik()
, and R2.pred()
. You can use them individually in the form of, e.g., R2.resid(mod, mod.r)
where mod
is the full model and mod.r
is the reduced model for partial R2s. If you do not include the reduced model mod.r
, then the appropriate model with just the intercept is used to give the total R^2^. When using R2.resid
and R2.pred
with PGLS, you need to include the phylo object containing a phylogenetic tree, e.g., R2.resid(mod, mod.r, phy = phy)
.
You can calculate all three R^2^s at the same time with R2(mod, mod.r)
. You can also specify which R^2^(s) to calculate within this function by turning off unwanted methods, e.g., R2(mod, mod.r, resid = FALSE)
or R2(mod, mod.r, pred = FALSE)
.
This package also has some helper functions such as inv.logit()
, partialR2()
, and partialR2adj()
.
| Models | Available.R2s | | :------------------------------- | :------------------------- | | LM | partialR2, partialR2adj | | LM | R2.pred, R2.resid, R2.lik | | GLM | R2.pred, R2.resid, R2.lik | | LMM: lmerMod | R2.pred, R2.resid, R2.lik | | GLMM: glmerMod | R2.pred, R2.resid, R2.lik | | PGLS: phylolm | R2.pred, R2.resid, R2.lik | | PGLMM: binaryPGLMM | R2.pred, R2.resid, ------- | | PGLMM: phyloglm | -------, --------, R2.lik | | PGLMM: communityPGLMM (gaussian) | R2.pred, --------, R2.lik | | PGLMM: communityPGLMM (binomial) | R2.pred, --------, ------- |
First, let's simulate data that will be used to fit various models.
# data set.seed(123) p1 <- 10; nsample <- 10; n <- p1 * nsample d <- data.frame(x1 = rnorm(n = n), x2 = rnorm(n = n), u1 = rep(1:p1, each = nsample), u2 = rep(1:p1, times = nsample)) d$u1 <- as.factor(d$u1); d$u2 <- as.factor(d$u2) # LMM: y with random intercept b1 <- 1; b2 <- -1; sd1 <- 1.5 d$y_re_intercept <- b1 * d$x1 + b2 * d$x2 + rep(rnorm(n = p1, sd = sd1), each = nsample) + # random intercept u1 rep(rnorm(n = p1, sd = sd1), times = nsample) + # random intercept u2 rnorm(n = n) # LMM: y with random slope b1 <- 0; sd1 <- 1; sd.x1 <- 2 d$y_re_slope <- b1 * d$x1 + rep(rnorm(n = p1, sd = sd1), each = nsample) + # random intercept u1 d$x1 * rep(rnorm(n = p1, sd = sd.x1), times = nsample) + # random slope u1 rnorm(n = n) # GLMM b1 <- 1; sd1 <- 1.5 prob <- rr2::inv.logit(b1 * d$x1 + rep(rnorm(n = p1, sd = sd1), each = nsample)) # random intercept u1 d$y_binary <- rbinom(n = n, size = 1, prob = prob) # PGLS b1 <- 1.5; signal <- 0.7 phy <- ape::compute.brlen(ape::rtree(n = n), method = "Grafen", power = 1) phy.x <- ape::compute.brlen(phy, method = "Grafen", power = .0001) x_trait <- ape::rTraitCont(phy.x, model = "BM", sigma = 1) e <- signal^0.5 * ape::rTraitCont(phy, model = "BM", sigma = 1) + (1-signal)^0.5 * rnorm(n=n) d$x_trait <- x_trait[match(names(e), names(x_trait))] d$y_pgls <- b1 * x_trait + e rownames(d) <- phy$tip.label # Phylogenetic Logistic Regression b1 <- 1.5; signal <- 2 e <- signal * ape::rTraitCont(phy, model = "BM", sigma = 1) e <- e[match(phy$tip.label, names(e))] d$y_phy_binary <- rbinom(n = n, size = 1, prob = rr2::inv.logit(b1 * d$x1 + e)) head(d)
Then, let's fit some models and calculate their R^2^s.
library(rr2) z.f.lm <- lm(y_re_intercept ~ x1 + x2, data = d) z.x.lm <- lm(y_re_intercept ~ x1, data = d) z.0.lm <- lm(y_re_intercept ~ 1, data = d) R2(mod = z.f.lm, mod.r = z.x.lm) partialR2(mod = z.f.lm, mod.r = z.x.lm) partialR2adj(mod = z.f.lm, mod.r = z.x.lm)
z.f.lmm <- lme4::lmer(y_re_intercept ~ x1 + x2 + (1 | u1) + (1 | u2), data = d, REML = F) z.x.lmm <- lme4::lmer(y_re_intercept ~ x1 + (1 | u1) + (1 | u2), data = d, REML = F) z.v.lmm <- lme4::lmer(y_re_intercept ~ 1 + (1 | u2), data = d, REML = F) z.0.lmm <- lm(y_re_intercept ~ 1, data = d) R2(mod = z.f.lmm, mod.r = z.x.lmm) R2(mod = z.f.lmm, mod.r = z.v.lmm) R2(mod = z.f.lmm, mod.r = z.0.lmm) R2(mod = z.f.lmm) # if omit mod.r, default will be the simplest model, such as z.0.lmm here.
z.f.glmm <- lme4::glmer(y_binary ~ x1 + (1 | u1), data = d, family = "binomial") z.x.glmm <- lme4::glmer(y_binary ~ 1 + (1 | u1), data = d, family = "binomial") z.v.glmm <- glm(y_binary ~ x1, data = d, family = "binomial") R2(mod = z.f.glmm, mod.r = z.x.glmm) R2(mod = z.f.glmm, mod.r = z.v.glmm) R2(mod = z.f.glmm) # specify sigma2_d for R2.resid() R2.resid(mod = z.f.glmm, mod.r = z.v.glmm, sigma2_d = "s2w") R2.resid(mod = z.f.glmm, mod.r = z.v.glmm, sigma2_d = "NS") R2.resid(mod = z.f.glmm, mod.r = z.v.glmm, sigma2_d = "rNS")
z.f.pgls <- phylolm::phylolm(y_pgls ~ x_trait, phy = phy, data = d, model = "lambda") z.v.lm <- lm(y_pgls ~ x_trait, data = d) # phy is needed for phylogenetic models' R2.resid and R2.pred R2(mod = z.f.pgls, mod.r = z.v.lm, phy = phy) R2(mod = z.f.pgls, phy = phy) # This also works for models fit with nlme::gls() z.f.gls <- nlme::gls(y_pgls ~ x_trait, data = d, correlation = ape::corPagel(1, phy), method = "ML") z.x.gls <- nlme::gls(y_pgls ~ 1, data = d, correlation = ape::corPagel(1, phy), method = "ML") R2(mod = z.f.gls, mod.r = z.v.lm) R2(mod = z.f.gls)
Note: we modified ape::binaryPGLMM
to return necessary components for rr2::R2()
.
z.f.plog <- rr2::binaryPGLMM(y_phy_binary ~ x1, data = d, phy = phy) z.x.plog <- rr2::binaryPGLMM(y_phy_binary ~ 1, data = d, phy = phy) z.v.plog <- glm(y_phy_binary ~ x1, data = d, family = "binomial") # R2.lik can't be used with binaryPGLMM because it is not a ML method R2(mod = z.f.plog, mod.r = z.x.plog) R2(mod = z.f.plog) z.f.plog2 <- phylolm::phyloglm(y_phy_binary ~ x1, data = d, start.alpha = 1, phy = phy) z.x.plog2 <- phylolm::phyloglm(y_phy_binary ~ 1, data = d, phy = phy, start.alpha = min(20, z.f.plog2$alpha)) z.v.plog2 <- glm(y_phy_binary ~ x1, data = d, family = "binomial") # R2.resid and R2.pred do not apply for phyloglm R2(z.f.plog2, z.x.plog2) # alternate R2.lik(z.f.plog2, z.x.plog2)
We can use rr2::R2()
to calculate partial R^2^s and compare contributions of different predictors. Here is an example using phylolm::phyloglm()
. The same comparisons can be also applied to other types of models.
z.f <- phylolm::phyloglm(y_phy_binary ~ x1 + x2, data = d, start.alpha = 1, phy = phy) z.r1 <- phylolm::phyloglm(y_phy_binary ~ x1, data = d, start.alpha = 1, phy = phy) z.r2 <- phylolm::phyloglm(y_phy_binary ~ x2, data = d, start.alpha = 1, phy = phy) # total R2 R2(z.f) # contribution of x1 R2(z.f, z.r2) # contribution of x2 R2(z.f, z.r1)
It is also possible to estimate the "contribution" of correlation structrues in the model. For the above example, we can replace the phylogeny with a star phylogeny and then compare the R^2^s of the two models.
# see the first chunk R code for the build of phy.x, a star phylogeny z.r3 <- phylolm::phyloglm(y_phy_binary ~ x1 + x2, data = d, start.alpha = 1, phy = phy.x) R2(z.f, z.r3)
Please cite the following papers if you find this package useful:
- Anthony R. Ives. 2018. R2s for Correlated Data: Phylogenetic Models, LMMs, and GLMMs. Systematic Biology, Volume 68, Issue 2, March 2019, Pages 234-251.
- Anthony R. Ives and Daijiang Li (2018). rr2: An R package to calculate R^2s for regression models. The Journal of Open Source Software, 3(30), 1028.
Contributions are welcome. You can either provide comments and feedback by filing an issue on Github here or making pull requests. It may be easier if you first open an issue outlining what you will do in the pull request.
Questions about the package can also be posted as issues on Github.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.