This document was generated from a file in the Rmd format. Both the source and target files are under the CC BY-SA license. The source file is versioned online.
This tutorial aims at exploring the partitioning of variance between factors in linear models.
In an simulated example, yield
as the response variable is regressed on two factors, fertilization
and genotype
.
External packages are required:
suppressPackageStartupMessages(library(MuMIn)) suppressPackageStartupMessages(library(lme4)) ## suppressPackageStartupMessages(library(brms)) suppressPackageStartupMessages(library(rsq)) suppressPackageStartupMessages(library(vegan))
Let $y_{ijk}$ be the yield of genotype $i$ with fertilization level $j$ in micro-plot $k$: [ y_{ijk} = \mu + a_i + b_j + d_{ij} + \epsilon_{ijk} ]
In matrix notation, when all effects are modeled as fixed: [ \boldsymbol{y} = X \boldsymbol{\beta} + \boldsymbol{\epsilon} \text{ with } \boldsymbol{\epsilon} \sim \mathcal{N}_N(\boldsymbol{0}, R) ]
In matrix notation, when fertilization effects are modeled as fixed and genotype effects as random: [ \boldsymbol{y} = X \boldsymbol{\beta} + Z \boldsymbol{u} + \boldsymbol{\epsilon} \text{ with } \boldsymbol{u} \sim \mathcal{N}_I(\boldsymbol{0}, G) \text{ and } \boldsymbol{\epsilon} \sim \mathcal{N}_N(\boldsymbol{0}, R) ]
Usually, $G = \sigma_u^2 A$ and $R = \sigma^2 \text{Id}$.
I <- 10 # number of genotypes J <- 3 # number of fertilization level K <- 4 # number of replicates per treatment (N <- I * J * K) # total number of observations
(levGenos <- sprintf("cv%02i", 1:I)) (levFerti <- c("low", "med", "high")) (levReps <- LETTERS[1:K]) dat <- data.frame(geno=rep(levGenos, each=J*K), ferti=rep(rep(levFerti, each=K), I), rep=rep(levReps, I*J), yield=NA) dat$ferti <- factor(dat$ferti, levels=levFerti) # so that levels are ordered head(dat) str(dat)
Set the seed:
set.seed(12345) # to make the simulation reproducible
Make the design matrix:
X <- model.matrix(~ 1 + geno + ferti + geno:ferti, data=dat, contrasts.arg=list(geno="contr.sum", ferti="contr.sum")) dim(X) colnames(X)
Fix the intercept:
mu <- 100
Draw genotype effects:
sd_a <- 10 a <- rnorm(n=I-1, mean=0, sd=sd_a)
Draw fertilization effects:
sd_b <- 10 b <- rnorm(n=J-1, mean=0, sd=sd_b)
Draw interaction effects:
sd_d <- 2 d <- rnorm(n=(I-1)*(J-1), mean=0, sd=sd_d)
Make the effect vector:
beta <- matrix(c(mu, a, b, d), nrow=ncol(X), ncol=1) rownames(beta) <- colnames(X)
Draw errors:
sd_e <- 1 epsilon <- rnorm(n=N, mean=0, sd=sd_e)
Compute the yield:
y <- X %*% beta + epsilon dat$yield <- as.vector(y)
str(dat) (grandMean <- mean(dat$yield)) (genoMeans <- tapply(dat$yield, dat$geno, mean)) (fertiMeans <- tapply(dat$yield, dat$ferti, mean))
hist(dat$yield, col="grey", border="white", las=1) abline(v=grandMean, lty=2) legend("topright", legend="grand mean", lty=2, bty="n")
colsFerti <- c("yellow", "orange", "red") boxplot(yield ~ geno + ferti, data=dat, las=2, varwidth=TRUE, col=rep(colsFerti, each=I), main="Boxplots of dat$yield") abline(h=grandMean, lty=2) legend("bottomright", bty="n", legend=c("grand mean", paste0("ferti.: ", levFerti)), lty=c(2, rep(NA, J)), col=c("black", colsFerti), fill=c(NA, colsFerti), border=c("white", colsFerti), x.intersp=c(2, rep(0.5, J)))
fitFix1 <- lm(yield ~ 1 + geno, data=dat, contrasts=list(geno="contr.sum")) fitFix2 <- lm(yield ~ 1 + ferti, data=dat, contrasts=list(ferti="contr.sum")) fitFix3a <- lm(yield ~ 1 + geno + ferti, data=dat, contrasts=list(geno="contr.sum", ferti="contr.sum")) fitFix3b <- lm(yield ~ 1 + geno + ferti + geno:ferti, data=dat, contrasts=list(geno="contr.sum", ferti="contr.sum"))
extractAIC(fitFix1) extractAIC(fitFix2) extractAIC(fitFix3a) extractAIC(fitFix3b)
Best model:
fitGlobModFix <- lm(yield ~ 1 + geno + ferti + geno:ferti, data=dat, contrasts=list(geno="contr.sum", ferti="contr.sum"), na.action="na.fail") (allModelsFix <- dredge(global.model=fitGlobModFix, rank="AICc"))
fitBestModFix <- get.models(allModelsFix, 1)[[1]]
par(mfrow=c(1,2)) plot(fitBestModFix, which=c(1,2), ask=FALSE)
summary(fitBestModFix)
ANOVA table:
(aovFix <- anova(fitBestModFix))
(tmp <- setNames(aovFix[, "Mean Sq"] / sum(aovFix[, "Mean Sq"]), rownames(aovFix))) sum(tmp) sum(tmp[-length(tmp)])
summary(fitBestModFix)$r.squared summary(fitBestModFix)$adj.r.squared
rsq.partial(objF=fitBestModFix, adj=FALSE, type="v") rsq.partial(objF=fitBestModFix, adj=TRUE, type="v") rsq.partial(objF=fitBestModFix, adj=FALSE, type="sse") rsq.partial(objF=fitBestModFix, adj=TRUE, type="sse")
vpG <- varpart(dat$yield, ~ geno, ~ geno + ferti + geno:ferti, data=dat) vpG plot(vpG)
vpF <- varpart(dat$yield, ~ ferti, ~ geno + ferti + geno:ferti, data=dat) vpF plot(vpF)
vpGF <- varpart(dat$yield, ~ geno + ferti, ~ geno + ferti + geno:ferti, data=dat) vpGF plot(vpGF)
fitMix1 <- lmer(yield ~ 1 + (1|geno), data=dat, REML=FALSE) fitMix2 <- lm(yield ~ 1 + ferti, data=dat, contrasts=list(ferti="contr.sum")) fitMix3a <- lmer(yield ~ 1 + (1|geno) + ferti, data=dat, contrasts=list(ferti="contr.sum"), REML=FALSE) fitMix3b <- lmer(yield ~ 1 + ferti + (ferti-1|geno), data=dat, contrasts=list(ferti="contr.sum"), REML=FALSE)
extractAIC(fitMix1) extractAIC(fitMix2) extractAIC(fitMix3a) extractAIC(fitMix3b)
Best model:
fitGlobModMix <- lmer(yield ~ 1 + ferti + (ferti-1|geno), data=dat, contrasts=list(ferti="contr.sum"), na.action="na.fail", REML=FALSE) (allModelsMix <- dredge(global.model=fitGlobModMix, rank="AICc"))
fitBestModMix <- get.models(allModelsMix, 1)[[1]] fitBestModFix <- update(fitBestModMix, REML=TRUE)
plot(fitBestModMix) qqmath(fitBestModMix)
summary(fitBestModMix)
anova(fitBestModMix) as.data.frame(VarCorr(fitBestModMix))
https://cran.r-project.org/web/packages/rsq/rsq.pdf#page=15
https://cran.r-project.org/web/packages/vegan/vegan.pdf#page=266
Legendre et Legendre
Lebarbier et Robin
Villemereuil et al
print(sessionInfo(), locale=FALSE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.