Nothing
#' Broad-sense heritability in plant breeding
#'
#' Heritability in plant breeding on a genotype difference basis
#'
#' @param data Experimental design data frame with the factors and traits.
#' @param trait Name of the trait.
#' @param gen.name Name of the genotypes.
#' @param rep.n Number of replications in the experiment.
#' @param env.name Name of the environments (default = NULL). See details.
#' @param env.n Number of environments (default = 1). See details.
#' @param year.name Name of the years (default = NULL). See details.
#' @param year.n Number of years (default = 1). See details.
#' @param fixed.model The fixed effects in the model (BLUEs). See examples.
#' @param random.model The random effects in the model (BLUPs). See examples.
#' @param emmeans Use emmeans for calculate the BLUEs (default = FALSE).
#' @param summary Print summary from random model (default = FALSE).
#' @param plot_diag Show diagnostic plots for fixed and random effects (default
#' = FALSE). Options: "base", "ggplot". .
#' @param outliers.rm Remove outliers (default = FALSE). See references.
#' @param weights an optional vector of ‘prior weights’ to be used in the
#' fitting process (default = NULL).
#' @param trial Column with the name of the trial in the results (default =
#' NULL).
#'
#' @details
#'
#' The function allows to made the calculation for individual or
#' multi-environmental trials (MET) using fixed and random model.
#'
#' 1. The variance components based in the random model and the population
#' summary information based in the fixed model (BLUEs).
#'
#' 2. Heritability under three approaches: Standard (ANOVA), Cullis (BLUPs) and
#' Piepho (BLUEs).
#'
#' 3. Best Linear Unbiased Estimators (BLUEs), fixed effect.
#'
#' 4. Best Linear Unbiased Predictors (BLUPs), random effect.
#'
#' 5. Table with the outliers removed for each model.
#'
#' For individual experiments is necessary provide the \code{trait},
#' \code{gen.name}, \code{rep.n}.
#'
#' For MET experiments you should \code{env.n} and \code{env.name} and/or
#' \code{year.n} and \code{year.name} according your experiment.
#'
#' The BLUEs calculation based in the pairwise comparison could be time
#' consuming with the increase of the number of the genotypes. You can specify
#' \code{emmeans = FALSE} and the calculate of the BLUEs will be faster.
#'
#' If \code{emmeans = FALSE} you should change 1 by 0 in the fixed model for
#' exclude the intersect in the analysis and get all the genotypes BLUEs.
#'
#' For more information review the references.
#'
#' @return list
#'
#' @author
#'
#' Maria Belen Kistner
#'
#' Flavio Lozano Isla
#'
#' @references
#'
#' Bernal Vasquez, Angela Maria, et al. “Outlier Detection Methods for
#' Generalized Lattices: A Case Study on the Transition from ANOVA to REML.”
#' Theoretical and Applied Genetics, vol. 129, no. 4, Apr. 2016.
#'
#' Buntaran, H., Piepho, H., Schmidt, P., Ryden, J., Halling, M., and Forkman,
#' J. (2020). Cross validation of stagewise mixed model analysis of Swedish
#' variety trials with winter wheat and spring barley. Crop Science, 60(5).
#'
#' Schmidt, P., J. Hartung, J. Bennewitz, and H.P. Piepho. 2019. Heritability in
#' Plant Breeding on a Genotype Difference Basis. Genetics 212(4).
#'
#' Schmidt, P., J. Hartung, J. Rath, and H.P. Piepho. 2019. Estimating Broad
#' Sense Heritability with Unbalanced Data from Agricultural Cultivar Trials.
#' Crop Science 59(2).
#'
#' Tanaka, E., and Hui, F. K. C. (2019). Symbolic Formulae for Linear Mixed
#' Models. In H. Nguyen (Ed.), Statistics and Data Science. Springer.
#'
#' Zystro, J., Colley, M., and Dawson, J. (2018). Alternative Experimental
#' Designs for Plant Breeding. In Plant Breeding Reviews. John Wiley and Sons,
#' Ltd.
#'
#' @importFrom dplyr filter pull rename mutate all_of
#' @importFrom purrr pluck as_vector
#' @importFrom stringr str_detect str_replace
#' @importFrom tibble rownames_to_column as_tibble tibble
#' @importFrom lme4 lmer ranef VarCorr
#' @importFrom graphics abline par
#' @importFrom stats fitted var as.formula
#' @importFrom ggplot2 ggplot aes geom_point theme_minimal
#' @importFrom emmeans emmeans
#'
#' @export
#'
#' @examples
#'
#' library(inti)
#'
#' dt <- potato
#'
#' hr <- H2cal(data = dt
#' , trait = "stemdw"
#' , gen.name = "geno"
#' , rep.n = 5
#' , fixed.model = "0 + (1|bloque) + geno"
#' , random.model = "1 + (1|bloque) + (1|geno)"
#' , emmeans = TRUE
#' , plot_diag = FALSE
#' , outliers.rm = TRUE
#' )
#'
#' hr$tabsmr
#' hr$blues
#' hr$blups
#' hr$outliers
#'
H2cal <- function(data
, trait
, gen.name
, rep.n
, env.n = 1
, year.n = 1
, env.name = NULL
, year.name = NULL
, fixed.model
, random.model
, summary = FALSE
, emmeans = FALSE
, weights = NULL
, plot_diag = FALSE
, outliers.rm = FALSE
, trial = NULL
){
# -------------------------------------------------------------------------
if (FALSE) {
data = dt
trait = "stemdw"
gen.name = "geno"
rep.n = 5
fixed.model = "0 + (1|bloque) + geno"
random.model = "1 + (1|bloque) + (1|geno)"
emmeans = TRUE
plot_diag = TRUE
outliers.rm = F
weights = NULL
summary = TRUE
}
# -------------------------------------------------------------------------
grp <- emmean <- SE <- Var <- where <- NULL
V.g <- V.gxl <- env <- V.gxy <- year <- NULL
V.gxlxy <- V.e <- V.p <- vdBLUEs <- vdBLUPs <- NULL
# outliers remove ---------------------------------------------------------
if ( outliers.rm == TRUE ) {
out.rm <- data %>% outliers_remove(data = .
, trait = trait
, model = random.model
, drop_na = FALSE
)
dt.rm <- out.rm %>% purrr::pluck(1)
out.fm <- data %>% outliers_remove(data = .
, trait = trait
, model = fixed.model
, drop_na = FALSE
)
dt.fm <- out.fm %>% purrr::pluck(1)
outliers <- list(fixed = out.fm$outliers, random = out.rm$outliers)
} else if (outliers.rm == FALSE) {
dt.rm <- data
dt.fm <- data
outliers <- NULL
}
# fit models --------------------------------------------------------------
#> fixed genotype effect
f.md <- as.formula(paste(trait, paste(fixed.model, collapse = " + "), sep = " ~ "))
g.fix <- eval(bquote(lme4::lmer(.(f.md), weights = weights, data = dt.fm)))
#> random genotype effect
r.md <- as.formula(paste(trait, paste(random.model, collapse = " + "), sep = " ~ "))
g.ran <- eval(bquote(lme4::lmer(.(r.md), weights = weights, data = dt.rm)))
# Print model summary -----------------------------------------------------
if (summary == TRUE) {
summary(g.ran) %>% print()
}
# Plot models -------------------------------------------------------------
fm.title <- paste("Fixed model:", trait)
rm.title <- paste("Random model:", trait)
if (plot_diag == TRUE || plot_diag == "base") {
diag.plot <- NULL
prp <- par(no.readonly = TRUE)
on.exit(par(prp))
par(mfrow=c(2,4))
hist(resid(g.fix), main = fm.title)
qqnorm(resid(g.fix), main = fm.title); qqline(resid(g.fix))
plot(fitted(g.fix), resid(g.fix, type = "pearson"), main = fm.title); abline(h=0)
plot(resid(g.fix), main = fm.title)
#>
hist(resid(g.ran), main = rm.title)
qqnorm(resid(g.ran), main = rm.title); qqline(resid(g.ran))
plot(fitted(g.ran), resid(g.ran, type = "pearson"), main = rm.title); abline(h=0)
plot(resid(g.ran), main = rm.title)
par(mfrow=c(1,1))
} else if (plot_diag == "ggplot") {
diag.fix <- inti::plot_diag(g.fix, title = fm.title)
diag.ran <- inti::plot_diag(g.ran, title = rm.title)
diag.plot <- list(fixed = diag.fix, random = diag.ran)
grid::grid.newpage()
grid::pushViewport(grid::viewport(layout = grid::grid.layout(nrow = 2
, ncol = 4
)))
vplayout <- function(x, y)
grid::viewport(layout.pos.row = x, layout.pos.col = y)
print(diag.fix$histogram, vp = vplayout(1, 1))
print(diag.fix$qqplot, vp = vplayout(1, 2))
print(diag.fix$residual, vp = vplayout(1, 3))
print(diag.fix$homoscedasticity, vp = vplayout(1, 4))
print(diag.ran$histogram, vp = vplayout(2, 1))
print(diag.ran$qqplot, vp = vplayout(2, 2))
print(diag.ran$residual, vp = vplayout(2, 3))
print(diag.ran$homoscedasticity, vp = vplayout(2, 4))
} else {diag.plot <- NULL}
# Model estimates --------------------------------------------------
# number of genotypes
gen.n <- g.ran %>%
summary() %>%
purrr::pluck("ngrps") %>%
.[{{gen.name}}]
# genotypic variance component
vc.g <- c(lme4::VarCorr(g.ran)[[gen.name]])
# environment variance component
if(env.n > 1){
gxl <- paste(gen.name, env.name, sep = ":")
vc.gxl <- g.ran %>%
lme4::VarCorr() %>%
tibble::as_tibble() %>%
dplyr::filter(grp == gxl) %>%
dplyr::pull(vcov)
if( length(strsplit(gxl,":")[[1]]) < 2) {
message("You should include env.name in the arguments")
vc.gxl <- 0
} else if (identical(vc.gxl, numeric(0))) {
message("You should include (1|genotype:environment) interaction")
vc.gxl <- 0
}
} else if (!is.null(env.name) && env.n == 1) {
message("You should include env.n in the arguments")
vc.gxl <- 0
} else if (env.n == 1){
vc.gxl <- 0
}
# year variance component
if(year.n > 1){
gxy <- paste(gen.name, year.name, sep = ":")
vc.gxy <- g.ran %>%
lme4::VarCorr() %>%
tibble::as_tibble() %>%
dplyr::filter(grp == gxy) %>%
dplyr::pull(vcov)
if( length(strsplit(gxy,":")[[1]]) < 2 ) {
message("You should include year.name in the arguments")
vc.gxy <- 0
} else if (identical(vc.gxy, numeric(0))) {
message("You should include (1|genotype:year) interaction")
vc.gxy <- 0
}
} else if (!is.null(year.name) && year.n == 1) {
message("You should include year.n in the arguments")
vc.gxy <- 0
} else if (year.n == 1){
vc.gxy <- 0
}
# environment x year variance component (review in MET)
if(year.n > 1 && env.n > 1){
gxlxy <- paste(gen.name, env.name, year.name, sep = ":")
vc.gxlxy <- g.ran %>%
lme4::VarCorr() %>%
tibble::as_tibble() %>%
dplyr::filter(grp == gxlxy) %>%
dplyr::pull(vcov)
if( length(strsplit(gxlxy,":")[[1]]) < 3 ) {
message("You should include environment/years arguments")
vc.gxlxy <- 0
} else if (identical(vc.gxlxy, numeric(0))) {
message("You should include (1|genotype:environment:year) interaction")
vc.gxlxy <- 0
}
} else if (year.n == 1 && env.n == 1){
vc.gxlxy <- 0
} else {vc.gxlxy <- 0}
# error variance component
vc.e <- g.ran %>%
lme4::VarCorr() %>%
tibble::as_tibble() %>%
dplyr::filter(grp == "Residual") %>%
dplyr::pull(vcov)
# Best Linear Unbiased Estimators (BLUE) :: fixed model -------------------
if(emmeans == TRUE){
BLUE <- g.fix %>%
emmeans::emmeans(as.formula(paste("pairwise", gen.name, sep = " ~ ")))
BLUEs <- BLUE %>%
purrr::pluck("emmeans") %>%
tibble::as_tibble() %>%
dplyr::rename(!!trait := 'emmean')
# mean variance of a difference between genotypes (BLUEs) -----------------
vdBLUE.avg <- BLUE %>%
purrr::pluck("contrasts") %>%
tibble::as_tibble() %>%
dplyr::mutate(Var=SE^2) %>%
dplyr::pull(Var) %>%
mean(.)
} else if (emmeans == FALSE) {
BLUEs <- g.fix %>%
lme4::fixef() %>%
data.frame() %>%
rownames_to_column({{gen.name}}) %>%
dplyr::rename(!!trait := .) %>%
mutate(across({{trait}}, as.numeric)) %>%
mutate(smith.w = diag(solve(vcov(g.fix)))) %>%
dplyr::filter(grepl({{gen.name}}, .data[[gen.name]])) %>%
mutate(across({{gen.name}}, ~stringr::str_replace(., gen.name, "")))
vdBLUE.avg <- g.fix %>%
vcov(.) %>%
base::as.matrix(.) %>%
diag(.) %>%
enframe() %>%
dplyr::filter(grepl({{gen.name}}, .data$name)) %>%
select(!.data$name) %>%
deframe() %>%
mean()
}
# Best Linear Unbiased Predictors (BLUP) :: random model -------------------
BLUPs <- g.ran %>%
stats::coef() %>%
purrr::pluck(gen.name) %>%
tibble::rownames_to_column(gen.name) %>%
dplyr::rename(!!trait := '(Intercept)') %>%
tibble::as_tibble(.) %>%
dplyr::select({{gen.name}}, {{trait}}) %>%
{if (!is.null(trial)) dplyr::mutate(.data = ., trial = trial) else .} %>%
{if (!is.null(trial)) select(.data = ., trial, everything()) else .}
# mean variance of a difference between genotypes (BLUPs) -----------------
vdBLUP.avg <- g.ran %>%
lme4::ranef(condVar = TRUE) %>%
purrr::pluck(gen.name) %>%
attr("postVar") %>%
purrr::as_vector(.) %>%
mean(.)*2
# Summary table of adjusted means (BLUEs) ---------------------------------
smd <- BLUEs %>%
dplyr::summarise(
mean = mean(.data[[trait]], na.rm = T)
, std = sqrt(var(.data[[trait]], na.rm = T))
, min = min(.data[[trait]])
, max = max(.data[[trait]])
)
# -------------------------------------------------------------------------
BLUEs <- BLUEs %>%
{if (!is.null(trial)) dplyr::mutate(.data = ., trial = trial) else .} %>%
{if (!is.null(trial)) select(.data = ., trial, everything()) else .}
# -------------------------------------------------------------------------
## Summary table VC & Heritability
vrcp <- dplyr::tibble(
trait = trait
, rep = rep.n
, geno = gen.n
, env = env.n
, year = year.n
) %>%
merge(., smd) %>%
mutate(V.g = vc.g) %>%
mutate(V.gxl = vc.gxl) %>%
mutate(V.gxy = vc.gxy) %>%
mutate(V.gxlxy = vc.gxlxy) %>%
mutate(V.e = vc.e) %>%
mutate(V.p = (V.g + V.gxl/env + V.gxy/year + V.gxlxy/(env*year) + V.e/(env*year*rep))) %>%
mutate(repeatability = V.g/(V.g + V.e/rep)) %>%
mutate(H2.s = V.g/V.p) %>%
mutate(vdBLUEs = vdBLUE.avg) %>%
mutate(H2.p = V.g/(V.g + vdBLUEs/2)) %>%
mutate(vdBLUPs = vdBLUP.avg) %>%
mutate(H2.c = 1 - (vdBLUPs/2/V.g)) %>%
select(!matches("BLUE|BLUP")) %>%
purrr::discard(~all(is.nan(.))) %>%
{if (env.n == 1) dplyr::select(.data = ., !V.gxl) else .} %>%
{if (year.n == 1) dplyr::select(.data = ., !V.gxy) else .} %>%
{if (env.n && year.n == 1) dplyr::select(.data = ., !V.gxlxy) else .} %>%
{if (!is.null(trial)) dplyr::mutate(.data = ., trial = trial) else .} %>%
{if (!is.null(trial)) dplyr::select(.data = ., trial, everything()) else .}
# result ------------------------------------------------------------------
rsl <- list(
tabsmr = vrcp
, blups = BLUPs
, blues = BLUEs
, model = g.ran
, outliers = outliers
, diagplot = diag.plot
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.