#' Tai's stability analysis
#'
#' This function runs Tai's stability analysis (Tai, G. C. C., 1971).
#' It assumes a RCBD with fixed effects for genotypes and random effects for environments.
#' @param trait The trait to analyze.
#' @param geno The genotypes.
#' @param env The environments.
#' @param rep The replications or blocks. A RCBD is assumed.
#' @param data The name of the data frame containing the data.
#' @param maxp Maximum allowed proportion of missing values to estimate, default is 10\%.
#' @param conf Probability for the Tai limits.
#' @param title Main title for plot.
#' @param color Color for symbols, labels and lines. Ignored; maintained for compatibility with original tai function.
#' @param size Relative size for symbols and labels. Ignored; maintained for compatibility with original tai function.
#' @param scaleSize logical. Whether dots should be scaled by trait value or not. Default is TRUE.
#' @author Raul Eyzaguirre
#' @author Reinhard Simon (transfer to ggplot2)
#' @import st4gi
#' @import ggplot2
#' @import ggrepel
#' @importFrom stats qf
#' @details The limits for alpha and lambda are computed using the mean squares from
#' an ANOVA table for a RCBD with blocks nested into environments. If the data set is
#' unbalanced, a warning is produced.
#' @return It returns the Tai graph for stability analysis but not anymore the values of alpha
#' and lambda for each genotype. Use 'tai' function.
#' @references
#' Tai, G. C. C. (1971). Genotypic Stability Analysis and Its Application to Potato
#' Regional Trials, Crop Science, Vol 11.
#' @export
gg_tai <- function(trait, geno, env, rep, data, maxp = 0.1, conf = 0.95, title = NULL,
color = c("darkorange", "black", "gray"), size = c(1, 1), scaleSize = TRUE) {
check.met <- function(trait, geno, env, rep, data) {
# Everything as factor
data[, geno] <- factor(data[, geno])
data[, env] <- factor(data[, env])
data[, rep] <- factor(data[, rep])
ng <- nlevels(data[, geno])
ne <- nlevels(data[, env])
nr <- nlevels(data[, rep])
# Check frequencies by geno and env
nmis <- sum(is.na(data[, trait]))
pmis <- mean(is.na(data[, trait]))
subdata <- subset(data, is.na(data[, trait]) == 0)
tfreq <- table(subdata[, geno], subdata[, env])
# Controls
c1 <- 1 # Check for zeros. Initial state no zeros which is good
c2 <- 0 # Check for replicates. Initial state only one replicate which is bad
c3 <- 1 # Check for balance. Initial state balanced which is good
if (min(tfreq) == 0) c1 <- 0 # State 0: there are zeros
if (max(tfreq) > 1) c2 <- 1 # State 1: more than one replicate
if (min(tfreq) != max(tfreq)) c3 <- 0 # State 0: unbalanced
# Return
list(c1 = c1, c2 = c2, c3 = c3, nmis = nmis, pmis = pmis, ng = ng, ne = ne, nr = nr)
}
data[, geno] <- factor(data[, geno])
data[, env] <- factor(data[, env])
data[, rep] <- factor(data[, rep])
# Check data
lc <- check.met(trait, geno, env, rep, data)
# Error messages and warnings
if (lc$c1 == 0)
stop("Some GxE cells have zero frequency. Remove genotypes or environments to proceed.")
if (lc$c1 == 1 & lc$c2 == 0)
stop("There is only one replication. Inference is not possible with one replication.")
if (lc$ng < 2 | lc$ne < 2)
stop("This is not a MET experiment.")
if (lc$ng < 3 | lc$ne < 3)
stop("You need at least 3 genotypes and 3 environments to run Tai")
if (lc$c1 == 1 & lc$c2 == 1 & lc$c3 == 0) {
data[, trait] <- st4gi::mve.met(trait, geno, env, rep, data, maxp, tol = 1e-06)[, 5]
warning(paste("The data set is unbalanced, ",
format(lc$pmis * 100, digits = 3),
"% missing values estimated.", sep = ""))
}
# Compute interaction effects matrix
int.mean <- tapply(data[, trait], list(data[, geno], data[, env]), mean, na.rm = TRUE)
overall.mean <- mean(int.mean)
env.mean <- apply(int.mean, 2, mean)
geno.mean <- apply(int.mean, 1, mean)
int.eff <- int.mean + overall.mean
int.eff <- int.eff - geno.mean
int.eff <- t(t(int.eff)- env.mean)
# ANOVA
model <- stats::aov(data[, trait] ~ data[, geno] + data[, env] +
data[, rep] %in% data[, env] + data[, geno]:data[, env])
at <- stats::anova(model)
#print(at)
# Correction for missing values if any
if (lc$nmis > 0) {
at[5, 1] <- at[5, 1] - lc$nmis
at[5, 3] <- at[5, 2] / at[5, 1]
}
# Compute Tai values alpha and lambda
slgl <- int.eff
slgl <- t(t(slgl) * (env.mean - overall.mean) / (lc$ne - 1))
alpha <- apply(slgl, 1, sum) / (at[2, 3] - at[3, 3]) * lc$ng * lc$nr
s2gl <- int.eff
s2gl <- s2gl^2 / (lc$ne - 1)
lambda <- (apply(s2gl, 1, sum) - alpha * apply(slgl, 1, sum)) /
(lc$ng - 1) / at[5, 3] * lc$ng * lc$nr
lambda[lambda < 0] <- 0
# plot lambda limits
lmax <- max(c(lambda, stats::qf(1 - (1 - conf) / 2, lc$ne - 2,
lc$ne * (lc$ng - 1) * (lc$nr - 1)))) * 1.1
# Prediction interval for alpha
lx <- seq(0, lmax, lmax / 100)
ta <- stats::qt(1 - (1 - conf) / 2, lc$ne - 2)
div2 <- (lc$ne - 2) * at[2, 3] - (ta^2 + lc$ne - 2) * at[3, 3]
if (div2 < 0) {
warning("MS for blocks is too big in relation with MS for environments. Cannot compute prediction interval for alpha parameter.")
amax <- max(abs(alpha)) * 1.05
} else {
pi.alpha <- ta * ((lx * (lc$ng - 1) * at[5, 3] * at[2, 3]) / ((at[2, 3] - at[3, 3]) * div2))^0.5
amax <- max(c(abs(alpha), pi.alpha))
}
# Tai plot
if (is.null(title))
title <- paste("Tai stability analysis for: ", trait, sep = "")
dat = as.data.frame(cbind(geno = names(lambda), lambda, alpha),
stringsAsFactors = FALSE)
dat[, 1] = as.character(dat[, 1])
dat[, 2] = as.numeric(dat[, 2])
dat[, 3] = as.numeric(dat[, 3])
if("type" %in% names(data)){
xdat = data[!duplicated(data$geno), c("geno", trait, "type") ]
xdat[, 1] = as.character(xdat[, 1])
dat = merge(dat, xdat)
} else {
xdat = data[!duplicated(data$geno), c("geno", trait) ]
xdat[, 1] = as.character(xdat[, 1])
dat = merge(dat, xdat)
}
dat$geno = as.factor(dat$geno)
gg = ggplot(data = dat, aes(x=lambda, y=alpha)) +
ggtitle(title) +
xlab(expression(lambda)) +
ylab(expression(alpha)) +
coord_cartesian(xlim = c(-0.05 * lmax, lmax), ylim = c(-amax, amax))
if (div2 > 0) {
dt2 = as.data.frame(cbind(lx, pi.alpha))
gg = gg +
geom_path(data = dt2, aes(x = dt2$lx, y = dt2$pi.alpha, col = color[3])) +
geom_path(data = dt2, aes(x = dt2$lx, y =-dt2$pi.alpha, col = color[3]))
}
gg = gg +
geom_vline(xintercept = qf((1 - conf) / 2, lc$ne - 2, lc$ne * lc$ng * (lc$nr - 1)),
col = color[3] ) +
geom_vline(xintercept = qf(1 - (1 - conf) / 2, lc$ne - 2, lc$ne * lc$ng * (lc$nr - 1)),
col = color[3] )
if("type" %in% names(data) & scaleSize){
gg = gg + geom_point( aes(color = factor(dat$type), size = dat[, trait]))
}
if("type" %in% names(data) & !scaleSize){
gg = gg + geom_point( aes(color = factor(dat$type)))
}
if(!("type" %in% names(data)) & scaleSize){
gg = gg + geom_point( aes( size = dat[, trait]))
}
if(!("type" %in% names(data)) & !scaleSize){
gg = gg + geom_point()
}
gg = gg + geom_text_repel( aes(label = dat$geno,col = color[2] )) +
theme(legend.position = 'none')
gg
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.