#' Pull main and interaction effects from a model
#'
#' @param object A fitted model object.
#' @param gen.col The name of the column in \code{data} containing the genotype factor.
#' @param env.col The name of the column in \code{data} containing the environment factor.
#'
#' @importFrom effects Effect allEffects
#'
#' @export
#'
pull_effects <- function(object, gen.col = "gen", env.col = "env") {
UseMethod(generic = "pull_effects", object = object)
}
#' @rdname pull_effects
pull_effects.lm <- function(object, gen.col = "gen", env.col = "env") {
# Get the coefficients
fit_coef <- coef(object)
mf <- model.frame(object)
## Calculate effects and means
# Grand mean
grand_mean <- fit_coef[1]
all_terms <- attr(terms(mf), "term.labels")
## Designate the terms as main or interacting
main_terms <- all_terms[!grepl(pattern = ":", x = all_terms)]
## Stop if one of the terms is not gen.col:env.col
gxe_term <- paste(gen.col, env.col, sep = ":")
if (!gxe_term %in% all_terms) {
stop("The GxE term is not properly specified in the model. It must take the
form 'gen.col:env.col'")
}
## Get the main effects
effs <- lapply(X = main_terms, FUN = Effect, mod = object, confint = FALSE)
# Create vectors of the effects with the level names
effs1 <- lapply(X = effs, FUN = function(eff_obj) {
setNames(object = c(eff_obj$fit), nm = eff_obj$variables[[1]]$levels) })
# Subtract the grand mean
effs2 <- setNames(object = lapply(X = effs1, FUN = `-`, grand_mean), nm = c(gen.col, env.col))
# Convert to data.frames
gen_eff <- setNames(object = as.data.frame(effs2[[gen.col]]), nm = "gen_main_eff")
env_eff <- setNames(object = as.data.frame(effs2[[env.col]]), nm = "env_main_eff")
## Get the interaction effects (gxe)
int_eff <- allEffects(mod = object, confint = FALSE)[[gxe_term]]
int_eff1 <- cbind(int_eff$x, int_eff = int_eff$fit)
## Combine
all_effects <- merge(x = merge(x = int_eff1, y = gen_eff, by.x = gen.col, by.y = "row.names"),
y = env_eff, by.x = env.col, by.y = "row.names")
# Recalculate interaction effects
all_effects$int_eff <- all_effects$int_eff - grand_mean - all_effects$gen_main_eff - all_effects$env_main_eff
# Add grand mean
all_effects$grand_mean <- grand_mean
# Return
return(all_effects)
}
#' The Sites Regression (SREG, or GGE) model
#'
#' @description Fit a sites-regression model (SREG, or GGE) to analyze genotype-
#' environment interactions in plant breeding experiments.
#'
#' @param formula A formula specifying the model.
#' @param data A data frame in which the variables specified in the formula will be found.
#' If missing, the variables are searched for in the standard way.
#' @param gen.col The name of the column in \code{data} containing the genotype factor.
#' @param env.col The name of the column in \code{data} containing the environment factor.
#' @param n.terms The number of multiplicative terms to include in the model.
#' Defaults to 1 (i.e. SREG_1).
#'
#' @examples
#' # Use the gauch.soy dataset
#' data("gauch.soy")
#'
#' # Filter
#' gauch_soy1 <- droplevels(subset(gauch.soy, env %in% c("A77", "A80", "A83")))
#'
#' sreg_fit <- sreg(formula = yield ~ gen + env + gen:env, data = gauch_soy1)
#'
#' @importFrom tidyr spread_
#'
#' @export
#'
sreg <- function(formula, data, gen.col = "gen", env.col = "env", n.terms = 1) {
stopifnot(inherits(formula, "formula"))
# Make sure the env.col is among the names in data
stopifnot(gen.col %in% names(data))
stopifnot(env.col %in% names(data))
# Edit the formula environment
environment(formula) <- environment()
## Create a model.frame
mf <- model.frame(formula = formula, data = data, drop.unused.levels = TRUE)
# Get the model terms
mod_terms <- terms(mf)
pred_vars <- attr(mod_terms, "predvars")
all_terms <- attr(mod_terms, "term.labels")
## Stop if one of the terms is not gen.col:env.col
gxe_term <- paste(gen.col, env.col, sep = ":")
if (!gxe_term %in% all_terms) {
stop("The GxE term is not properly specified in the model. It must take the
form 'gen.col:env.col'")
}
## Vector of predictor variables
## The first two names (for a single response) are "list" and the response variable
var_names <- as.character(pred_vars)[-1]
resp_var_names <- var_names[attr(mod_terms, "response")]
pred_var_names <- setdiff(var_names, resp_var_names)
# Classes of the variables
var_class <- attr(mod_terms, "dataClasses")
## Trim the data for genotypes that are observed at least once in each environment
tabs_form <- as.formula(paste("~ ", gen.col, "+", env.col))
mf_tabs <- xtabs(formula = tabs_form, data = mf)
# Count the observations of genotypes
gen_count <- apply(X = mf_tabs, MARGIN = 1, FUN = sum)
# Remove those with < 2 observations
gen_to_keep <- names(gen_count)[gen_count >= 2]
# If this is length 0, error out
if (length(gen_to_keep) <= 1)
stop("There is not enough data to estimate interactions. Check the balance of the dataset.")
## Filter the mf
mf1 <- mf[mf[[gen.col]] %in% gen_to_keep,, drop = FALSE]
### Model-fitting
## Convert the predictors to factors, if characters, then set the contrasts
## to contr.sum
for (var in pred_var_names) {
# Determine the class of the predictor
if (var_class[var] %in% c("factor", "character")) {
mf1[[var]] <- as.factor(mf1[[var]])
C_mat <- C(object = mf1[[var]], contr = "contr.sum")
# Set the column names of the contrast matrix
contrasts(C_mat) <- `colnames<-`(x = contrasts(C_mat), value = head(levels(C_mat), -1))
mf1[[var]] <- C_mat
}}
fit <- lm(formula = formula, data = mf1)
## Get the effects
all_effects <- pull_effects(object = fit, gen.col = gen.col, env.col = env.col)
##############
##############
## Create a two-way table of genotype + gxe effects
gge_effects <- all_effects
gge_effects$gge <- gge_effects$gen_main_eff + gge_effects$int_eff
gge_mat <- spread_(data = gge_effects[,c(gen.col, env.col, "gge")], key_col = env.col, value_col = "gge")
row.names(gge_mat) <- gge_mat[[1]]
gge_tab <- as.matrix(gge_mat[,-1])
dimnames(gge_tab) <- setNames(dimnames(gge_tab), c(gen.col, env.col))
### Singular value decomp
gge_svd <- svd(x = gge_tab)
# Conver the singular values to a matrix
gge_svd$d <- matrix(gge_svd$d, nrow = 1)
## Split in to a list of length(multi_svd$d) - one for each component
n_comp <- ncol(gge_svd$d)
gge_svd_list <- lapply(X = seq(n_comp), FUN = function(n) lapply(X = gge_svd, "[", ,n, drop = FALSE))
## Pull out the desired components
comp_tokeep <- gge_svd_list[seq(n.terms)]
comp_residual <- gge_svd_list[setdiff(seq(n_comp), seq(n.terms))]
# If n.term is 1, the eigenvectors are the primary effects
if (n.terms == 1) {
gen_prim_eff <- setNames(object = c(comp_tokeep[[1]]$u), nm = paste(rownames(gge_tab), sep = ""))
env_prim_eff <- setNames(object = c(comp_tokeep[[1]]$v), nm = paste(colnames(gge_tab), sep = ""))
} else {
gen_prim_eff <- env_prim_eff <- NULL
}
## For each component, generate PC scores by multplying the eigenvectors by the
## square root of the singular value
pc_score_list_tokeep <- lapply(X = comp_tokeep, FUN = function(comp) {
gen_score <- comp$u * sqrt(c(comp$d))
env_score <- comp$v * sqrt(c(comp$d))
# Multiply them
as.vector(gen_score) %*% t(as.vector(env_score))
})
## Do the same for the residual components
pc_score_list_residual <- lapply(X = comp_residual, FUN = function(comp) {
gen_score <- comp$u * sqrt(c(comp$d))
env_score <- comp$v * sqrt(c(comp$d))
# Multiply them
as.vector(gen_score) %*% t(as.vector(env_score))
})
## Sum the desired PC scores
theta <- Reduce(f = `+`, x = pc_score_list_tokeep)
# Add dimnames
dimnames(theta) <- dimnames(gge_tab)
## Sum the residual scores
rho <- Reduce(f = `+`, x = pc_score_list_residual)
# Add dimnames
dimnames(rho) <- dimnames(gge_tab)
## Create a data.frame with the theta and rho values
theta_df <- as.data.frame(as.table(theta), responseName = "theta")
rho_df <- as.data.frame(as.table(rho), responseName = "rho")
# Combine
gge_svd_df <- cbind(theta_df, rho = rho_df$rho)
## Combine with all_effects
all_effects1 <- merge(x = all_effects, y = gge_svd_df, by = c(gen.col, env.col))
## Add the response values
mf_effects <- merge(x = data, y = all_effects1)
## The fitted values are the main effects + the svd effects
fitted_values <- mf_effects$grand_mean + mf_effects$env_main_eff + mf_effects$theta
# The residuals are the response values - the fitted values
residual_values <- mf_effects[[resp_var_names]] - fitted_values
## Subset the mf_effects df for unique G-E combinations
mf_effects1 <- unique(mf_effects[,c(gen.col, env.col, "grand_mean", "gen_main_eff", "env_main_eff",
"int_eff", "theta")])
## Predicted values
mf_effects1$ybar <- mf_effects1$grand_mean + mf_effects1$gen_main_eff + mf_effects1$env_main_eff +
mf_effects1$int_eff
mf_effects1$fitted <- mf_effects1$grand_mean + mf_effects1$env_main_eff + mf_effects1$theta
mf_effects1$residual <- mf_effects1$ybar - mf_effects1$fitted
## Add the primary genotype and environment effects
mf_effects2 <- merge(mf_effects1, as.data.frame(env_prim_eff), by.x = env.col, by.y = "row.names")
mf_effects3 <- merge(mf_effects2, as.data.frame(gen_prim_eff), by.x = gen.col, by.y = "row.names")
## Assemble an output list
sreg <- list(
anova = anova(fit),
coefficients = coef(fit),
residuals = residual_values,
fitted.values = fitted_values,
primary.effect = setNames(list(gen_prim_eff, env_prim_eff), c(gen.col, env.col)),
effects = mf_effects3,
n.terms = n.terms
)
# Add class and return
structure(sreg, class = "sreg")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.