#' @noRd
#' @keywords internal
"circleFun" <- function(center = c(0, 0), diameter = 1, npoints = 100) {
r <- diameter / 2
tt <- seq(0, 2 * pi, length.out = npoints)
xx <- center[1] + r * cos(tt)
yy <- center[2] + r * sin(tt)
return(data.frame(x = xx, y = yy))
}
#' Correlation Covariance Heatmap
#'
#' @param matrix A numeric matrix.
#' @param corr A logical value indicating if the matrix is in a scaled form
#' (\code{TRUE} by default, correlation matrix)
#' @param size A numeric value to define the letter size.
#' @param digits A numeric integer to define the number of digits to plot.
#' @param legend the position of legends ("none", "left", "right", "bottom",
#' "top", or two-element numeric vector)
#' @param upper_tri A logical value to plot the Lower or Upper Triangular Part of
#' the matrix. FALSE by default.
#' @param reorder A logical value to reorder by a Hierarchical Clustering. FALSE
#' by default.
#'
#' @return A ggplot object showing the upper triangular elements of the matrix.
#' @export
#'
#' @examples
#' library(agriutilities)
#' data(iris)
#' M <- cor(iris[, -5])
#' covcor_heat(matrix = M, corr = TRUE)
covcor_heat <- function(matrix,
corr = TRUE,
size = 4,
digits = 3,
legend = c(0.6, 0.7),
upper_tri = FALSE,
reorder = FALSE) {
matrix <- round(x = matrix, digits = digits)
# Get upper triangle of the correlation matrix
get_upper_tri <- function(cormat, upper_tri = FALSE) {
if (upper_tri) {
cormat[upper.tri(cormat)] <- NA
} else {
cormat[lower.tri(cormat)] <- NA
}
return(cormat)
}
reorder_cormat <- function(cormat) {
# Use correlation between variables as distance
dd <- stats::as.dist((1 - cormat) / 2)
hc <- stats::hclust(dd)
cormat <- cormat[hc$order, hc$order]
}
if (corr && reorder) {
matrix <- reorder_cormat(matrix)
}
upper_tri <- as.data.frame(get_upper_tri(matrix, upper_tri))
col_names <- colnames(upper_tri)
upper_tri[, "col"] <- factor(x = col_names, levels = col_names)
melted_cormat <- tidyr::gather(
data = upper_tri,
key = "row",
value = "value",
-col,
na.rm = TRUE,
factor_key = TRUE
)
colnames(melted_cormat) <- c("Var1", "Var2", "value")
u <- -1
m <- 0
l <- 1
main <- "Correlation"
col_pallete <- c("#db4437", "white", "#4285f4")
col_letter <- "black"
if (isFALSE(corr)) {
u <- min(matrix, na.rm = TRUE)
l <- max(matrix, na.rm = TRUE)
m <- u + (l - u) / 2
main <- "Covariance"
col_pallete <- c("#440154", "#21908C", "#FDE725")
col_letter <- "white"
}
melted_cormat$Var1 <- as.factor(melted_cormat$Var1)
melted_cormat$Var2 <- as.factor(melted_cormat$Var2)
ggheatmap <-
ggplot2::ggplot(
data = melted_cormat,
mapping = ggplot2::aes(Var2, Var1, fill = value)
) +
ggplot2::geom_tile(color = "white") +
ggplot2::scale_fill_gradient2(
low = col_pallete[1],
high = col_pallete[3],
mid = col_pallete[2],
midpoint = m,
limit = c(u, l),
space = "Lab",
name = main
) +
ggplot2::theme_minimal() +
ggplot2::theme(
axis.text.x = ggplot2::element_text(
angle = 45,
vjust = 1,
size = 12,
hjust = 1
),
axis.text.y = ggplot2::element_text(size = 12)
)
plot <- ggheatmap +
ggplot2::geom_text(
mapping = ggplot2::aes(Var2, Var1, label = value),
color = col_letter,
size = size
) +
ggplot2::theme(
axis.title.x = ggplot2::element_blank(),
axis.title.y = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.border = ggplot2::element_blank(),
panel.background = ggplot2::element_blank(),
axis.ticks = ggplot2::element_blank(),
legend.justification = c(1, 0),
legend.position = legend,
legend.direction = "horizontal"
) +
ggplot2::guides(
fill = ggplot2::guide_colorbar(
barwidth = 7,
barheight = 1,
title.position = "top",
title.hjust = 0.5
)
)
return(plot)
}
#' Factor Analytic Summary
#'
#' @param model Factor Analytic Model (ASReml object)
#' @param trial A character string indicating the column in data that contains
#' trials.
#' @param genotype A character string indicating the column in data that
#' contains genotypes.
#' @param BLUEs_trial A data.frame containing BLUEs for each trial.
#' @param mult_fa1 A constant to multiply the first loading. Must be 1 or -1.
#' (-1 by default)
#' @param mult_fa2 A constant to multiply the second loading. Must be 1 or -1.
#' (1 by default)
#' @param filter_score A numeric value to filter genotypes by the distance from
#' the origin.
#' @param k_biplot A numeric value to multiply the scores in the biplot.
#' @param size_label_var A numeric value to define the label size for the
#' variables.
#' @param alpha_label_var A numeric value between (0,1) to define the label for
#' the variables.
#' @param size_label_ind A numeric value to define the label size for the
#' individuals.
#' @param alpha_label_ind A numeric value between (0,1) to define the label for
#' the individuals.
#' @param size_arrow A numeric value to define the arrow size.
#' @param alpha_arrow A numeric value between (0,1) to define the arrow.
#' @param base_size A numeric value to define the base size.
#'
#' @return An object with a list of:
#' \item{loadings}{A data.frame containing the first and second loading for each
#' trial.}
#' \item{loading_star}{A data.frame containing the first and second loading
#' rotated for each trial.}
#' \item{Gvar}{A matrix of the estimated variance-covariance between trials.}
#' \item{Cmat}{A matrix of the correlation between trials.}
#' \item{summary_loading}{A data.frame containing a summary of the loadings.}
#' \item{paf_site}{A data.frame containing the percentage of variance explained
#' for each component and for each trial.}
#' \item{var_tot}{A numeric value of the total variance.}
#' \item{scores}{A data.frame containing the scores for each genotype.}
#' \item{plots}{A list with different plots. Includes a plot for the loadings,
#' biplot, biplot_scaled and loadings_c.}
#' @export
#'
#' @examples
#' \dontrun{
#' library(agridat)
#' library(agriutilities)
#' data(besag.met)
#' dat <- besag.met
#' results <- check_design_met(
#' data = dat,
#' genotype = "gen",
#' trial = "county",
#' traits = c("yield"),
#' rep = "rep",
#' block = "block",
#' col = "col",
#' row = "row"
#' )
#' out <- single_trial_analysis(results, progress = FALSE)
#' met_results <- met_analysis(out, vcov = "fa2", progress = FALSE)
#'
#' pp <- met_results$trial_effects
#' model <- met_results$met_models$yield
#' fa_summary(
#' model = model,
#' trial = "trial",
#' genotype = "genotype",
#' BLUEs_trial = pp,
#' mult_fa1 = -1,
#' mult_fa2 = -1,
#' filter_score = 1,
#' k_biplot = 10,
#' size_label_var = 3,
#' alpha_label_var = 0.5,
#' size_label_ind = 3,
#' alpha_label_ind = 0.8,
#' size_arrow = 0.2,
#' alpha_arrow = 0.1
#' )
#' }
#' @import ggplot2 ggrepel
fa_summary <- function(model = NULL,
trial = "trial",
genotype = "genotype",
BLUEs_trial = NULL,
mult_fa1 = -1,
mult_fa2 = 1,
filter_score = 1.5,
k_biplot = 1,
size_label_var = 2,
alpha_label_var = 0.2,
size_label_ind = 2,
alpha_label_ind = 0.8,
size_arrow = 0.2,
alpha_arrow = 0.2,
base_size = 12) {
vars <- summary(model)$varcomp
vars <- data.frame(effect = rownames(vars), vars, check.names = FALSE)
# Loading by Trial
fa1_loadings <- vars[grepl("!fa1$", vars$effect), "component"]
fa2_loadings <- vars[grepl("!fa2$", vars$effect), "component"]
L <- as.matrix(cbind(fa1_loadings, fa2_loadings))
svd_L <- svd(L)
L_star <- L %*% svd_L$v
psi <- vars[grepl("!var$", vars$effect), "component"]
Gvar <- L_star %*% t(L_star) + diag(psi)
Cmat <- cov2cor(Gvar)
VarTot <- sum(diag(L_star %*% t(L_star))) / sum(diag(Gvar))
ns <- nlevels(model$mf[, trial])
k <- 2
snam <- levels(model$mf[, trial])
paf_site <- matrix(0, nrow = ns, ncol = k)
dimnames(paf_site) <- list(snam, paste("fac", 1:k, sep = "_"))
for (i in 1:k) {
paf_site[, i] <- 100 * diag(L_star[, i] %*% t(L_star[, i])) /
diag(L_star %*% t(L_star) + diag(psi))
}
if (k > 1) {
all <- 100 * diag(L_star %*% t(L_star)) /
diag(L_star %*% t(L_star) + diag(psi))
paf_site <- cbind(paf_site, all)
}
VarTot <- VarTot
VarGenEnv <- diag(L_star %*% t(L_star) + diag(psi))
TotGenVar <- sum(VarGenEnv)
VarFA1 <- sum(VarGenEnv * paf_site[, 1]) / 100
VarFA2 <- sum(VarGenEnv * paf_site[, 2]) / 100
PerVarFA1 <- VarFA1 / TotGenVar
PerVarFA2 <- VarFA2 / TotGenVar
percentg <- round(c(PerVarFA1, PerVarFA2) * 100, 2)
L_star[, 1] <- L_star[, 1] * mult_fa1
L_star[, 2] <- L_star[, 2] * mult_fa2
faComp <- data.frame(
site = snam,
fa1 = L_star[, 1],
fa2 = L_star[, 2],
psi = psi,
Vg = diag(Gvar),
BLUE = BLUEs_trial$predicted.value
)
sqrt_diag <- sqrt(diag(Gvar))
faComp[, c("fa1_scaled", "fa2_scaled")] <- diag(1 / sqrt_diag) %*% L_star
row.names(L) <- row.names(L_star) <- snam
row.names(Gvar) <- colnames(Gvar) <- row.names(Cmat) <- colnames(Cmat) <- snam
# Scores by Genotype
coef_fa <- coef(model)$random
fa1_scores <- coef_fa[grep(paste0("Comp1:", genotype), rownames(coef_fa)), ]
fa2_scores <- coef_fa[grep(paste0("Comp2:", genotype), rownames(coef_fa)), ]
names(fa1_scores) <- sub(
pattern = paste0("fa(", trial, ", 2)_Comp1:", genotype, "_"),
replacement = "",
x = names(fa1_scores),
fixed = TRUE
)
names(fa2_scores) <- sub(
pattern = paste0("fa(", trial, ", 2)_Comp2:", genotype, "_"),
replacement = "",
x = names(fa2_scores),
fixed = TRUE
)
f_scores <- rbind(as.matrix(fa1_scores), as.matrix(fa2_scores))
nGenotype <- nlevels(model$mf[, genotype])
f_star <- kronecker(t(svd_L$v), diag(nGenotype)) %*% f_scores
rownames(f_star) <- rownames(f_scores)
fa12_scores <- merge(
x = f_star[1:nGenotype, 1],
y = f_star[(nGenotype + 1):(2 * nGenotype), 1],
by = "row.names"
)
names(fa12_scores) <- c("Genotype", "fa1", "fa2")
fa12_scores$fa1 <- fa12_scores$fa1 * mult_fa1
fa12_scores$fa2 <- fa12_scores$fa2 * mult_fa2
fa12_scores$distance_orig <- sqrt(fa12_scores$fa1^2 + fa12_scores$fa2^2)
fa12_scores$Score <- ifelse(
test = fa12_scores$distance_orig > filter_score,
yes = 1,
no = 0
)
# Plots
d <- data.frame(
x = rep(0, nrow(L_star)),
y = rep(0, nrow(L_star)),
vx = L_star[, 1],
vy = L_star[, 2]
)
loadings <- faComp %>%
ggplot(aes(x = fa1, y = fa2)) +
geom_point(aes(colour = Vg, size = BLUE)) +
scale_colour_gradient(low = "pink", high = "blue") +
geom_text_repel(
mapping = aes(label = site),
nudge_y = 0.05,
nudge_x = -0.03,
force = 1,
alpha = alpha_label_var,
size = size_label_var
) +
ggtitle(
label = paste0(
"Environment Factor Loadings ",
"(", sum(percentg), "%)"
),
subtitle = NULL
) +
xlab(paste0("FA1 loading ", "(", percentg[1], "%)")) +
ylab(paste0("FA2 loading ", "(", percentg[2], "%)")) +
theme_bw(base_size = base_size) +
geom_vline(xintercept = 0, linetype = 2) +
geom_hline(yintercept = 0, linetype = 2) +
geom_segment(
data = d,
mapping = aes(
x = x,
y = y,
xend = x + vx,
yend = y + vy
),
arrow = arrow(),
color = "black",
size = size_arrow,
alpha = alpha_arrow
)
biplot <- faComp %>%
ggplot(aes(x = fa1, y = fa2)) +
geom_point(aes(colour = Vg, size = BLUE)) +
scale_colour_gradient(low = "pink", high = "blue") +
geom_text_repel(
mapping = aes(label = site),
nudge_y = 0.05,
nudge_x = -0.03,
force = 1,
alpha = alpha_label_var,
size = size_label_var
) +
ggtitle("Environment Factor Loadings") +
xlab("FA1 loading") +
ylab("FA2 loading") +
theme_bw(base_size = base_size) +
geom_vline(xintercept = 0, linetype = 2) +
geom_hline(yintercept = 0, linetype = 2) +
geom_segment(
data = d,
mapping = aes(x = x, y = y, xend = x + vx, yend = y + vy),
arrow = arrow(),
color = "black",
size = size_arrow,
alpha = alpha_arrow
) +
geom_label_repel(
data = subset(fa12_scores, Score == 1),
mapping = aes(label = Genotype, x = k_biplot * fa1, y = k_biplot * fa2),
colour = "red",
segment.colour = "red",
size = size_label_ind,
alpha = alpha_label_ind
)
d_scaled <- data.frame(
x = rep(0, nrow(faComp)),
y = rep(0, nrow(faComp)),
vx = faComp[, "fa1_scaled"],
vy = faComp[, "fa2_scaled"]
)
biplot_scaled <- faComp %>%
ggplot(aes(x = fa1_scaled, y = fa2_scaled)) +
geom_point(aes(colour = Vg, size = BLUE)) +
scale_colour_gradient(low = "pink", high = "blue") +
geom_text_repel(
mapping = aes(label = site),
nudge_y = 0.05,
nudge_x = -0.03,
force = 1,
alpha = alpha_label_var,
size = size_label_var
) +
ggtitle("Environment Factor Loadings") +
xlab("FA1 loading") +
ylab("FA2 loading") +
theme_bw(base_size = base_size) +
geom_vline(xintercept = 0, linetype = 2) +
geom_hline(yintercept = 0, linetype = 2) +
geom_segment(
data = d_scaled,
mapping = aes(x = x, y = y, xend = x + vx, yend = y + vy),
arrow = arrow(),
color = "black",
size = size_arrow,
alpha = alpha_arrow
) +
geom_label_repel(
data = subset(fa12_scores, Score == 1),
mapping = aes(label = Genotype, x = 1 * fa1, y = 1 * fa2),
colour = "red",
segment.colour = "red",
size = size_label_ind,
alpha = alpha_label_ind
)
circle <- circleFun(
center = c(0, 0),
diameter = 2,
npoints = 100
)
var_centered <- faComp %>%
ggplot(aes(x = fa1_scaled, y = fa2_scaled)) +
geom_point(aes(colour = Vg, size = BLUE)) +
scale_colour_gradient(low = "pink", high = "blue") +
geom_text_repel(
mapping = aes(label = site),
nudge_y = 0.05,
nudge_x = -0.03,
force = 1,
size = size_label_var,
alpha = alpha_label_var
) +
ggtitle(label = "Environment Factor Loadings") +
xlab(label = "FA1 loading") +
ylab(label = "FA2 loading") +
theme_bw(base_size = base_size) +
geom_vline(xintercept = 0, linetype = 2) +
geom_hline(yintercept = 0, linetype = 2) +
geom_segment(
data = d_scaled,
mapping = aes(x = x, y = y, xend = x + vx, yend = y + vy),
arrow = arrow(),
color = "black",
size = size_arrow,
alpha = alpha_arrow
) +
geom_path(data = circle, mapping = aes(x, y)) +
coord_fixed()
results <- list(
loadings = L,
loading_star = L_star,
Gvar = Gvar,
Cmat = Cmat,
summary_loadings = faComp,
paf_site = paf_site,
var_tot = percentg,
scores = fa12_scores,
plots = list(
loadings = loadings,
biplot = biplot,
biplot_scaled = biplot_scaled,
loadings_c = var_centered
)
)
return(results)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.