Nothing
#' @name gl.plot.structure
#'
#' @title Plots STRUCTURE analysis results (Q-matrix)
#'
#' @description
#' This function takes a structure run object (output from
#' \code{\link{gl.run.structure}}) and plots the typical structure bar
#' plot that visualize the q matrix of a structure run.
#'
#' @param sr Structure run object from \code{\link{gl.run.structure}} [required].
#' @param K The number for K of the q matrix that should be plotted. Needs to
#' be within you simulated range of K's in your sr structure run object. If
#' NULL, all the K's are plotted [default NULL].
#' @param met_clumpp The algorithm to use to infer the correct permutations.
#' One of 'greedy' or 'greedyLargeK' or 'stephens' [default "greedyLargeK"].
#' @param iter_clumpp The number of iterations to use if running either 'greedy'
#' 'greedyLargeK' [default 100].
#' @param clumpak Whether use the Clumpak method (see details) [default TRUE].
#' @param plot_theme Theme for the plot. See Details for options
#' [default NULL].
#' @param color_clusters A color palette for clusters (K) or a list with
#' as many colors as there are clusters (K) [default NULL].
#' @param ind_name Whether to plot individual names [default TRUE].
#' @param k_name Name of the structure plot to plot. It should be character
#' [default NULL].
#' @param border_ind The width of the border line between individuals
#' [default 0.25].
#' @param den Whether to include a dendrogram. It is necessary to include the
#' original genlight object used in gl.run.structure in the parameter x
#' [default FALSE].
#' @param dis.mat A dis object (distance matrix) to be used to order structure
#' plot which is plotted together with structure plot [default NULL].
#' @param x The original genlight object used in gl.run.structure description
#' [default NULL].
#' @param plot.out Specify if plot is to be produced [default TRUE].
#' @param plot.dir Directory in which to save files [default = working directory]
#' @param plot.file Name for the RDS binary file to save (base name only, exclude
#' extension) [default NULL]
#' @param verbose Verbosity: 0, silent or fatal errors; 1, begin and end; 2,
#' progress log ; 3, progress and results summary; 5, full report [default
#' NULL, unless specified using gl.set.verbosity]
#'
#' @details The function outputs a barplot which is the typical output of
#' structure. For a Evanno plot use gl.evanno.
#'
#' This function is based on the methods of CLUMPP and Clumpak as implemented
#' in the R package starmie (https://github.com/sa-lee/starmie).
#'
#' The Clumpak method identifies sets of highly similar runs among
#' all the replicates of the same K. The method then separates the distinct
#' groups of runs representing distinct modes in the space of possible solutions.
#'
#' The CLUMPP method permutes the clusters output by independent runs of
#' clustering programs such as structure, so that they match up as closely as
#' possible.
#'
#' This function averages the replicates within each mode identified by the
#' Clumpak method.
#'
#' Plots and table are saved to the working directory specified in plot.dir (tempdir )
#' if plot.file is set.
#'
#' Examples of other themes that can be used can be consulted in \itemize{
#' \item \url{https://ggplot2.tidyverse.org/reference/ggtheme.html} and \item
#' \url{https://yutannihilation.github.io/allYourFigureAreBelongToUs/ggthemes/}
#' }
#'
#' @return List of Q-matrices
#'
#' @author Bernd Gruber & Luis Mijangos (Post to \url{https://groups.google.com/d/forum/dartr})
#'
#' @examples
#' # examples need structure to be installed on the system (see above)
#' \dontrun{
#' bc <- bandicoot.gl[,1:100]
#' sr <- gl.run.structure(bc, k.range = 2:5, num.k.rep = 3, exec = './structure')
#' ev <- gl.evanno(sr)
#' ev
#' qmat <- gl.plot.structure(sr, K=3)
#' head(qmat)
#' gl.map.structure(qmat, K=3, bc, scalex=1, scaley=0.5)
#' }
#' @export
#' @importFrom ggdendro ggdendrogram
#' @seealso \code{gl.run.structure}, \code{gl.plot.structure}
#' @references
#' \itemize{
#' \item Pritchard, J.K., Stephens, M., Donnelly, P. (2000) Inference of
#' population structure using multilocus genotype data. Genetics 155, 945-959.
#' \item Kopelman, Naama M., et al. "Clumpak: a program for identifying
#' clustering modes and packaging population structure inferences across K."
#' Molecular ecology resources 15.5 (2015): 1179-1191.
#' \item Mattias Jakobsson and Noah A. Rosenberg. 2007. CLUMPP: a cluster
#' matching and permutation program for dealing with label switching and
#' multimodality in analysis of population structure. Bioinformatics
#' 23(14):1801-1806. Available at
#' \href{http://web.stanford.edu/group/rosenberglab/clumppDownload.html}{clumpp}
#' }
gl.plot.structure <- function(sr,
K = NULL,
met_clumpp = "greedyLargeK",
iter_clumpp = 100,
clumpak = TRUE,
plot_theme = NULL,
color_clusters = NULL,
ind_name = TRUE,
k_name = NULL,
border_ind = 0.15,
den = FALSE,
dis.mat = NULL,
x = NULL,
plot.out = TRUE,
plot.file = NULL,
plot.dir = NULL,
verbose = NULL) {
# SET VERBOSITY
verbose <- gl.check.verbosity(verbose)
# SET WORKING DIRECTORY
plot.dir <- gl.check.wd(plot.dir, verbose = 0)
# FLAG SCRIPT START
funname <- match.call()[[1]]
utils.flag.start(
func = funname,
build = "Jody",
verbose = verbose
)
# DO THE JOB
if (!is(sr, "structure.result")) {
stop(error(
"sr is not a structure result object returned from gl.run.structure.\n"
))
}
if (is.null(K)) {
ks <- range((lapply(sr, function(x) {
x$summary[1]
})))
ks <- ks[1]:ks[2]
} else {
ks <- K
}
res <- list()
for (i in ks) {
eq.k <- sapply(sr, function(x) {
x$summary["k"] == i
})
if (sum(eq.k) == 0) {
stop(error(paste(
"No entries for K =", K, "found in 'sr'.\n"
)))
}
sr_tmp <- sr[eq.k]
Q_list_tmp <- lapply(sr_tmp, function(x) {
as.matrix(x[[2]][, 4:ncol(x[[2]])])
})
# If K = 1
if (ncol(Q_list_tmp[[1]]) == 1) {
res[[length(res) + 1]] <- c(res, as.matrix(Q_list_tmp[1]))
# If K > 1
} else {
# If just one replicate
if (length(Q_list_tmp) == 1) {
res_tmp <- list(Q_list_tmp[[1]])
# if more than 1 replicate
} else {
res_tmp <- clumpp(Q_list=Q_list_tmp,
method = met_clumpp,
iter = iter_clumpp)$Q_list
}
# clumpak method for inferring modes within multiple structure runs as
# implemented in starmie package
if (clumpak) {
# if just one replicate
if (length(res_tmp) == 1) {
# res_tmp_2 <- res_tmp[[1]]
res_tmp_2 <- res_tmp
# if more than one replicate
} else {
simMatrix <- as.matrix(proxy::simil(res_tmp, method = G))
diag(simMatrix) <- 1
t <- calcThreshold(simMatrix)
simMatrix[simMatrix < t] <- 0
clusters <- mcl(simMatrix, addLoops = TRUE)$Cluster
res_tmp_2 <- split(res_tmp, clusters)
}
# averaging replicates
# if there is just one mode
if (length(res_tmp_2) == 1) {
# if there is just one replicate within the mode
if (!is.list(res_tmp_2[[1]]) ){
# length(res_tmp_2[[1]]) == 1) {
res_tmp_3 <- res_tmp_2[[1]]
# if there are more than 1 replicate within the mode
} else {
res_tmp_3 <- as.matrix(Reduce("+", res_tmp_2[[1]]) / length(res_tmp_2[[1]]))
}
# if there are more than 1 mode
} else {
res_tmp_3 <- lapply(res_tmp_2, function(x) {
# if there is just one replicate within the mode
if (length(x[1]) == 1) {
return(x[[1]])
# if there are more than 1 replicate within the mode
} else {
return(Reduce("+", x[1]) / length(x[1]))
}
})
}
} else {
res_tmp_3 <- res_tmp
}
# if the object is a list
if (is.list(res_tmp_3)) {
for (y in 1:length(res_tmp_3)) {
res[[length(res) + 1]] <- res_tmp_3[y]
}
} else {
res[[length(res) + 1]] <- res_tmp_3
}
}
}
# flattening lists
renquote <- function(l) if (is.list(l)) lapply(l, renquote) else enquote(l)
res <- lapply(unlist(renquote(res)), eval)
names(res) <- as.character(1:length(res))
Q_list <- res
# get K labels
Ks <- unlist(lapply(Q_list, ncol))
if (length(unique(Ks)) != length(Ks)) {
# Repeated Ks so label with subnumbering
Ks <- paste(Ks, ave(Ks, Ks, FUN = seq_along), sep = ".")
} else {
Ks <- as.character(Ks)
}
for (i in 1:length(Q_list)) {
Q_list_tmp <- data.frame(
Label = sr[[1]]$q.mat$id,
Q_list[[i]],
K = rep(Ks[[i]], nrow(Q_list[[i]])),
orig.pop = sr[[1]]$q.mat$orig.pop
)
n_col <- ncol(Q_list_tmp) - 3
colnames(Q_list_tmp) <-
c("Label", paste0(rep("cluster", n_col), 1:n_col), "K", "orig.pop")
cols_order <- colnames(Q_list_tmp)
cols_order <- cols_order[grepl("cluster", cols_order)]
Q_list_tmp$orig.pop <- as.factor(Q_list_tmp$orig.pop)
Q_list_tmp_list <- split(Q_list_tmp, Q_list_tmp$orig.pop)
Q_list_tmp_list_2 <- lapply(Q_list_tmp_list, function(x) {
data.table::setorderv(x, cols = cols_order, order = -1)
})
Q_list_tmp <- data.table::rbindlist(Q_list_tmp_list_2)
Q_list_tmp$ord <- 1:nrow(Q_list_tmp)
Q_list[[i]] <- Q_list_tmp
}
order_df <- Q_list[[1]][order(Q_list[[1]]$Label), ]
Q_list <- lapply(Q_list, function(x) {
tmp <- x[order(x$Label), ]
tmp$ord <- order_df$ord
return(tmp)
})
if (is.null(plot_theme)) {
plot_theme <- theme_dartR()
}
if (is.null(color_clusters)) {
color_clusters <- gl.select.colors(ncolors = max(ks), verbose = 0)
}
# #Melt and append Q matrices
Q_melt <-
do.call(
"rbind",
lapply(
Q_list,
reshape2::melt,
id.vars = c("Label", "K", "orig.pop", "ord"),
variable.name = "Cluster"
)
)
if(den){
if(!is.null(dis.mat)){
res <- dis.mat
}else{
res <- gl.dist.ind(x,method = "Manhattan",plot.display = FALSE,verbose = 0)
}
reorderfun <- function(d, w) reorder(d, w, agglo.FUN = mean)
distr <- dist(res)
hcr <- hclust(distr)
ddr <- as.dendrogram(hcr)
ddr <- reorderfun(ddr, TRUE)
p_den <- ggdendro::ggdendrogram(ddr)
rowInd <- order.dendrogram(ddr)
rowInd_2 <- data.frame(Label=indNames(x)[rowInd])
rowInd_2$order_d <- 1:nInd(x)
Q_melt <- merge(Q_melt,rowInd_2,by= "Label")
Q_melt$ord <- Q_melt$order_d
Q_melt$ord <- as.factor( Q_melt$ord)
}
Q_melt_tmp <- Q_melt
Q_melt_tmp <- Q_melt_tmp[order(Q_melt_tmp$ord),]
Q_melt_tmp_pop <- unique(Q_melt_tmp$orig.pop)
Q_melt$orig.pop <-
factor(Q_melt$orig.pop, levels = unique(sr[[1]]$q.mat$orig.pop))
if(!is.null(k_name)){
Q_melt <- Q_melt[which(Q_melt$K==k_name),]
}
p3 <- ggplot(Q_melt, aes_(x = ~ factor(ord), y = ~value, fill = ~Cluster)) +
geom_col(color = "black", size = border_ind, width = 1) +
facet_grid(K ~ orig.pop, scales = "free", space = "free") +
scale_y_continuous(expand = c(0, 0)) +
scale_x_discrete(
breaks = unique(Q_melt$ord),
labels = unique(Q_melt$Label),
expand = c(0, 0)
) +
scale_fill_manual(values = color_clusters) +
plot_theme +
theme(
panel.spacing = unit(0, "lines"),
panel.border = element_rect(
color = "black",
fill = NA,
size = 1
),
strip.background = element_blank(),
strip.text.x = element_text(size = 12, angle = 90),
axis.title.x = element_blank(),
axis.text.x = element_text(
size = 8,
angle = 90,
vjust = 0.5,
hjust = 1
),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "none"
)
if (ind_name == FALSE) {
p3 <- p3 + theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
}
if(den){
design <- "#AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA#
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB"
p3 <- p3 / p_den +
plot_layout(design = design)
}
if (plot.out) {
print(p3)
}
# Optionally save the plot ---------------------
if (!is.null(plot.file)) {
tmp <- utils.plot.save(p3,
dir = plot.dir,
file = plot.file,
verbose = verbose
)
}
# FLAG SCRIPT END
if (verbose >= 1) {
cat(report("Completed:", funname, "\n"))
}
return(invisible(Q_list))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.