Nothing
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#
# This software was authored by Zhian N. Kamvar and Javier F. Tabima, graduate
# students at Oregon State University; Jonah C. Brooks, undergraduate student at
# Oregon State University; and Dr. Nik Grünwald, an employee of USDA-ARS.
#
# Permission to use, copy, modify, and distribute this software and its
# documentation for educational, research and non-profit purposes, without fee,
# and without a written agreement is hereby granted, provided that the statement
# above is incorporated into the material, giving appropriate attribution to the
# authors.
#
# Permission to incorporate this software into commercial products may be
# obtained by contacting USDA ARS and OREGON STATE UNIVERSITY Office for
# Commercialization and Corporate Development.
#
# The software program and documentation are supplied "as is", without any
# accompanying services from the USDA or the University. USDA ARS or the
# University do not warrant that the operation of the program will be
# uninterrupted or error-free. The end-user understands that the program was
# developed for research purposes and is advised not to rely exclusively on the
# program for any reason.
#
# IN NO EVENT SHALL USDA ARS OR OREGON STATE UNIVERSITY BE LIABLE TO ANY PARTY
# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
# LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION,
# EVEN IF THE OREGON STATE UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE. USDA ARS OR OREGON STATE UNIVERSITY SPECIFICALLY DISCLAIMS ANY
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY STATUTORY
# WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
# BASIS, AND USDA ARS AND OREGON STATE UNIVERSITY HAVE NO OBLIGATIONS TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#==============================================================================#
#' Internal function to plot the results from ia() and poppr()
#'
#' @param sample either an object of class "ialist" or a list of ialists
#' @param pval a named vector specifying the p values to display
#' @param pop The name of the population
#' @param file The name of the source file
#' @param N The number of samples in the population
#' @param observed observed values of Ia and rbarD
#' @param index The index to plot (defaults to "rbarD")
#' @param labsize size of the in-plot label
#' @param linesize size of the in-plot line
#'
#' @return a ggplot2 object
#' @keywords internal
#'
#' @examples
#' \dontrun{
#' data(Pinf)
#' x <- Pinf %>% seppop() %>% lapply(ia, sample = 99, valuereturn = TRUE, quiet = TRUE, plot = FALSE)
#' x
#' poppr:::poppr.plot(sample = x, file = "hey") # plots multiple populations
#' # plot.ialist takes care of the single populations.
#' for (i in x){
#' print(plot(i))
#' }
#' }
#'
poppr.plot <- function(sample, pval = c(Ia = 0.05, rbarD = 0.05),
pop = NULL, file = NULL, N = NULL,
observed = c(Ia = 0, rbarD = 0),
index = c("rbarD", "Ia"),
labsize = rel(3),
linesize = rel(1)){
INDEX_ARGS <- c("rbarD", "Ia")
index <- match.arg(index, INDEX_ARGS)
tidy_ialist <- function(ialist){
res <- stats::reshape(ialist$samples,
direction = "long",
varying = 1:2,
times = c("Ia", "rbarD"),
timevar = "variable",
v.names = "value")[-3] %>%
dplyr::as_tibble()
res$variable <- as.factor(res$variable)
res
}
if (!class(sample) %in% "ialist" & class(sample) %in% "list"){
ggsamps <- dplyr::bind_rows(lapply(sample, tidy_ialist), .id = "population")
ggvals <- vapply(sample, "[[", numeric(4), "index")
ggvals <- setNames(as.data.frame.table(ggvals, stringsAsFactors = FALSE),
c("index", "population", "value"))
ggsamps <- ggsamps[ggsamps$variable == index, ]
ggvals <- ggvals[grep(ifelse(index == "Ia", "I", "D"), ggvals$index), ]
ggindex <- ggvals[ggvals$index == index, ]
ggpval <- ggvals[ggvals$index == ifelse(index == "Ia", "p.Ia", "p.rD"), ]
ggsamps$population <- factor(ggsamps$population, names(sample))
srange <- range(ggsamps$value, na.rm = TRUE)
binw <- diff(srange/30)
plot_title <- make_poppr_plot_title(sample[[1]][["samples"]][[index]],
file = file)
thePlot <- ggplot(ggsamps, aes_string(x = "value", group = "population")) +
geom_histogram(binwidth = binw, position = "identity") +
geom_rug(alpha = 0.5) +
geom_vline(aes_string(xintercept = "value"), color = "blue", linetype = 2,
data = ggindex) +
facet_wrap(~population, scales = "free_x") +
ggtitle(plot_title)
if (index == "rbarD"){
thePlot <- thePlot + xlab(expression(paste(bar(r)[d])))
} else if (index == "Ia"){
thePlot <- thePlot + xlab(expression(paste(I[A])))
}
return(thePlot)
}
plot_title <- make_poppr_plot_title(sample[[index]], file, N, pop)
if (all(is.nan(sample$rbarD))){
oops <- ggplot(as.data.frame(list(x=-10:9)), aes_string(x = "x")) +
geom_histogram(binwidth=1, fill="orange") +
geom_text(aes(label="Warning:", x=0, y=0.8), color="black", size=rel(15)) +
geom_text(aes(label="Data contains only NaNs and\ncannot be displayed graphically",
x=0, y=0.5, hjust=0.5), color="black", size=rel(10)) +
theme(line = element_blank(),
axis.text = element_blank(),
axis.title = element_blank()) +
ggtitle(plot_title)
return(oops)
}
if (any(is.na(sample[[index]]))){
sample[[index]][is.na(sample[[index]])] <- mean(sample[[index]], na.rm=TRUE)
}
srange <- range(sample[[index]])
labs <- c(Ia = "I[A]", rbarD = "bar(r)[d]")
names(pval) <- names(labs)
binw <- diff(srange/30)
if (is.na(observed[index])){
xval <- srange[2]
just <- 1
vline <- geom_blank()
annot_color <- "red"
obs <- "NaN"
} else if (all(observed[index] > srange)){
xval <- observed[index]
just <- 1.1
} else {
maxobsmin <- c(srange[1], observed[index], srange[2])
xjust <- which.max(abs(diff(maxobsmin)))
xval <- srange[xjust]
just <- ifelse(xjust < 2, 0, 1)
}
if (!exists("annot_color")){
annot_color <- "blue"
vline <- geom_vline(xintercept = observed[index], color = "blue",
linetype = 2, size = linesize)
obs <- signif(observed[index], 3)
}
obslab <- paste(labs[index], ":", obs, sep = "")
plab <- paste("p =", signif(pval[index], 3))
ggdata <- tidy_ialist(list(samples = sample))
ggdata <- ggdata[ggdata$variable == index, ]
thePlot <- ggplot(ggdata, aes_string(x = "value"))
thePlot <- thePlot +
geom_histogram(binwidth = binw, position = "identity") +
geom_rug() +
vline +
annotate(geom = "text", x = xval, y = Inf, label = obslab, color = annot_color,
vjust = 2, hjust = just, parse = TRUE, size = labsize) +
annotate(geom = "text", x = xval, y = Inf, label = plab, color = annot_color,
vjust = 4, hjust = just, size = labsize)
if (index == "rbarD"){
thePlot <- thePlot + xlab(expression(paste(bar(r)[d])))
} else if (index == "Ia"){
thePlot <- thePlot + xlab(expression(paste(I[A])))
}
thePlot <- thePlot + ggtitle(plot_title)
return(thePlot)
}
#==============================================================================#
#
#' Create a minimum spanning network of selected populations using a distance
#' matrix.
#'
#' @param gid a \code{\link{genind}}, \code{\link{genclone}},
#' \code{\link{genlight}}, or \code{\link{snpclone}} object
#'
#' @param distmat a distance matrix that has been derived from your data set.
#'
#' @param mlg.compute if the multilocus genotypes are set to "custom" (see
#' \code{\link{mll.custom}} for details) in your genclone object, this will
#' specify which mlg level to calculate the nodes from. See details.
#'
#' @param palette a \code{vector} or \code{function} defining the color palette
#' to be used to color the populations on the graph. It defaults to
#' \code{\link{topo.colors}}. See examples for details.
#'
#' @param sublist a \code{vector} of population names or indexes that the user
#' wishes to keep. Default to "ALL".
#'
#' @param exclude a \code{vector} of population names or indexes that the user
#' wishes to discard. Default to \code{NULL}.
#'
#' @param blacklist DEPRECATED, use exclude.
#'
#' @param vertex.label a \code{vector} of characters to label each vertex. There
#' are two defaults: \code{"MLG"} will label the nodes with the multilocus
#' genotype from the original data set and \code{"inds"} will label the nodes
#' with the representative individual names.
#'
#' @param gscale "grey scale". If this is \code{TRUE}, this will scale the color
#' of the edges proportional to the observed distance, with the lines becoming
#' darker for more related nodes. See \code{\link{greycurve}} for details.
#'
#' @param glim "grey limit". Two numbers between zero and one. They determine
#' the upper and lower limits for the \code{\link{gray}} function. Default is
#' 0 (black) and 0.8 (20\% black). See \code{\link{greycurve}} for details.
#'
#' @param gadj "grey adjust". a positive \code{integer} greater than zero that
#' will serve as the exponent to the edge weight to scale the grey value to
#' represent that weight. See \code{\link{greycurve}} for details.
#'
#' @param gweight "grey weight". an \code{integer}. If it's 1, the grey scale
#' will be weighted to emphasize the differences between closely related
#' nodes. If it is 2, the grey scale will be weighted to emphasize the
#' differences between more distantly related nodes. See
#' \code{\link{greycurve}} for details.
#'
#' @param wscale "width scale". If this is \code{TRUE}, the edge widths will be
#' scaled proportional to the inverse of the observed distance , with the
#' lines becoming thicker for more related nodes.
#'
#' @param showplot logical. If \code{TRUE}, the graph will be plotted. If
#' \code{FALSE}, it will simply be returned.
#'
#' @param include.ties logical. If \code{TRUE}, the graph will include all edges
#' that were arbitrarily passed over in favor of another edge of equal weight.
#' If \code{FALSE}, which is the default, one edge will be arbitrarily
#' selected when two or more edges are tied, resulting in a pure minimum
#' spanning network.
#'
#' @param threshold numeric. By default, this is \code{NULL}, which will have no
#' effect. Any threshold value passed to this argument will be used in
#' \code{\link{mlg.filter}} prior to creating the MSN. If you have a data set
#' that contains contracted MLGs, this argument will override the threshold in
#' the data set. See Details.
#'
#' @param clustering.algorithm string. By default, this is \code{NULL}. If
#' \code{threshold = NULL}, this argument will have no effect. When supplied
#' with either "farthest_neighbor", "average_neighbor", or "nearest_neighbor",
#' it will be passed to \code{\link{mlg.filter}} prior to creating the MSN. If
#' you have a data set that contains contracted MLGs, this argument will
#' override the algorithm in the data set. See Details.
#'
#' @param ... any other arguments that could go into plot.igraph
#'
#' @return \item{graph}{a minimum spanning network with nodes corresponding to
#' MLGs within the data set. Colors of the nodes represent population
#' membership. Width and color of the edges represent distance.}
#' \item{populations}{a vector of the population names corresponding to the
#' vertex colors} \item{colors}{a vector of the hexadecimal representations of
#' the colors used in the vertex colors}
#'
#'
#' @details The minimum spanning network generated by this function is generated
#' via igraph's \code{\link[igraph:mst]{minimum.spanning.tree}}. The resultant
#' graph produced can be plotted using igraph functions, or the entire object
#' can be plotted using the function \code{\link{plot_poppr_msn}}, which will
#' give the user a scale bar and the option to layout your data.
#' \subsection{node sizes}{
#' The area of the nodes are representative of the number of samples. Because
#' \pkg{igraph} scales nodes by radius, the node sizes in the graph are
#' represented as the square root of the number of samples.}
#' \subsection{mlg.compute}{
#' Each node on the graph represents a different multilocus genotype.
#' The edges on the graph represent genetic distances that connect the
#' multilocus genotypes. In genclone objects, it is possible to set the
#' multilocus genotypes to a custom definition. This creates a problem for
#' clone correction, however, as it is very possible to define custom lineages
#' that are not monophyletic. When clone correction is performed on these
#' definitions, information is lost from the graph. To circumvent this, The
#' clone correction will be done via the computed multilocus genotypes, either
#' "original" or "contracted". This is specified in the \code{mlg.compute}
#' argument, above.}
#' \subsection{contracted multilocus genotypes}{
#' If your incoming data set is of the class \code{\linkS4class{genclone}},
#' and it contains contracted multilocus genotypes, this function will retain
#' that information for creating the minimum spanning network. You can use the
#' arguments \code{threshold} and \code{clustering.algorithm} to change the
#' threshold or clustering algorithm used in the network. For example, if you
#' have a data set that has a threshold of 0.1 and you wish to have a minimum
#' spanning network without a threshold, you can simply add
#' \code{threshold = 0.0}, and no clustering will happen.
#'
#' The \code{threshold} and \code{clustering.algorithm} arguments can also be
#' used to filter un-contracted data sets.
#'
#' All filtering will use the distance matrix supplied in the argument
#' \code{distmat}.
#' }
#'
#' @note The edges of these graphs may cross each other if the graph becomes too
#' large.
#'
#' @seealso \code{\link{plot_poppr_msn}} \code{\link{nancycats}},
#' \code{\link{upgma}}, \code{\link{nj}}, \code{\link{nodelabels}},
#' \code{\link{tab}}, \code{\link{missingno}}, \code{\link{bruvo.msn}},
#' \code{\link{greycurve}}
#'
#' @export
#' @aliases msn.poppr
#' @author Javier F. Tabima, Zhian N. Kamvar, Jonah C. Brooks
#' @examples
#'
#' # Load the data set and calculate the distance matrix for all individuals.
#' data(Aeut)
#' A.dist <- diss.dist(Aeut)
#'
#' # Graph it.
#' A.msn <- poppr.msn(Aeut, A.dist, gadj = 15, vertex.label = NA)
#'
#' # Find the sizes of the nodes (number of individuals per MLL):
#' igraph::vertex_attr(A.msn$graph, "size")^2
#'
#' \dontrun{
#' # Set subpopulation structure.
#' Aeut.sub <- as.genclone(Aeut)
#' setPop(Aeut.sub) <- ~Pop/Subpop
#'
#' # Plot respective to the subpopulation structure
#' As.msn <- poppr.msn(Aeut.sub, A.dist, gadj=15, vertex.label=NA)
#'
#' # Show only the structure of the Athena population.
#' As.msn <- poppr.msn(Aeut.sub, A.dist, gadj=15, vertex.label=NA, sublist=1:10)
#'
#' # Let's look at the structure of the microbov data set
#'
#' library("igraph")
#' data(microbov)
#' micro.dist <- diss.dist(microbov, percent = TRUE)
#' micro.msn <- poppr.msn(microbov, micro.dist, vertex.label=NA)
#'
#' # Let's plot it and show where individuals have < 15% of their genotypes
#' # different.
#'
#' edge_weight <- E(micro.msn$graph)$weight
#' edge_labels <- ifelse(edge_weight < 0.15, round(edge_weight, 3), NA)
#' plot.igraph(micro.msn$graph, edge.label = edge_labels, vertex.size = 2,
#' edge.label.color = "red")
#'
#' }
#'
#==============================================================================#
poppr.msn <- function (gid, distmat, palette = topo.colors, mlg.compute = "original",
sublist = "All", exclude = NULL, blacklist = NULL, vertex.label = "MLG",
gscale=TRUE, glim = c(0,0.8), gadj = 3, gweight = 1,
wscale=TRUE, showplot = TRUE,
include.ties = FALSE, threshold = NULL,
clustering.algorithm = NULL, ...){
# testing gid -------------------------------------------------------------
if (!inherits(gid, c("genclone", "snpclone"))){
if (inherits(gid, "genind")){
gid <- as.genclone(gid)
} else if (inherits(gid, "genlight")){
gid <- as.snpclone(gid)
} else {
stop("gid must be a genind/genclone or genlight/snpclone object.")
}
}
if (!inherits(gid@mlg, "MLG")){
gid@mlg <- new("MLG", gid@mlg)
}
if (!is.null(blacklist)) {
warning(
option_deprecated(
match.call(),
"blacklist",
"exclude",
"2.8.7.",
"Please use `exclude` in the future"
),
immediate. = TRUE
)
exclude <- blacklist
}
# Handling MLG type -------------------------------------------------------
visible_mlg <- visible(gid@mlg)
if (visible_mlg == "custom"){
mll(gid) <- mlg.compute
} else if (visible_mlg == "contracted"){
mll(gid) <- "original"
if (is.null(threshold)){
threshold <- cutoff(gid@mlg)["contracted"]
}
if (is.null(clustering.algorithm)){
clustering.algorithm <- distalgo(gid@mlg)
}
}
# testing dist ------------------------------------------------------------
is_dist <- inherits(distmat, "dist")
is_mat <- inherits(distmat, "matrix")
if (is_dist | is_mat){
n <- nInd(gid)
eq_size <- if (is_dist) n == attr(distmat, "Size") else n == nrow(distmat)
if (!eq_size){
stop("The size of the distance matrix does not match the size of the data.\n")
}
} else {
stop("The distance matrix is neither a dist object nor a matrix.\n")
}
distmat <- as.matrix(distmat)
gadj <- ifelse(gweight == 1, gadj, -gadj)
# Subsetting the population -----------------------------------------------
# This will subset both the population and the matrix.
if (toupper(sublist[1]) != "ALL" | !is.null(exclude)){
sublist_exclude <- sub_index(gid, sublist, exclude)
distmat <- distmat[sublist_exclude, sublist_exclude, drop = FALSE]
gid <- popsub(gid, sublist, exclude)
}
# Clone correcting the matrix ---------------------------------------------
if (!is.null(threshold)){
filtered <- filter_at_threshold(gid,
threshold,
indist = distmat,
clustering.algorithm,
bruvo_args = NULL)
distmat <- filtered$indist
cgid <- filtered$cgid
gid <- filtered$gid
} else {
cgid <- gid[.clonecorrector(gid), ]
singles <- !duplicated(mll(gid))
distmat <- distmat[singles, singles, drop = FALSE]
}
rownames(distmat) <- indNames(cgid) -> colnames(distmat)
poppr_msn_list <- msn_constructor(
gid = gid,
cgid = cgid,
palette = palette,
indist = distmat,
include.ties = include.ties,
mlg.compute = mlg.compute,
vlab = vertex.label,
visible_mlg = visible_mlg,
wscale = wscale,
gscale = gscale,
glim = glim,
gadj = gadj,
showplot = showplot,
...)
return(poppr_msn_list)
}
#==============================================================================#
#' Create a table summarizing missing data or ploidy information of a genind or
#' genclone object
#'
#' @param gen a \linkS4class{genind} or \linkS4class{genclone} object.
#'
#' @param type \code{character}. What information should be returned. Choices
#' are "missing" (Default) and "ploidy". See Description.
#'
#' @param percent \code{logical}. (ONLY FOR \code{type = 'missing'}) If
#' \code{TRUE} (default), table and plot will represent missing data as a
#' percentage of each cell. If \code{FALSE}, the table and plot will represent
#' missing data as raw counts. (See details)
#'
#' @param plot \code{logical}. If \code{TRUE}, a simple heatmap will be
#' produced. If \code{FALSE} (default), no heatmap will be produced.
#'
#' @param df \code{logical}. If \code{TRUE}, the data will be returned as a long
#' form data frame. If \code{FALSE} (default), a matrix with samples in rows
#' and loci in columns will be returned.
#'
#' @param returnplot \code{logical}. If \code{TRUE}, a list is returned with two
#' elements: \code{table} - the normal output and \code{plot} - the ggplot
#' object. If \code{FALSE}, the table is returned.
#'
#' @param low \code{character}. What color should represent no missing data or
#' lowest observed ploidy? (default: "blue")
#'
#' @param high \code{character}. What color should represent the highest amount
#' of missing data or observed ploidy? (default: "red")
#'
#' @param plotlab \code{logical}. (ONLY FOR \code{type = 'missing'}) If
#' \code{TRUE} (default), values of missing data greater than 0\% will be
#' plotted. If \code{FALSE}, the plot will appear un-appended.
#'
#' @param scaled \code{logical}. (ONLY FOR \code{type = 'missing'}) This is for
#' when \code{percent = TRUE}. If \code{TRUE} (default), the color specified
#' in \code{high} will represent the highest observed value of missing data.
#' If \code{FALSE}, the color specified in \code{high} will represent 100\%.
#'
#' @return a matrix, data frame (\code{df = TRUE}), or a list (\code{returnplot
#' = TRUE}) representing missing data per population (\code{type = 'missing'})
#' or ploidy per individual (\code{type = 'ploidy'}) in a \linkS4class{genind}
#' or \linkS4class{genclone} object.
#'
#' @details
#' Missing data is accounted for on a per-population level.\cr
#' Ploidy is accounted for on a per-individual level.
#'
#' \subsection{For type = 'missing'}{
#' This data is potentially useful for identifying areas of systematic missing
#' data. There are a few caveats to be aware of. \itemize{ \item
#' \strong{Regarding counts of missing data}: Each count represents the number
#' of individuals with missing data at each locus. The last column, "mean" can
#' be thought of as the average number of individuals with missing data per
#' locus. \item \strong{Regarding percentage missing data}: This percentage is
#' \strong{relative to the population and locus}, not to the entire data set.
#' The last column, "mean" represents the average percent of the population
#' with missing data per locus. }}
#' \subsection{For type = 'ploidy'}{
#' This option is useful for data that has been imported with mixed ploidies.
#' It will summarize the relative levels of ploidy per individual per locus.
#' This is simply based off of observed alleles and does not provide any
#' further estimates.}
#'
#' @export
#' @keywords missing ploidy
#' @author Zhian N. Kamvar
#' @examples
#' data(nancycats)
#' nancy.miss <- info_table(nancycats, plot = TRUE, type = "missing")
#' data(Pinf)
#' Pinf.ploid <- info_table(Pinf, plot = TRUE, type = "ploidy")
#'
#==============================================================================#
info_table <- function(gen, type = c("missing", "ploidy"), percent = TRUE, plot = FALSE,
df = FALSE, returnplot = FALSE, low = "blue",
high = "red", plotlab = TRUE, scaled = TRUE){
datalabel <- as.character(match.call()[2])
ARGS <- c("missing", "ploidy")
type <- match.arg(type, ARGS)
if (type == "missing"){
valname <- "Missing"
pops <- seppop(gen, drop = FALSE)
pops$Total <- gen
inds <- 1
if (percent){
inds <- c(table(pop(gen)), nInd(gen))
}
data_table <- matrix(0, nrow = nLoc(gen) + 1, ncol = length(pops))
data_table[1:nLoc(gen), ] <- vapply(pops, number_missing_locus, numeric(nLoc(gen)), 1)
data_table[-nrow(data_table), ] <- t(apply(data_table[-nrow(data_table), ], 1, "/", inds))
data_table[nrow(data_table), ] <- colMeans(data_table[-nrow(data_table), ])
rownames(data_table) <- c(locNames(gen), "Mean")
colnames(data_table) <- names(pops)
dimnames(data_table) <- list(Locus = c(locNames(gen), "Mean"), Population = names(pops))
if (all(data_table == 0)){
message("No Missing Data Found!")
return(NULL)
}
if (plot){
data_df <- as.data.frame.table(data_table,
responseName = valname,
stringsAsFactors = FALSE)
leg_title <- valname
data_df[1:2] <- data.frame(lapply(data_df[1:2],
function(x) factor(x, levels = unique(x))))
plotdf <- data_df -> textdf
if (percent) {
plotdf$Missing <- round(plotdf$Missing*100, 2)
textdf$Missing <- paste(plotdf$Missing, "%")
miss <- "0 %"
title <- paste("Percent missing data per locus and population of",
datalabel)
leg_title <- paste("Percent", leg_title)
if(!scaled){
lims <- c(0, 100)
}
} else {
textdf$Missing <- round(textdf$Missing, 2)
miss <- 0
title <- paste("Missing data per locus and population of",
datalabel)
}
if (scaled | !percent){
lims <- c(0, max(plotdf$Missing))
}
linedata <- data.frame(list(yint = 1.5, xint = nrow(data_table) - 0.5))
textdf$Missing <- ifelse(textdf$Missing == miss, "", textdf$Missing)
plotdf$Missing[plotdf$Locus == "Mean" & plotdf$Population == "Total"] <- NA
outplot <- ggplot(plotdf, aes_string(x = "Locus", y = "Population")) +
geom_tile(aes_string(fill = valname)) +
labs(list(title = title, x = "Locus", y = "Population")) +
labs(fill = leg_title) +
scale_fill_gradient(low = low, high = high, na.value = "white",
limits = lims) +
geom_hline(aes_string(yintercept = "yint"), data = linedata) +
geom_vline(aes_string(xintercept = "xint"), data = linedata)
if (plotlab){
outplot <- outplot + geom_text(aes_string(label = valname),
data = textdf)
}
outplot <- outplot +
theme_classic() +
scale_x_discrete(expand = c(0, -1)) +
scale_y_discrete(expand = c(0, -1),
limits = rev(unique(plotdf$Population))) +
theme(axis.text.x = element_text(size = 10, angle = -45,
hjust = 0, vjust = 1))
print(outplot)
}
} else if (type == "ploidy"){
valname <- "Observed_Ploidy"
data_table <- get_local_ploidy(gen)
dimnames(data_table) <- list(Samples = indNames(gen), Loci = locNames(gen))
if (plot){
data_df <- as.data.frame.table(data_table,
responseName = valname,
stringsAsFactors = FALSE)
data_df[1:2] <- data.frame(lapply(data_df[1:2],
function(x) factor(x, levels = unique(x))))
vars <- aes_string(x = "Loci", y = "Samples", fill = valname)
mytheme <- theme_classic() +
theme(axis.text.x = element_text(size = 10, angle = -45,
hjust = 0, vjust = 1))
legvars <- unique(data_df[[valname]])
title <- paste("Observed ploidy of", datalabel)
outplot <- ggplot(data_df) + geom_tile(vars) +
scale_fill_gradient(low = low, high = high, guide = "legend",
breaks = legvars) +
scale_x_discrete(expand = c(0, -1)) +
scale_y_discrete(expand = c(0, -1),
limits = rev(unique(data_df$Samples))) +
labs(list(title = title, x = "Locus", y = "Sample",
fill = "Observed\nPloidy")) +
mytheme
print(outplot)
}
}
if (df){
if (!exists("data_df")){
data_df <- as.data.frame.table(data_table,
responseName = valname,
stringsAsFactors = FALSE)
}
data_table <- data_df
} else {
if (type == "missing"){
data_table <- t(data_table)
}
class(data_table) <- c("locustable", "matrix")
}
if (returnplot & exists("outplot")){
data_table <- list(table = data_table, plot = outplot)
}
return(data_table)
}
#==============================================================================#
#' Display a greyscale gradient adjusted to specific parameters
#'
#' This function has one purpose. It is for deciding the appropriate scaling for
#' a grey palette to be used for edge weights of a minimum spanning network.
#'
#'
#' @param data a sequence of numbers to be converted to greyscale.
#'
#' @param glim "grey limit". Two numbers between zero and one. They determine
#' the upper and lower limits for the \code{\link{gray}} function. Default is 0
#' (black) and 0.8 (20\% black).
#'
#' @param gadj "grey adjust". a positive \code{integer} greater than zero that
#' will serve as the exponent to the edge weight to scale the grey value to
#' represent that weight.
#'
#' @param gweight "grey weight". an \code{integer}. If it's 1, the grey scale
#' will be weighted to emphasize the differences between closely related nodes.
#' If it is 2, the grey scale will be weighted to emphasize the differences
#' between more distantly related nodes.
#'
#' @param scalebar When this is set to \code{TRUE}, two scalebars will be
#' plotted. The purpose of this is for adding a scale bar to minimum spanning
#' networks produced in earlier versions of poppr.
#'
#' @return A plot displaying a grey gradient from 0.001 to 1 with minimum and
#' maximum values displayed as yellow lines, and an equation for the correction
#' displayed in red.
#'
#' @author Zhian N. Kamvar
#'
#' @examples
#' # Normal grey curve with an adjustment of 3, an upper limit of 0.8, and
#' # weighted towards smaller values.
#' greycurve()
#' \dontrun{
#' # 1:1 relationship grey curve.
#' greycurve(gadj=1, glim=1:0)
#'
#' # Grey curve weighted towards larger values.
#' greycurve(gweight=2)
#'
#' # Same as the first, but the limit is 1.
#' greycurve(glim=1:0)
#'
#' # Setting the lower limit to 0.1 and weighting towards larger values.
#' greycurve(glim=c(0.1,0.8), gweight=2)
#' }
#' @export
#==============================================================================#
greycurve <- function(data = seq(0, 1, length = 1000), glim = c(0,0.8),
gadj = 3, gweight = 1, scalebar = FALSE){
gadj <- ifelse(gweight == 1, gadj, -gadj)
adjustcurve(data, glim, correction = gadj, show = TRUE, scalebar = scalebar)
}
#==============================================================================#
#' Plot minimum spanning networks produced in poppr.
#'
#' This function allows you to take the output of poppr.msn and bruvo.msn and
#' customize the plot by labeling groups of individuals, size of nodes, and
#' adjusting the palette and scale bar.
#'
#' @param x a \code{\linkS4class{genind}}, \code{\linkS4class{genclone}},
#' \code{\linkS4class{genlight}}, or \code{\linkS4class{snpclone}} object from
#' which \code{poppr_msn} was derived.
#'
#' @param poppr_msn a \code{list} produced from either \code{\link{poppr.msn}}
#' or \code{\link{bruvo.msn}}. This list should contain a graph, a vector of
#' population names and a vector of hexadecimal color definitions for each
#' population.
#'
#' @inheritParams greycurve
#'
#' @inheritParams poppr.msn
#'
#'
#' @param nodescale a \code{numeric} indicating how to scale the node sizes (scales by area).
#' @param nodebase \strong{deprecated} a \code{numeric} indicating what base logarithm should be
#' used to scale the node sizes. Defaults to 1.15. See details.
#'
#' @param nodelab an \code{integer} specifying the smallest size of node to
#' label. See details.
#'
#' @param inds a \code{character} or \code{numeric} vector indicating which
#' samples or multilocus genotypes to label on the graph. See details.
#'
#' @param mlg \code{logical} When \code{TRUE}, the nodes will be labeled by
#' multilocus genotype. When \code{FALSE} (default), nodes will be labeled by
#' sample names.
#'
#' @param quantiles \code{logical}. When set to \code{TRUE} (default), the scale
#' bar will be composed of the quantiles from the observed edge weights. When
#' set to \code{FALSE}, the scale bar will be composed of a smooth gradient
#' from the minimum edge weight to the maximum edge weight.
#'
#' @param cutoff a number indicating the longest distance to display in your
#' graph. This is performed by removing edges with weights greater than this
#' number.
#'
#' @param palette a function or character corresponding to a specific palette
#' you want to use to delimit your populations. The default is whatever
#' palette was used to produce the original graph.
#'
#' @param layfun a function specifying the layout of nodes in your graph. It
#' defaults to \code{\link[igraph:layout_nicely]{layout.auto}}.
#'
#' @param beforecut if \code{TRUE}, the layout of the graph will be computed
#' before any edges are removed with \code{cutoff}. If \code{FALSE} (Default),
#' the layout will be computed after any edges are removed.
#'
#' @param pop.leg if \code{TRUE}, a legend indicating the populations will
#' appear in the top right corner of the graph, but will not overlap. Setting
#' \code{pop.leg = FALSE} disables this legend. See details.
#'
#' @param size.leg if \code{TRUE}, a legend displyaing the number of samples per
#' node will appear either below the population legend or in the top right
#' corner of the graph. Setting \code{size.leg = FALSE} disables this legend.
#'
#' @param scale.leg if \code{TRUE}, a scale bar indicating the distance will
#' appear under the graph. Setting \code{scale.leg = FALSE} suppresses this
#' bar. See details.
#'
#' @param ... any other parameters to be passed on to
#' \code{\link[igraph]{plot.igraph}}.
#'
#' @details The previous incarnation of msn plotting in poppr simply plotted the
#' minimum spanning network with the legend of populations, but did not
#' provide a scale bar and it did not provide the user a simple way of
#' manipulating the layout or labels. This function allows the user to
#' manipulate many facets of graph creation, making the creation of minimum
#' spanning networks ever so slightly more user friendly.
#'
#' This function must have both the source data and the output msn to work.
#' The source data must contain the same population structure as the graph.
#' Every other parameter has a default setting.
#'
#' \subsection{Parameter details}{ \itemize{
#' \item \code{inds} By default, the graph will label each node (circle) with
#' all of the samples (individuals) that are contained within that node. As
#' each node represents a single multilocus genotype (MLG) or individuals (n
#' >= 1), this argument is designed to allow you to selectively label the
#' nodes based on query of sample name or MLG number. If the option \code{mlg
#' = TRUE}, the multilocus genotype assignment will be used to label the node.
#' If you do not want to label the nodes by individual or multilocus genotype,
#' simply set this to a name that doesn't exist in your data.
#' \item \code{nodescale} The nodes (circles) on the graph represent different
#' multilocus genotypes. The area of the nodes represent the number of
#' individuals. Setting nodescale will scale the area of the nodes.
#' \item \code{nodelab} If a node is not labeled by individual, this will
#' label the size of the nodes greater than or equal to this value. If you
#' don't want to label the size of the nodes, simply set this to a very high
#' number.
#' \item \code{cutoff} This is useful for when you want to investigate groups
#' of multilocus genotypes separated by a specific distance or if you have two
#' distinct populations and you want to physically separate them in your
#' network.
#' \item \code{beforecut} This is an indicator useful if you want to maintain
#' the same position of the nodes before and after removing edges with the
#' \code{cutoff} argument. This works best if you set a seed before you run
#' the function.
#' }}
#'
#' \subsection{mlg.compute}{
#' Each node on the graph represents a different multilocus genotype.
#' The edges on the graph represent genetic distances that connect the
#' multilocus genotypes. In genclone objects, it is possible to set the
#' multilocus genotypes to a custom definition. This creates a problem for
#' clone correction, however, as it is very possible to define custom lineages
#' that are not monophyletic. When clone correction is performed on these
#' definitions, information is lost from the graph. To circumvent this, The
#' clone correction will be done via the computed multilocus genotypes, either
#' "original" or "contracted". This is specified in the \code{mlg.compute}
#' argument, above.}
#'
#' \subsection{legends}{ To avoid drawing the legend over the graph, legends
#' are separated by different plotting areas. This means that if legends are
#' included, you cannot plot multiple MSNs in a single plot. The scale bar (to
#' be added in manually) can be obtained from \code{\link{greycurve}} and the
#' legend can be plotted with \code{\link{legend}}.}
#'
#' @return the modified msn list, invisibly.
#'
#' @seealso \code{\link[igraph:layout_nicely]{layout.auto}} \code{\link[igraph]{plot.igraph}}
#' \code{\link{poppr.msn}} \code{\link{bruvo.msn}} \code{\link{greycurve}}
#' \code{\link[igraph]{delete_edges}} \code{\link{palette}}
#'
#' @author Zhian N. Kamvar
#' @export
#' @examples
#' # Using a data set of the Aphanomyces eutieches root rot pathogen.
#' data(Aeut)
#' adist <- diss.dist(Aeut, percent = TRUE)
#' amsn <- poppr.msn(Aeut, adist, showplot = FALSE)
#'
#' # Default
#' library("igraph") # To get all the layouts.
#' set.seed(500)
#' plot_poppr_msn(Aeut, amsn, gadj = 15)
#'
#' \dontrun{
#'
#' # Different layouts (from igraph) can be used by supplying the function name.
#' set.seed(500)
#' plot_poppr_msn(Aeut, amsn, gadj = 15, layfun = layout_with_kk)
#'
#' # Removing link between populations (cutoff = 0.2) and labelling no individuals
#' set.seed(500)
#' plot_poppr_msn(Aeut, amsn, inds = "none", gadj = 15, beforecut = TRUE, cutoff = 0.2)
#'
#' # Labelling individual #57 because it is an MLG that crosses populations
#' # Showing clusters of MLGS with at most 5% variation
#' # Notice that the Mt. Vernon population appears to be more clonal
#' set.seed(50)
#' plot_poppr_msn(Aeut, amsn, gadj = 15, cutoff = 0.05, inds = "057")
#'
#'
#' data(partial_clone)
#' pcmsn <- bruvo.msn(partial_clone, replen = rep(1, 10))
#'
#' # You can plot using a color palette or a vector of named colors
#' # Here's a way to define the colors beforehand
#' pc_colors <- nPop(partial_clone) %>%
#' RColorBrewer::brewer.pal("Set2") %>%
#' setNames(popNames(partial_clone))
#'
#' pc_colors
#'
#' # Labelling the samples contained in multilocus genotype 9
#' set.seed(999)
#' plot_poppr_msn(partial_clone, pcmsn, palette = pc_colors, inds = 9)
#'
#' # Doing the same thing, but using one of the sample names as input.
#' set.seed(999)
#' plot_poppr_msn(partial_clone, pcmsn, palette = pc_colors, inds = "sim 20")
#'
#' # Note that this is case sensitive. Nothing is labeled.
#' set.seed(999)
#' plot_poppr_msn(partial_clone, pcmsn, palette = pc_colors, inds = "Sim 20")
#'
#' # Something pretty
#' data(microbov)
#' mdist <- diss.dist(microbov, percent = TRUE)
#' micmsn <- poppr.msn(microbov, mdist, showplot = FALSE)
#'
#' plot_poppr_msn(microbov, micmsn, palette = "terrain.colors", inds = "n",
#' quantiles = FALSE)
#' plot_poppr_msn(microbov, micmsn, palette = "terrain.colors", inds = "n",
#' cutoff = 0.3, quantiles = FALSE)
#'
#' ### Utilizing vectors for palettes
#'
#' data(Pram)
#' Pram_sub <- popsub(Pram, exclude = c("Nursery_CA", "Nursery_OR"))
#'
#' # Creating the network for the forest
#' min_span_net_sub <- bruvo.msn(Pram_sub, replen = other(Pram)$REPLEN,
#' add = TRUE, loss = TRUE, showplot = FALSE,
#' include.ties = TRUE)
#'
#' # Creating the network with nurseries
#' min_span_net <- bruvo.msn(Pram, replen = other(Pram)$REPLEN,
#' add = TRUE, loss = TRUE, showplot = FALSE,
#' include.ties = TRUE)
#'
#' # Only forest genotypes
#' set.seed(70)
#' plot_poppr_msn(Pram,
#' min_span_net_sub,
#' inds = "ALL",
#' mlg = TRUE,
#' gadj = 9,
#' nodescale = 5,
#' palette = other(Pram)$comparePal,
#' cutoff = NULL,
#' quantiles = FALSE,
#' beforecut = TRUE)
#'
#' # With Nurseries
#' set.seed(70)
#' plot_poppr_msn(Pram,
#' min_span_net,
#' inds = "ALL",
#' mlg = TRUE,
#' gadj = 9,
#' nodescale = 5,
#' palette = other(Pram)$comparePal,
#' cutoff = NULL,
#' quantiles = FALSE,
#' beforecut = TRUE)
#' }
#==============================================================================#
#' @importFrom igraph layout.auto delete.edges
plot_poppr_msn <- function(x,
poppr_msn,
gscale = TRUE,
gadj = 3,
mlg.compute = "original",
glim = c(0, 0.8),
gweight = 1,
wscale = TRUE,
nodescale = 10,
nodebase = NULL,
nodelab = 2,
inds = "ALL",
mlg = FALSE,
quantiles = TRUE,
cutoff = NULL,
palette = NULL,
layfun = layout.auto,
beforecut = FALSE,
pop.leg = TRUE,
size.leg = TRUE,
scale.leg = TRUE,
...) {
if (!is(x, "genlight") && !is.genind(x)){
stop(paste(substitute(x), "is not a genind or genclone object."))
}
if (!identical(names(poppr_msn), c("graph", "populations", "colors"))){
stop("graph not compatible")
}
if (!is.null(palette)){
pal <- palette
newpal <- palette_parser(pal, length(poppr_msn$populations), poppr_msn$populations)
poppr_msn <- update_poppr_graph(poppr_msn, function(x) newpal)
}
# Making sure incoming data matches so that the individual names match.
if (!all(is.na(poppr_msn$populations))){
if (nPop(x) > 0 && nPop(x) >= length(poppr_msn$populations)){
x <- popsub(x, sublist = poppr_msn$populations)
} else {
warning("populations in graph don't match data. Setting to none.")
}
}
if (beforecut){
LAYFUN <- match.fun(layfun)
lay <- LAYFUN(poppr_msn$graph)
} else {
lay <- match.fun(layfun)
}
# delete.edges <- match.fun(igraph::delete.edges)
if (!is.null(cutoff) && !is.na(cutoff)){
if (all(cutoff < E(poppr_msn$graph)$weight)){
msg <- paste0("Cutoff value (", cutoff, ") is below the minimum observed",
" distance. Edges will not be removed.")
warning(msg)
} else {
E_above_cutoff <- E(poppr_msn$graph)[E(poppr_msn$graph)$weight >= cutoff]
poppr_msn$graph <- delete.edges(poppr_msn$graph, E_above_cutoff)
}
}
# Adjusting color scales. This will replace any previous scaling contained in
# poppr_msn.
weights <- E(poppr_msn$graph)$weight
wmin <- min(weights)
wmax <- max(weights)
gadj <- ifelse(gweight == 1, gadj, -gadj)
poppr_msn$graph <- update_edge_scales(poppr_msn$graph, wscale, gscale, glim, gadj)
x.mlg <- mlg.vector(x)
if (is.factor(x.mlg)){
x.mlg <- mll(x, mlg.compute)
custom <- TRUE
# labs <- unique(x.mlg)
labs <- correlate_custom_mlgs(x, mlg.compute)
} else {
labs <- unique(x.mlg)
custom <- FALSE
}
# Highlighting only the names of the submitted genotypes and the matching
# isolates.
# The labels in the graph are organized by MLG, so we will use that to
# extract the names we need.
if (length(inds) == 1 & toupper(inds[1]) == "ALL"){
x.input <- unique(x.mlg)
} else if (is.character(inds)){
x.input <- unique(x.mlg[indNames(x) %in% inds])
} else if (is.numeric(inds)){
x.input <- unique(x.mlg[x.mlg %in% inds])
}
# Remove labels that are not specified.
if (!custom){
labs[!labs %in% x.input] <- NA
} else {
labs[!labs %in% labs[as.character(x.input)]] <- NA
}
if (!isTRUE(mlg)){
# Combine all the names that match with each particular MLG in x.input.
combined_names <- vapply(x.input, function(mlgname)
paste(rev(indNames(x)[x.mlg == mlgname]),
collapse = "\n"),
character(1))
labs[!is.na(labs)] <- combined_names
} else if (is.numeric(x.mlg) && !custom){
labs[!is.na(labs)] <- paste("MLG", labs[!is.na(labs)], sep = ".")
} else {
labs <- as.character(labs)
}
if (any(is.na(labs))){
sizelabs <- V(poppr_msn$graph)$size
sizelabs <- ifelse(sizelabs >= nodelab, sizelabs * sizelabs, NA)
labs <- ifelse(is.na(labs), sizelabs, labs)
}
if (!is.null(nodebase)){
warnw <- min(.9 * getOption("width"), 80)
nodewarn <- paste("\n",
"The parameter nodebase has been deprecated.",
"It is currently being kept for backwards compatibility,",
"but will be removed in a future version of poppr.",
"\n\nPlease use the new parameter nodescale instead.")
nodewarn <- strwrap(nodewarn, width = warnw)
warning(paste(nodewarn, collapse = "\n"))
# Change the size of the vertices to a log scale.
if (nodebase == 1) {
nodebase <- 1.15
nodewarn <- paste0("Cannot set nodebase = 1:\n",
"Log base 1 is undefined, reverting to nodebase = 1.15")
warning(nodewarn)
}
vsize <- log(V(poppr_msn$graph)$size * V(poppr_msn$graph)$size, base = nodebase) + 3
} else {
vsize <- sqrt(V(poppr_msn$graph)$size * V(poppr_msn$graph)$size * nodescale)
}
# Plotting parameters.
def.par <- par(no.readonly = TRUE)
# Return top level plot to defaults.
on.exit({
graphics::layout(matrix(1, ncol = 1, byrow = TRUE))
graphics::par(def.par)
})
pop.leg <- pop.leg && !all(is.na(poppr_msn$populations))
if (!pop.leg && !size.leg && !scale.leg) {
plot.igraph(poppr_msn$graph, vertex.label = labs, vertex.size = vsize,
layout = lay, ...)
} else {
# Setting up the matrix for plotting. One Vertical panel of width 1 and height
# 5 for the legend, one rectangular panel of width 4 and height 4.5 for the
# graph, and one horizontal panel of width 4 and height 0.5 for the greyscale.
if (pop.leg || size.leg) {
if (scale.leg) {
mat <- matrix(c(1,4,
3,2),
ncol = 2,
byrow = TRUE)
layout(mat, widths = c(1, 4), heights = c(4.5, 0.5))
} else {
layout(matrix(c(1, 2), ncol = 2), widths = c(1, 4))
}
# mar = bottom left top right
## LEGEND
par(mar = c(0, 0, 1, 0) + 0.5)
a <- NULL
main <- if (pop.leg) 'POPULATION' else NULL
too_many_pops <- as.integer(ceiling(nPop(x)/30))
pops_correction <- ifelse(too_many_pops > 1, -1, 1)
yintersperse <- ifelse(too_many_pops > 1, 0.51, 0.62)
graphics::plot(c(-1, 1),
c(-0.5, 0.5),
type = 'n',
axes = FALSE,
xlab = '',
ylab = '',
main = main)
if (pop.leg) {
a <- graphics::legend("topleft",
bty = "n",
cex = 1.2^pops_correction,
legend = poppr_msn$populations,
fill = poppr_msn$color,
border = NULL,
ncol = too_many_pops,
x.intersp = 0.45,
y.intersp = yintersperse)
}
if (size.leg) {
N <- V(poppr_msn$graph)$size * V(poppr_msn$graph)$size
make_circle_legend(a = a,
mlg_number = N,
scale = sqrt(nodescale),
cex = 1.2 ^ pops_correction,
radmult = 4,
xspace = 0.45,
font = 2,
pos = 0,
x = 0,
y = 0.55,
txt = 0.05)
}
} else {
graphics::layout(matrix(c(2, 1), nrow = 2), heights = c(4.5, 0.5))
}
if (scale.leg) {
## SCALE BAR
if (quantiles) {
scales <- sort(weights)
} else {
scales <- seq(wmin, wmax, l = 1000)
}
greyscales <- grDevices::gray(adjustcurve(scales, show=FALSE, glim=glim, correction=gadj))
legend_image <- grDevices::as.raster(matrix(greyscales, nrow=1))
graphics::par(mar = c(0, 1, 0, 1) + 0.5)
graphics::plot.new()
graphics::rasterImage(legend_image, 0, 0.5, 1, 1)
graphics::polygon(c(0, 1, 1), c(0.5, 0.5, 0.8), col = "white", border = "white", lwd = 2)
graphics::axis(3, at = c(0, 0.25, 0.5, 0.75, 1), labels = round(quantile(scales), 3))
graphics::text(0.5, 0, labels = "DISTANCE", font = 2, cex = 1.5, adj = c(0.5, 0))
}
## PLOT
if ((pop.leg || size.leg) && scale.leg) frame()
par(mar = c(0,0,0,0))
plot.igraph(poppr_msn$graph,
vertex.label = labs,
vertex.size = vsize,
layout = lay,
...)
}
return(invisible(poppr_msn))
}
#==============================================================================#
#' Produce a genotype accumulation curve
#'
#' Genotype accumulation curves are useful for determining the minimum number of
#' loci necessary to discriminate between individuals in a population. This
#' function will randomly sample loci without replacement and count the number
#' of multilocus genotypes observed.
#'
#' @param gen a \code{\linkS4class{genclone}}, \code{\linkS4class{genind}}, or
#' \code{\link[pegas:read.loci]{loci}} object.
#'
#' @param sample an \code{integer} defining the number of times loci will be
#' resampled without replacement.
#'
#' @param maxloci the maximum number of loci to sample. By default,
#' \code{maxloci = 0}, which indicates that n - 1 loci are to be used. Note
#' that this will always take min(n - 1, maxloci)
#'
#' @param quiet if \code{FALSE} (default), Progress of the iterations will be
#' displayed. If \code{TRUE}, nothing is printed to screen as the function
#' runs.
#'
#' @param thresh a number from 0 to 1. This will draw a line at that fraction of
#' multilocus genotypes, rounded. Defaults to 1, which will draw a line at the
#' maximum number of observable genotypes.
#'
#' @param plot if \code{TRUE} (default), the genotype curve will be plotted via
#' ggplot2. If \code{FALSE}, the resulting matrix will be visibly returned.
#'
#' @param drop if \code{TRUE} (default), monomorphic loci will be removed before
#' analysis as these loci affect the shape of the curve.
#'
#' @param dropna if \code{TRUE} (default) and \code{drop = TRUE}, NAs will be
#' ignored when determining if a locus is monomorphic. When \code{FALSE},
#' presence of NAs will result in the locus being retained. This argument has
#' no effect when \code{drop = FALSE}
#'
#' @return (invisibly by deafuls) a matrix of integers showing the results of
#' each randomization. Columns represent the number of loci sampled and rows
#' represent an independent sample.
#'
#' @details Internally, this function works by converting the data into a
#' \code{\link[pegas:read.loci]{loci}} object, which represents genotypes as a data
#' frame of factors. Random samples are taken of 1 to n-1 columns of the
#' matrix and the number of unique rows are counted to determine the number of
#' multilocus genotypes in that random sample. This function does not take
#' into account any definitions of MLGs via \code{\link{mlg.filter}} or
#' \code{\link{mll.custom}}.
#'
#' @author Zhian N. Kamvar
#' @export
#' @examples
#' data(nancycats)
#' nan_geno <- genotype_curve(nancycats)
#' \dontrun{
#'
#' # Marker Type Comparison --------------------------------------------------
#' # With AFLP data, it is often necessary to include more markers for resolution
#' data(Aeut)
#' Ageno <- genotype_curve(Aeut)
#'
#' # Many microsatellite data sets have hypervariable markers
#' data(microbov)
#' mgeno <- geotype_curve(microbov)
#'
#' # Adding a trendline ------------------------------------------------------
#'
#' # Trendlines: you can add a smoothed trendline with geom_smooth()
#' library("ggplot2")
#' p <- last_plot()
#' p + geom_smooth()
#'
#' # Producing Figures for Publication ---------------------------------------
#'
#' # This data set has been pre filtered
#' data(monpop)
#' mongeno <- genotype_curve(monpop)
#'
#' # Here, we add a curve and a title for publication
#' p <- last_plot()
#' mytitle <- expression(paste("Genotype Accumulation Curve for ",
#' italic("M. fructicola")))
#' p + geom_smooth() +
#' theme_bw() +
#' theme(text = element_text(size = 12, family = "serif")) +
#' theme(title = element_text(size = 14)) +
#' ggtitle(mytitle)
#' }
#'
#==============================================================================#
#' @importFrom pegas loci2genind
genotype_curve <- function(gen, sample = 100, maxloci = 0L, quiet = FALSE,
thresh = 1, plot = TRUE, drop = TRUE, dropna = TRUE){
datacall <- match.call()
if (!inherits(gen, c("genind", "genclone", "loci"))){
stop(paste(datacall[2], "must be a genind or loci object"))
}
quiet <- should_poppr_be_quiet(quiet)
if (inherits(gen, "loci")){
genloc <- gen
gen <- pegas::loci2genind(gen)
} else {
genloc <- pegas::as.loci(gen)
}
if (!is.genclone(gen)) gen <- as.genclone(gen)
if (nLoc(gen) == 1){
stop("genotype_curve needs data with at least two loci.")
}
the_loci <- attr(genloc, "locicol")
res <- integer(nrow(genloc))
suppressWarnings(genloc <- vapply(genloc[the_loci], as.integer, res))
if (drop){
nas <- if (dropna) !is.na(genloc) else TRUE
polymorphic <- colSums(!apply(genloc, 2, duplicated) & nas) > 1
lnames <- locNames(gen)
gen <- gen[loc = polymorphic]
genloc <- genloc[, polymorphic, drop = FALSE]
if (sum(!polymorphic) > 0){
msg <- paste("Dropping monomorphic loci:", paste(lnames[!polymorphic], collapse = ", "))
message(msg)
}
}
nloci <- min(maxloci, nLoc(gen) - 1)
nloci <- as.integer(ifelse(nloci > 0, nloci, nLoc(gen) - 1))
sample <- as.integer(sample)
report <- ifelse(quiet, 0L, as.integer(sample/100))
out <- .Call("genotype_curve_internal",
mat = genloc,
iter = sample,
maxloci = nloci,
report = report,
PACKAGE = "poppr")
if (!quiet) cat("\n")
colnames(out) <- seq(nloci)
rownames(out) <- seq(sample)
names(dimnames(out)) <- c("sample", "NumLoci")
# Visibly return the data if plot = FALSE
if (!plot) {
return(out)
}
suppressWarnings(max_obs <- nmll(gen, "original"))
threshdf <- data.frame(x = round(max_obs*thresh))
outmelt <- as.data.frame.table(out,
responseName = "MLG",
stringsAsFactors = FALSE)
outmelt$sample <- as.integer(outmelt$sample)
outmelt$NumLoci <- as.integer(outmelt$NumLoci)
aesthetics <- aes_string(x = "NumLoci", y = "MLG")
outplot <- ggplot(outmelt, aesthetics) +
geom_boxplot(aes_string(group = "factor(NumLoci)")) +
labs(list(title = paste("Genotype accumulation curve for", datacall[2]),
y = "Number of multilocus genotypes",
x = "Number of loci sampled")) +
scale_x_continuous(breaks = seq(nloci), expand = c(0, 0.125))
if (!is.null(thresh)) {
outbreaks <- sort(c(pretty(0:max_obs), threshdf$x))
tjust <- ifelse(thresh > 0.9, 1.5, -1)
outplot <- outplot +
geom_hline(aes_string(yintercept = "x"), data = threshdf,
color = "red", linetype = 2) +
annotate("text", x = 1, y = threshdf$x, vjust = tjust,
label = paste0(thresh*100, "%"), color = "red",
hjust = 0) +
scale_y_continuous(breaks = outbreaks, limits = c(0, max_obs))
}
print(outplot)
return(invisible(out))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.