#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#
# This software was authored by Zhian N. Kamvar and Javier F. Tabima, graduate
# students at Oregon State University; Jonah C. Brooks, undergraduate student at
# Oregon State University; and Dr. Nik Grünwald, an employee of USDA-ARS.
#
# Permission to use, copy, modify, and distribute this software and its
# documentation for educational, research and non-profit purposes, without fee,
# and without a written agreement is hereby granted, provided that the statement
# above is incorporated into the material, giving appropriate attribution to the
# authors.
#
# Permission to incorporate this software into commercial products may be
# obtained by contacting USDA ARS and OREGON STATE UNIVERSITY Office for
# Commercialization and Corporate Development.
#
# The software program and documentation are supplied "as is", without any
# accompanying services from the USDA or the University. USDA ARS or the
# University do not warrant that the operation of the program will be
# uninterrupted or error-free. The end-user understands that the program was
# developed for research purposes and is advised not to rely exclusively on the
# program for any reason.
#
# IN NO EVENT SHALL USDA ARS OR OREGON STATE UNIVERSITY BE LIABLE TO ANY PARTY
# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
# LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION,
# EVEN IF THE OREGON STATE UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE. USDA ARS OR OREGON STATE UNIVERSITY SPECIFICALLY DISCLAIMS ANY
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY STATUTORY
# WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
# BASIS, AND USDA ARS AND OREGON STATE UNIVERSITY HAVE NO OBLIGATIONS TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#==============================================================================#
# This function will attempt to convert external files of the following types:
#
# Structure (*.str, *.stru)
# Fstat (*.dat)
# Gentix (*.gtx)
# Genpop (*.gen)
# Genalex (*.csv)
#
# The output is a poppr object and the original filename that was imported.
# Missing data is handled via the function missingno and the ability for clone
# correction is also possible.
# If quiet is set to false and you are importing non-genind or poppr objects,
# you will see many warnings.
#
# Public functions utilizing this function:
# # poppr
#
# Internal functions utilizing this function:
# # new.poppr (in testing)
#==============================================================================#
process_file <- function(input, quiet=TRUE, missing="ignore", cutoff=0.05,
keep=1, clonecorrect=FALSE, strata=1){
if (!is.genind(input)){
x <- input
if (toupper(.readExt(x)) == "CSV"){
try(input <- read.genalex(x), silent=quiet)
try(input <- read.genalex(x, region=TRUE), silent=quiet)
try(input <- read.genalex(x, geo=TRUE), silent=quiet)
try(input <- read.genalex(x, geo=TRUE, region=TRUE), silent=quiet)
} else {
try(input <- import2genind(x, quiet=quiet), silent=quiet)
}
stopifnot(is.genind(input))
input@call[2] <- x
popcall <- input@call
input <- missingno(input, type=missing, cutoff=cutoff, quiet=quiet)
input@call <- popcall
if (clonecorrect == TRUE){
poplist <- clonecorrect(input, strata = strata, keep = keep)
input <- poplist
input@call <- popcall
}
} else if (is.genind(input)) {
x <- as.character(match.call()[2])
popcall <- input@call
input <- missingno(input, type=missing, cutoff=cutoff, quiet=quiet)
if (clonecorrect == TRUE){
poplist <- clonecorrect(input, strata = strata, keep = keep)
input <- poplist
input@call <- popcall
}
}
return(list(X=x, GENIND=input))
}
#==============================================================================#
# .clonecorrector will simply give a list of individuals (rows) that are
# duplicated within a genind object. This can be used for clone correcting a
# single genind object.
#
# Public functions utilizing this function:
# # clonecorrect, bruvo.msn
#
# Internal functions utilizing this function:
# # none
#==============================================================================#
.clonecorrector <- function(x){
if (is.genclone(x) | is(x, "snpclone")){
is_duplicated <- duplicated(x@mlg[])
} else {
is_duplicated <- duplicated(x@tab[, 1:ncol(x@tab)])
}
res <- -which(is_duplicated)
# conditional for the case that all individuals are unique.
if(is.na(res[1])){
res <- which(!is_duplicated)
}
return(res)
}
#==============================================================================#
# This will remove either loci or genotypes containing missing values above the
# cutoff percent.
#
# Public functions utilizing this function:
# # missingno
#
# Internal functions utilizing this function:
# # none.
#==============================================================================#
percent_missing <- function(pop, type="loci", cutoff=0.05){
if (toupper(type) == "LOCI"){
missing_loci <- 1 - propTyped(pop, "loc")
locfac <- locFac(pop)
names(missing_loci) <- levels(locfac)
missing_loci <- missing_loci[missing_loci <= cutoff]
misslist <- 1:length(locfac)
filter <- locfac %in% names(missing_loci)
} else {
missing_geno <- 1 - propTyped(pop, "ind")
misslist <- 1:nInd(pop)
filter <- missing_geno <= cutoff
}
return(misslist[filter])
}
#==============================================================================#
# This implements rounding against the IEEE standard and rounds 0.5 up
# Public functions utilizing this function:
# # none
#
# Internal functions utilizing this function:
# # .PA.pairwise.differences, .pairwise.differences
#==============================================================================#
round.poppr <- Vectorize(function(x){
ix <- as.integer(x)
is_even <- ix %% 2 == 0
if (is_even) {
if (x - ix == 0.5)
x <- round(x) + 1
else if (-x + ix == 0.5)
x <- round(x) - 1
} else {
x <- round(x)
}
return(x)
})
#==============================================================================#
# Subsetting the population and returning the indices.
#
# Public functions utilizing this function:
# ## mlg.crosspop poppr.msn
#
# Internal functions utilizing this function:
# ## none
#==============================================================================#
sub_index <- function(pop, sublist="ALL", exclude=NULL){
numList <- seq(nInd(pop))
if (is.null(pop(pop))){
return(numList)
}
if(toupper(sublist[1]) == "ALL"){
if (is.null(exclude)){
return(numList)
} else {
# filling the sublist with all of the population names.
sublist <- popNames(pop)
}
}
# Treating anything present in exclude.
if (!is.null(exclude)){
# If both the sublist and exclude are numeric or character.
if (is.numeric(sublist) && is.numeric(exclude) || identical(class(sublist), class(exclude))) {
sublist <- sublist[!sublist %in% exclude]
} else if (is.numeric(sublist) && inherits(exclude, "character")){
# if the sublist is numeric and exclude is a character. eg s=1:10, b="USA"
sublist <- sublist[sublist %in% which(!popNames(pop) %in% exclude)]
} else {
# no sublist specified. Ideal situation
if(all(popNames(pop) %in% sublist)){
sublist <- sublist[-exclude]
} else {
# weird situation where the user will specify a certain sublist, yet index
# the exclude numerically. Interpreted as an index of populations in the
# whole data set as opposed to the sublist.
warning("Blacklist is numeric. Interpreting exclude as the index of the population in the total data set.")
sublist <- sublist[!sublist %in% popNames(pop)[exclude]]
}
}
}
# subsetting the population.
if (is.numeric(sublist)){
sublist <- popNames(pop)[sublist]
} else{
sublist <- popNames(pop)[popNames(pop) %in% sublist]
}
sublist <- pop(pop) %in% sublist
if (sum(sublist) == 0){
warning("All items present in sublist are also present in exclude.\nSubsetting not taking place.")
return(seq(nInd(pop)))
}
#cat("Sublist:\n", sublist,"\n")
return(numList[sublist])
}
#==============================================================================#
# Internal function to create mlg.table.
#
# Public functions utilizing this function:
# # mlg.table mlg.crosspop
#
# Internal functions utilizing this function:
# # none
#==============================================================================#
mlg.matrix <- function(x){
visible <- "original"
if (is.genclone(x) | is(x, "snpclone")){
mlgvec <- x@mlg[]
if (is(x@mlg, "MLG")){
visible <- visible(x@mlg)
}
} else {
mlgvec <- mlg.vector(x)
}
if (!is.null(pop(x))){
mlg.mat <- table(pop(x), mlgvec)
} else {
mlg.mat <- t(as.matrix(table(mlgvec)))
rownames(mlg.mat) <- "Total"
}
names(attr(mlg.mat, "dimnames")) <- NULL
if (visible == "custom"){
return(mlg.mat)
}
if (is.null(colnames(mlg.mat))){
mlgs <- length(unique(mlgvec))
colnames(mlg.mat) <- 1:mlgs
}
colnames(mlg.mat) <- paste("MLG", colnames(mlg.mat), sep=".")
return(unclass(mlg.mat))
}
#==============================================================================#
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
#
# The reason for this section of code is for the fact that Presence/Absence
# markers are dealt with in a different way for adegenet (to save memory) and
# so the calculations must be different as implemented in these mostly identical
# functions.
#
# Public functions utilizing this function:
# # ia
#
# Internal functions utilizing this function:
# # .ia
#
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
#==============================================================================#
#' @noRd
.PA.Ia.Rd <- function(pop, missing=NULL){
vard.vector <- NULL
numLoci <- ncol(pop@tab)
numIsolates <- nrow(pop@tab)
# Creating this number is necessary because it is how the variance is
# calculated.
np <- choose(numIsolates, 2)
if(np < 2){
return(as.numeric(c(NaN, NaN)))
}
# Starting the actual calculations.
V <- .PA.pairwise.differences(pop, numLoci, np, missing=missing)
# First, set the variance of D
varD <- ((sum(V$D.vector^2)-((sum(V$D.vector))^2)/np))/np
# Next is to create a vector containing all of the variances of d (there
# will be one for each locus)
vard.vector <- ((V$d2.vector-((V$d.vector^2)/np))/np)
vardpair.vector <- .Call("pairwise_covar", vard.vector, PACKAGE = "poppr")
# The sum of the variances necessary for the calculation of Ia is calculated
sigVarj <- sum(vard.vector)
rm(vard.vector)
# Finally, the Index of Association and the standardized Index of associati-
# on are calculated.
Ia <- (varD/sigVarj)-1
rbarD <- (varD - sigVarj)/(2*sum(vardpair.vector))
return(c(Ia, rbarD))
}
#==============================================================================#
# .PA.pairwise.differences will calculate three vectors that will be used for the
# calculation of the Index of Association and standardized Index of Association
# Later.
# pop = genind object
# numLoci = should read numLoci. This will be fixed later.
# temp.d.vector = temporary vector to store the differences
# d.vector = a vector of the sum of the differences at each locus. The length
# of this vector will be the same as the number of loci.
# d2.vector = the same as d.vector, except it's the sum of the squares
# D.vector = a vector of the the pairwise distances over all loci. The length
# of this vector will be the same as n(n-1)/2, where n is number of
# isolates.
# Public functions utilizing this function:
# # ia
#
# Internal functions utilizing this function:
# # .ia
#
#==============================================================================#
.PA.pairwise.differences <- function(pop, numLoci, np, missing){
temp.d.vector <- matrix(nrow = np, ncol = numLoci, data = NA_real_)
if( missing == "MEAN" ){
# this will round all of the values if the missing indicator is "mean"
temp.d.vector <- vapply(seq(numLoci),
function(x) as.vector(dist(pop@tab[, x])),
temp.d.vector[, 1])
# since the replacement was with "mean", the missing data will not produce
# a binary distance. The way we will handle this is to replace numbers that
# are not one or zero with a rounded value.
tempz <- !temp.d.vector %in% 0:1
temp.d.vector[tempz] <- vapply(temp.d.vector[tempz], round.poppr, 1)
} else {
temp.d.vector <- vapply(seq(numLoci),
function(x) as.vector(dist(pop@tab[, x])),
temp.d.vector[, 1])
# checking for missing data and imputing the comparison to zero.
if(any(is.na(temp.d.vector))){
temp.d.vector[which(is.na(temp.d.vector))] <- 0
}
}
if (max(ploidy(pop)) > 1){
# multiplying by two is the proper way to evaluate P/A diploid data because
# one cannot detect heterozygous loci (eg, a difference of 1).
temp.d.vector <- temp.d.vector * max(ploidy(pop))
d.vector <- as.vector(colSums(temp.d.vector))
d2.vector <- as.vector(colSums(temp.d.vector^2))
D.vector <- as.vector(rowSums(temp.d.vector))
} else {
d.vector <- as.vector(colSums(temp.d.vector))
d2.vector <- d.vector
D.vector <- as.vector(rowSums(temp.d.vector))
}
vectors <- list(d.vector = d.vector, d2.vector = d2.vector, D.vector = D.vector)
return(vectors)
}
#==============================================================================#
# Function for parsing output of poppr function.
#
# Public functions utilizing this function:
# # poppr, poppr.all
#
# Internal functions utilizing this function:
# # none
#
#==============================================================================#
final <- function(Iout, result){
if (is.null(result)){
return(Iout)
} else {
return(result)
}
}
#==============================================================================#
# The internal version of ia.
# Public functions utilizing this function:
# # ia, poppr
#
# Internal functions utilizing this function:
# # none
#
#==============================================================================#
.ia <- function(pop, sample=0, method=1, quiet=FALSE, namelist=NULL,
missing="ignore", hist=TRUE, index = "rbarD"){
METHODS = c("permute alleles", "parametric bootstrap",
"non-parametric bootstrap", "multilocus")
if(pop@type!="PA"){
type <- pop@type
popx <- seploc(pop)
}
else {
type <- pop@type
popx <- pop
.Ia.Rd <- .PA.Ia.Rd
}
# if there are less than three individuals in the population, the calculation
# does not proceed.
if (nInd(pop) < 3){
IarD <- as.numeric(c(NA, NA))
names(IarD) <- c("Ia", "rbarD")
if(sample==0){
return(IarD)
}
else{
IarD <- as.numeric(rep(NA, 4))
names(IarD) <- c("Ia","p.Ia","rbarD","p.rD")
return(IarD)
}
}
IarD <- .Ia.Rd(popx, missing)
# data vomit options.
if (!quiet){
cat("|", namelist$population ,"\n")
}
names(IarD) <- c("Ia", "rbarD")
# no sampling, it will simply return two named numbers.
if (sample == 0){
Iout <- IarD
result <- NULL
}
# sampling will perform the iterations and then return a data frame indicating
# the population, index, observed value, and p-value. It will also produce a
# histogram.
else{
Iout <- NULL
idx <- as.data.frame(list(Index=names(IarD)))
if (quiet) {
oh <- progressr::handlers()
on.exit(progressr::handlers(oh))
progressr::handlers("void")
}
progressr::with_progress({
samp <- .sampling(popx, sample, missing, quiet=quiet, type=type, method=method)
})
p.val <- sum(IarD[1] <= c(samp$Ia, IarD[1]))/(sample + 1)#ia.pval(index="Ia", samp2, IarD[1])
p.val[2] <- sum(IarD[2] <= c(samp$rbarD, IarD[2]))/(sample + 1)#ia.pval(index="rbarD", samp2, IarD[2])
if(hist == TRUE){
print(poppr.plot(samp, observed=IarD, pop=namelist$population, index = index,
file=namelist$File, pval=p.val, N=nrow(pop@tab)))
}
result <- 1:4
result[c(1, 3)] <- IarD
result[c(2, 4)] <- p.val
names(result) <- c("Ia","p.Ia","rbarD","p.rD")
iaobj <- list(index = final(Iout, result), samples = samp)
class(iaobj) <- "ialist"
return(iaobj)
}
return(final(Iout, result))
}
#==============================================================================#
#==============================================================================#
#=====================Index of Association Calculations========================#
#==============================================================================#
#==============================================================================#
# The actual calculation of Ia and rbarD. This allows for multiple populations
# to be calculated.
# pop: A list of genind objects consisting of one locus each over a population.
# Public functions utilizing this function:
# # none
#
# Internal functions utilizing this function:
# # .ia
#
#==============================================================================#
.Ia.Rd <- function (pop, missing = NULL)
{
vard.vector <- NULL
numLoci <- length(pop)
numIsolates <- nInd(pop[[1]])
np <- choose(numIsolates, 2)
if (np < 2) {
return(as.numeric(c(NaN, NaN)))
}
V <- pair_diffs(pop, numLoci, np)
varD <- ((sum(V$D.vector^2) - ((sum(V$D.vector))^2)/np))/np
vard.vector <- ((V$d2.vector - ((V$d.vector^2)/np))/np)
vardpair.vector <- .Call("pairwise_covar", vard.vector, PACKAGE = "poppr")
sigVarj <- sum(vard.vector)
rm(vard.vector)
Ia <- (varD/sigVarj) - 1
rbarD <- (varD - sigVarj)/(2 * sum(vardpair.vector))
return(c(Ia, rbarD))
}
#==============================================================================#
# This creates the results from the pairwise difference matrix used by .Ia.Rd
# to calculate the index of association.
#
# Public functions utilizing this function:
# # none
#
# Internal functions utilizing this function:
# # .Ia.Rd
#
#==============================================================================#
pair_diffs <- function(pop, numLoci, np)
{
temp.d.vector <- pair_matrix(pop, numLoci, np)
d.vector <- colSums(temp.d.vector)
d2.vector <- colSums(temp.d.vector^2)
D.vector <- rowSums(temp.d.vector)
return(list(d.vector = d.vector, d2.vector = d2.vector, D.vector = D.vector))
}
#==============================================================================#
# This creates a pairwise difference matrix via the C function pairdiffs in
# src/poppr_distance.c
#
# Public functions utilizing this function:
# # none
#
# Internal functions utilizing this function:
# # pair_diffs, pair.ia
#
#==============================================================================#
pair_matrix <- function(pop, numLoci, np)
{
temp.d.vector <- matrix(nrow = np, ncol = numLoci, data = as.numeric(NA))
temp.d.vector <- vapply(pop, function(x) .Call("pairdiffs", tab(x),
PACKAGE = "poppr")/2,
FUN.VALUE = temp.d.vector[, 1])
temp.d.vector <- ceiling(temp.d.vector)
return(temp.d.vector)
}
#==============================================================================#
# This will transform the data to be in the range of [0, 1]
#
# Public functions utilizing this function:
# poppr.msn
#
# Internal functions utilizing this function:
# # adjustcurve
#==============================================================================#
rerange <- function(x){
minx <- min(x, na.rm = TRUE)
maxx <- max(x, na.rm = TRUE)
if (!is.finite(minx) || !is.finite(maxx)){
warning("non-finite values found for distances, returning 0.5")
return(rep(0.5, length(x)))
}
if (minx < 0)
x <- x + abs(minx)
maxx <- maxx + abs(minx)
if (maxx > 1)
x <- x/maxx
return(x)
}
#==============================================================================#
# This will scale the edge widths
#
# Public functions utilizing this function:
# none
#
# Internal functions utilizing this function:
# # update_edge_scales
#==============================================================================#
make_edge_width <- function(mst){
edgewidth <- rerange(E(mst)$weight)
if (any(edgewidth < 0.08)){
edgewidth <- edgewidth + 0.08
}
return(1/edgewidth)
}
#==============================================================================#
# This will scale the edge widths and edge color for a graph
#
# Public functions utilizing this function:
# poppr.msn bruvo.msn plot_poppr_msn
#
# Internal functions utilizing this function:
# # singlepop_msn
#==============================================================================#
update_edge_scales <- function(mst, wscale = TRUE, gscale = TRUE, glim, gadj){
if(gscale == TRUE){
E(mst)$color <- gray(adjustcurve(E(mst)$weight, glim=glim, correction=gadj,
show=FALSE))
} else {
E(mst)$color <- rep("black", length(E(mst)$weight))
}
E(mst)$width <- 2
if (wscale==TRUE){
E(mst)$width <- make_edge_width(mst)
}
return(mst)
}
#==============================================================================#
# This will adjust the grey scale with respect to the edge weights for igraph.
# This is needed because the length of the edges do not correspond to weights.
# If show is set to TRUE, it will show a graph giving the equation used for con-
# version from the original scale to the grey scale, the grey scale itself in
# the background, and the curve.
#
# Public functions utilizing this function:
# # bruvo.msn (soon)
#
# Internal functions utilizing this function:
# # new.bruvo.msn
# # new.poppr.msn
#
#==============================================================================#
adjustcurve <- function(weights, glim = c(0, 0.8), correction = 3, show=FALSE,
scalebar = FALSE, smooth = TRUE){
w <- weights
w <- rerange(w)
maxg <- max(glim)
ming <- 1-(min(glim)/maxg)
if (correction < 0){
adj <- (w^abs(correction))/(1/ming)
adj <- (adj + 1-ming) / ((1 / maxg))
} else {
adj <- (1 - (((1-w)^abs(correction))/(1/ming)) )
adj <- adj / (1/maxg)
}
if (!show){
return(adj)
} else if (!scalebar){
with_quantiles <- sort(weights)
wq_raster <- t(as.raster(as.matrix(gray(sort(adj)), nrow = 1)))
xlims <- c(min(weights), max(weights))
graphics::plot(xlims, 0:1, type = "n", ylim = 0:1, xlim = xlims, xlab = "", ylab = "")
graphics::rasterImage(wq_raster, xlims[1], 0, xlims[2], 1)
graphics::points(x = sort(weights), y = sort(adj), col = grDevices::grey(rev(sort(adj))), pch=20)
title(xlab="Observed Value", ylab="Grey Adjusted",
main=paste("Grey adjustment\n min:",
min(glim),
"max:", max(glim),
"adjust:", abs(correction)))
if (correction < 0){
graphics::text(bquote(frac(bgroup("(", frac(scriptstyle(x)^.(abs(correction)),
.(ming)^-1),")") + .(1-ming),
.(maxg)^-1)) ,
x = min(weights) + (0.25*max(weights)), y=0.75, col="red")
} else {
graphics::text(bquote(frac(1-bgroup("(", frac((1-scriptstyle(x))^.(abs(correction)),
.(ming)^-1),")"),
.(maxg)^-1)) ,
x= min(weights) + (0.15*max(weights)), y=0.75, col="red")
}
graphics::lines(x=xlims, y=c(min(glim), min(glim)), col="yellow")
graphics::lines(x=xlims, y=c(max(glim), max(glim)), col="yellow")
} else {
with_quantiles <- sort(weights)
wq_raster <- t(grDevices::as.raster(as.matrix(grDevices::gray(sort(adj)), nrow = 1)))
no_quantiles <- seq(min(weights), max(weights), length = 1000)
nq_raster <- adjustcurve(no_quantiles, glim, correction, show = FALSE)
nq_raster <- t(grDevices::as.raster(as.matrix(grDevices::gray(nq_raster), nrow = 1)))
graphics::layout(matrix(1:2, nrow = 2))
graphics::plot.new()
graphics::rasterImage(wq_raster, 0, 0.5, 1, 1)
graphics::polygon(c(0, 1, 1), c(0.5, 0.5, 0.8), col = "white", border = "white", lwd = 2)
graphics::axis(3, at = c(0, 0.25, 0.5, 0.75, 1), labels = round(quantile(with_quantiles), 3))
graphics::text(0.5, 0, labels = "Quantiles From Data", font = 2, cex = 1.5, adj = c(0.5, 0))
graphics::plot.new()
graphics::rasterImage(nq_raster, 0, 0.5, 1, 1)
graphics::polygon(c(0, 1, 1), c(0.5, 0.5, 0.8), col = "white", border = "white", lwd = 2)
graphics::axis(3, at = c(0, 0.25, 0.5, 0.75, 1), labels = round(quantile(no_quantiles), 3))
graphics::text(0.5, 0, labels = "Quantiles From Smoothing", font = 2, cex = 1.5, adj = c(0.5, 0))
# Return top level plot to defau lts.
graphics::layout(matrix(c(1), ncol=1, byrow=T))
graphics::par(mar=c(5, 4, 4, 2) + 0.1) # number of lines of margin specified.
graphics::par(oma=c(0, 0, 0, 0)) # Figure margins
}
}
#==============================================================================#
# This will guess the repeat lengths of the microsatellites for Bruvo's distance
#
# Public functions utilizing this function:
# # bruvo.boot bruvo.dist
#
# Internal functions utilizing this function:
# # none
#
#==============================================================================#
guesslengths <- function(vec){
if (length(vec) > 1){
lens <- vapply(2:length(vec), function(x) abs(vec[x] - vec[x - 1]), 1)
if (all(lens == 1)){
return(1)
} else {
return(min(lens[lens > 1]))
}
} else {
return(1)
}
}
#==============================================================================#
# Specifically used to find loci with an excessive number of the same genotype.
#
# Public functions utilizing this function:
# # informloci
#
# Internal functions utilizing this function:
# # none
#==============================================================================#
test_table <- function(loc, min_ind, n){
tab <- table(loc)
return(ifelse(any(tab > n - min_ind), FALSE, TRUE))
}
#==============================================================================#
# Normalize negative branch lenght by converting the negative branch to zero
# and adding the negative value to the sibling branch.
#
# This now has a few caveats:
# 1. If the parent branch contains a polytomy, then only the branch that is
# equal to the negative branch will be fixed.
# 2. If there are branches that cannot be fixed (i.e., the absolute value of
# the negative branch is greater than the value of the sibling), then it will
# not be fixed.
#
# Public functions utilizing this function:
# # bruvo.boot
#
# Internal functions utilizing this function:
# # none
#==============================================================================#
fix_negative_branch <- function(tre){
# Creating a matrix from the tree information: Tree edges and edge length
all.lengths <- matrix(c(tre$edge, tre$edge.length), ncol = 3,
dimnames = list(1:length(tre$edge.length),
c("parent", "child", "length")
))
# Looking at the edges that are zero.
zero.edges <- all.lengths[tre$edge.length < 0, , drop = FALSE]
# Checking which negative edges are included in all the edges
all.edges <- all.lengths[all.lengths[, "parent"] %in% zero.edges[, "parent"], , drop = FALSE]
# Ordering all the edges
index.table <- all.edges[order(all.edges[, "parent"]), , drop = FALSE]
# Loop to change the NJ branch length
for (i in (unique(index.table[, "parent"]))){
the_parents <- index.table[, "parent"] == i
fork <- index.table[, "length"][the_parents]
# Check for polytomies
if (length(fork) > 2){
# Fix only siblings of equal magnitude.
forkinds <- abs(fork) == -min(fork)
fork <- fork[forkinds]
fixed_lengths <- abs(fork) + min(fork)
# Check that branches are actually fixed.
if (all(fixed_lengths >= 0)){
index.table[, "length"][the_parents][forkinds] <- abs(fork) + min(fork)
}
} else { # No polytomies
fixed_lengths <- abs(fork) + min(fork)
# Check that branches are actually fixed.
if (all(fixed_lengths >= 0)){
index.table[, "length"][the_parents] <- abs(fork) + min(fork)
}
}
}
# replacing the branch length for each negative value in the total table
name_match <- match(rownames(index.table), rownames(all.lengths))
all.lengths[, "length"][name_match] <- index.table[, "length"]
# replacing the branch lengths to the original tree
tre$edge.length <- all.lengths[, "length"]
return(tre)
}
#==============================================================================#
# Calculate Bruvo's distance from a bruvomat object.
#
# Public functions utilizing this function:
# # bruvo.msn, bruvo.dist, bruvo.boot
#
# Internal functions utilizing this function:
# # singlepop_msn
#==============================================================================#
bruvos_distance <- function(bruvomat, funk_call = match.call(), add = TRUE,
loss = TRUE, by_locus = FALSE){
x <- bruvomat@mat
ploid <- bruvomat@ploidy
if (getOption("old.bruvo.model") && ploid > 2 && (add | loss)){
msg <- paste("The option old.bruvo.model has been set to TRUE, which does",
"not represent every ordered combinations of alleles in the",
"genome addition or loss models. This could result in",
"potentially incorrect results.",
"\n\n To use every ordered combination of alleles for",
"estimating short genotypes, enter the following command in",
"your R console:",
"\n\n\toptions(old.bruvo.model = FALSE)\n")
warning(msg, call. = FALSE, immediate. = TRUE)
}
replen <- bruvomat@replen
x[is.na(x)] <- 0
# Dividing the data by the repeat length of each locus.
x <- x / rep(replen, each = ploid * nrow(x))
x <- matrix(as.integer(round(x)), ncol = ncol(x))
# Getting the permutation vector.
perms <- .Call("permuto", ploid, PACKAGE = "poppr")
# Calculating bruvo's distance over each locus.
distmat <- .Call("bruvo_distance",
x, # data matrix
perms, # permutation vector (0-indexed)
ploid, # maximum ploidy
add, # Genome addition model switch
loss, # Genome loss model switch
getOption("old.bruvo.model"), # switch to use unordered genotypes
PACKAGE = "poppr")
# If there are missing values, the distance returns 100, which means that the
# comparison is not made. These are changed to NA.
distmat[distmat == 100] <- NA
if (!by_locus){
# Obtaining the average distance over all loci.
avg.dist.vec <- apply(distmat, 1, mean, na.rm=TRUE)
# presenting the information in a lower triangle distance matrix.
dist.mat <- matrix(ncol=nrow(x), nrow=nrow(x))
dist.mat[which(lower.tri(dist.mat)==TRUE)] <- avg.dist.vec
dist.mat <- as.dist(dist.mat)
attr(dist.mat, "Labels") <- bruvomat@ind.names
attr(dist.mat, "method") <- "Bruvo"
attr(dist.mat, "call") <- funk_call
return(dist.mat)
} else {
n <- nrow(x)
cols <- seq(ncol(distmat))
labs <- bruvomat@ind.names
meth <- "Bruvo"
return(lapply(cols, function(i) make_attributes(distmat[, i], n, labs, meth, funk_call)))
}
}
#==============================================================================#
# Calculate a subset Bruvo's distances from a bruvomat object, segmented into a
# query and a reference.
#
# Public functions utilizing this function:
# # bruvo.between
#==============================================================================#
bruvos_between <- function(bruvomat, query_length, funk_call = match.call(), add = TRUE,
loss = TRUE, by_locus = FALSE){
x <- bruvomat@mat
ploid <- bruvomat@ploidy
if (getOption("old.bruvo.model") && ploid > 2 && (add | loss)){
msg <- paste("The option old.bruvo.model has been set to TRUE, which does",
"not represent every ordered combinations of alleles in the",
"genome addition or loss models. This could result in",
"potentially incorrect results.",
"\n\n To use every ordered combination of alleles for",
"estimating short genotypes, enter the following command in",
"your R console:",
"\n\n\toptions(old.bruvo.model = FALSE)\n")
warning(msg, call. = FALSE, immediate. = TRUE)
}
replen <- bruvomat@replen
x[is.na(x)] <- 0
# Dividing the data by the repeat length of each locus.
x <- x / rep(replen, each = ploid * nrow(x))
x <- matrix(as.integer(round(x)), ncol = ncol(x))
# Getting the permutation vector.
perms <- .Call("permuto", ploid, PACKAGE = "poppr")
# Calculating bruvo's distance over each locus.
distmat <- .Call("bruvo_between",
x, # data matrix
perms, # permutation vector (0-indexed)
ploid, # maximum ploidy
add, # Genome addition model switch
loss, # Genome loss model switch
getOption("old.bruvo.model"), # switch to use unordered genotypes
query_length, # length of the original query
PACKAGE = "poppr")
# If there are missing values, the distance returns 100, which means that the
# comparison is not made. These are changed to NA.
distmat[distmat == 100] <- NA
if (!by_locus){
# Obtaining the average distance over all loci.
avg.dist.vec <- apply(distmat, 1, mean, na.rm=TRUE)
# presenting the information in a lower triangle distance matrix.
dist.mat <- matrix(ncol=nrow(x), nrow=nrow(x))
dist.mat[which(lower.tri(dist.mat)==TRUE)] <- avg.dist.vec
dist.mat <- as.dist(dist.mat)
attr(dist.mat, "Labels") <- bruvomat@ind.names
attr(dist.mat, "method") <- "Bruvo"
attr(dist.mat, "call") <- funk_call
return(dist.mat)
} else {
n <- nrow(x)
cols <- seq(ncol(distmat))
labs <- bruvomat@ind.names
meth <- "Bruvo"
return(lapply(cols, function(i) make_attributes(distmat[, i], n, labs, meth, funk_call)))
}
}
#==============================================================================#
# match repeat lengths to loci present in data
#
# Public functions utilizing this function:
# ## initialize,bruvomat-method
#
# Internal functions utilizing this function:
# ## none
#==============================================================================#
match_replen_to_loci <- function(gid_loci, replen){
# unnamed loci with the same length
if (is.null(names(replen))){
if (is.character(gid_loci)){
names(replen) <- gid_loci # give them names
}
return(replen)
} else if (!all(gid_loci %in% names(replen))){ # names don't match up
unmatched_repeats <- names(replen[!names(replen) %in% gid_loci])
stop(unmatched_loci_warning(unmatched_repeats, gid_loci), call. = FALSE)
} else {
return(replen[gid_loci])
}
}
#==============================================================================#
# Function for creating the structure data frame needed for ade4's AMOVA
# implementation.
#
# Public functions utilizing this function:
#
# # poppr.amova
#
# Internal functions utilizing this function:
# # none
#==============================================================================#
make_ade_df <- function(hier, df, expanded = FALSE){
if (expanded){
levs <- attr(terms(hier), "term.labels")
} else {
levs <- all.vars(hier)
}
if(length(levs) <= 1){
return(NULL)
}
levs <- gsub(":", "_", levs)
if(!all(levs %in% names(df))){
stop(hier_incompatible_warning(levs, df))
}
smallest <- df[[levs[length(levs)]]]
smallinds <- !duplicated(smallest)
newdf <- df[smallinds, , drop = FALSE]
newdf <- newdf[-length(levs)]
if (length(newdf) > 1){
factlist <- lapply(newdf, function(x) factor(x, unique(x)))
} else {
factlist <- list(factor(newdf[[1]], unique(newdf[[1]])))
names(factlist) <- names(newdf)
}
return(rev(data.frame(factlist)))
}
# Function for determining if a genind object has any heterozygous sites.
# Public functions utilizing this function:
#
# # poppr.amova
#
# Internal functions utilizing this function:
# # none
check_Hs <- function(x){
if (all(ploidy(x) == 1)) return(FALSE)
res <- sweep(tab(x), 1, ploidy(x), function(x, y) any(x < y))
return(res)
}
#==============================================================================#
# Arguments:
# x a diploid genind object.
#
# Returns:
# a matrix with n*2 rows where haplotypes are interleaved. This is to be
# converted into a genind object. To explain what it looks like:
#
# if you have your genotypes arranged like so:
# L1 L2
# A x/y a/b
# B z/z c/a
#
# The result of this function would be to return a matrix that looks like:
# L1 L2
# A.1 x a
# A.2 y b
# B.1 z c
# B.2 z a
#
# External functions utilizing this function:
# # none
#
# Internal functions utilizing this function:
# # separate_haplotypes
#==============================================================================#
separate_haplotypes <- function(x){
if (max(ploidy(x)) > 2){
x <- suppressWarnings(recode_polyploids(x, addzero = TRUE))
}
df <- genind2df(x, sep = "/", usepop = FALSE)
df <- apply(df, 1, strsplit, "/")
df <- lapply(df, lapply, function(i){ i[grepl("^[0]+$", i)] <- NA; i }) # replace zeroes as missing
df <- lapply(df, data.frame, stringsAsFactors = FALSE, check.names = FALSE)
dplyr::bind_rows(df)
}
#==============================================================================#
# The function locus_table_pegas is the internal workhorse. It will process a
# summary.loci object into a nice table utilizing the various diversity indices
# provided by vegan. Note that it has a catch which will remove all allele names
# that are any amount of zeroes and nothing else. The reason for this being that
# these alleles actually represent missing data. The wrapper for this is
# locus_table, which will take in a genind object and send it into
# locus_table_pegas.
# Note: lev argument has only the options of "allele" or "genotype"
#
# UPDATE 2015-07-16: I had noticed in issue #47 that the definition of Hexp was
# completely wrong. It was correcting by the number of classes as opposed to the
# number of samples, artificially increasing the diversity. It has been
# corrected with one exception: polyploids will not be able to utilize this
# correction because the allelic dosage is ambiguous. This is corrected by
# utilizing Müller's index, which is equivalent to the unbiased Simpson's index.
#
# Public functions utilizing this function:
# # locus_table
#
# Internal functions utilizing this function:
# # none
#==============================================================================#
locus_table_pegas <- function(x, index = "simpson", lev = "allele", ploidy = NULL,
type = "codom", hexp_only = FALSE){
unique_types <- x[[lev]]
# Removing any zero-typed alleles that would be present with polyploids.
unique_types <- remove_zeroes(unique_types, type)
n <- sum(unique_types)
Simp <- vegan::diversity(unique_types, "simp")
nei <- (n/(n-1)) * Simp
if (hexp_only){
return(nei)
}
N <- length(unique_types) # Resetting N to be the number of unique types.
H <- vegan::diversity(unique_types)
G <- vegan::diversity(unique_types, "inv")
if (index == "simpson"){
idx <- Simp
names(idx) <- "1-D"
} else if (index == "shannon"){
idx <- H
names(idx) <- "H"
} else {
idx <- G
names(idx) <- "G"
}
E.5 <- (G - 1)/(exp(H) - 1)
names(N) <- lev
return(c(N, idx, Hexp = nei, Evenness = E.5))
}
#==============================================================================#
# This function takes a vector of allele counts and searches for the names
# that appear to be zero-length alleles. These alleles are then removed from
# analysis.
#
# Public functions utilizing this function:
# # none
#
# Internal functions utilizing this function:
# # locus_table_pegas
# # get_hexp_from_loci
#==============================================================================#
remove_zeroes <- function(unique_types, type){
zero_names <- grep("^0+?$", names(unique_types))
if (length(zero_names) > 0 & type == "codom"){
unique_types <- unique_types[-zero_names]
}
return(unique_types)
}
#==============================================================================#
# This function replaces the old definition of Hexp in poppr and will actually
# return a measure of expected heterozygosity. The only catch is that if an
# incoming population has varying ploidy or ploidy greater than two, the
# returned value is the same as the result of Hs (except missing values do not
# corrupt the whole calculation). This is then treated in the poppr function by
# multiplying by the sample size correction to give a corrected simpson's index.
#
# Public functions utilizing this function:
# # poppr
#
# Internal functions utilizing this function:
# # none
#==============================================================================#
get_hexp_from_loci <- function(loci, type = "codom", ploidy = NULL){
loci <- vapply(summary(loci), FUN = locus_table_pegas, FUN.VALUE = numeric(1),
ploidy = ploidy, type = type, hexp_only = TRUE)
return(mean(loci, na.rm = TRUE))
}
#==============================================================================#
# Function to plot phylo objects the way I want to.
#
# Public functions utilizing this function:
# bruvo.boot
#
# Private functions utilizing this function:
# # nei.boot any.boot
#==============================================================================#
poppr.plot.phylo <- function(tree, type = "nj", root = FALSE){
barlen <- min(median(tree$edge.length), 0.1)
if (barlen < 0.1) barlen <- 0.01
if (!root && type != "upgma"){
tree <- ladderize(tree)
}
nodelabs <- round(tree$node.label, 2)
plot.phylo(tree, cex = 0.8, font = 2, adj = 0, xpd = TRUE,
label.offset = 0.0125)
nodelabels(nodelabs, adj = c(1.3, -0.5), frame = "n", cex = 0.8,
font = 3, xpd = TRUE)
if (type != "upgma"){
add.scale.bar(lwd = 5, length = barlen)
} else {
axisPhylo(3)
}
}
#==============================================================================#
# Function to do something with Rodger's distance
#
# Public functions utilizing this function:
# rodger.dist
#
# Private functions utilizing this function:
# # none
#==============================================================================#
# From adegenet dist.genpop
dcano <- function(mat) {
daux <- mat%*%t(mat)
vec <- diag(daux)
daux <- -2*daux + vec[col(daux)] + vec[row(daux)]
diag(daux) <- 0
# if (any(daux == Inf)){
# daux <- infinite_vals_replacement(daux, warning)
# }
daux <- sqrt(daux*0.5)
return(daux)
}
#==============================================================================#
# tabulate the amount of missing data per locus.
#
# Public functions utilizing this function:
# none
#
# Private functions utilizing this function:
# # percent_missing
#==============================================================================#
number_missing_locus <- function(x, divisor){
missing_result <- colSums(1 - propTyped(x, by = "both"))
return(missing_result/divisor)
}
#==============================================================================#
# Replace infinite values with the maximum finite value of a distance matrix.
#
# Public functions utilizing this function:
# nei.dist
#
# Private functions utilizing this function:
# # none
#==============================================================================#
infinite_vals_replacement <- function(D, warning){
if (warning){
warning("Infinite values detected.")
}
maxval <- max(D[!D == Inf])
D[D == Inf] <- maxval*10
return(D)
}
#==============================================================================#
# Given a tree function and a distance function, this will generate an
# automatic tree generating function. This is useful for functions such as
# boot.phylo.
#
# Public functions utilizing this function:
# anyboot
#
# Private functions utilizing this function:
# # none
#==============================================================================#
tree_generator <- function(tree, distance, quiet = TRUE, ...){
TREEFUNK <- match.fun(tree)
DISTFUNK <- match.fun(distance)
distargs <- as.list(formals(distance))
otherargs <- list(...)
matchargs <- names(distargs)[names(distargs) %in% names(otherargs)]
distargs[matchargs] <- otherargs[matchargs]
if (!quiet) cat("\nTREE....... ", tree,"\nDISTANCE... ", distance)
treedist <- function(x){
distargs[[1]] <- x
TREEFUNK(do.call(DISTFUNK, distargs))
}
return(treedist)
}
#==============================================================================#
# This will retrieve a genetic matrix based on genpop status or not.
#
# Public functions utilizing this function:
# *.dist
#
# Private functions utilizing this function:
# # none
#==============================================================================#
get_gen_mat <- function(x){
if (suppressWarnings(is.genpop(x)) && x@type == "codom"){
MAT <- makefreq(x, missing = "mean", quiet = TRUE)
} else {
MAT <- tab(x, freq = TRUE)
}
return(MAT)
}
#==============================================================================#
# This will retrieve the labels for the distance matrix from "gen" objects
#
# Public functions utilizing this function:
# *.dist
#
# Private functions utilizing this function:
# # none
#==============================================================================#
get_gen_dist_labs <- function(x){
if (is.genind(x)){
labs <- indNames(x)
} else if (is.genpop(x)){
labs <- popNames(x)
} else if (is(x, "bootgen")){
labs <- names(x)
} else {
labs <- rownames(x)
}
return(labs)
}
#==============================================================================#
# This will give attributes to genetic distance matrices.
#
# Input:
# - d a vector to be coerced into a distance matrix.
# - nlig number of samples
# - labs the names of the samples
# - method the name of the method used to create the distances
# - matched_call the call that created the matrix.
# Public functions utilizing this function:
# *.dist
#
# Private functions utilizing this function:
# # none
#==============================================================================#
make_attributes <- function(d, nlig, labs, method, matched_call){
attr(d, "Size") <- nlig
attr(d, "Labels") <- labs
attr(d, "Diag") <- FALSE
attr(d, "Upper") <- FALSE
attr(d, "method") <- method
attr(d, "call") <- matched_call
class(d) <- "dist"
return(d)
}
#==============================================================================#
# Parse incoming palettes
#
# Color palettes could come in the form of functions, vectors, or named vectors.
# This internal helper will parse them and return a named vector of colors.
#
# Public functions utilizing this function:
# ## bruvo.msn poppr.msn
#
# Internal functions utilizing this function:
# ## none
#==============================================================================#
palette_parser <- function(inPAL, npop, pnames){
PAL <- try(match.fun(inPAL, descend = FALSE), silent = TRUE)
if ("try-error" %in% class(PAL)){
if (all(pnames %in% names(inPAL))){
color <- inPAL[pnames]
} else if (npop == length(inPAL)){
color <- stats::setNames(inPAL, pnames)
} else if (npop < length(inPAL)){
warning("Number of populations fewer than number of colors supplied. Discarding extra colors.")
color <- stats::setNames(inPAL[1:npop], pnames)
} else {
warning("insufficient color palette supplied. Using topo.colors().")
color <- stats::setNames(topo.colors(npop), pnames)
}
} else {
color <- stats::setNames(PAL(npop), pnames)
}
return(color)
}
#==============================================================================#
# Function used to update colors in poppr msn
#
# Public functions utilizing this function:
# plot_poppr_msn
#
# Private functions utilizing this function:
# # none
#==============================================================================#
update_poppr_graph <- function(graphlist, PALETTE){
PALETTE <- match.fun(PALETTE)
updates <- setNames(PALETTE(length(graphlist$populations)), NULL)
lookup <- data.frame(old = graphlist$populations,
update = updates,
stringsAsFactors = FALSE)
if (nrow(lookup) > 1){
colorlist <- V(graphlist$graph)$pie.color
V(graphlist$graph)$pie.color <- lapply(colorlist, update_colors, lookup)
# Update color vector for circles if present
pie.single <- lengths(V(graphlist$graph)$pie) == 1
if (any(pie.single)) {
the_circles <- unlist(V(graphlist$graph)$pie.color[pie.single])
V(graphlist$graph)$color[pie.single] <- the_circles # set the color palette
names(V(graphlist$graph)$color)[pie.single] <- names(the_circles) # set the population names
}
} else {
colorlist <- V(graphlist$graph)$color
V(graphlist$graph)$color <- rep(PALETTE(1), length(colorlist))
}
graphlist$colors <- lookup[[2]]
names(graphlist$colors) <- graphlist$populations
return(graphlist)
}
#==============================================================================#
# Function used to update colors in poppr msn
#
# Public functions utilizing this function:
# none
#
# Private functions utilizing this function:
# # update_poppr_graph
#==============================================================================#
update_colors <- function(colorvec, lookup){
# x <- vapply(1:length(colorvec), update_single_color, colorvec, lookup, colorvec)
# return(x)
pops <- lookup[[1]]
update <- lookup[[2]]
names(update) <- pops
matching <- match(names(colorvec), pops)
return(update[matching[!is.na(matching)]])
}
#==============================================================================#
# Function used to obtain information about local ploidy in a genind data set
# for polyploids. When polyploids are imported, the entire data set is coded in
# such a way that the whole data set is considered to be the ploidy of the higest
# observed datum. The missing data are coded as "0". This function will simply
# take the difference between the maximum ploidy and the number of zeroes present.
#
# Public functions utilizing this function:
# info_table
#
# Private functions utilizing this function:
# # none
#==============================================================================#
get_local_ploidy <- function(x){
# Microsatellite markers should have zero alleles filtered out. If there are
# missing alleles, then this indicates non-microsatellite alleles.
suppressWarnings({
microsat_test <- as.numeric(unlist(alleles(x), use.names = FALSE))
})
if (!anyNA(microsat_test) && any(microsat_test == 0)) {
x <- recode_polyploids(x, newploidy = TRUE)
}
tabx <- tab(x)
cols <- split(colnames(tabx), locFac(x))
locmat <- vapply(cols, function(i) rowSums(tabx[, i, drop = FALSE]), numeric(nInd(x)))
return(locmat)
}
#==============================================================================#
# Test if a data set is comprised of microsatellites.
#
# Public functions utilizing this function:
# none
#
# Private functions utilizing this function:
# # test_zeroes
#==============================================================================#
test_microsat <- function(x){
allnames <- unlist(lapply(x@all.names, as.numeric))
if (any(is.na(allnames))){
return(FALSE)
} else {
return(TRUE)
}
}
#==============================================================================#
# Test if a polyploid microsatellite data set represent missing data as "0"
#
# Public functions utilizing this function:
# none
#
# Private functions utilizing this function:
# # get_local_ploidy
#==============================================================================#
test_zeroes <- function(x){
if (x@type == "PA") return(FALSE)
if (test_microsat(x)){
allnames <- as.numeric(unlist(alleles(x), use.names = FALSE))
if (any(allnames == 0)){
return(TRUE)
}
}
return(FALSE)
}
#==============================================================================#
# Internal plotting function for mlg.table
#
# Public functions utilizing this function:
# none
#
# Private functions utilizing this function:
# # print_mlg_barplot
#==============================================================================#
#' @param mlgt a table with populations in rows and MLGs in columns
#'
#' @param color logical. Should the output be colored by population?
#' @param background logical. should the data be plotted against the background
#' data?
#' @noRd
#' @keywords internal
#'
#' @importFrom dplyr %>%
#' @importFrom dplyr arrange_ group_by_ ungroup
#' @importFrom dplyr n
#' @importFrom rlang "!!"
mlg_barplot <- function(mlgt, color = FALSE, background = FALSE){
names(dimnames(mlgt)) <- c("Population", "MLG")
mlgt.df <- as.data.frame.table(mlgt, responseName = "count", stringsAsFactors = FALSE)
use_old_dplyr <- utils::packageVersion("dplyr") <= package_version("0.5.0") | getOption("poppr.old.dplyr")
# Ensure that the population is a factor
if (use_old_dplyr) {
mlgt.df <- mlgt.df %>%
dplyr::mutate_(.dots = list(Population = ~factor(Population, unique(Population)))) %>%
dplyr::filter_("count > 0")
} else {
qPop <- quote(Population)
mlgt.df <- mlgt.df %>%
dplyr::mutate(Population = factor(!!qPop, unique(!!qPop))) %>%
dplyr::filter(!!quote(count > 0))
}
if (color | background) {
# summarize with the total counts, and merge with original data
if (use_old_dplyr) {
mlgt.df <- mlgt.df %>%
dplyr::group_by_("MLG") %>%
dplyr::summarize_(.dots = list(n = ~sum(count))) %>%
dplyr::full_join(mlgt.df, by = "MLG") %>%
dplyr::arrange_(~dplyr::desc(n)) %>%
dplyr::mutate_(.dots = list(MLG = ~factor(MLG, levels = unique(MLG))))
} else {
qMLG <- quote(MLG)
mlgt.df <- mlgt.df %>%
dplyr::group_by(!!qMLG) %>%
dplyr::summarize(n = sum(!!quote(count))) %>%
dplyr::full_join(mlgt.df, by = "MLG") %>%
dplyr::arrange(dplyr::desc(!!quote(n))) %>%
dplyr::mutate(MLG = factor(!!qMLG, levels = unique(!!qMLG)))
}
the_breaks <- pretty(mlgt.df$n)
} else {
if (use_old_dplyr) {
mlgt.df <- mlgt.df %>%
dplyr::group_by_("Population") %>%
dplyr::do_(~dplyr::arrange(., dplyr::desc(count)))
} else {
mlgt.df <- mlgt.df %>%
dplyr::group_by(!!quote(Population)) %>%
dplyr::arrange(dplyr::desc(!!quote(count)), .by_group = TRUE)
}
the_breaks <- pretty(mlgt.df$count)
}
if (use_old_dplyr) {
mlgt.df <- mlgt.df %>%
dplyr::ungroup() %>%
unique() %>%
dplyr::filter_("count > 0") %>%
dplyr::mutate_(.dots = list(order = ~seq(nrow(.)))) %>%
dplyr::mutate_(.dots = list(order = ~factor(order, unique(order))))
} else {
mlgt.df <- mlgt.df %>%
dplyr::ungroup() %>%
unique() %>%
dplyr::filter(!!quote(count > 0)) %>%
dplyr::mutate(order = seq(n())) %>%
dplyr::mutate(order = factor(!!quote(order), unique(!!quote(order))))
}
if (color | background) {
if (use_old_dplyr) {
mlgt.df <- mlgt.df %>%
dplyr::select_(~Population, ~MLG, ~count, ~order, ~n)
} else {
mlgt.df <- mlgt.df %>%
dplyr::select(!!quote(Population), !!quote(MLG), !!quote(count), !!quote(order), !!quote(n))
}
}
the_breaks <- the_breaks[the_breaks %% 1 == 0]
# Conditionals for plotting
x <- if (color) "MLG" else "order"
baes <- if (color) aes_string(fill = "Population") else aes_string()
the_plot <- ggplot(mlgt.df, aes_string(x = x, y = "count"))
if (background){
the_plot <- the_plot +
geom_bar(aes_string(y = "n"), color = "grey25", alpha = 0, stat = "identity")
}
the_plot <- the_plot +
geom_bar(baes, stat = "identity") +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
axis.text.x = element_text(size = 10, angle = 90,
hjust = 1, vjust = 1)) +
xlab("MLG") +
scale_y_continuous(expand = c(0, 0), breaks = the_breaks)
if (!color || (color && background)){
xbreak <- if (color) mlgt.df$MLG else mlgt.df$order
the_plot <- the_plot +
scale_x_discrete(labels = mlgt.df$MLG, breaks = xbreak) +
facet_wrap(~Population, scales = "free_x", shrink = TRUE, drop = TRUE)
}
return(the_plot)
}
#==============================================================================#
# Internal function for creating title for index of association histogram.
#
# Public functions utilizing this function:
# # none
#
# Private functions utilizing this function:
# # poppr.plot
#==============================================================================#
make_poppr_plot_title <- function(samp, file = NULL, N = NULL, pop = NULL){
plot_title <- ifelse(is.null(pop), "", paste0("Population:", pop))
plot_title <- ifelse(is.null(N), paste0(plot_title, ""),
paste(plot_title, paste("N:", N), sep = "\n"))
plot_title <- ifelse(is.null(file), paste0(plot_title, ""),
paste(plot_title, paste("Data:", file), sep = "\n"))
perms <- paste("Permutations:", length(samp))
plot_title <- paste(plot_title, perms, sep = "\n")
return(plot_title)
}
#' Pad a single locus genotype with zeroes according the maximum ploidy.
#'
#' @param x a vector of alleles for a single individual at a single locus
#' @param maxploid the maximum ploidy to pad
#' @param mat_type if the final output is to be a matrix with one column per
#' allele, what type of matrix should it be. Acceptable are: numeric and character.
#' @noRd
#' @return a vector of length 1 (default) or of length maxploid.
#' @seealso used by: [fill_zero_locus()]
fill_zero <- function(x, maxploid, mat_type = character(0)){
if (length(x) < maxploid){
# If the genotype is less than the max ploidy, fill it with a zero
if (length(mat_type)) {
fill <- as(0L, mat_type)
pad <- rep(fill, maxploid - length(x))
res <- c(pad, as(x, mat_type))
} else {
zeroes <- paste(rep(0, maxploid - length(x)), collapse = "/")
res <- paste(x, collapse = "/")
res <- paste(zeroes, res, sep = "/")
}
} else {
# If the genotype is the right format_type, either collapse it or return it
if (length(mat_type)){
res <- as(x, mat_type)
} else {
res <- paste(x, collapse = "/")
}
}
return(res)
}
#' Fill short genotypes in a character vector with zeroes
#'
#' @param x a character vector of genotypes at a single locus, separated by "/"
#' @param maxploid the maximum ploidy to pad
#' @param mat_type if the final output is to be a matrix with one column per
#' allele, what type of matrix should it be. Acceptable are: numeric and character.
#' @noRd
#' @return a vector of length 1 (default) or of length maxploid.
#' @seealso uses: [fill_zero_locus()], used by: [create_bruvo_mat()]
fill_zero_locus <- function(x, sep = "/", maxploid, mat_type = character(0)){
x <- strsplit(x, sep)
if (length(mat_type)) {
result <- vector(mode = mat_type, length = maxploid)
} else {
result <- character(1)
}
return(t(vapply(x, fill_zero, result, maxploid, mat_type)))
}
#==============================================================================#
# This will fill in zeroes from a data frame with uneven ploidy to force it to
# have uniform ploidy.
#
# Example:
# locus_1 locus_2
# sample_1 25/91/20 15/33/10/55
# sample_2 71/29/34/69 45
# sample_3 24 23/83/25
# sample_4 12/18/94 4
# sample_5 68 15/54/57
# sample_6 36/45/91 32/74/22
# sample_7 72 22/27/70/75
# sample_8 100/54 92
# sample_9 62/50 17/11/62/50
# sample_10 41/31 17/30/57
#
# Will become
#
# locus_1 locus_2
# sample_1 0/25/91/20 15/33/10/55
# sample_2 71/29/34/69 0/0/0/45
# sample_3 0/0/0/24 0/23/83/25
# sample_4 0/12/18/94 0/0/0/4
# sample_5 0/0/0/68 0/15/54/57
# sample_6 0/36/45/91 0/32/74/22
# sample_7 0/0/0/72 22/27/70/75
# sample_8 0/0/100/54 0/0/0/92
# sample_9 0/0/62/50 17/11/62/50
# sample_10 0/0/41/31 0/17/30/57
#
# or, if mat = TRUE
#
# locus_1.1 locus_1.2 locus_1.3 locus_1.4 locus_2.1 locus_2.2 locus_2.3 locus_2.4
# sample_1 0 25 91 20 15 33 10 55
# sample_2 71 29 34 69 0 0 0 45
# sample_3 0 0 0 24 0 23 83 25
# sample_4 0 12 18 94 0 0 0 4
# sample_5 0 0 0 68 0 15 54 57
# sample_6 0 36 45 91 0 32 74 22
# sample_7 0 0 0 72 22 27 70 75
# sample_8 0 0 100 54 0 0 0 92
# sample_9 0 0 62 50 17 11 62 50
# sample_10 0 0 41 31 0 17 30 57
#
#
#' Fill short genotypes in a data frame with zeroes
#'
#' @param x a data frame of character vectors representing genotypes with alleles separated by "/"
#' @param maxploid the maximum ploidy to pad
#' @param mat if the final output is to be a matrix with one column per
#' allele, what type of matrix should it be. Acceptable are: numeric and character.
#' Default is an empty character vector, indicating that alleles should be concatenated.
#' @noRd
#' @return a vector of length 1 (default) or of length maxploid.
#' @seealso uses: [fill_zero_locus()], used by: [genind2genalex()]
generate_bruvo_mat <- function(x, maxploid, sep = "/", mat_type = character(0)){
# --- 2021-01-30 ---
# mat has been renamed to mat_type and recast as a character vector. For
# details, see https://github.com/grunwaldlab/poppr/issues/108
# ------------------
# Create a template for vapply to fill in with the result.
if (length(mat_type)) {
# Each locus will be a matrix with one allele per column
fill <- vector(mode = mat_type, length = nrow(x) * maxploid)
result <- matrix(fill, ncol = maxploid, nrow = nrow(x))
} else {
# Each locus will be a character vector with all the alleles
result <- character(nrow(x))
}
res <- vapply(x, fill_zero_locus, result, sep, maxploid, mat_type)
if (length(dim(res)) > 2){
redim <- dim(res)
dim(res) <- c(redim[1], redim[2]*redim[3])
xcols <- colnames(x)
maxseq <- seq(maxploid)
colnames(res) <- vapply(xcols, FUN = paste,
FUN.VALUE = character(maxploid), maxseq, sep = ".")
} else {
colnames(res) <- colnames(x)
}
if (length(mat_type) == 0) {
res[grep("NA", res)] <- NA_character_
}
rownames(res) <- rownames(x)
return(res)
}
#==============================================================================#
# Function to subset the custom MLGs by the computationally derived MLGs in the
# data set. This is necessary due to the fact that minimum spanning networks
# will clone correct before calculations, but this is performed on the visible
# multilocus genotypes. for custom MLGs that may not be monophyletic, this
# results in observed networks that may be incorrect.
#
# A solution to this would simply be to label the multilocus genotypes with
# their custom labels, but collapse them with the computationally derived
# labels.
#
# In order to parse these out, we have three possible situations we can think
# of:
#
# 1. computational MLGs match the custom MLGs: pretty easy, simply return the
# non-duplicated mlgs
# 2. There are more computational MLG classes than custom MLGs. This is also
# fairly simple: return the custom MLGs censored by the computational MLGs
# 3. More custom MLGs than computational MLGs. For labelling purposes,
# the custom MLGs that occupy a single MLG should be concatenated in a
# string.
# 4. A mix of 2 and 3. Same strategy as 3.
#
# Input: a genclone or snpclone object with an MLG object in the @mlg slot
# Output: A string of clone-censored custom MLGs
#
# Public functions utilizing this function:
# # bruvo.msn poppr.msn plot_poppr_msn
#
# Private functions utilizing this function
# # singlepop_msn
#==============================================================================#
correlate_custom_mlgs <- function(x, by = "original", subset = TRUE){
if (!is.genclone(x) & !is(x, "snpclone")){
stop("needs a genclone or snpclone object")
}
if (!is(x@mlg, "MLG")){
stop("the @mlg slot needs to be of class MLG. This procedure is meaningless otherwise.")
}
the_mlgs <- x@mlg@mlg
customs <- as.character(the_mlgs[["custom"]])
ncustom <- nlevels(the_mlgs[["custom"]])
mlg_dup <- !duplicated(the_mlgs[[by]])
ndup <- sum(mlg_dup)
# Create contingency table with custom genotypes in rows and computed in
# columns
cont_table <- table(customs, the_mlgs[[by]])
# Create true/false table of MLG identity
i_table <- cont_table > 0
# Count up the number of custom MLGs contained within each computed MLG.
check_less_custom <- colSums(i_table)
if ((ndup == ncustom | ndup > ncustom) & all(check_less_custom == 1)){
if (!subset){
return(customs)
}
res <- customs[mlg_dup]
names(res) <- the_mlgs[[by]][mlg_dup]
return(res)
}
cust_names <- rownames(cont_table)
res <- apply(cont_table, 2, function(i) paste(cust_names[i > 0], collapse = "\n"))
if (!subset){
res <- res[as.character(as.character(the_mlgs[[by]]))]
}
return(res)
}
#==============================================================================#
# Function to boostrap a single population from a mlg.matrix
#
# Public functions utilizing this function:
# ## diversity_boot
#
# Internal functions utilizing this function:
# ## none
#==============================================================================#
boot_per_pop <- function(x, rg = multinom_boot, n, mle = NULL, H = TRUE,
G = TRUE, lambda = TRUE, E5 = TRUE, ...){
xi <- extract_samples(x)
res <- boot::boot(xi, boot_stats, R = n, sim = "parametric", ran.gen = rg,
mle = mle, H = H, G = G, lambda = lambda, E5 = E5, ...)
return(res)
}
#==============================================================================#
# multinomial sampler for bootstrapping
#
# Public functions utilizing this function:
# ## diversity_boot
#
# Internal functions utilizing this function:
# ## boot_per_pop
#==============================================================================#
multinom_boot <- function(x, mle = NULL){
if (is.null(mle) || mle < 2){
res <- stats::rmultinom(1, length(x), prob = tabulate(x))
} else {
res <- stats::rmultinom(1, mle, prob = tabulate(x))
}
extract_samples(res)
}
#==============================================================================#
# subsampler for rarefaction bootstrapping
#
# Public functions utilizing this function:
# ## diversity_boot
#
# Internal functions utilizing this function:
# ## boot_per_pop
#==============================================================================#
rare_sim_boot <- function(x, mle = 10){
if (length(x) < mle){
sample(x)
} else {
sample(x, mle)
}
}
#==============================================================================#
# wrapper to calculate statistics from bootstrapped samples
#
# Public functions utilizing this function:
# ## none
#
# Internal functions utilizing this function:
# ## boot_per_pop
#==============================================================================#
boot_stats <- function(x, i, H = TRUE, G = TRUE, lambda = TRUE, E5 = TRUE, ...){
xi <- tabulate(x[i])
res <- diversity_stats(xi, H, G, lambda, E5, ...)
return(res)
}
#==============================================================================#
# Sets up a vector of mlg counts for bootstrapping. Input is a vector where each
# element represents the count of a specific mlg, output is a vector where each
# element represents the mlg assignment of a particular sample.
#
# Public functions utilizing this function:
# ## none
#
# Internal functions utilizing this function:
# ## boot_per_pop, multinom_boot
#==============================================================================#
extract_samples <- function(x) rep(1:length(x), x)
#==============================================================================#
# calculates confidence interval for a single population and a single statistic
# given the result of a bootstrap. Note, around_estimate is always true, and
# will center the CI around the observed estimate. The structure for changing it
# is set up, but currently not used.
#
# Public functions utilizing this function:
# ## none
#
# Internal functions utilizing this function:
# ## get_ci
#==============================================================================#
get_boot_ci <- function(index, x, type = "normal", conf = 0.95,
around_estimate = TRUE, ...){
if (length(unique(x$t[, index])) == 1){
return(c(NA_real_, NA_real_))
} else if (around_estimate){
res <- boot::norm.ci(conf = conf, t0 = x$t0[index], var.t0 = var(x$t[, index]))[-1]
} else {
res <- boot::norm.ci(x, conf = conf, index = index)[1, ]
# res <- boot::boot.ci(x, conf, type, index, ...)[[type]][1, ]
}
return(utils::tail(res, 2))
}
#==============================================================================#
# calculates confidence intervals for all statistics per population. If
# bci = TRUE, then the confidence interval is calculated using norm.ci or
# boot.ci. Otherwise, the CI is calculated using quantiles from the bootstrapped
# data.
#
# Public functions utilizing this function:
# ## none
#
# Internal functions utilizing this function:
# ## get_all_ci
#==============================================================================#
get_ci <- function(x, lb, ub, bci = TRUE, btype = "normal", center = TRUE){
if (bci){
lenout <- length(x$t0)
conf <- diff(c(lb, ub))
res <- vapply(1:lenout, get_boot_ci, numeric(2), x, btype, conf, center)
if (!is.null(dim(res))) rownames(res) <- paste(c(lb, ub)*100, "%")
} else {
res <- apply(x$t, 2, quantile, c(lb, ub), na.rm = TRUE)
if (all(all.equal(res[1, ], res[2, ]) == TRUE)){
res[] <- NA_real_
}
}
return(res)
}
#==============================================================================#
# compiles an array of confidence intervals per statistic, per population.
#
# Public functions utilizing this function:
# ## diversity_ci
#
# Internal functions utilizing this function:
# ## none
#==============================================================================#
get_all_ci <- function(res, ci = 95, index_names = c("H", "G", "Hexp", "E.5"),
center = TRUE, btype = "normal", bci = TRUE){
lower_bound <- (100 - ci)/200
upper_bound <- 1 - lower_bound
n_indices <- length(index_names)
funval <- matrix(numeric(n_indices*2), nrow = 2)
CI <- vapply(res, FUN = get_ci, FUN.VALUE = funval,
lower_bound, upper_bound, bci = bci, btype = btype,
center = center)
dCI <- dimnames(CI)
dimnames(CI) <- list(CI = dCI[[1]],
Index = index_names,
Pop = dCI[[3]])
return(CI)
}
#==============================================================================#
# Takes the output of diversity_ci and converts it to a data frame.
#
# Public functions utilizing this function:
# ## diversity_ci
#
# Internal functions utilizing this function:
# ## none
#==============================================================================#
pretty_info <- function(obs, est, CI, boots = NULL){
pretty_ci <- t(apply(round(CI, 3), 2:3,
function(x){
if (all(is.na(x))) return(NA_character_)
paste0("(", paste(x, collapse = ", "), ")")
}))
colnames(est) <- paste(colnames(est), "est", sep = ".")
out <- vector(mode = "list", length = ncol(est)*3)
colnames(pretty_ci) <- paste(colnames(pretty_ci), "ci", sep = ".")
names(out)[(1:length(out)) %% 3 != 0] <- intersp(colnames(obs), colnames(est))
names(out)[(1:length(out)) %% 3 == 0] <- colnames(pretty_ci)
for (i in names(out)){
out[[i]] <- obs[, 1]
}
out <- data.frame(out)
out[colnames(obs)] <- obs
out[colnames(est)] <- est
out[colnames(pretty_ci)] <- pretty_ci
class(out) <- c("popprtable", "data.frame")
return(out)
}
#==============================================================================#
# Takes two vectors and intersperses their elements
#
# Public functions utilizing this function:
# ## none
#
# Internal functions utilizing this function:
# ## pretty_info
#==============================================================================#
intersp <- function(v1, v2){
v1l <- length(v1)
v2l <- length(v2)
the_evens <- evens(v1l + v2l)
stopifnot(v1l == v2l)
ov <- vector(length = v1l + v2l, mode = class(c(v1[1], v2[1])))
ov[the_evens] <- v2
ov[!the_evens] <- v1
return(ov)
}
#==============================================================================#
# Returns a vector of even elements
#
# Public functions utilizing this function:
# ## none
#
# Internal functions utilizing this function:
# ## intersp
#==============================================================================#
evens <- function(x){
if (length(x) > 1){
if (is.numeric(x)){
res <- x %% 2 == 0
} else {
res <- 1:length(x) %% 2 == 0
}
} else {
res <- seq(x) %% 2 == 0
}
return(res)
}
#==============================================================================#
# Plots the results of bootstrapping and confidence interval calculation in
# facetted barplots
#
# Public functions utilizing this function:
# ## diversity_ci
#
# Internal functions utilizing this function:
# ## none
#==============================================================================#
#' Plot the results of boot_ci
#'
#' @param res a list of "boot" class objects
#' @param orig a table of the observed statistics
#' @param CI
#'
#' @return prints a ggplot2 object
#' @noRd
#' @keywords internal
boot_plot <- function(res, orig, CI){
statnames <- colnames(orig)
popnames <- rownames(orig)
orig <- as.data.frame.table(orig, responseName = "value")
if (!is.null(CI)) {
cidf <- as.data.frame.table(CI, responseName = "v", stringsAsFactors = FALSE) %>%
stats::reshape(idvar = c("Pop", "Index"), timevar = "CI", direction = "wide")
names(cidf) <- gsub("^v\\.", "", names(cidf))
orig <- merge(orig, cidf)
colnames(orig)[4:5] <- c("lb", "ub")
}
samp <- vapply(res, "[[", FUN.VALUE = res[[1]]$t, "t")
dimnames(samp) <- list(NULL,
Index = statnames,
Pop = popnames)
sampmelt <- as.data.frame.table(samp, responseName = "value")
pl <- ggplot(sampmelt, aes_string(x = "Pop", y = "value", group = "Pop")) +
geom_boxplot() +
geom_point(aes_string(color = "Pop", x = "Pop", y = "value"),
size = rel(4), pch = 16, data = orig) +
xlab("Population") + labs(color = "Observed") +
facet_wrap(~Index, scales = "free_y") + myTheme
if (!is.null(CI)){
pl <- pl +
geom_errorbar(aes_string(color = "Pop", x = "Pop", ymin = "lb", ymax = "ub"),
data = orig)
}
print(pl)
}
#==============================================================================#
# Extract the observed bootstrap statistics from a boot object.
#
# Public functions utilizing this function:
# ## diversity_ci
#
# Internal functions utilizing this function:
# ## none
#==============================================================================#
get_boot_stats <- function(bootlist){
npop <- length(bootlist)
bstats <- bootlist[[1]]$t0
nstat <- length(bstats)
resmat <- matrix(nrow = npop, ncol = nstat,
dimnames = list(Pop = names(bootlist), Index = names(bstats)))
resmat[] <- t(vapply(bootlist, FUN = "[[", FUN.VALUE = bstats, "t0"))
return(resmat)
}
#==============================================================================#
# mean and sd methods for boot
#
# Public functions utilizing this function:
# ## none
#
# Internal functions utilizing this function:
# ## get_boot_se
#==============================================================================#
mean.boot <- function(x, ...) apply(x$t, 2, mean, na.rm = TRUE)
sd.boot <- function(x, na.rm = TRUE) apply(x$t, 2, stats::sd, na.rm)
#==============================================================================#
# retrieves standard error or mean from list of boot objects.
#
# Public functions utilizing this function:
# ## diversity_ci
#
# Internal functions utilizing this function:
# ## none
#==============================================================================#
get_boot_se <- function(bootlist, res = "sd"){
npop <- length(bootlist)
bstats <- bootlist[[1]]$t0
nstat <- length(bstats)
THE_FUN <- match.fun(paste0(res, ".boot")) # mean.boot sd.boot
resmat <- matrix(nrow = npop, ncol = nstat,
dimnames = list(Pop = names(bootlist), Index = names(bstats)))
resmat[] <- t(vapply(bootlist, FUN = THE_FUN, FUN.VALUE = bstats))
if (res == "sd"){
colnames(resmat) <- paste(colnames(resmat), res, sep = ".")
}
return(resmat)
}
#==============================================================================#
# Given a maximum and minimum value, this makes an n x 2 matrix of windows that
# encompass these values.
#
# Public functions utilizing this function:
# ## win.ia
#
# Internal functions utilizing this function:
# ## none
#==============================================================================#
make_windows <- function(maxp, minp = 1L, window = 100L){
nwin <- ceiling(maxp/window)
minwin <- ceiling(minp/window)
winmat <- matrix(window * seq(nwin), nrow = nwin, ncol = 2)[minwin:nwin, , drop = FALSE]
winmat[, 1] <- winmat[, 1] - window + 1
return(winmat)
}
#==============================================================================#
# add tied edges to minimum spanning tree
#
# Public functions utilizing this function:
# ## bruvo.msn poppr.msn
#
# Internal functions utilizing this function:
# ## singlepop_msn
#==============================================================================#
add_tied_edges <- function(mst, distmat, tolerance = .Machine$double.eps ^ 0.5){
tied_edges <- .Call("msn_tied_edges", as.matrix(mst[]), as.matrix(distmat), tolerance)
if (length(tied_edges) > 0){
the_edges <- tied_edges[c(TRUE, TRUE, FALSE)]
the_weights <- tied_edges[c(FALSE, FALSE, TRUE)]
mst <- add.edges(mst, dimnames(mst[])[[1]][the_edges], weight = the_weights)
}
return(mst)
}
#==============================================================================#
#' Correct minor allele frequencies derived from rraf (INTERNAL)
#'
#' \strong{This is an internal function. The documentation is for use with
#' \code{\link{rraf}}, \code{\link{pgen}}, and \code{\link{psex}}. Do not
#' attempt to use this function directly.} Minor alleles are often lost when
#' calculating allele frequencies from a round-robin approach, resulting in
#' zero-valued allele frequencies (Arnaud-Haond et al. 2007, Parks and Werth
#' 1993). This can be problematic when calculating values for \code{\link{pgen}}
#' and \code{\link{psex}}. This function gives options for giving a value to
#' these zero-valued frequencies.
#'
#'
#' @param rraf \emph{internal} a list or matrix produced from \code{\link{rraf}}
#' (with uncorrected MAF)
#' @param rrmlg \emph{internal} a matrix containing multilocus genotypes per
#' locus derived from \code{\link{rrmlg}}
#' @param e a numeric epsilon value to use for all missing allele frequencies.
#' @param sum_to_one when \code{TRUE}, the original frequencies will be reduced
#' so that all allele frequencies will sum to one. \strong{Default:
#' \code{FALSE}}
#' @param d the unit by which to take the reciprocal. \code{div = "sample"} will
#' be 1/(n samples), \code{div = "mlg"} will be 1/(n mlg), and \code{div =
#' "rrmlg"} will be 1/(n mlg at that locus). This is overridden by \code{e}.
#' @param mul a multiplier for div. Default is \code{mult = 1}. This parameter
#' is overridden by \code{e}
#' @param mlg \emph{internal} the number of MLGs in the sample. Only required if
#' \code{d = "mlg"}.
#' @param pop \emph{internal} a vector of factors that define the population
#' definition for each observation in \code{rrmlg}. This must be supplied if
#' \code{rraf} is a matrix.
#' @param locfac \emph{internal} a vector of factors that define the columns
#' belonging to the loci.
#'
#' @details Arguments of interest to the user are:
#' \itemize{
#' \item \strong{e}
#' \item \strong{sum_to_one}
#' \item \strong{d}
#' \item \strong{m}
#' }
#' By default (\code{d = "sample", e = NULL, sum_to_one = FALSE, mul = 1}), this
#' will add 1/(n samples) to all zero-value alleles. The basic formula is
#' \strong{1/(d * m)} unless \strong{e} is specified. If \code{sum_to_one =
#' TRUE}, then the frequencies will be scaled as x/sum(x) AFTER correction,
#' indicating that the allele frequencies will be reduced. See the examples for
#' details. The general pattern of correction is that the value of the MAF will
#' be \emph{rrmlg > mlg > sample}
#'
#' @return a matrix or vector the same type as rraf
#' @author Zhian N. Kamvar
#' @noRd
#' @references
#'
#' Arnaud-Haond, S., Duarte, C. M., Alberto, F., & Serrão, E. A. 2007.
#' Standardizing methods to address clonality in population studies.
#' \emph{Molecular Ecology}, 16(24), 5115-5139.
#'
#' Parks, J. C., & Werth, C. R. 1993. A study of spatial features of clones in a
#' population of bracken fern, \emph{Pteridium aquilinum} (Dennstaedtiaceae).
#' \emph{American Journal of Botany}, 537-544.
#'
#' @seealso \code{\link{rraf}},
#' \code{\link{pgen}},
#' \code{\link{psex}},
#' \code{\link{rrmlg}}
#'
#' @examples
#' \dontrun{
#'
#' data(Pram)
#' #-------------------------------------
#'
#' # If you set correction = FALSE, you'll notice the zero-valued alleles
#'
#' rraf(Pram, correction = FALSE)
#'
#' # By default, however, the data will be corrected by 1/n
#'
#' rraf(Pram)
#'
#' # Of course, this is a diploid organism, we might want to set 1/2n
#'
#' rraf(Pram, mul = 1/2)
#'
#' # To set MAF = 1/2mlg
#'
#' rraf(Pram, d = "mlg", mul = 1/2)
#'
#' # Another way to think about this is, since these allele frequencies were
#' # derived at each locus with different sample sizes, it's only appropriate to
#' # correct based on those sample sizes.
#'
#' rraf(Pram, d = "rrmlg", mul = 1/2)
#'
#' # If we were going to use these frequencies for simulations, we might want to
#' # ensure that they all sum to one.
#'
#' rraf(Pram, d = "mlg", mul = 1/2, sum_to_one = TRUE)
#'
#' #-------------------------------------
#' # When we calculate these frequencies based on population, they are heavily
#' # influenced by the number of observed mlgs.
#'
#' rraf(Pram, by_pop = TRUE, d = "rrmlg", mul = 1/2)
#'
#' # This can be fixed by specifying a specific value
#'
#' rraf(Pram, by_pop = TRUE, e = 0.01)
#'
#' }
#'
#==============================================================================#
rare_allele_correction <- function(rraf, rrmlg, e = NULL, sum_to_one = FALSE,
d = c("sample", "mlg", "rrmlg"), mul = 1,
mlg = NULL, pop = NULL, locfac = NULL){
if (identical(parent.frame(), globalenv())){
msg <- paste0("\n\n\n",
" !!! rare_allele_correction() is for internal use only !!!",
"\n\n",
" Input types are not checked within this function and may\n",
" result in an error. USE AT YOUR OWN RISK.\n\n\n")
warning(msg, immediate. = TRUE)
}
d <- match.arg(d, c("sample", "mlg", "rrmlg"))
if (is.list(rraf)){
if (is.null(e)){
e <- get_minor_allele_replacement(rrmlg, d, mul, mlg)
}
if (length(e) == 1){
e <- setNames(rep(e, ncol(rrmlg)), colnames(rrmlg))
}
res <- lapply(names(rraf), replace_zeroes, rraf, e, sum_to_one)
names(res) <- names(rraf)
} else if (is.matrix(rraf)){
# split matrix by population and locus
poplist <- apply(rraf, 1, split, locfac)
# loop over populations and call this function again on the list of loci
res <- lapply(names(poplist), function(i){
prraf <- poplist[[i]]
prrmlg <- rrmlg[pop == i, ]
rare_allele_correction(prraf, prrmlg, mlg = mlg, e = e, d = d, mul = mul,
sum_to_one = sum_to_one)
})
# make this list of loci a matrix again
res <- t(vapply(res, unlist, rraf[1, , drop = TRUE]))
dimnames(res) <- dimnames(rraf)
}
return(res)
}
#==============================================================================#
# round robin clone-correct allele frequency estimates
#
# This will take in
# - i the index of the locus.
# - loclist a list of genind objects each one locus.
# - mlgs a matrix from rrmlg
#
# Public functions utilizing this function:
# ## rraf
#
# Internal functions utilizing this function:
# ## none
#==============================================================================#
rrcc <- function(i, loclist, mlgs){
cc <- !duplicated(mlgs[, i])
mat <- tab(loclist[[i]], freq = TRUE)
res <- colMeans(mat[cc, , drop = FALSE], na.rm = TRUE)
return(res)
}
#==============================================================================#
#' round robin clone-correct allele frequency estimates by population
#'
#' @param i the index of the locus.
#' @param loclist a list of genind objects each one locus.
#' @param mlgs a matrix from rrmlg
#' @param pnames the population names.
#'
#' @note
#' Public functions utilizing this function:
#' ## rraf
#'
#' Internal functions utilizing this function:
#' ## none
#' @noRd
#==============================================================================#
rrccbp <- function(i, loclist, mlgs, pnames){
mat <- tab(loclist[[i]], freq = TRUE)
npop <- length(pnames)
res <- matrix(numeric(ncol(mat)*npop), nrow = npop)
rownames(res) <- pnames
colnames(res) <- colnames(mat)
pops <- pop(loclist[[i]])
for (p in pnames){
psub <- which(pops %in% p)
cc <- psub[!duplicated(mlgs[psub, i])]
out <- colMeans(mat[cc, , drop = FALSE], na.rm = TRUE)
res[p, ] <- out
}
return(res)
}
#' Treat the optional "G" argument for psex
#'
#' @param G either NULL or an integer vector that can be named or not
#' @param N an integer or integer vector
#' @param dat a data set
#' @param population a population name
#' @param method passed from psex
#'
#' @note
#' Public functions: psex
#' Private functions: none
#'
#' @return a value for G
#' @noRd
treat_G <- function(G, N, dat, population, method){
if (is.null(G)){
return(N)
} else if (length(G) == 1){
return(G)
} else if (!is.null(names(G)) && all(names(G) %in% popNames(dat))){
if (method == "multiple"){
G <- G[population]
} else {
G <- G[pop(dat)]
}
} else if (length(G) > 1){
stop("G must be a named vector of integers.")
} else {
stop("G must be NULL or an integer vector of length 1 or nPop(gid)",
call. = FALSE)
}
G
}
#==============================================================================#
# replace missing allele frequencies with a specified value
#
# @param i the name or index of a locus
# @param loci a list of allele frequencies by locus derived from the round-robin
# method
# @param e a value to replace zero-value frequencies with
# @param sum_to_one a boolean indicating whether or not allele frequencies at a
# single locus should sum to one.
#
# Public functions utilizing this function:
# ## rare_allele_correction
#
# Internal functions utilizing this function:
# ## none
#==============================================================================#
replace_zeroes <- function(i, loci, e, sum_to_one = FALSE){
locus <- loci[[i]]
missing_alleles <- locus <= .Machine$double.eps^0.5
locus[missing_alleles] <- e[i]
if (sum_to_one){
locus <- locus/sum(locus)
}
return(locus)
}
#==============================================================================#
# prepare the value to set the minor allele
#
# @param rrmlg a matrix derived from rrmlg
# @param d one of "sample", "mlg", or "rrmlg"
# @param m a multiplier
# @param mlg an integer or NULL specifying the number of unique multilocus
# genotypes in the sample.
#
# Public functions utilizing this function:
# ## rare_allele_correction
#
# Internal functions utilizing this function:
# ## none
#==============================================================================#
get_minor_allele_replacement <- function(rrmlg, d, m, mlg = NULL){
if (d == "sample"){
e <- (1/nrow(rrmlg)) * m
} else if (d == "mlg"){
e <- (1/mlg) * m
} else {
clones <- !apply(rrmlg, 2, duplicated)
e <- setNames((1/colSums(clones)) * m, colnames(rrmlg))
}
return(e)
}
#==============================================================================#
# Get the index of association from sums of distances over loci and samples
#
# This will take in a list called that contains 3 vectors:
# 1. d.vector : a vector containing sums of distances per locus
# 2. d2.vector : like d.vector, but containing sums of squares
# 3. D.vector : a vector containing the distance over all samples.
#
# Public functions utilizing this function:
# ## none
#
# Internal functions utilizing this function:
# ## ia_pair_loc
#==============================================================================#
ia_from_d_and_D <- function(V, np){
varD <- ((sum(V$D.vector^2) - ((sum(V$D.vector))^2)/np))/np
vard.vector <- ((V$d2.vector - ((V$d.vector^2)/np))/np)
vardpair.vector <- .Call("pairwise_covar", vard.vector, PACKAGE = "poppr")
sigVarj <- sum(vard.vector)
rm(vard.vector)
Ia <- (varD/sigVarj) - 1
rbarD <- (varD - sigVarj)/(2 * sum(vardpair.vector))
return(c(Ia, rbarD))
}
#' scans genlight objects for genotypes that have no alternate homozygous sites
#' and manually adds a vector of zeroes in for bitwise functions to work.
#'
#' @param x a genlight or snpclone object
#'
#' @return a genlight object
#' @noRd
#'
fix_uneven_diploid <- function(x){
snplens <- lengths(lapply(x$gen, slot, "snp"))
if (any(snplens == 1)) {
replacement <- as.raw(rep(0, length(x$gen[[1]]$snp[[1]])))
for (i in which(snplens == 1)){
x$gen[[i]]$snp[[2]] <- replacement
}
}
return(x)
}
#' MLG index handler for the "[[" methods
#'
#' This handles the indices for MLGs within the subset methods since they don't
#' know what the sample names are.
#' @param i a vector used to subset data
#' @param gid a genclone or snpclone object
#'
#' @return a vector of integers or logicals
#' @noRd
#'
#' @seealso \code{\link{initialize,genclone-method}}
#' \code{\link{initialize,snpclone-method}}
handle_mlg_index <- function(i, gid){
if (is.logical(i) || is.numeric(i)){
return(i)
}
i <- if (is.factor(i)) as.character(i) else i
i <- match(i, indNames(gid)) # match to the index names
i <- i[!is.na(i)] # remove missing indices
i <- if (length(i) == 0) TRUE else i # ignore if no result
return(i)
}
#' Population index handler for the "[[" methods
#'
#' If the user specifies populations, it takes priority
#'
#' @param pops vector of population indices or names
#' @param gid a genclone or snpclone object
#'
#' @return a vector of logicals for each sample
#' @noRd
#' @seealso \code{\link{initialize,genclone-method}}
#' \code{\link{initialize,snpclone-method}}
handle_pops_index <- function(pops, gid){
if (!is.null(pops) && !is.null(pop(gid))){
pops <- if (is.factor(pops)) as.character(pops) else pops
pops <- if (!is.character(pops)) popNames(gid)[pops] else pops
newi <- pop(gid) %in% pops
}
return(newi)
}
#' Should poppr be quiet?
#'
#' If it's not an interactive session or it's in a knitr document, messages in
#' poppr should be suppressed UNLESS poppr.debug is set to TRUE
#'
#' @return TRUE or FALSE
#' @noRd
#'
should_poppr_be_quiet <- function(quiet){
# Suppress the noise if it's not interactive or in knitr
# This is thanks to @jimhester
# https://github.com/hadley/dplyr/commit/c8beb59217620614b36cd82df0a7e89c556fb374
in_knitr <- !is.null(getOption("knitr.in.progress"))
in_script <- !interactive()
poppr_debug <- getOption("poppr.debug")
if ((in_script || in_knitr) && !poppr_debug){
quiet <- TRUE
}
return(quiet)
}
#' Set the population from a formula or vector
#'
#' @param gid a genind/genclone/genlight/snpclone object
#' @param pop a formula or factor specifying population
#'
#' @return the object with the population set in the pop slot
#' @noRd
set_pop_from_strata_or_vector <- function(gid, pop) {
if (is.language(pop)){ # incoming is a formula, e.g. ~Country/Year
setPop(gid) <- pop
} else { # incoming is a vector
pop(gid) <- pop
}
gid
}
# poppr's theme for ggplot2 (mainly rotating x axis labels)
myTheme <- theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
#' Calculate Psex for a given genotype
#'
#' @param n_encounters integer the number of observations of an MLG
#' @param p_genotype numeric the probability of the given MLG
#' @param sample_ids list the names of the given MLG
#' @param n_samples integer the number of samples observed
#'
#' @return a vector of probabilities
#' @noRd
#'
#' @examples
#' make_psex(10, 0.005, n_samples = 100)
make_psex <- function(n_encounters, p_genotype, sample_ids = NULL, n_samples){
encounters <- seq(n_encounters) - 1
out <- dbinom(encounters, n_samples, p_genotype)
names(out) <- sample_ids
return(out)
}
cromulent_replen <- function(gid, replen){
the_loci <- locNames(gid)
if (length(replen) != nLoc(gid) && is.null(names(replen))){
msg <- mismatched_repeat_length_warning(replen, nLoc(gid))
stop(msg, call. = FALSE)
} else if (length(replen) < nLoc(gid)){
msg <- mismatched_repeat_length_warning(replen, nLoc(gid))
stop(msg, call. = FALSE)
} else if (length(replen) > nLoc(gid)){
msg <- trimmed_repeats_warning(replen, the_loci)
warning(msg, call. = FALSE, immediate. = TRUE)
}
new_replen <- match_replen_to_loci(the_loci, replen)
return(new_replen)
}
#' calculate the distance between two circles
#'
#' @param c1 radius of circle 1
#' @param c2 radius of circle 2
#'
#' @return the distance between two centers on one axis
#' @noRd
#' @details
#' https://stackoverflow.com/a/14830596/2752888
cdist <- function(c1, c2){
a <- (c1 + c2) ^ 2
b <- (c1 - c2) ^ 2
sqrt(a - b)
}
# spread the circles out
#' Arrange circles adjacent to each other
#'
#' @param radii a vector indicating the radii of the circles to be arranged.
#'
#' @return a vector of positions on which to plot the centers of circles
#' @noRd
make_adjacent_circles <- function(radii){
res <- vapply(seq(radii), function(i) {
if (i == 1)
0.0
else
cdist(radii[i], radii[i - 1])
}, numeric(1))
cumsum(res)
}
#' Make labels for circles
#'
#' This function takes in a range of numbers and figures out three circles that
#' exist in these numbers to create a legend using the range and approximate
#' mean of the numbers
#'
#' @param mlg_number a vector of integers
#'
#' @return 1, 2, or 3 integers.
#' @noRd
#'
#' @examples
#' set.seed(40)
#' make_circle_labs(sample(100, 50, replace = TRUE))
make_circle_labs <- function(mlg_number){
labs <- range(mlg_number)
if (diff(labs) > 1) {
m <- mean(labs)
m <- mlg_number[which.min(abs(mlg_number - m))]
labs <- c(labs[1], m, labs[2])
} else if (diff(labs) == 0) {
labs <- labs[1]
}
labs
}
#' Get the correct side of the legend box
#'
#' @param a the result of a "legend" call
#' @param side either "top", "bottom", "right", or "left"
#'
#' @return a vector of at max length 4
#' @noRd
#'
#' @examples
#' plot(seq(-1, 1), seq(-1, 1), asp = 1)
#' tr <- legend("topright", fill = "blue", legend = "blue")
#' tl <- legend("topleft", fill = "blue", legend = "blue")
#' bl <- legend("bottomleft", fill = "blue", legend = "blue")
#' br <- legend("bottomright", fill = "blue", legend = "blue")
#' ce <- legend("center", fill = "blue", legend = "blue")
#' points(x = get_legend_side(ce, 3:4),
#' y = get_legend_side(ce, 1:2),
#' col = c("red", "blue"))
#' points(x = get_legend_side(tl, 3:4),
#' y = get_legend_side(tl, 1:2),
#' col = c("red", "blue"))
#' points(x = get_legend_side(bl, 3:4),
#' y = get_legend_side(bl, 1:2),
#' col = c("red", "blue"))
#' points(x = get_legend_side(br, 3:4),
#' y = get_legend_side(br, 1:2),
#' col = c("red", "blue"))
#' points(x = get_legend_side(tr, 3:4),
#' y = get_legend_side(tr, 1:2),
#' col = c("red", "blue"))
get_legend_side <- function(a, side = NULL) {
res <- c(
top = a$rect$top,
bottom = a$rect$top - a$rect$h,
right = a$rect$left + a$rect$w,
left = a$rect$left
)
return( if (is.null(side)) res else res[side] )
}
#' Create a circle legend
#'
#' @param a the output of a "legend" call. Defaults to `NULL`.
#' @param mlg_number a vector of integers
#' @param scale a number by which to multiply the node sizes
#' @param cex character expansion for the text
#' @param radmult multiplier for the radius (specifically for [plot_poppr_msn])
#' @param xspace the defined xspacer (currently is wonky :/)
#' @param font font for the title
#' @param pos a numeric position for the title or NULL
#' @param x the position for the leftmost part of the legend
#' @param y the position of the topmost part of the legend
#' @param txt The position of the text
#'
#' @details
#' If a legend is provided in `a`, then the position of the circle legend will
#' be below it. If not, the arguments left, top, and txt will control the
#' position of the text within the box.
#'
#' @return a legend with circles
#'
#' @noRd
make_circle_legend <-
function(a = NULL,
mlg_number,
scale = 5,
cex = 0.75,
radmult = 1,
xspace = NULL,
font = 1,
pos = NULL,
x = -1.55,
y = 1,
txt = -1.3) {
# Create example circles for comparison
labs <- make_circle_labs(mlg_number)
rads <- (sqrt(labs) * scale)/200
if (!is.null(a)){
txt <- a$text$x[1]
left <- get_legend_side(a, "left")
bottom <- get_legend_side(a, "bottom")
} else {
left <- x
bottom <- y
}
# Get the space between legend elements
yspace <- if (!is.null(a)) diff(a$text$y) else 0.05
yspace <- if (length(yspace) > 0L) min(abs(yspace)) else 0.05
xspace <- if (is.null(xspace)) 0.25 * yspace else 2 * xspace * yspace
# Create positions of circles horizontally
diam <- max(rads) * 2
big_circles <- diam > abs(txt - left)/2
circlx <- if (big_circles) left + diam + xspace else txt
circlx <- rep(circlx, length(rads))
# shift the y position of the circles
cpos <- make_adjacent_circles(rads)
circly <- bottom - ((2.5 * yspace) + max(cpos) + max(rads)/2)
circly <- cpos + circly
# Create the circle legend
xpos <- if (is.null(pos)) left else pos
tadj <- if (is.null(pos)) c(0, 0.5) else c(0.5, 0.5)
graphics::text(x = xpos,
y = bottom - yspace,
label = "Samples/Node",
adj = tadj,
font = font,
cex = cex)
graphics::symbols(x = circlx - (rads * radmult + xspace),
y = circly,
circles = rads * radmult,
add = TRUE,
inches = FALSE,
asp = 1)
graphics::text(x = circlx,
y = circly,
labels = labs,
adj = c(0, 0.5),
cex = cex)
}
make_progress <- function(reps, steps = 50) {
step <- reps/steps
scale <- if (step < 1) step else 1
p <- progressr::progressor(steps, scale = scale)
list(rog = p, step = if (step < 1) step else round(step))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.