Nothing
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#
# This software was authored by Zhian N. Kamvar and Javier F. Tabima, graduate
# students at Oregon State University; Jonah C. Brooks, undergraduate student at
# Oregon State University; and Dr. Nik Grünwald, an employee of USDA-ARS.
#
# Permission to use, copy, modify, and distribute this software and its
# documentation for educational, research and non-profit purposes, without fee,
# and without a written agreement is hereby granted, provided that the statement
# above is incorporated into the material, giving appropriate attribution to the
# authors.
#
# Permission to incorporate this software into commercial products may be
# obtained by contacting USDA ARS and OREGON STATE UNIVERSITY Office for
# Commercialization and Corporate Development.
#
# The software program and documentation are supplied "as is", without any
# accompanying services from the USDA or the University. USDA ARS or the
# University do not warrant that the operation of the program will be
# uninterrupted or error-free. The end-user understands that the program was
# developed for research purposes and is advised not to rely exclusively on the
# program for any reason.
#
# IN NO EVENT SHALL USDA ARS OR OREGON STATE UNIVERSITY BE LIABLE TO ANY PARTY
# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
# LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION,
# EVEN IF THE OREGON STATE UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE. USDA ARS OR OREGON STATE UNIVERSITY SPECIFICALLY DISCLAIMS ANY
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY STATUTORY
# WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
# BASIS, AND USDA ARS AND OREGON STATE UNIVERSITY HAVE NO OBLIGATIONS TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#==============================================================================#
#' Calculate a distance matrix based on relative dissimilarity
#'
#' diss.dist uses the same discrete dissimilarity matrix utilized by the index
#' of association (see \code{\link{ia}} for details). By default, it returns a
#' distance reflecting the number of allelic differences between two
#' individuals. When \code{percent = TRUE}, it returns a ratio of the number of
#' observed differences by the number of possible differences. Eg. two
#' individuals who share half of the same alleles will have a distance of 0.5.
#' This function can analyze distances for any marker system.
#'
#' @param x a \code{\link{genind}} object.
#'
#' @param percent \code{logical}. Should the distance be represented as a
#' percent? If set to \code{FALSE} (default), the distance will be reflected
#' as the number of alleles differing between to individuals. When set to
#' \code{TRUE}, These will be divided by the ploidy multiplied by the number
#' of loci.
#'
#' @param mat \code{logical}. Return a matrix object. Default set to
#' \code{FALSE}, returning a dist object. \code{TRUE} returns a matrix object.
#'
#' @return Pairwise distances between individuals present in the genind object.
#' @author Zhian N. Kamvar
#'
#' @details The distance calculated here is quite simple and goes by many names,
#' depending on its application. The most familiar name might be the Hamming
#' distance, or the number of differences between two strings.
#'
#' @note When \code{percent = TRUE}, this is exactly the same as
#' \code{\link{provesti.dist}}, except that it performs better for large
#' numbers of individuals (n > 125) at the cost of available memory.
#'
#' @seealso \code{\link{prevosti.dist}},
#' \code{\link{bitwise.dist}} (for SNP data)
#' @examples
#'
#' # A simple example. Let's analyze the mean distance among populations of A.
#' # euteiches.
#'
#' data(Aeut)
#' mean(diss.dist(popsub(Aeut, 1)))
#' \dontrun{
#' mean(diss.dist(popsub(Aeut, 2)))
#' mean(diss.dist(Aeut))
#' }
#' @export
#==============================================================================#
diss.dist <- function(x, percent=FALSE, mat=FALSE){
stopifnot(is(x, "gen"))
ploid <- x@ploidy
if (is(x, "bootgen")){
ind.names <- x@names
} else {
ind.names <- indNames(x)
}
inds <- nrow(x@tab)
np <- choose(inds, 2)
dist.mat <- matrix(data = 0, nrow = inds, ncol = inds)
numLoci <- nLoc(x)
type <- x@type
if (type == "PA"){
dist_by_locus <- matrix(.Call("pairdiffs", x@tab))
ploid <- 1
} else if (is(x, "bootgen")){
dist_by_locus <- vapply(seq(numLoci), function(i){
.Call("pairdiffs", tab(x[, i]))/2
}, numeric(np))
} else {
x <- seploc(x)
dist_by_locus <- vapply(x, function(x) .Call("pairdiffs", x@tab)/2,
numeric(np))
}
if (is.matrix(dist_by_locus)){
dist.mat[lower.tri(dist.mat)] <- rowSums(ceiling(dist_by_locus))
} else {
dist.mat[lower.tri(dist.mat)] <- ceiling(dist_by_locus)
}
colnames(dist.mat) <- ind.names
rownames(dist.mat) <- ind.names
if (percent){
dist.mat <- sweep(dist.mat, 1, ploid * numLoci, "/")
}
dist.mat <- as.dist(dist.mat)
if (mat == TRUE){
dist.mat <- as.matrix(dist.mat)
}
return(dist.mat)
}
#==============================================================================#
#' Calculate Genetic Distance for a genind or genclone object.
#'
#' These functions are modified from the function \link[adegenet]{dist.genpop} to
#' be applicable for distances between individuals.
#'
#' @param x a \linkS4class{genind}, \linkS4class{genclone}, or matrix object.
#'
#' @param warning If \code{TRUE}, a warning will be printed if any infinite
#' values are detected and replaced. If \code{FALSE}, these values will be
#' replaced without warning. See Details below.
#'
#' @return an object of class dist with the same number of observations as the
#' number of individuals in your data.
#'
#' @details It is important to be careful with the interpretation of these
#' distances as they were originally intended for calculation of
#' between-population distance. As Nei's distance is the negative log of 0:1,
#' this means that it is very possible to obtain distances of infinity. When
#' this happens, infinite values are corrected to be 10 * max(D) where D is
#' the distance matrix without infinite values.
#'
#' @note Prevosti's distance is identical to \code{\link{diss.dist}}, except
#' that \code{\link{diss.dist}} is optimized for a larger number of
#' individuals (n > 125) at the cost of required memory. Both
#' \code{prevosti.dist} and \code{provesti.dist} are the same function,
#' \code{provesti.dist} is a spelling error and exists for backwards
#' compatibility.
#'
#' These distances were adapted from the \pkg{adegenet} function
#' \code{\link{dist.genpop}} to work with \code{\linkS4class{genind}} objects.
#'
#' @seealso \code{\link{aboot}} \code{\link{diss.dist}} \code{\link{poppr.amova}}
#' @rdname genetic_distance
#' @author Zhian N. Kamvar (poppr adaptation)
#' Thibaut Jombart (adegenet adaptation)
#' Daniel Chessel (ade4)
#'
#' @keywords nei rogers rodgers reynolds coancestry edwards angular provesti
#' @references
#' Nei, M. (1972) Genetic distances between populations. American Naturalist,
#' 106, 283-292.
#'
#' Nei M. (1978) Estimation of average heterozygosity and genetic
#' distance from a small number of individuals. Genetics, 23, 341-369.
#'
#' Avise, J. C. (1994) Molecular markers, natural history and evolution. Chapman & Hall,
#' London.
#'
#' Edwards, A.W.F. (1971) Distance between populations on the basis of gene
#' frequencies. Biometrics, 27, 873-881.
#'
#' Cavalli-Sforza L.L. and Edwards A.W.F.
#' (1967) Phylogenetic analysis: models and estimation procedures. Evolution,
#' 32, 550-570.
#'
#' Hartl, D.L. and Clark, A.G. (1989) Principles of population
#' genetics. Sinauer Associates, Sunderland, Massachussetts (p. 303).
#'
#' Reynolds, J. B., B. S. Weir, and C. C. Cockerham. (1983) Estimation of the
#' coancestry coefficient: basis for a short-term genetic distance. Genetics,
#' 105, 767-779.
#'
#' Rogers, J.S. (1972) Measures of genetic similarity and genetic distances.
#' Studies in Genetics, Univ. Texas Publ., 7213, 145-153.
#'
#' Avise, J. C. (1994)
#' Molecular markers, natural history and evolution. Chapman & Hall, London.
#'
#' Prevosti A. (1974) La distancia genetica entre poblaciones. Miscellanea
#' Alcobe, 68, 109-118.
#'
#' Prevosti A., Ocana J. and Alonso G. (1975) Distances
#' between populations of Drosophila subobscura, based on chromosome
#' arrangements frequencies. Theoretical and Applied Genetics, 45, 231-241.
#'
#' For more information on dissimilarity indexes:
#'
#' Gower J. and Legendre P. (1986)
#' Metric and Euclidean properties of dissimilarity coefficients. Journal of
#' Classification, 3, 5-48
#'
#' Legendre P. and Legendre L. (1998) Numerical Ecology,
#' Elsevier Science B.V. 20, pp274-288.
#' @export
#' @examples
#'
#' data(nancycats)
#' (nan9 <- popsub(nancycats, 9))
#' (neinan <- nei.dist(nan9))
#' (ednan <- edwards.dist(nan9))
#' (rodnan <- rogers.dist(nan9))
#' (reynan <- reynolds.dist(nan9))
#' (pronan <- prevosti.dist(nan9))
#'
#==============================================================================#
nei.dist <- function(x, warning = TRUE){
if (is(x, "gen")){
MAT <- get_gen_mat(x)
} else if (length(dim(x)) == 2){
MAT <- x
} else {
stop("Object must be a matrix or genind object")
}
IDMAT <- MAT %*% t(MAT)
vec <- sqrt(diag(IDMAT))
IDMAT <- IDMAT/vec[col(IDMAT)]
IDMAT <- IDMAT/vec[row(IDMAT)]
D <- -log(IDMAT)
if (any(D %in% Inf)){
D <- infinite_vals_replacement(D, warning)
}
D <- as.dist(D)
labs <- get_gen_dist_labs(x)
D <- make_attributes(D, length(labs), labs, "Nei", match.call())
return(D)
}
# modified from adegenet dist.genpop
#' @rdname genetic_distance
#' @export
edwards.dist <- function(x){
if (is(x, "gen")){
MAT <- get_gen_mat(x)
nloc <- nLoc(x)
} else if (length(dim(x)) == 2){
MAT <- x
nloc <- ncol(x)
} else {
stop("Object must be a matrix or genind object")
}
MAT <- sqrt(MAT)
D <- MAT%*%t(MAT)
D <- 1 - D/nloc # Negative number are generated here.
diag(D) <- 0
D <- sqrt(D)
D <- as.dist(D)
labs <- get_gen_dist_labs(x)
D <- make_attributes(D, length(labs), labs, "Edwards", match.call())
return(D)
}
#' @rdname genetic_distance
#' @export
rogers.dist <- function(x){
if (is(x, "gen")){
if (is.genind(x) && x@type == "PA"){
MAT <- x@tab
nloc <- nLoc(x)
loc.fac <- factor(locNames(x), levels = locNames(x))
nlig <- nrow(x@tab)
} else {
MAT <- get_gen_mat(x)
nloc <- nLoc(x)
loc.fac <- x@loc.fac
nlig <- nrow(x@tab)
}
} else if (length(dim(x)) == 2){
MAT <- x
nloc <- ncol(x)
loc.fac <- factor(colnames(x), levels = colnames(x))
nlig <- nrow(x)
} else {
stop("Object must be a matrix or genind object")
}
# kX is a list of K=nloc matrices
kX <- lapply(split(MAT, loc.fac[col(MAT)]), matrix, nrow = nlig)
D <- matrix(0, nlig, nlig)
for(i in 1:length(kX)){
D <- D + dcano(kX[[i]])
}
D <- D/length(kX)
D <- as.dist(D)
labs <- get_gen_dist_labs(x)
D <- make_attributes(D, nlig, labs, "Rogers", match.call())
return(D)
}
#' @rdname genetic_distance
#' @export
reynolds.dist <- function(x){
if (is(x, "gen")){
MAT <- get_gen_mat(x)
nloc <- nLoc(x)
} else if (length(dim(x)) == 2){
MAT <- x
nloc <- ncol(x)
} else {
stop("Object must be a matrix or genind object")
}
denomi <- MAT %*% t(MAT)
vec <- apply(MAT, 1, function(x) sum(x*x))
D <- -2*denomi + vec[col(denomi)] + vec[row(denomi)]
diag(D) <- 0
denomi <- 2*nloc - 2*denomi
diag(denomi) <- 1
D <- D/denomi
D <- sqrt(D)
D <- as.dist(D)
labs <- get_gen_dist_labs(x)
D <- make_attributes(D, length(labs), labs, "Reynolds",
match.call())
return(D)
}
#' @rdname genetic_distance
#' @export
provesti.dist <- function(x){
if (is(x, "gen")){
MAT <- get_gen_mat(x)
nlig <- nrow(tab(x))
nloc <- nLoc(x)
ploid <- x@ploidy
} else if (length(dim(x)) == 2){
MAT <- x
nlig <- nrow(x)
nloc <- ncol(x)
} else {
stop("Object must be a matrix or genind object")
}
w0 <- 1:(nlig-1)
loca <- function(k, nlig, MAT, nLoc){
w1 <- (k+1):nlig
resloc <- vapply(w1, function(y) sum(abs(MAT[k, ] - MAT[y, ]), na.rm = TRUE), numeric(1))
if (is(x, "gen") && x@type == "codom"){
# This only applies to codominant data because dominant data can only take
# on a single state. Dividing by two indicates that the observations can
# occupy co-occurring states.
resloc <- resloc/2
}
return(resloc/nloc)
}
d <- unlist(lapply(w0, loca, nlig, MAT, nloc))
labs <- get_gen_dist_labs(x)
d <- make_attributes(d, nlig, labs, "Provesti", match.call())
return(d)
}
# Making an alias to correct the spelling to fix issue #65
prevosti.dist <- provesti.dist
#' @rdname genetic_distance
#' @export
"prevosti.dist"
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.