Nothing
#' Converts a genlight object to nexus format PAUP SVDquartets
#'
#' The output nexus file contains the SNP data in one of two forms, depending
#' upon what you regard as most appropriate. One form, that used by Chifman and
#' Kubatko, has two lines per individual, one providing the reference SNP the
#' second providing the alternate SNP (method=1).
#'
#' A second form, recommended by Dave Swofford, has a single line per
#' individual, resolving heterozygous SNPs by replacing them with standard
#' ambiguity codes (method=2).
#'
#' If the data are tag presence/absence, then method=2 is assumed.
#'
#' Note that the genlight object must contain at least two populations for this
#' function to work.
#'
#' @references Chifman, J. and L. Kubatko. 2014. Quartet inference from SNP data
#' under the coalescent. Bioinformatics 30: 3317-3324
#'
#' @param x Name of the genlight object containing the SNP data or tag P/A data
#' [required].
#' @param outfile File name of the output file (including extension)
#' [default 'svd.nex'].
#' @param outpath Path where to save the output file
#' [default tempdir(), mandated by CRAN]. Use outpath=getwd() when calling this
#' function or set.tempdir <- getwd() elsewhere in your script to direct output
#' files to your working directory.
#' @param method Method = 1, nexus file with two lines per individual; method =
#' 2, nexus file with one line per individual, ambiguity codes for SNP
#' genotypes, 0 or 1 for presence/absence data [default 2].
#' @param verbose Verbosity: 0, silent or fatal errors; 1, begin and end; 2,
#' progress log; 3, progress and results summary; 5, full report
#' [default 2 or as specified using gl.set.verbosity]
#' @return returns no value (i.e. NULL)
#' @export
#' @author Custodian: Arthur Georges (Post to
#' \url{https://groups.google.com/d/forum/dartr})
#' @examples
#' gg <- testset.gl[1:20,1:100]
#' gg@other$loc.metrics <- gg@other$loc.metrics[1:100,]
#' gl2svdquartets(gg)
gl2svdquartets <- function(x,
outfile = "svd.nex",
outpath = tempdir(),
method = 2,
verbose = NULL) {
outfilespec <- file.path(outpath, outfile)
# SET VERBOSITY
verbose <- gl.check.verbosity(verbose)
# FLAG SCRIPT START
funname <- match.call()[[1]]
utils.flag.start(func = funname,
build = "Jody",
verbosity = verbose)
# CHECK DATATYPE
datatype <- utils.check.datatype(x, verbose = verbose)
# Check for monomorphic loci
if (!x@other$loc.metrics.flags$monomorphs) {
cat(warn(
" Warning: genlight object may contain monomorphic loci\n"
))
}
# FUNCTION SPECIFIC ERROR CHECKING
if (method < 0 | method > 2) {
cat(
warn(
" Warning: method must be either 1 or 2, set to 2, one line per individual using ambiguity codes for SNP data, 0 or 1 for tag P/A data \n"
)
)
method <- 2
}
if(nPop(x)<2){
cat(error(" Fatal error: The genlight object contains only one population. At least two populations are needed for this function to work. \n"))
stop()
}
# DO THE JOB
######## SNP data
if (all(x@ploidy == 2)) {
if (verbose >= 2) {
cat(report(
paste(
" Extacting SNP bases and creating records for each individual\n"
)
))
}
# Extract the reference base and the alternate base for each locus snp <- as.character(x@other$loc.metrics$SNP) snp <-
# strsplit(snp,':') snp <- unlist(lapply(snp, `[`, 2)) snp <- strsplit(snp,'>')
ref <-
unname(sapply(x@loc.all, function(x)
strsplit(x, split = "/")[1][[1]][1]))
# ref <- unlist(lapply(snp, `[`, 1))
alt <-
unname(sapply(x@loc.all, function(x)
strsplit(x, split = "/")[1][[1]][2]))
# alt <- unlist(lapply(snp, `[`, 2))
# Sort the data on population
if (verbose >= 2) {
cat(report(paste(" Sorting ....\n")))
}
df <- data.frame(as.matrix(x))
df <- cbind(indNames(x), pop(x), df)
df <- df[order(df$pop), ]
indlabels <- df[, 1]
poplabels <- df[, 2]
m <- df[, 3:(nLoc(x) + 2)]
# Create a lookup table for the ambiguity codes
if (verbose >= 2) {
cat(report(
paste(" Creating lookup table for ambiguity codes\n")
))
}
# A T G C A A W R M) T W T K Y G R K G S C M Y S C
conversion <-
matrix(
c(
"A",
"W",
"R",
"M",
"W",
"T",
"K",
"Y",
"R",
"K",
"G",
"S",
"M",
"Y",
"S",
"C"
),
nrow = 4,
ncol = 4
)
colnames(conversion) <- c("A", "T", "G", "C")
rownames(conversion) <- colnames(conversion)
# Add individual labels to sequences
refseq <- array(data = NA, dim = nInd(x))
altseq <- array(data = NA, dim = nInd(x))
ambseq <- array(data = NA, dim = nInd(x))
if (method == 1) {
for (ind in 1:nInd(x)) {
refseq[ind] <- paste0(indlabels[ind], "_1 ")
altseq[ind] <- paste0(indlabels[ind], "_2 ")
}
} else {
for (ind in 1:nInd(x)) {
ambseq[ind] <- paste0(indlabels[ind], " ")
}
}
# progressively add the bases
if (verbose >= 2) {
cat(report(
paste(
" Applying ambiguity codes (if method=2) and reconstructing sequences\n"
)
))
}
for (ind in 1:nInd(x)) {
for (loc in 1:nLoc(x)) {
if (is.na(m[ind, loc])) {
refseq[ind] <- paste0(refseq[ind], "?")
altseq[ind] <- paste0(altseq[ind], "?")
ambseq[ind] <- paste0(ambseq[ind], "?")
} else if (m[ind, loc] == 0) {
refseq[ind] <- paste0(refseq[ind], ref[loc])
altseq[ind] <- paste0(altseq[ind], ref[loc])
ambseq[ind] <- paste0(ambseq[ind], ref[loc])
} else if (m[ind, loc] == 1) {
refseq[ind] <- paste0(refseq[ind], ref[loc])
altseq[ind] <- paste0(altseq[ind], alt[loc])
ambseq[ind] <-
paste0(ambseq[ind], conversion[ref[loc], alt[loc]])
} else if (m[ind, loc] == 2) {
refseq[ind] <- paste0(refseq[ind], alt[loc])
altseq[ind] <- paste0(altseq[ind], alt[loc])
ambseq[ind] <- paste0(ambseq[ind], alt[loc])
} else {
cat(error(
"Fatal Error: SNP must be scored as 0, 1, 2 or NA\n"
))
stop()
}
}
}
}
if (all(x@ploidy == 1)) {
# progressively add the scores (0 0r 1 or NA)
if (verbose >= 2) {
cat(report(
paste(" Constructing genotypes for each individual\n")
))
}
str <- array(NA, nInd(x))
for (ind in 1:nInd(x)) {
str[ind] <-
paste(as.character(as.matrix(x)[ind, ]),
collapse = "",
sep = "")
str[ind] <- gsub("NA", "?", str[ind])
str[ind] <- paste(indNames(x)[ind], " ", str[ind])
}
ambseq <- str
poplabels <- pop(x)
}
# Create the taxpartition (popname : 25-60)
if (verbose >= 2) {
cat(report(paste(" Creating partition table\n")))
}
a <- array(data = NA, dim = length(poplabels))
b <- array(data = NA, dim = length(poplabels))
a[1] <- 1
b <- table(poplabels)
for (i in 2:length(b)) {
b[i] <-b[i] + b[i - 1]
a[i] <-b[i - 1] + 1
}
plabels <- unique(poplabels)
# Create the svd file
if (verbose > 1) {
cat(report(
paste(" Writing results to svd nexus file", outfilespec, "\n")
))
}
sink(outfilespec)
cat("#nexus\n")
cat("BEGIN DATA;\n")
if (method == 1) {
cat(paste0(" dimensions ntax = ", 2 * nInd(x), " nchar = ", nLoc(x), " ;\n"))
} else {
cat(paste0(" dimensions ntax = ", nInd(x), " nchar = ", nLoc(x), " ;\n"))
}
cat(" format datatype = dna gap = - ;\n\n")
cat("matrix\n")
if (method == 1) {
for (i in 1:nInd(x)) {
cat(paste0(refseq[i], "\n"))
cat(paste0(altseq[i], "\n"))
}
} else {
for (i in 1:nInd(x)) {
cat(paste0(ambseq[i], "\n"))
}
}
cat(";\n")
cat("end;\n\n")
cat("begin sets;\n")
cat(" taxpartition pops =\n")
for (i in 1:(length(plabels) - 1)) {
cat(paste0(" ", plabels[i], " : ", a[i], "-", b[i], ",\n"))
}
cat(" ", paste0(plabels[length(plabels)], " : ", a[length(plabels)], "-", b[length(plabels)], ";\n"))
cat("end;\n\n")
cat("begin paup;\n")
cat("log file=svd.txt;\n")
cat("lset nthreads=3;\n")
cat("SVDQuartets\n")
cat(" evalQuartets=random\n")
cat(" speciesTree=yes\n")
cat(" partition=pops\n")
cat(" bootstrap=standard\n")
cat(" nreps=10000\n")
cat(" ambigs=distribute\n")
cat(" treeFile=svd.tre;\n")
cat("savetrees file=svd_boot.tre from=1 to=1 maxdecimals=2;\n")
cat("log stop;\n")
cat("quit;\n")
cat("end;\n")
sink()
if (verbose > 2) {
cat(report(
paste(" Records written to", outfilespec, ":", nInd(x), "\n")
))
}
# FLAG SCRIPT END
if (verbose > 0) {
cat(report("Completed:", funname, "\n"))
}
return(NULL)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.