##' Concenate marker alignements to generate a single alignment.
##'
##' Given a series of files generated by \sQuote{mergeSeq}, this
##' function creates an alignment from these files. WARNING: the files
##' for the independent loci are assumed to be created using the
##' function mergeSeq, meaning that the loci is indicated in 3rd
##' position, separated by an hyphen e.g. 20120808-101010-COI.fas
##' @title Concatenate alignments
##' @param pattern character string to identify aligned files in folder
##' @param path path where the alignment files are to be found
##' @param output file name (with full path) for the concatenated file
##' @param input.format format of the input DNA alignment (to be
##' passed to ape:::read.dna)
##' @param partition if NULL no partition file is created, otherwise a
##' character string indicating the file name where to write the
##' partitions
##' @param partition.format which format should be used to create the
##' partition file (raxml or nexus)
##' @param create.conc if TRUE create the concatenated file, if FALSE
##' useful for testing (dry run) or to create partition file
##' @param standardize not needed if alignments created by
##' \sQuote{mergeSeq}. Otherwise the alignments names are standardized
##' for each of the individual markers by generating empty sequences.
##' @param drop a character vector indicating the sequences that need
##' to be removed from the output
##' @param ... further arguments to be passed to write.dna for the
##' output file
##' @return This function is used for its side effect of creating an
##' alignment from a set of alignemnts (marker based). It can also
##' create a partition file that can added at the end of the alignment
##' to specify where each marker starts and ends.
##' @author Francois Michonneau
##' @export
##' @examples
##' \dontrun{
##' concatenateAlignments(pattern="^20121017-112755.+afa$", path="/tmp/seq",
##' output="/tmp/seq/20121017.impatiens.fas",
##' partition="/tmp/seq/20121017.impatiens.part",
##' partition.format="nexus", colsep="", colw=10000)
##' }
concatenateAlignments <- function(pattern, path, output,
input.format = "fasta",
partition = NULL,
partition.format = c("raxml", "nexus"),
create.conc = TRUE,
standardize=FALSE,
drop = NULL, ...) {
partition.format <- match.arg(partition.format)
lAlg <- list.files(path = path, pattern = pattern)
if (length(lAlg) < 2) {
stop("only 1 file")
}
else {
firstF <- lAlg[1]
sizeAlg <- numeric(length(lAlg))
firstAlg <- read.dna(file = file.path(path, lAlg[1]), format = input.format, as.character=TRUE)
sizeAlg[1] <- dim(firstAlg)[2]
if (standardize) {
seqNm <- vector("list", length(lAlg))
seqNm[[1]] <- dimnames(firstAlg)[[1]]
for (i in 2:length(lAlg)) {
tmpAlg <- read.dna(file = file.path(path, lAlg[i]), format = input.format)
seqNm[[i]] <- dimnames(tmpAlg)[[1]]
}
seqNm <- unique(unlist(seqNm))
missingInFirst <- setdiff(seqNm, dimnames(firstAlg)[[1]])
if (length(missingInFirst)) {
for (j in 1:length(missingInFirst)) {
firstAlg <- rbind(firstAlg, rep("-", dim(firstAlg)[2]))
dimnames(firstAlg)[[1]][length(dimnames(firstAlg)[[1]])] <- missingInFirst[j]
}
}
}
for (i in 2:length(lAlg)) {
tmpAlg <- read.dna(file = file.path(path, lAlg[i]), format = input.format, as.character=TRUE)
if (standardize) {
missingNm <- setdiff(seqNm, dimnames(tmpAlg)[[1]])
if (length(missingNm)) {
for (j in 1:length(missingNm)) {
tmpAlg <- rbind(tmpAlg, rep("-", dim(tmpAlg)[2]))
dimnames(tmpAlg)[[1]][length(dimnames(tmpAlg)[[1]])] <- missingNm[j]
}
}
}
tmpAlg <- tmpAlg[match(dimnames(firstAlg)[[1]], dimnames(tmpAlg)[[1]]), ]
firstAlg <- cbind(firstAlg, tmpAlg)
sizeAlg[i] <- dim(tmpAlg)[2]
}
if(create.conc) {
if (!is.null(drop)) {
toRm <- match(drop, dimnames(firstAlg)[[1]])
if (any(is.na(toRm))) {
stop("Some of the sequences specified in drop were not found.")
}
firstAlg <- firstAlg[-toRm, ]
}
write.dna(firstAlg, file = output, ...)
}
if (!is.null(partition)) {
cat(character(0), file = partition, append=FALSE)
locNm <- gsub("(.+)-(.+)-(.+)\\.(.+)", "\\3", lAlg)
first <- 1
if (partition.format == "nexus") {
cat("BEGIN ASSUMPTIONS;\n", file = partition, append = TRUE)
}
for (i in 1:length(locNm)) {
last <- first+sizeAlg[i]-1
if (partition.format == "raxml") {
cat(paste("DNA,",locNm[i], "=", first, "-", last, "\n", sep = ""), file = partition, append = TRUE)
}
else if (partition.format == "nexus") {
cat(paste(" charset ", locNm[i], " = ", first, "-", last, ";\n", sep = ""), file = partition, append = TRUE)
}
first <- last+1
}
if (partition.format == "nexus") {
cat("END;\n", file = partition, append = TRUE)
}
}
TRUE
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.