R/splitMultiAlignments.R

##' Split a file containing multiple alignments into a multiple files.
##'
##' This function reads a file containing multiple alignments in the
##' phylip format (as generated by seq-gen) and breaks each replicate
##' into its own file. The files are named by putting together a
##' prefix (specified by the user), the string \sQuote{rep} (stands
##' for replicate), the replicate number, the name of the marker/locus
##' (which should be part of the file name between an underscore and
##' \sQuote{.phy}, e.g., \code{myalignment_marker.phy}).
##' @title splitMultiAlignments -- split multiple alignments into
##' individual files
##' @param file the name of the file containing the multiple
##' alignemnts to be split.
##' @param prefix the prefix for each of the file name 
##' @param pathin path (i.e., directory) where the replicates for each
##' locus are stored
##' @param pathout path (i.e., directory) where the individual
##' replicate for each locus are going to be stored.
##' @return TRUE, but really used for its side effect of generating
##' individual alignment files.
##' @author Francois Michonneau
##' @export
splitMultiAlignments <- function(file, prefix, pathin, pathout) {
    f <- scan(file.path(pathin, file), what="character", sep="\n", quiet=TRUE)

    begAlg <- grep("^\\s?[0-9]+\\s{1}[0-9]+$", f)
    endAlg <- c(begAlg[-1] - 1, length(f))
    marker <- grep("phy", unlist(strsplit(file, "_")), value=TRUE)
    marker <- unlist(strsplit(marker, "\\."))[1]
    for (i in 1:length(begAlg)) {
        fileI <- file.path(pathout, paste(prefix, "-rep", i, "-", marker,
                                          ".phy", sep=""))
        cat(f[begAlg[i]:endAlg[i]], file=fileI, sep="\n")
    }
    TRUE
}
fmichonneau/chopper documentation built on May 16, 2019, 1:43 p.m.