R/DNA.R

Defines functions solveAmbiguousBases latag2n .buildDegeneracyMatrix dnds rDNAbin as.matrix.AAbin as.list.AAbin as.AAbin.list cbind.AAbin rbind.AAbin c.AAbin all.equal.DNAbin checkAlignment image.AAbin AAsubst dist.aa as.phyDat.AAbin labels.AAbin as.AAbin.character as.AAbin as.character.AAbin print.AAbin trans complement as.AAbin.AAMultipleAlignment as.AAbin.AAStringSet as.AAbin.AAString .AAString2AAbin as.DNAbin.PairwiseAlignmentsSingleSubject as.DNAbin.DNAMultipleAlignment as.DNAbin.DNAStringSet as.DNAbin.DNAString .DNAString2DNAbin where alview image.DNAbin dist.dna seg.sites GC.content Ftab base.freq as.character.DNAbin as.DNAbin.list as.DNAbin.alignment as.DNAbin.character as.DNAbin print.DNAbin c.DNAbin cbind.DNAbin rbind.DNAbin as.list.DNAbin as.matrix.DNAbin as.alignment del.colgapsonly del.rowgapsonly del.gaps labels.DNAbin DNAbin2indel

Documented in AAsubst all.equal.DNAbin alview as.AAbin as.AAbin.AAMultipleAlignment as.AAbin.AAString as.AAbin.AAStringSet as.AAbin.character as.AAbin.list as.alignment as.character.AAbin as.character.DNAbin as.DNAbin as.DNAbin.alignment as.DNAbin.character as.DNAbin.DNAMultipleAlignment as.DNAbin.DNAString as.DNAbin.DNAStringSet as.DNAbin.list as.DNAbin.PairwiseAlignmentsSingleSubject as.list.AAbin as.list.DNAbin as.matrix.AAbin as.matrix.DNAbin as.phyDat.AAbin base.freq c.AAbin cbind.AAbin cbind.DNAbin c.DNAbin checkAlignment complement del.colgapsonly del.gaps del.rowgapsonly dist.aa dist.dna DNAbin2indel dnds Ftab GC.content image.AAbin image.DNAbin labels.AAbin labels.DNAbin latag2n print.AAbin print.DNAbin rbind.AAbin rbind.DNAbin rDNAbin seg.sites solveAmbiguousBases trans where

## DNA.R (2023-02-13)

##   Manipulations and Comparisons of DNA and AA Sequences

## Copyright 2002-2023 Emmanuel Paradis, 2015 Klaus Schliep, 2017 Franz Krah

## This file is part of the R-package `ape'.
## See the file ../COPYING for licensing issues.

DNAbin2indel <- function(x)
{
    if (is.list(x)) x <- as.matrix(x)
    d <- dim(x)
    s <- as.integer(d[2])
    n <- as.integer(d[1])
    if (s * n > 2^31 - 1)
        stop("DNAbin2indel() cannot handle more than 2^31 - 1 bases")
    res <- .C(DNAbin2indelblock, x, n, s, integer(n*s), NAOK = TRUE)[[4]]
    dim(res) <- d
    rownames(res) <- rownames(x)
    res
}

labels.DNAbin <- function(object, ...)
{
    if (is.list(object)) return(names(object))
    if (is.matrix(object)) return(rownames(object))
    NULL
}

del.gaps <- function(x)
{
    deleteGaps <- function(x) {
        i <- which(x == 4)
        if (length(i)) x[-i] else x
    }

    if (!inherits(x, "DNAbin")) x <- as.DNAbin(x)
    if (is.matrix(x)) {
        n <- dim(x)[1]
        y <- vector("list", n)
        for (i in 1:n) y[[i]] <- x[i, ]
        names(y) <- rownames(x)
        x <- y
        rm(y)
    }
    if (!is.list(x)) return(deleteGaps(x))
    x <- lapply(x, deleteGaps)
    class(x) <- "DNAbin"
    x
}

del.rowgapsonly <- function(x, threshold = 1, freq.only = FALSE)
{
    if (!inherits(x, "DNAbin")) x <- as.DNAbin(x)
    if (!is.matrix(x)) stop("DNA sequences not in a matrix")
    foo <- function(x) sum(x == 4)
    g <- apply(x, 1, foo)
    if (freq.only) return(g)
    i <- which(g / ncol(x) >= threshold)
    if (length(i)) x <- x[-i, ]
    x
}

del.colgapsonly <- function(x, threshold = 1, freq.only = FALSE)
{
    if (!inherits(x, "DNAbin")) x <- as.DNAbin(x)
    if (!is.matrix(x)) stop("DNA sequences not in a matrix")
    foo <- function(x) sum(x == 4)
    g <- apply(x, 2, foo)
    if (freq.only) return(g)
    i <- which(g / nrow(x) >= threshold)
    if (length(i)) x <- x[, -i]
    x
}

as.alignment <- function(x)
{
    if (is.list(x)) n <- length(x)
    if (is.matrix(x)) n <- dim(x)[1]
    seq <- character(n)
    if (is.list(x)) {
        nam <- names(x)
        for (i in 1:n)
          seq[i] <- paste(x[[i]], collapse = "")
    }
    if (is.matrix(x)) {
        nam <- dimnames(x)[[1]]
        for (i in 1:n)
          seq[i] <- paste(x[i, ], collapse = "")
    }
    obj <- list(nb = n, seq = seq, nam = nam, com = NA)
    class(obj) <- "alignment"
    obj
}

"[.DNAbin" <- function(x, i, j, drop = FALSE)
{
    ans <- NextMethod("[", drop = drop)
    class(ans) <- "DNAbin"
    ans
}

as.matrix.DNAbin <- function(x, ...)
{
    if (is.matrix(x)) return(x)
    if (!is.list(x)) { # vector
        dim(x) <- c(1, length(x))
        return(x)
    }
    s <- unique(lengths(x, use.names = FALSE))
    if (length(s) != 1)
        stop("DNA sequences in list not of the same length.")
    n <- length(x)
    y <- matrix(raw(), n, s)
    for (i in seq_len(n)) y[i, ] <- x[[i]]
    rownames(y) <- names(x)
    class(y) <- "DNAbin"
    y
}

as.list.DNAbin <- function(x, ...)
{
    if (is.list(x)) return(x)
    if (is.null(dim(x))) obj <- list(x) # cause is.vector() doesn't work
    else { # matrix
        class(x) <- NULL
        n <- nrow(x)
        obj <- vector("list", n)
        for (i in seq_len(n)) obj[[i]] <- x[i, , drop = TRUE]
        names(obj) <- rownames(x)
    }
    class(obj) <- "DNAbin"
    obj
}

rbind.DNAbin <- function(...)
{
    obj <- list(...)
    n <- length(obj)
    if (n == 1) return(obj[[1]])
    for (i in 1:n)
        if (!is.matrix(obj[[1]]))
            stop("the 'rbind' method for \"DNAbin\" accepts only matrices")
    NC <- unlist(lapply(obj, ncol))
    if (length(unique(NC)) > 1)
        stop("matrices do not have the same number of columns.")
    for (i in 1:n) class(obj[[i]]) <- NULL # safe but maybe not really needed
    structure(do.call(rbind, obj), class = "DNAbin")
}

cbind.DNAbin <-
    function(..., check.names = TRUE, fill.with.gaps = FALSE,
             quiet = FALSE)
{
    obj <- list(...)
    n <- length(obj)
    if (n == 1) return(obj[[1]])
    for (i in 1:n)
        if (!is.matrix(obj[[1]]))
            stop("the 'cbind' method for \"DNAbin\" accepts only matrices")
    NR <- unlist(lapply(obj, nrow))
    for (i in 1:n) class(obj[[i]]) <- NULL
    if (check.names) {
        NMS <- lapply(obj, rownames)
        for (i in 1:n)
            if (anyDuplicated(NMS[[i]]))
                stop("Duplicated rownames in matrix ", i, ": see ?cbind.DNAbin")
        nms <- unlist(NMS)
        if (fill.with.gaps) {
            NC <- unlist(lapply(obj, ncol))
            nms <- unique(nms)
            ans <- matrix(as.raw(4), length(nms), sum(NC))
            rownames(ans) <- nms
            from <- 1
            for (i in 1:n) {
                to <- from + NC[i] - 1
                k <- match(NMS[[i]], nms)
                ans[k, from:to] <- obj[[i]]
                from <- to + 1
            }
        } else {
            tab <- table(nms)
            ubi <- tab == n
            nms <- names(tab)[which(ubi)]
            ans <- obj[[1]][nms, , drop = FALSE]
            for (i in 2:n)
                ans <- cbind(ans, obj[[i]][nms, , drop = FALSE])
            if (!quiet && !all(ubi))
                warning("some rows were dropped.")
        }
    } else {
        if (length(unique(NR)) > 1)
            stop("matrices do not have the same number of rows.")
        ans <- matrix(unlist(obj), NR)
        rownames(ans) <- rownames(obj[[1]])
    }
    class(ans) <- "DNAbin"
    ans
}

c.DNAbin <- function(..., recursive = FALSE)
{
    if (!all(unlist(lapply(list(...), is.list))))
        stop("the 'c' method for \"DNAbin\" accepts only lists")
    structure(NextMethod("c"), class = "DNAbin")
}

print.DNAbin <- function(x, printlen = 6, digits = 3, ...)
{
    if (is.list(x)) {
        n <- length(x)
        nms <- names(x)
        if (n == 1) {
            cat("1 DNA sequence in binary format stored in a list.\n\n")
            nTot <- length(x[[1]])
            cat("Sequence length:", nTot, "\n")
        } else {
            cat(n, "DNA sequences in binary format stored in a list.\n\n")
            tmp <- lengths(x, use.names = FALSE)
            nTot <- sum(as.numeric(tmp))
            mini <- min(tmp)
            maxi <- max(tmp)
            if (mini == maxi)
                cat("All sequences of same length:", maxi, "\n")
            else {
                cat("Mean sequence length:", round(mean(tmp), 3), "\n")
                cat("   Shortest sequence:", mini, "\n")
                cat("    Longest sequence:", maxi, "\n")
            }
        }
    } else {
        nTot <- length(x)
        if (is.matrix(x)) {
            nd <- dim(x)
            n <- nd[1]
            nms <- rownames(x)
            if (n == 1) {
                cat("1 DNA sequence in binary format stored in a matrix.\n\n")
                cat("Sequence length:", nd[2], "\n")
            } else {
                cat(n, "DNA sequences in binary format stored in a matrix.\n\n")
                cat("All sequences of same length:", nd[2], "\n")
            }
        } else {
            cat("1 DNA sequence in binary format stored in a vector.\n\n")
            cat("Sequence length:", nTot, "\n\n")
        }
    }
    if (exists("nms")) {
        HEAD <- if (n == 1) "\nLabel:" else "\nLabels:"
        TAIL <- ""
        if (printlen < n) {
            nms <- nms[1:printlen]
            TAIL <- "...\n"
        }
        if (any(longs <- nchar(nms) > 60))
            nms[longs] <- paste0(substr(nms[longs], 1, 60), "...")
        cat(HEAD, nms, TAIL, sep = "\n")
    }
    if (nTot <= 1e7) {
        cat("Base composition:\n")
        print(round(base.freq(x), digits))
    } else {
        cat("More than 10 million bases: not printing base composition.\n")
    }
    if (nTot > 1) {
        k <- floor(log(nTot, 1000))
        units <- c("bases", "kb", "Mb", "Gb", "Tb", "Pb", "Eb")
        cat("(Total: ", round(nTot/1000^k, 2), " ", units[k + 1], ")\n", sep = "")
    }
}

as.DNAbin <- function(x, ...) UseMethod("as.DNAbin")

._cs_ <- c("a", "g", "c", "t", "r", "m", "w", "s", "k",
           "y", "v", "h", "d", "b", "n", "-", "?")

._bs_ <- c(136, 72, 40, 24, 192, 160, 144, 96, 80,
           48, 224, 176, 208, 112, 240, 4, 2)

## by Klaus:
as.DNAbin.character <- function(x, ...)
{
    ans <- as.raw(._bs_)[match(tolower(x), ._cs_)]
    if (is.matrix(x)) {
        dim(ans) <- dim(x)
        dimnames(ans) <- dimnames(x)
    }
    class(ans) <- "DNAbin"
    ans
}

as.DNAbin.alignment <- function(x, ...)
{
    n <- x$nb
    x$seq <- tolower(x$seq)
    ans <- matrix("", n, nchar(x$seq[1]))
    for (i in 1:n)
        ans[i, ] <- strsplit(x$seq[i], "")[[1]]
    rownames(ans) <- gsub(" +$", "", gsub("^ +", "", x$nam))
    as.DNAbin.character(ans)
}

as.DNAbin.list <- function(x, ...)
{
    obj <- lapply(x, as.DNAbin)
    class(obj) <- "DNAbin"
    obj
}

as.character.DNAbin <- function(x, ...)
{
    f <- function(xx) {
        ans <- ._cs_[match(as.numeric(xx), ._bs_)]
        if (is.matrix(xx)) {
            dim(ans) <- dim(xx)
            dimnames(ans) <- dimnames(xx)
        }
        ans
    }
    if (is.list(x)) lapply(x, f) else f(x)
}

base.freq <- function(x, freq = FALSE, all = FALSE)
{
    if (!inherits(x, "DNAbin"))
        stop('base.freq requires an object of class "DNAbin"')

    f <- function(x) .Call(BaseProportion, x)

    if (is.list(x)) {
        BF <- rowSums(sapply(x, f))
        n <- sum(as.double(lengths(x, use.names = FALSE)))
    } else {
        n <- length(x)
        BF <- f(x)
    }

    names(BF) <- c("a", "c", "g", "t", "r", "m", "w", "s",
                   "k", "y", "v", "h", "d", "b", "n", "-", "?")
    if (all) {
        if (!freq) BF <- BF / n
    } else {
        BF <- BF[1:4]
        if (!freq) BF <- BF / sum(BF)
    }
    BF
}

Ftab <- function(x, y = NULL)
{
    if (is.null(y)) {
        if (is.list(x)) {
            y <- x[[2]]
            x <- x[[1]]
            if (length(x) != length(y))
                stop("'x' and 'y' not of the same length")
        } else { # 'x' is a matrix
            y <- x[2, , drop = TRUE]
            x <- x[1, , drop = TRUE]
        }
    } else {
        x <- as.vector(x)
        y <- as.vector(y)
        if (length(x) != length(y))
            stop("'x' and 'y' not of the same length")
    }
    out <- matrix(0, 4, 4)
    k <- c(136, 40, 72, 24)
    for (i in 1:4) {
        a <- x == k[i]
        for (j in 1:4) {
            b <- y == k[j]
            out[i, j] <- sum(a & b)
        }
    }
    dimnames(out)[1:2] <- list(c("a", "c", "g", "t"))
    out
}

GC.content <- function(x) sum(base.freq(x)[2:3])

seg.sites <- function(x, strict = FALSE, trailingGapsAsN = TRUE)
{
    if (is.list(x)) x <- as.matrix(x)
    ## is.vector() returns FALSE because of the class,
    ## so we use a different test
    dx <- dim(x)
    if (is.null(dx)) return(integer())
    if (dx[1] == 1) return(integer())
    if (trailingGapsAsN) x <- latag2n(x)
    ans <- .Call(SegSites, x, strict)
    which(as.logical(ans))
}

dist.dna <- function(x, model = "K80", variance = FALSE, gamma = FALSE,
                     pairwise.deletion = FALSE, base.freq = NULL,
                     as.matrix = FALSE)
{
    MODELS <- c("RAW", "JC69", "K80", "F81", "K81", "F84", "T92", "TN93",
                "GG95", "LOGDET", "BH87", "PARALIN", "N", "TS", "TV",
                "INDEL", "INDELBLOCK")
    imod <- pmatch(toupper(model), MODELS)
    if (is.na(imod))
        stop(paste("'model' must be one of:",
                   paste("\"", MODELS, "\"", sep = "", collapse = " ")))
    if (imod == 11 && variance) {
        warning("computing variance not available for model BH87")
        variance <- FALSE
    }
    if (gamma && imod %in% c(1, 5:7, 9:17)) {
        warning(paste("gamma-correction not available for model", model))
        gamma <- FALSE
    }
    if (is.list(x)) x <- as.matrix(x)
    nms <- dimnames(x)[[1]]
    n <- dim(x)[1] # in case nms is NULL

    if (imod %in% c(4, 6:8)) {
        BF <- if (is.null(base.freq)) base.freq(x) else base.freq
    } else BF <- 0

    if (imod %in% 16:17) pairwise.deletion <- TRUE

    if (!pairwise.deletion) {
        keep <- .Call(GlobalDeletionDNA, x)
        x <- x[, as.logical(keep)]
    }
    if (!gamma) {
        alpha <- 0
    } else {
        alpha <- gamma
        gamma <- 1L
    }
    d <- .Call(dist_dna, x, imod, BF, as.integer(pairwise.deletion),
               as.integer(variance), as.integer(gamma), alpha)
    if (variance) {
        var <- d[[2]]
        d <- d[[1]]
    }
    if (imod == 11) {
        dim(d) <- c(n, n)
        dimnames(d) <- list(nms, nms)
    } else {
        attr(d, "Size") <- n
        attr(d, "Labels") <- nms
        attr(d, "Diag") <- attr(d, "Upper") <- FALSE
        attr(d, "call") <- match.call()
        attr(d, "method") <- model
        class(d) <- "dist"
        if (as.matrix) d <- as.matrix(d)
    }
    if (variance) attr(d, "variance") <- var
    d
}

image.DNAbin <-
    function(x, what, col, bg = "white", xlab = "", ylab = "",
             show.labels = TRUE, cex.lab = 1, legend = TRUE,
             grid = FALSE, show.bases = FALSE, base.cex = 1,
             base.font = 1, base.col = "black", ...)
{
    what <-
        if (missing(what)) c("a", "g", "c", "t", "n", "-") else tolower(what)
    if (missing(col))
        col <- c("red", "yellow", "green", "blue", "grey", "black")
    x <- as.matrix(x) # tests if all sequences have the same length
    n <- (dx <- dim(x))[1] # number of sequences
    s <- dx[2] # number of sites
    y <- integer(N <- length(x))
    ncl <- length(what)
    col <- rep(col, length.out = ncl)
    brks <- 0.5:(ncl + 0.5)
    sm <- 0L
    for (i in ncl:1) {
        k <- ._bs_[._cs_ == what[i]]
        sel <- which(x == k)
        if (L <- length(sel)) {
            y[sel] <- i
            sm <- sm + L
        } else {
            what <- what[-i]
            col <- col[-i]
            brks <- brks[-i]
        }
    }
    dim(y) <- dx
    ## if there's no 0 in y, must drop 'bg' from the cols passed to image:
    if (sm == N) {
        leg.co <- co <- col
        leg.txt <- toupper(what)
    } else {
        co <- c(bg, col)
        leg.txt <- c(toupper(what), "others")
        leg.co <- c(col, bg)
        brks <- c(-0.5, brks)
    }
    yaxt <- if (show.labels) "n" else "s"
    image.default(1:s, 1:n, t(y[n:1, , drop = FALSE]), col = co, xlab = xlab,
                  ylab = ylab, yaxt = yaxt, breaks = brks, ...)
    if (show.labels)
        mtext(rownames(x), side = 2, line = 0.1, at = n:1,
              cex = cex.lab, adj = 1, las = 1)
    if (legend) {
        psr <- par("usr")
        xx <- psr[2]/2
        yy <- psr[4] * (0.5 + 0.5/par("plt")[4])
        legend(xx, yy, legend = leg.txt, pch = 22, pt.bg = leg.co,
               pt.cex = 2, bty = "n", xjust = 0.5, yjust = 0.5,
               horiz = TRUE, xpd = TRUE)
    }
    if (grid) {
        if (is.logical(grid)) grid <- 3L
        if (grid %in% 2:3) abline(v = seq(1.5, s - 0.5, 1), lwd = 0.33, xpd = FALSE)
        if (grid %in% c(1, 3)) abline(h = seq(1.5, n - 0.5, 1), lwd = 0.33, xpd = FALSE)
    }
    if (show.bases) {
        x <- toupper(as.character(x))
        xx <- rep(1:s, each = n)
        yy <- rep(n:1, s)
        text(xx, yy, x, cex = base.cex, font = base.font, col = base.col)
    }
}

alview <- function(x, file = "", uppercase = TRUE, showpos = TRUE)
{
    if (is.list(x)) x <- as.matrix(x)
    taxa <- formatC(labels(x), width = -1)
    x <- as.character(x)
    s <- ncol(x)
    if (nrow(x) > 1) {
        for (j in seq_len(s)) {
            q <- which(x[-1L, j] == x[1L, j]) + 1L
            x[q, j] <- "."
        }
    }
    x <- apply(x, 1L, paste, collapse = "")
    if (uppercase) x <- toupper(x)
    res <- paste(taxa, x)
    if ((is.logical(showpos) && showpos) || is.numeric(showpos)) {
        if (is.logical(showpos)) {
            pos <- 1:s
            digits <- floor(log10(s)) + 1
        } else {
            pos <- showpos
            digits <- floor(log10(max(pos))) + 1
        }
        hdr <- sprintf(paste0("%0", digits, "d"), pos)
        hdr <- unlist(strsplit(hdr, ""))
        dim(hdr) <- c(digits, length(pos))
        hdr <- apply(hdr, 1, paste, collapse = "")
        hdr <- formatC(hdr, width = nchar(res[1]))
        cat(hdr, file = file, sep = "\n")
    }
    cat(res, file = file, sep = "\n", append = TRUE)
}

where <- function(x, pattern)
{
    pat <- strsplit(pattern, NULL)[[1]]
    if (inherits(x, "DNAbin")) {
        pat <- as.DNAbin(pat)
    } else {
        if (inherits(x, "AAbin")) {
            pat <- as.AAbin(toupper(pat))
        } else {
            stop("'x' should inherit class \"DNAbin\" or \"AAbin\"")
        }
    }
    p <- length(pat)

    f <- function(x, pat, p) {
        if (length(x) < p) {
            warning("sequence shorter than the pattern: returning NULL")
            return(NULL)
        }
        .Call(C_where, x, pat)
    }

    if (is.list(x)) return(lapply(x, f, pat = pat, p = p))
    if (is.matrix(x)) {
        n <- nrow(x)
        res <- vector("list", n)
        for (i in seq_len(n))
            res[[i]] <- f(x[i, , drop = TRUE], pat, p)
        names(res) <- rownames(x)
        return(res)
    }
    f(x, pat, p) # if x is a vector
}

## conversions from BioConductor:

## DNA:
.DNAString2DNAbin <- function(from)
    .Call("charVectorToDNAbinVector", as.character(from))

as.DNAbin.DNAString <- function(x, ...)
{
    res <- list(.DNAString2DNAbin(x))
    class(res) <- "DNAbin"
    res
}

as.DNAbin.DNAStringSet <- function(x, ...)
{
    res <- lapply(x, .DNAString2DNAbin)
    class(res) <- "DNAbin"
    res
}

as.DNAbin.DNAMultipleAlignment <- function(x, ...)
    as.matrix(as.DNAbin.DNAStringSet(as(x, "DNAStringSet")))

as.DNAbin.PairwiseAlignmentsSingleSubject <- function(x, ...)
    as.DNAbin.DNAMultipleAlignment(x)

## AA:
.AAString2AAbin <- function(from)
    charToRaw(as.character(from))

as.AAbin.AAString <- function(x, ...)
{
    res <- list(.AAString2AAbin(x))
    class(res) <- "AAbin"
    res
}

as.AAbin.AAStringSet <- function(x, ...)
{
    res <- lapply(x, .AAString2AAbin)
    class(res) <- "AAbin"
    res
}

as.AAbin.AAMultipleAlignment <- function(x, ...)
    as.matrix(as.AAbin.AAStringSet(as(x, "AAStringSet")))

complement <- function(x)
{
    f <- function(x) {
        ## reorder the vector of raws to match the complement:
        comp <- as.raw(._bs_[c(4:1, 10:9, 7:8, 6:5, 14:11, 15:17)])
        ans <- comp[match(as.integer(x), ._bs_)]
        rev(ans) # reverse before returning
    }
    if (is.matrix(x)) {
        for (i in 1:nrow(x)) x[i, ] <- f(x[i, ])
        return(x)
    } else if (is.list(x)) {
        x <- lapply(x, f)
    } else x <- f(x)
    class(x) <- "DNAbin"
    x
}

trans <- function(x, code = 1, codonstart = 1)
{
    f <- function(x, s, code)
        .C(trans_DNA2AA, x, as.integer(s), raw(s/3), as.integer(code),
           NAOK = TRUE)[[3]]
    if (code > 6)
        stop("only the genetic codes 1--6 are available for now")

    if (codonstart > 1) {
        del <- -(1:(codonstart - 1))
        if (is.list(x)) {
            for (i in seq_along(x)) x[[i]] <- x[[i]][del]
        } else {
            x <- if (is.matrix(x)) x[, del] else x[del]
        }
    }

    if (is.list(x)) {
        res <- lapply(x, trans, code = code)
    } else {
        s <- if (is.matrix(x)) ncol(x) else length(x)
        rest <- s %% 3
        if (rest != 0) {
            s <- s - rest
            x <- if (is.matrix(x)) x[, 1:s] else x[1:s]
            msg <- paste("sequence length not a multiple of 3:", rest, "nucleotide")
            if (rest == 2) msg <- paste0(msg, "s")
            warning(paste(msg, "dropped"))
        }
        if (is.matrix(x)) {
            res <- t(apply(x, 1, f, s = s, code = code))
            if (s == 3) {
                res <- t(res)
                rownames(res) <- rownames(x)
            }
        } else {
            res <- f(x, s, code)
        }
    }
    class(res) <- "AAbin"
    res
}

print.AAbin <- function(x, ...)
{
    if (is.list(x)) {
        n <- length(x)
        cat(n, "amino acid sequence")
        if (n > 1) cat("s")
        cat(" in a list\n\n")
        tmp <- lengths(x, use.names = FALSE)
        maxi <- max(tmp)
        mini <- min(tmp)
        if (mini == maxi)
            cat("All sequences of the same length:", maxi, "\n")
        else {
            cat("Mean sequence length:", round(mean(tmp), 3),
                "\n   Shortest sequence:", mini,
                "\n    Longest sequence:", maxi, "\n")
        }
    } else if (is.matrix(x)) {
        n <- nrow(x)
        cat(n, "amino acid sequence")
        if (n > 1) cat("s")
        cat(" in a matrix\n")
        if (n == 1) cat("Sequence length: ")
        else cat("All sequences of the same length: ")
        cat(ncol(x), "\n")
    } else {
        cat("1 amino acid sequence in a vector:\n\n",
            rawToChar(x))
    }
    cat("\n")
}

"[.AAbin" <- function (x, i, j, drop = FALSE)
{
    ans <- NextMethod("[", drop = drop)
    class(ans) <- "AAbin"
    ans
}

as.character.AAbin <- function(x, ...)
{
    f <- function(xx) {
        ans <- strsplit(rawToChar(xx), "")[[1]]
        if (is.matrix(xx)) {
            dim(ans) <- dim(xx)
            dimnames(ans) <- dimnames(xx)
        }
        ans
    }
    if (is.list(x)) lapply(x, f) else f(x)
}

as.AAbin <- function(x, ...) UseMethod("as.AAbin")

as.AAbin.character <- function(x, ...)
{
    f <- function(x) charToRaw(paste(x, collapse = ""))
    res <- if (is.vector(x)) f(x) else t(apply(x, 1, f))
    class(res) <- "AAbin"
    res
}

labels.AAbin <- function(object, ...) labels.DNAbin(object, ...)

## TO BE MOVED TO phangorn LATER
if (getRversion() >= "2.15.1") utils::globalVariables("phyDat")
as.phyDat.AAbin <- function(x, ...) phyDat(as.character(x), type = "AA")
## \alias{as.phyDat.AAbin}
## \method{as.phyDat}{AAbin}(x, \dots)

dist.aa <- function(x, pairwise.deletion = FALSE, scaled = FALSE)
{
    n <- nrow(x)
    d <- numeric(n*(n - 1)/2)
    X <- charToRaw("X")
    k <- 0L
    if (!pairwise.deletion) {
        del <- apply(x, 2, function(y) any(y == X))
        if (any(del)) x <- x[, !del]
        for (i in 1:(n - 1)) {
            for (j in (i + 1):n) {
                k <- k + 1L
                d[k] <- sum(x[i, ] != x[j, ])
            }
        }
        if (scaled) d <- d/ncol(x)
    } else {
        for (i in 1:(n - 1)) {
            a <- x[i, ]
            for (j in (i + 1):n) {
                b <- x[j, ]
                del <- a == X | b == X
                p <- length(b <- b[!del])
                tmp <- sum(a[!del] != b)
                k <- k + 1L
                d[k] <- if (scaled) tmp/p else tmp
            }
        }
    }
    attr(d, "Size") <- n
    attr(d, "Labels") <- rownames(x)
    attr(d, "Diag") <- attr(d, "Upper") <- FALSE
    attr(d, "call") <- match.call()
    class(d) <- "dist"
    d
}

AAsubst <- function(x)
{
    X <- charToRaw("X")
    f <- function(y) length(unique.default(y[y != X]))
    which(apply(x, 2, f) > 1)
}

.AA_3letter <- c("Ala", "Cys", "Asp", "Glu", "Phe", "Gly", "His", "Ile",  "Lys", "Leu",
                 "Met", "Asn", "Pro", "Gln", "Arg", "Ser", "Thr",  "Val", "Trp", "Tyr",
                 "Xaa", "Stp")

.AA_1letter <- c("A", "C", "D", "E", "F", "G", "H", "I", "K", "L",
                 "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y", "X", "*")

.AA_raw <- sapply(.AA_1letter, charToRaw)

.AA_3cat <- list(Hydrophobic = .AA_raw[c("V", "I", "L", "F", "W", "Y", "M")],
                 Small = .AA_raw[c("P", "G", "A", "C")],
                 Hydrophilic = .AA_raw[c("S", "T", "H", "N", "Q", "D", "E", "K", "R")])

image.AAbin <-
    function(x, what, col, bg = "white", xlab = "", ylab = "",
             show.labels = TRUE, cex.lab = 1, legend = TRUE,
             grid = FALSE, show.aa = FALSE, aa.cex = 1,
             aa.font = 1, aa.col = "black", ...)
{
    if (missing(what))
        what <- c("Hydrophobic", "Small", "Hydrophilic")
    if (missing(col)) col <- c("red", "yellow", "blue")
    n <- (dx <- dim(x))[1]
    s <- dx[2]
    y <- integer(N <- length(x))
    ncl <- length(what)
    col <- rep(col, length.out = ncl)
    brks <- 0.5:(ncl + 0.5)
    sm <- 0L
    for (i in ncl:1) {
        k <- .AA_3cat[[i]]
        sel <- which(x %in% k)
        if (L <- length(sel)) {
            y[sel] <- i
            sm <- sm + L
        }
        else {
            what <- what[-i]
            col <- col[-i]
            brks <- brks[-i]
        }
    }
    dim(y) <- dx
    if (sm == N) {
        leg.co <- co <- col
        leg.txt <- what
    } else {
        co <- c(bg, col)
        leg.txt <- c(what, "Unknown")
        leg.co <- c(col, bg)
        brks <- c(-0.5, brks)
    }
    yaxt <- if (show.labels) "n" else "s"
    image.default(1:s, 1:n, t(y[n:1, ]), col = co, xlab = xlab, ylab = ylab,
                  yaxt = yaxt, breaks = brks, ...)
    if (length(poly <- AAsubst(x))) {
        rect(poly - 0.5, n + 0.5, poly + 0.5, n + 0.5 + yinch(0.2),
             col = "slategrey", border = NA, xpd = TRUE)
        ##rect(0.5, n + 0.5, s + 0.5, n + 0.5 + yinch(0.2), lwd = 0.5)
    }
    if (show.labels)
        mtext(rownames(x), side = 2, line = 0.1, at = n:1, cex = cex.lab,
              adj = 1, las = 1)
    if (legend) {
        psr <- par("usr")
        xx <- psr[2]/2
        yy <- psr[4] * (0.5 + 0.5/par("plt")[4])
        legend(xx, yy, legend = leg.txt, pch = 22, pt.bg = leg.co,
               pt.cex = 2, bty = "n", xjust = 0.5, yjust = 0.5,
               horiz = TRUE, xpd = TRUE)
    }
    if (grid) {
        if (is.logical(grid)) grid <- 3L
        if (grid %in% 2:3) abline(v = seq(1.5, s - 0.5, 1), lwd = 0.33, xpd = FALSE)
        if (grid %in% c(1, 3)) abline(h = seq(1.5, n - 0.5, 1), lwd = 0.33, xpd = FALSE)
    }
    if (show.aa) {
        x <- toupper(as.character(x))
        xx <- rep(1:s, each = n)
        yy <- rep(n:1, s)
        text(xx, yy, x, cex = aa.cex, font = aa.font, col = aa.col)
    }
}

checkAlignment <- function(x, check.gaps = TRUE, plot = TRUE, what = 1:4)
{
    cat("\nNumber of sequences:", n <- nrow(x),
        "\nNumber of sites:", s <- ncol(x), "\n")
if (check.gaps) {
    cat("\n")
    y <- DNAbin2indel(x)
    gap.length <- sort(unique.default(y))[-1]
    if (!length(gap.length)) cat("No gap in alignment.\n")
    else {
        rest <- gap.length %% 3
        if (any(cond <- rest > 0)) {
            cat("Some gap lengths are not multiple of 3:", gap.length[cond])
        } else cat("All gap lengths are multiple of 3.")

        tab <- tabulate(y, gap.length[length(gap.length)])
        tab <- tab[gap.length]
        cat("\n\nFrequencies of gap lengths:\n")
        names(tab) <- gap.length
        print(tab)

        ## find gaps on the borders:
        col1 <- unique(y[, 1])
        if (!col1[1]) col1 <- col1[-1]
        if (length(col1))
            cat("   => length of gaps on the left border of the alignment:", unique(col1), "\n")
        else cat("   => no gap on the left border of the alignment\n")
        i <- which(y != 0, useNames = FALSE)
        jcol <- i %/% nrow(y) + 1
        yi <- y[i]
        j <- yi == s - jcol + 1
        if (any(j))
            cat("   => length of gaps on the right border of the alignment:", yi[j], "\n")
        else cat("   => no gap on the right border of the alignment\n")

        ## find base segments:
        A <- B <- numeric()
        for (i in seq_len(n)) {
            j <- which(y[i, ] != 0) # j: start of each gap in the i-th sequence
            if (!length(j)) next
            k <- j + y[i, j] # k: start of each base segment in the i-th sequence
            if (j[1] != 1) k <- c(1, k) else j <- j[-1]
            if (k[length(k)] > s) k <- k[-length(k)] else j <- c(j, s + 1)
            A <- c(A, j)
            B <- c(B, k)
        }
        AB <- unique(cbind(A, B))
        not.multiple.of.3 <- (AB[, 1] - AB[, 2]) %% 3 != 0
        left.border <- AB[, 2] == 1
        right.border <- AB[, 1] == s + 1
        Nnot.mult3 <- sum(not.multiple.of.3)
        cat("\nNumber of unique contiguous base segments defined by gaps:", nrow(AB), "\n")
        if (!Nnot.mult3) cat("All segment lengths multiple of 3.\n")
        else {
            Nleft <- sum(not.multiple.of.3 & left.border)
            Nright <- sum(not.multiple.of.3 & right.border)
            cat("Number of segment lengths not multiple of 3:", Nnot.mult3, "\n",
                "   => on the left border of the alignement:", Nleft, "\n",
                "   => on the right border                 :", Nright, "\n")
            if (Nright + Nleft < Nnot.mult3) {
                cat("    => positions of these segments inside the alignment: ")
                sel <- not.multiple.of.3 & !left.border & !right.border
                cat(paste(AB[sel, 2], AB[sel, 1] - 1, sep = ".."), "\n")
            }
        }
    }
} else gap.length <- numeric()
    ss <- seg.sites(x)
    cat("\nNumber of segregating sites (including gaps):", length(ss))
    BF.col <- matrix(NA_real_, length(ss), 4)
    for (i in seq_along(ss))
        BF.col[i, ] <- base.freq(x[, ss[i]])#, freq = TRUE)
    tmp <- apply(BF.col, 1, function(x) sum(x > 0))
    cat("\nNumber of sites with at least one substitution:", sum(tmp > 1))
    cat("\nNumber of sites with 1, 2, 3 or 4 observed bases:\n")
    tab2 <- tabulate(tmp, 4L)
    tab2[1] <- s - sum(tab2)
    names(tab2) <- 1:4
    print(tab2)
    cat("\n")
    H <- numeric(s)
    H[ss] <- apply(BF.col, 1, function(x) {x <- x[x > 0]; -sum(x * log(x))})

    G <- rep(1, s)
    G[ss] <- tmp
    if (plot) {
        if (length(what) == 4) {
            mat <- if (length(gap.length)) 1:4 else c(1, 0, 2, 3)
            layout(matrix(mat, 2, 2))
        } else {
            if (length(what) != 1) {
                what <- what[1]
                warning("argument 'what' has length > 1: the first value is taken")
            }
        }
        if (1 %in% what) image(x)
        if (2 %in% what && length(gap.length)) barplot(tab, xlab = "Gap length")
        if (3 %in% what)
            plot(1:s, H, "h", xlab = "Sequence position", ylab = "Shannon index (H)")
        if (4 %in% what)
            plot(1:s, G, "h", xlab = "Sequence position", ylab = "Number of observed bases")
    }
}

all.equal.DNAbin <- function(target, current, plot = FALSE, ...)
{
    if (identical(target, current)) return(TRUE)
    name.target <- deparse(substitute(target))
    name.current <- deparse(substitute(current))
    st1 <- "convert list as matrix for further comparison."
#    st2 <- ""
    st3 <- "Subset your data for further comparison."
    isali1 <- is.matrix(target)
    isali2 <- is.matrix(current)
    if (isali1 && !isali2)
        return(c("1st object is a matrix, 2nd object is a list:", st1))
    if (!isali1 && isali2)
        return(c("1st object is a list, 2nd object is a matrix:", st1))
    if (!isali1 && !isali2)
        return(c("Both objects are lists:",
                 "convert them as matrices for further comparison."))
#    n1 <- if (isali1) nrow(target) else length(target)
#    n2 <- if (isali2) nrow(current) else length(current)
    if (ncol(target) != ncol(current))
        return("Numbers of columns different: comparison stopped here.")

    foo <- function(n) ifelse(n == 1, "sequence", "sequences")
    doComparison <- function(target, current)
        which(target != current, arr.ind = TRUE, useNames = FALSE)

    n1 <- nrow(target)
    n2 <- nrow(current)
    labs1 <- labels(target)
    labs2 <- labels(current)
    if (identical(labs1, labs2)) {
        res <- "Labels in both objects identical."
        res <- list(messages = res,
                    different.sites = doComparison(target, current))
    } else {
        in12 <- labs1 %in% labs2
        in21 <- labs2 %in% labs1
        if (n1 != n2) {
            res <- c("Number of sequences different:",
                     paste(n1, foo(n1), "in 1st object;", n2,
                           foo(n2), "in 2nd object."),
                     st3)
            plot <- FALSE
        } else { # n1 == n2
            if (any(!in12)) {
                res <- c("X: 1st object (target), Y: 2nd object (current).",
                         paste("labels in X not in Y:",
                               paste(labs1[!in12], collapse = ", ")),
                         paste("labels in X not in Y:",
                               paste(labs2[!in21], collapse = ", ")),
                         st3)
                plot <- FALSE
            } else {
                res <- c("Labels in both objects identical but not in the same order.",
                         "Comparing sequences after reordering rows of the second matrix.")
                current <- current[labs1, ]
                if (identical(target, current)) {
                    res <- c(res, "Sequences are identical.")
                    plot <- FALSE
                } else {
                    res <- list(messages = res,
                                different.sites = doComparison(target, current))
                }
            }
	}
    }
    if (plot) {
        cols <- unique(res$different.sites[, 2])
        diff.cols <- diff(cols)
        j <- which(diff.cols != 1)
        end <- c(cols[j], cols[length(cols)])
        start <- c(cols[1], cols[j + 1])
        v <- cumsum(end - start + 1) + 0.5
        f <- function(lab) {
            axis(2, at = seq_len(n1), labels = FALSE)
            axis(1, at = seq_along(cols), labels = cols)
            mtext(lab, line = 1, adj = 0, font = 2)
        }
        layout(matrix(1:2, 2))
        par(xpd = TRUE)
        image(target[, cols], show.labels = FALSE, axes = FALSE, ...)
        f(name.target)
        xx <- c(0.5, v)
        segments(xx, 0.5, xx, n1, lty = 2, col = "white", lwd = 2)
        segments(xx, 0.5, xx, -1e5, lty = 2, lwd = 2)
        image(current[, cols], show.labels = FALSE, axes = FALSE, ...)
        f(name.current)
        segments(xx, 0.5, xx, n2, lty = 2, col = "white", lwd = 2)
        segments(xx, 1e5, xx, n2, lty = 2, lwd = 2)
        #segments(0.5, -5, length(cols) + 0.5, -5, lwd = 5, col = "grey")
        #rect(0.5, -4, length(cols) + 0.5, -3, col = "grey")
        #segments(0.5, 0.5, 10, -3)
    }
    res
}

## From Franz Krah <f.krah@mailbox.org>:

## estensions of the AAbin class to complement the DNAbin class funcitons

c.AAbin <- function(..., recursive = FALSE)
{
  if (!all(unlist(lapply(list(...), is.list))))
    stop("the 'c' method for \"AAbin\" accepts only lists")
  structure(NextMethod("c"), class = "AAbin")
}

rbind.AAbin <- function(...)
{
    obj <- list(...)
    n <- length(obj)
    if (n == 1) return(obj[[1]])
    for (i in 1:n)
        if (!is.matrix(obj[[1]]))
            stop("the 'rbind' method for \"AAbin\" accepts only matrices")
    NC <- unlist(lapply(obj, ncol))
    if (length(unique(NC)) > 1)
        stop("matrices do not have the same number of columns.")
    for (i in 1:n) class(obj[[i]]) <- NULL # safe but maybe not really needed
    structure(do.call(rbind, obj), class = "AAbin")
}

cbind.AAbin <-
    function(..., check.names = TRUE, fill.with.Xs = FALSE,
             quiet = FALSE)
{
    obj <- list(...)
    n <- length(obj)
    if (n == 1) return(obj[[1]])
    for (i in 1:n)
        if (!is.matrix(obj[[1]]))
            stop("the 'cbind' method for \"AAbin\" accepts only matrices")
    NR <- unlist(lapply(obj, nrow))
    for (i in 1:n) class(obj[[i]]) <- NULL
    if (check.names) {
        NMS <- lapply(obj, rownames)
        for (i in 1:n)
            if (anyDuplicated(NMS[[i]]))
                stop("Duplicated rownames in matrix ", i, ": see ?cbind.AAbin")
        nms <- unlist(NMS)
        if (fill.with.Xs) {
            NC <- unlist(lapply(obj, ncol))
            nms <- unique(nms)
            ans <- matrix(charToRaw("X"), length(nms), sum(NC))
            rownames(ans) <- nms
            from <- 1
            for (i in 1:n) {
                to <- from + NC[i] - 1
                k <- match(NMS[[i]], nms)
                ans[k, from:to] <- obj[[i]]
                from <- to + 1
            }
        } else {
            tab <- table(nms)
            ubi <- tab == n
            nms <- names(tab)[which(ubi)]
            ans <- obj[[1]][nms, , drop = FALSE]
            for (i in 2:n)
                ans <- cbind(ans, obj[[i]][nms, , drop = FALSE])
            if (!quiet && !all(ubi))
                warning("some rows were dropped.")
        }
    } else {
        if (length(unique(NR)) > 1)
            stop("matrices do not have the same number of rows.")
        ans <- matrix(unlist(obj), NR)
        rownames(ans) <- rownames(obj[[1]])
    }
    class(ans) <- "AAbin"
    ans
}

as.AAbin.list <- function(x, ...)
{
  obj <- lapply(x, as.AAbin)
  class(obj) <- "AAbin"
  obj
}

as.list.AAbin <- function(x, ...)
{
  if (is.list(x)) return(x)
  if (is.null(dim(x))) obj <- list(x) # cause is.vector() doesn't work
  else { # matrix
    n <- nrow(x)
    obj <- vector("list", n)
    for (i in seq_len(n)) obj[[i]] <- x[i, , drop = TRUE]
    names(obj) <- rownames(x)
  }
  class(obj) <- "AAbin"
  obj
}

as.matrix.AAbin <- function(x, ...)
{
  if (is.matrix(x)) return(x)
  if (!is.list(x)) { # vector
    dim(x) <- c(1, length(x))
    return(x)
  }
  s <- unique(lengths(x, use.names = FALSE))
  if (length(s) != 1)
    stop("AA sequences in list not of the same length.")
  n <- length(x)
  y <- matrix(raw(), n, s)
  for (i in seq_len(n)) y[i, ] <- x[[i]]
  rownames(y) <- names(x)
  class(y) <- "AAbin"
  y
}

rDNAbin <- function(n, nrow, ncol, base.freq = rep(0.25, 4), prefix = "Ind_")
{
    foo <- function(n, prob) {
        vec <- as.raw(._bs_[1:4])
        vec[sample.int(4L, n, TRUE, prob, FALSE)]
    }
    base.freq <- if (all(base.freq == 0.25)) NULL else base.freq[c(1, 3, 2, 4)]
    if (missing(n)) {
        if (missing(nrow) && missing(ncol))
            stop("nrow and ncol should be given if n is missing")
        res <- foo(nrow * ncol, base.freq)
        dim(res) <- c(nrow, ncol)
        rownames(res) <- paste0(prefix, 1:nrow)
    } else {
        res <- lapply(n, foo, prob = base.freq)
        names(res) <- paste0(prefix, seq_along(n))
    }
    class(res) <- "DNAbin"
    res
}

dnds <- function(x, code = 1, codonstart = 1, quiet = FALSE,
                 details = FALSE, return.categories = FALSE)
{
    if (code > 6) stop("only the genetic codes 1--6 are available for now")
    if (is.list(x)) x <- as.matrix(x)
    n <- nrow(x)
    if (nrow(unique.matrix(x)) != n) stop("sequences are not unique")
### if (any(base.freq(x, TRUE, TRUE)[-(1:4)] > 0)) stop("ambiguous bases are not permitted")
    if (codonstart > 1) {
        del <- -(1:(codonstart - 1))
        x <- x[, del]
    }
    p <- ncol(x)
    rest <- p %% 3
    if (rest) {
        p <- p - rest
        x <- x[, 1:p]
        msg <- sprintf("sequence length not a multiple of 3: %d %s dropped",
                       rest, ngettext(rest, "base", "bases"))
        warning(msg)
    }

    degMat <- .buildDegeneracyMatrix(code)
    Lcat <- matrix(0L, n, p)
    V1 <- V2 <- V3 <- integer(136)
    i <- c(136L, 72L, 40L, 24L)
    V1[i] <- c(1L, 17L, 33L, 49L)
    V2[i] <- c(0L, 4L, 8L, 12L)
    V3[i] <- 0:3

    class(x) <- NULL
    z <- as.integer(x)
    N <- length(x)

    SHIFT <- c(0L, n, 2L * n)
    p <- 1L + SHIFT
    while (p[3] <= N) {
        for (i in 1:n) {
            codon <- z[p]
            ii <- V1[codon[1]] + V2[codon[2]] + V3[codon[3]]
            if (!is.na(ii)) Lcat[p] <- degMat[ii, ]
            p <- p + 1L
        }
        p <- p[3] + SHIFT
    }

    if (return.categories) return(Lcat)
    if (details) quiet <- TRUE

    deg <- c(0, 2, 4) # the 3 levels of degeneracy
    nout <- n*(n - 1)/2
    res <- numeric(nout)
    k <- 1L
    for (i in 1:(n - 1)) {
        for (j in (i + 1):n) {
            if (!quiet) cat("\r", round(100*k/nout), "%")
            z <- x[c(i, j), ]
            Lavg <- (Lcat[i, ] + Lcat[j, ])/2
            Lavg[Lavg == 1] <- 2
            Lavg[Lavg == 3] <- 4
            ii <- lapply(deg, function(x) which(x == Lavg))
            L <- lengths(ii)
            S <- lapply(ii, function(id) dist.dna(z[, id, drop = FALSE], "TS"))
            V <- lapply(ii, function(id) dist.dna(z[, id, drop = FALSE], "TV"))
            S <- unlist(S, use.names = FALSE)
            V <- unlist(V, use.names = FALSE)
            if (details) {
                cat(sprintf("\nComparing sequences %d and %d:\n", i, j))
                tmp <- rbind(S, V)
                dimnames(tmp) <- list(c("Transitions", "Transversions"),
                                      c("Nondegenerate", "Twofold", "Fourfold"))
                print(tmp)
            }
            P <- S/L
            Q <- V/L
            a <- 1/(1 - 2*P - Q)
            b <- 1/(1 - 2*Q)
            c <- (a - b)/2
            A <- log(a)/2 - log(b)/4
            B <- log(b)/2
            dS <- (L[2]*A[2] + L[3]*A[3])/sum(L[2:3]) + B[3]
            dN <- A[1] + (L[1]*B[1] + L[2]*B[2])/sum(L[1:2])
            res[k] <- dN/dS
            k <- k + 1L
        }
    }
    if (!quiet) cat("... done\n")
    attr(res, "Size") <- n
    attr(res, "Labels") <- rownames(x)
    attr(res, "Diag") <- attr(res, "Upper") <- FALSE
    attr(res, "call") <- match.call()
    attr(res, "method") <- "dNdS (Li 1993)"
    class(res) <- "dist"
    res
}

.buildDegeneracyMatrix <- function(code) {
    b <- as.raw(._bs_[1:4])
    CODONS <- cbind(rep(b, each = 16), rep(rep(b, each = 4), 4), rep(b, 16))
    AA <- trans(CODONS, code = code)

    degeneracyMatrix <- matrix(0L, 64L, 3L)
    deg <- c(4L, 2L, 2L, 0L)

    ## 1/ find the bases at 3rd positions that are twofold/fourfold degenerate
    s  <- 1:4
    while (s[4L] <= 64) {
        degeneracyMatrix[s, 3L] <- deg[length(unique(AA[s]))]
        s <- s + 4L
    }
    ## 2/ all bases at 2nd positions are nondegenerate: no need to do anything
    ## 3/ are some bases at 1st positions twofold degenerate?
    s <- c(1L, 17L, 33L, 49L)
    while (s[1L] < 17) {
        degeneracyMatrix[s, 1L] <- deg[length(unique(AA[s]))]
        s <- s + 1L
    }
    degeneracyMatrix
}

latag2n <- function(x) {
    if (is.list(x)) x <- as.matrix(x)
    dx <- dim(x)
    clx <- class(x)
    res <- .Call(leading_trailing_gaps_to_N, x)
    ## the order of the next two commands is crucial if 'clx' is
    ## c("matrix", "array") because the dimension must be set before
    ## setting the class (this is the case for pegas::mjn() where the
    ## class "DNAbin" is dropped before calling latag2n())
    dim(res) <- dx
    class(res) <- clx
    res
}

solveAmbiguousBases <- function(x, method = "columnwise", random = TRUE)
{
    if (method == "columnwise") {
        if (is.list(x)) x <- as.matrix(x)
        p <- ncol(x)
        for (j in 1:p) {
            BF <- base.freq(x[, j], TRUE, TRUE)
            ambi <- BF[5:15]
            K <- which(ambi > 0)
            if (length(K)) {
                agct <- BF[c(1, 3, 2, 4)]
                for (b in K) {
                    base <- as.DNAbin(names(ambi[b]))
                    sel <- agct[rev(rawToBits(base))[1:4] == 1]
                    if (!sum(sel)) sel[] <- 1L
                    i <- which(x[, j] == base)
                    tmp <- if (random) sample(names(sel), length(i), TRUE, sel) else names(sel)[which.max(sel)]
                    x[i, j] <- as.DNAbin(tmp)
                }
            }
        }
    }
    x
}

##distK80 <- function(x, pairwise.deletion = FALSE)
##{
##    nms <- dimnames(x)[[1]]
##    n <- length(nms)
##    if (!pairwise.deletion) {
##        keep <- .Call(GlobalDeletionDNA, x)
##        x <- x[, as.logical(keep)]
##        d <- .Call(dist_dna_K80_short, x)
##    } else {
##        d <- .Call(dist_dna_K80_short_pairdel, x)
##    }
##    attr(d, "Size") <- n
##    attr(d, "Labels") <- nms
##    attr(d, "Diag") <- attr(d, "Upper") <- FALSE
##    attr(d, "call") <- match.call()
##    attr(d, "method") <- "K80"
##    class(d) <- "dist"
##    d
##}

Try the ape package in your browser

Any scripts or data that you put into this service are public.

ape documentation built on March 31, 2023, 6:56 p.m.