R/coalescentMCMC.R
In coalescentMCMC: MCMC Algorithms for the Coalescent

Documented in AIC.coalescentMCMC anova.coalescentMCMC BIC.coalescentMCMC cleanMCMCtrees coalescentMCMC getLastTree getMCMCstats getMCMCtrees logLik.coalescentMCMC saveMCMCtrees subset.coalescentMCMC

## coalescentMCMC.R (2019-02-01)

##   Run MCMC for Coalescent Trees

## Copyright 2012-2019 Emmanuel Paradis

## This file is part of the R-package `coalescentMCMC'.
## See the file ../COPYING for licensing issues.

coalescentMCMC <- function(x, ntrees = 3000, model = "constant",
                           tree0 = NULL, printevery = 100, degree = 1,
                           nknots = 0, knot.times = NULL, moves = 1:6)
{
    MODELS <- c("constant", "time", "step", "linear", "bsplines")
    model <- match.arg(model, MODELS)
    if (is.na(model)) stop(paste("model must one of:", MODELS))
    if (packageVersion("phangorn") >= "1.99.5") {
        edQt <- phangorn::edQt
        lli <- phangorn::lli
        pml.fit <- phangorn::pml.fit
        pml.free <- phangorn::pml.free
        pml.init <- phangorn::pml.init
    }

    on.exit({
        if (packageVersion("phangorn") >= "1.99.5") pml.free()
        if (i < ntrees) {
            one2i <- seq_len(i)
            TREES <- TREES[one2i]
            LL <- LL[one2i]
            params <- params[one2i, ]
            LLmod <- LLmod[one2i]
            warning(paste("MCMC interrupted after", i, "generations"))
        }
        ## compress the list of trees:
        attr(TREES, "TipLabel") <- TREES[[1L]]$tip.label
        lapply(TREES, function(x) x$tip.label <- NULL)
        class(TREES) <- "multiPhylo"

        suffix <- 1
        list.trees <- .get.list.trees()
        if (l <- length(list.trees))
            suffix <- 1 + as.numeric(sub("TREES_", "", list.trees[l]))
        suffix <- sprintf("%03d", suffix)
        assign(paste("TREES", suffix, sep = "_"), TREES,
               envir = .coalescentMCMCenv)

        i <- i - 1L

        MCMCstats <- get("MCMCstats", envir = .coalescentMCMCenv)
        MCMCstats[[suffix]] <- c(i, j)
        assign("MCMCstats", MCMCstats, envir = .coalescentMCMCenv)

        ## LLmod stores the half deviances, hence the minus sign:
        LL <- cbind(LL, -LLmod, params)
        colnames(LL) <- c("logLik.tree", "logLik.coal", para.nms)
        LL <- mcmc(LL, start = 1, end = i)
        class(LL) <- c("coalescentMCMC", "mcmc")
        attr(LL, "model") <- model
        attr(LL, "call") <- match.call()
        attr(LL, "nobs") <- n
        if (model == "bsplines") {
            attr(LL, "degree") <- degree
            attr(LL, "nknots") <- nknots
        }
        return(LL)
    })

    verbose <- as.logical(printevery)

    if (is.null(tree0)) {
        d <- dist.dna(x, "JC69")
        tree0 <- upgma(d, "average")
    }

    X <- if (inherits(x, "phyDat")) x else phyDat(x)
    n <- length(tree0$tip.label)
    nodeMax <- 2*n - 1

    ## log-lik of the tree given the genetic data
    getlogLik <- function(phy, X) pml(phy, X)$logLik

    if (packageVersion("phangorn") >= "1.99.5") {
        INV <- Matrix(lli(X, tree0), sparse = TRUE)
        ll.0 <- numeric(attr(X, "nr"))
        ## by Klaus (ensures that tip labels of tree and data have same order):
        X <- subset(X, tree0$tip.label)
        ##
        bf <- rep(0.25, 4)
        eig <- edQt()
        pml.init(X)
        getlogLik <- function(phy, X) {
            phy <- reorder(phy, "postorder")
            pml.fit(phy, X, bf = bf, eig = eig, INV = INV, ll.0 = ll.0)
        }
    }

    TREES <- vector("list", ntrees)
    LL <- LLmod <- numeric(ntrees)
    TREES[[1L]] <- tree0
    lnL0 <- getlogLik(tree0, X)
    LL[1L] <- lnL0

    ## For each model, calculate:
    ## np: number of parameters of the coalescent model
    ## para.nms: names of these parameters (output in the "mcmc" object)
    ## lo: lower bounds of these parameters (except for "constant" model)
    ##     for nlminb()
    ## up: id. for the upper bounds
    ## getparams: a function that returns the coalescent parameters and
    ##            the log-likelihood given the coalescent times (bt)

    switch(model, constant = {
        np <- 1L
        para.nms <- "theta"
        ## quantities to calculate THETA:
        n2two <- n:2
        Ncomb <- n2two * (n2two - 1)/2 # choose(two2n, 2)
        getparams <- function(bt) {
            x <- diff(c(0, sort(bt)))
            par <- sum(x * Ncomb)/(n - 1)
            obj <- sum(log(Ncomb)) - (n - 1) * log(par) - sum(x * Ncomb)/par
            list(par = par, objective = -obj)
        }
    }, time = {
        np <- 2L
        para.nms <- c("theta0", "rho")
        getparams <- function(bt) {
            halfdev <- function(p) {
                if (any(is.nan(p))) return(1e100)
                -dcoal.time(bt, p[1], p[2], log = TRUE)
            }
            lo <- c(1e-8, -1e6)
            up <- c(100, 1000)
            out <- nlminb(c(0.02, 0.1), halfdev, lower = lo, upper = up)
            out[c("par", "objective")]
        }
    }, step = {
        np <- 3L
        para.nms <- c("theta0", "theta1", "tau")
        getparams <- function(bt) {
            halfdev <- function(p) {
                if (any(p <= 0) || any(is.nan(p))) return(1e100)
                -dcoal.step(bt, p[1], p[2], p[3], log = TRUE)
            }
            out <- nlminb(c(0.02, 0.02, bt[1]/2), halfdev)
            out[c("par", "objective")]
        }
    }, linear = {
        np <- 2L
        para.nms <- c("theta0", "thetaT")
        getparams <- function(bt) {
            halfdev <- function(p) {
                if (any(p <= 0) || any(is.nan(p))) return(1e100)
                -dcoal.linear(bt, p[1], p[2], log = TRUE)
            }
            out <- nlminb(c(0.02, 0.02), halfdev)
            out[c("par", "objective")]
        }
    }, bsplines = {
        free.knot.times <- TRUE
        if (!is.null(knot.times)) {
            nknots <- length(knot.times)
            free.knot.times <- FALSE
        }
        np <- nknots + degree + 1L
        if (free.knot.times) np <- np + nknots
        para.nms <- paste0("b", 1:np)
        getparams <- function(bt) {
            up <- rep(1e3, np)
            lo <- -up
            if (nknots) {
                up[1:nknots] <- bt
                lo[1:nknots] <- 0
                ip <- c(1:nknots * bt/(nknots + 1), runif(np))
            } else {
                ip <- runif(np)
            }
            halfdev <- function(p) {
                if (any(p <= 0) || any(is.nan(p))) return(1e100)
                if (nknots) {
                    knots <- p[1:nknots]
                    beta <- p[-(1:nknots)]
                } else {
                    knots <- NULL
                    beta <- p
                }
                -dcoal.bsplines(bt, beta, knots = knots, degree = degree, log = TRUE)
            }
            out <- nlminb(ip, halfdev, lower = lo, upper = up)
            out[c("par", "objective")]
        }
    })

    params <- matrix(0, ntrees, np)

    i <- 2L # number of sampled trees = number of generations
    j <- 0L # number of accepted moves

    if (verbose) {
        cat("Running the Markov chain:\n")
        cat("  Number of generations to run:", ntrees, "\n")
        cat("Generation    Nb of accepted moves\n")
    }

    bt0 <- .branching_times(tree0)
    fitcoal <- getparams(bt0)
    params[1L, ] <- para0 <- fitcoal$par
    LLmod[1L] <- lnLcoal0 <- fitcoal$objective

    getTHETAct <- function(n, bt) {
        x <- diff(c(0, sort(bt)))
        n2two <- n:2
        Ncomb <- n2two * (n2two - 1)/2
        sum(x * Ncomb)/(n - 1)
    }

    while (i <= ntrees) {
        if (verbose) if (! i %% printevery)
            cat("\r  ", i, "                ", j, "           ")

        move <- sample(moves, 1L)
        if (move == 1)
            THETA <- ifelse(model == "constant", para0, getTHETAct(n, bt0))
        tr.b <- switch(move,
                       NeighborhoodRearrangement(tree0, n, THETA, bt0),
                       TipInterchange(tree0, n),
                       ScalingMove(tree0),
                       branchSwapping(tree0, n, bt0),
                       subtreeExchange(tree0, n, bt0),
                       NodeAgeMove(tree0, n, bt0))

        lnL.b <- getlogLik(tr.b, X)
        ## calculate coalescent parameters for the proposed tree:
        bt <- .branching_times(tr.b)
        fitcoal <- getparams(bt)
        para <- fitcoal$par
        lnLcoal.b <- fitcoal$objective
        if (is.na(lnL.b) || is.na(lnLcoal.b)) {
            ACCEPT <- FALSE
        } else {
            ## R <- lnL.b + lnLcoal.b - lnL0 - lnLcoal0
            R <- lnL.b - lnL0
            ACCEPT <- if (R >= 0) TRUE else rbinom(1, 1, exp(R))
        }
        if (ACCEPT) {
            j <- j + 1L
            lnL0 <- lnL.b
            lnLcoal0 <- lnLcoal.b
            tree0 <- tr.b
            para0 <- para
            bt0 <- bt
        }
        TREES[[i]] <- tree0
        LL[i] <- lnL0
        params[i, ] <- para0
        LLmod[i] <- lnLcoal0
        i <- i + 1L
    }
    if (verbose) cat("\nDone.\n")
}

.get.list.trees <- function()
    ls(envir = .coalescentMCMCenv, pattern = "^TREES_")

getMCMCtrees <- function(chain = NULL)
{
    list.trees <- .get.list.trees()
    l <- length(list.trees)
    if (is.null(chain)) {
        if (!l) return(NULL)
        if (l == 1)
            return(get(list.trees, envir = .coalescentMCMCenv))
        ## l > 1:
        cat("Several lists of MCMC trees are stored:\n\n")
        for (i in 1:l) cat(i, ":", list.trees[i], "\n")
        cat("\nReturn which number? ")
        chain <- as.numeric(readLines(n = 1))
    } else {
        if (!l) {
            warning("no list of MCMC trees stored")
            return(NULL)
        }
        if (l < chain) {
            warning("no enough lists of MCMC trees stored")
            return(NULL)
        }
    }
    get(paste("TREES", sprintf("%03d", chain), sep = "_"),
        envir = .coalescentMCMCenv)
}

saveMCMCtrees <- function(destdir = ".", format = "RDS", ...)
{
    format <- match.arg(toupper(format), c("RDS", "NEWICK", "NEXUS"))
    switch(format, RDS = {
        FUN <- saveRDS
        suffix <- ".rds"
    }, NEWICK = {
        FUN <- write.tree
        suffix <- ".tre"
    }, NEXUS = {
        FUN <- write.nexus
        suffix <- ".nex"
    })
    list.trees <- .get.list.trees()
    l <- length(list.trees)
    if (!l) warning("no list of trees to save") else {
        for (i in 1:l) {
            f <- list.trees[i]
            outfile <- paste(destdir, "/", f, suffix, sep = "")
            FUN(get(f, envir = .coalescentMCMCenv), outfile, ...)
        }
    }
}

cleanMCMCtrees <- function()
    rm(list = .get.list.trees(), envir = .coalescentMCMCenv)

getLastTree <- function(X) X[[length(X)]]

getMCMCstats <- function()
{
    cat("MCMC chain summaries (chains as columns):\n\n")
    get("MCMCstats", envir = .coalescentMCMCenv)
}

logLik.coalescentMCMC <- function(object, ...)
{
    model <- attr(object, "model")
    if (model == "bsplines") {
        degree <- attr(object, "degree")
        knots <- attr(object, "knots")
    }

    if (model == "bsplines") {
        beta <- vector("list", nrow(object))
        for (i in 1:nrow(object)) beta[[i]] <- object[i, -1]
    }
    para.nms <- colnames(object)[-(1:2)]
    res <- object[, "logLik.coal"]
    attributes(res) <- NULL
    ##if (useWeights) {
    ##    w <- object[, "logLik.tree"]
    ##    attributes(w) <- NULL
    ##    w <- w - min(w) + 1e-8 # all w > 0
    ##    w <- length(res)*w/sum(w) # sum(w) = length(res)
    ##    res <- w * res
    ##}
    attr(res, "nobs") <- attr(object, "nobs")
    attr(res, "df") <- length(para.nms) # number of parameters
    class(res) <- c("logLik")
    res
}

AIC.coalescentMCMC <- function(object, ..., k = 2)
{
    mod <- c(list(object), list(...))
    ll <- lapply(mod, logLik.coalescentMCMC)
    df <-  sapply(ll, attr, "df")
    -2 * sapply(ll, mean) + k * df
}

BIC.coalescentMCMC <- function(object, ...)
{
    mod <- c(list(object), list(...))
    ll <- lapply(mod, logLik.coalescentMCMC)
    df <-  sapply(ll, attr, "df")
    nobs <-  sapply(ll, attr, "nobs")
    -2 * sapply(ll, mean) + df * nobs
}

anova.coalescentMCMC <- function(object, ...)
{
    mod <- c(list(object), list(...))
    ll <- lapply(mod, logLik.coalescentMCMC)
    df <- sapply(ll, attr, "df")
    ll <- sapply(ll, mean)
    dev <- c(NA, 2 * diff(ll)) # LRT's
    ddf <- c(NA, diff(df))
    res <- data.frame(ll, df, ddf, dev, pchisq(dev, ddf, lower.tail = FALSE))
    dimnames(res) <- list(1:length(mod),
                          c("Log lik.", "Resid. df", "df", "Chisq", "Pr(>|Chi|)"))
    structure(res, heading = "Likelihood Ratio Test Table",
              class = c("anova", "data.frame"))
}

subset.coalescentMCMC <- function(x, burnin = 1000, thinning = 10, end = NULL, ...)
{
    oc <- oldClass(x)
    class(x) <- NULL
    attr.x <- attributes(x)
    mcpar <- attr.x$mcpar
    attr.x$old.call <- attr.x$call
    attr.x$call <-  match.call()

    if (mcpar[2] < burnin)
        stop("'burnin' longer than the number of generations")
    from <- burnin + 1
    n <- nrow(x)
    if (!is.null(end)) {
        if (end < from) stop("argument 'end' too small")
        if (end > n) {
            warning("argument 'end' greater than the number of generations: it was ignored")
            end <- n
        }
    } else  end <- n
    x <- x[from:end, , drop = FALSE]
    if (thinning > 1) {
        i <- seq(thinning, nrow(x), thinning)
        x <- x[i, , drop = FALSE]
        ## mcpar[c(1, 3)] <- thinning
    }

    mcpar[2] <- nrow(x)
    attr.x$mcpar <- mcpar
    attr.x$dim <- dim(x)
    attributes(x) <- attr.x
    class(x) <- oc
    x
}

Any scripts or data that you put into this service are public.

coalescentMCMC documentation built on April 22, 2022, 5:05 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

coalescentMCMC
MCMC Algorithms for the Coalescent

R/coalescentMCMC.R
In coalescentMCMC: MCMC Algorithms for the Coalescent

Defines functions subset.coalescentMCMC anova.coalescentMCMC BIC.coalescentMCMC AIC.coalescentMCMC logLik.coalescentMCMC getMCMCstats getLastTree cleanMCMCtrees saveMCMCtrees getMCMCtrees .get.list.trees coalescentMCMC

Documented in AIC.coalescentMCMC anova.coalescentMCMC BIC.coalescentMCMC cleanMCMCtrees coalescentMCMC getLastTree getMCMCstats getMCMCtrees logLik.coalescentMCMC saveMCMCtrees subset.coalescentMCMC

Try the coalescentMCMC package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

coalescentMCMC MCMC Algorithms for the Coalescent

R/coalescentMCMC.R In coalescentMCMC: MCMC Algorithms for the Coalescent

Defines functions subset.coalescentMCMC anova.coalescentMCMC BIC.coalescentMCMC AIC.coalescentMCMC logLik.coalescentMCMC getMCMCstats getLastTree cleanMCMCtrees saveMCMCtrees getMCMCtrees .get.list.trees coalescentMCMC

Documented in AIC.coalescentMCMC anova.coalescentMCMC BIC.coalescentMCMC cleanMCMCtrees coalescentMCMC getLastTree getMCMCstats getMCMCtrees logLik.coalescentMCMC saveMCMCtrees subset.coalescentMCMC

Try the coalescentMCMC package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

coalescentMCMC
MCMC Algorithms for the Coalescent

R/coalescentMCMC.R
In coalescentMCMC: MCMC Algorithms for the Coalescent