bayesgen.pl
Before running this file, convert MrBayes output into R-readable output in
nexTrees folder using t2nex.pl
Before loading trees, R needs to know where abouts on the computer files are stored.
If you are using RStudio, then R will by default begin in the directory in which the package is installed -- great, no further work is required.
Otherwise you might need to run setwd("C:/path/to/CongreveLamsdell2016/data-raw")
,
substituting in the necessary path on your machine.
Data are saved to the package using the usedata
package, which you'll need to
install using install.packages('usethis')
if you haven't already.
```{R Initialize}
referenceTree <- CongreveLamsdell2016::clReferenceTree
DIR_ROOT = '../data-raw/trees/' MATRIX_DIR = '../data-raw/matrices'
FILE_NUMS <- formatC(1:100, width=3, format='d', flag='0') # Add leading zeroes to numbers SO_NUMS <- formatC(1:20, width=2, format='d', flag='0') # Enumeration of suboptimal trees WEIGHTINGS <- c('eq', 'k1', 'k2', 'k3', 'k5', 'kX')
TREE_FILE <- paste0(DIR_ROOT, '%s/%s.', FILE_NUMS, '%s.con.tre') # Defines the pattern of the file name MATRIX_FILES <- paste0(MATRIX_DIR, '/', FILE_NUMS, '.txt.nex') BAYES_TREE <- paste0(DIR_ROOT, 'MrBayes/%s.nex.run%s.nex') CI_PATH <- paste0(DIR_ROOT, 'consistency_indices.txt') SUBOPTIMAL <- list( mk = seq(1, 0.5, length.out = 21L), mkv = seq(1, 0.5, length.out = 21L), brem = seq(1, 0.5, length.out = 21L), freq = seq(0, 100, length.out = 51L), gc = seq(-100, 100, length.out = 41L) )
TIP_ORDER <- as.character(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 19, 18, 17, 15, 16, 13, 14, 20, 21, 22))
BlankReturn <- function (analysis, nCol) matrix(0, ncol=nCol, nrow=length(SUBOPTIMAL[[analysis]]))
TranslateTntTips <- function(tree) { tree$tip.label <- TIP_ORDER[as.integer(tree$tip.label) + 1L] tree }
LoadSuboptimal <- function (pref) { lapply(TREE_FILE, function (treeFile) { lapply(c(sprintf(treeFile, pref, pref, ''), sprintf(treeFile, pref, pref, paste0('.so', SO_NUMS))), TreeSearch::ReadTntTree, tipLabels = TIP_ORDER) }) }
Resampling <- function (jackFile) { jackLines <- readLines(jackFile) jackTree <- TreeSearch::TNTText2Tree(jackLines[3]) jackTipOrder <- order(as.integer(jackTree$tip.label) + 1L) jackNodeOrder <- unique(unlist(phangorn::Ancestors(jackTree, jackTipOrder)))[-1] nTntNode <- jackTree$Nnode jackTree <- TranslateTntTips(jackTree)
jackScores <- trimws(gsub("ttag \+\d+ (.); ", "\1", jackLines[length(jackLines) - (nTntNode - 2L):0]))[order(jackNodeOrder)] jackScores <- gsub("\?", 0, jackScores) jackScores <- gsub("\[(\d+)\]", "-\1", jackScores) jackScores <- matrix(as.double(unlist(strsplit(jackScores, '/'))), 2, dimnames=list(c('freq', 'gc'), NULL))
attr(jackTree, 'scores') <- jackScores # Return: jackTree }
QuartetScores <- function (X) Quartet::QuartetStatus(X, cf=referenceTree) PartitionScores <- function (X) Quartet::SplitStatus(X, cf=referenceTree)
ReduceTreesNodeByNode <- function(tree, nodeSupport) { nTip <- length(tree$tip.label) rootNode <- nTip + 1L nNode <- tree$Nnode collapseOrder <- order(as.double(nodeSupport)) nodeOrder <- rootNode + collapseOrder nodeChoices <- !upper.tri(matrix(NA, length(nodeOrder), length(nodeOrder)))
# Return: apply(nodeChoices, 1, function (i) TreeSearch::CollapseNode(tree, nodeOrder[i])) }
ReduceTreesBySupport <- function(tree, tag) { nTip <- length(tree$tip.label) rootNode <- nTip + 1L nNode <- tree$Nnode nodeSupport <- attr(tree, 'scores')[tag, ]
nodeNumbers <- rootNode + seq_along(nodeSupport) collapse <- lapply(SUBOPTIMAL[[tag]], function (threshold) nodeNumbers[nodeSupport < threshold])
# Return: lapply(collapse, function (toCollapse) TreeSearch::CollapseNode(tree, toCollapse)) }
SupportSuboptimal <- function (filePath, tag) { resampled <- Resampling(filePath) reduced <- ReduceTreesBySupport(resampled, tag) }
NodeSupportSuboptimal <- function(pref, measure, tag) { lapply(paste0(DIR_ROOT, '', pref, '/', pref, '.', FILE_NUMS, '.', measure), SupportSuboptimal, tag) }
GenerateTrees <- function (method, tag) { result <- lapply(WEIGHTINGS, NodeSupportSuboptimal, method, tag) names(result) <- WEIGHTINGS result[['kC']] <- lapply(seq_along(FILE_NUMS), function(i) lapply(seq_along(result[[1]][[1]]), function (j) ape::consensus(result[['k2']][[i]][[j]], result[['k3']][[i]][[j]], result[['k5']][[i]][[j]], result[['kX']][[i]][[j]]) )) result }
## Load matrices and calcluate consistency ```{R load-matrices} rawMatrices <- lapply(MATRIX_FILES, ape::read.nexus.data) clMatrices <- lapply(rawMatrices, function (mat) lapply(mat, function (row) row[-(56:59)])) clPhyDat <- lapply(clMatrices, phangorn::phyDat, type="USER", levels=1:2) clCI <- vapply(clPhyDat, function (mat) phangorn::CI(referenceTree, mat), double(1)) names(clCI) <- FILE_NUMS # Save data: usethis::use_data(clMatrices, clPhyDat, clCI, overwrite=TRUE)
```{R markov-analyses}
for (NUM in FILE_NUMS) { if (!file.exists(sprintf(TREE_FILE[as.integer(NUM)], 'mk', 'mk', '')) && all(file.exists(sprintf(BAYES_TREE, NUM, 1:4)))) { trees <- unlist(lapply(1:4, function (run) { ape::read.nexus(file=sprintf(BAYES_TREE, NUM, run)) }), recursive=FALSE)
class(trees) <- 'multiPhylo' consi <- lapply(SUBOPTIMAL$mk, function (p) ape::consensus(trees, p=p)) names(consi) <- paste0('consensus_', SUBOPTIMAL$mk) ape::write.nexus(rev(consi), file=sprintf(TREE_FILE[as.integer(NUM)], 'mk', 'mk', ''))
} }
markovTrees <- lapply(sprintf(TREE_FILE, 'mk', 'mk', ''), ape::read.nexus) clMkvQuartets <- vapply(markovTrees, QuartetScores, BlankReturn('mkv', 7L)) clMkvPartitions <- vapply(markovTrees, PartitionScores, BlankReturn('mkv', 8L)) usethis::use_data(clMkvQuartets, clMkvPartitions, overwrite=TRUE)
## Bremer results ```{R Bremer-analyses} bremTrees <- lapply(WEIGHTINGS, LoadSuboptimal) names(bremTrees) <- WEIGHTINGS bremTrees[['kC']] <- lapply(seq_along(FILE_NUMS), function(i) lapply(1:21, function (j) ape::consensus(bremTrees[['k2']][[i]][[j]], bremTrees[['k3']][[i]][[j]], bremTrees[['k5']][[i]][[j]], bremTrees[['kX']][[i]][[j]]) )) clBremQuartets <- lapply(bremTrees, vapply, QuartetScores, BlankReturn('brem', 7L)) clBremPartitions <- lapply(bremTrees, vapply, PartitionScores, BlankReturn('brem', 8L)) usethis::use_data(clBremQuartets, clBremPartitions, overwrite=TRUE)
```{R Load resampling metrics, message=FALSE, warn=FALSE} bootFreqTrees <- GenerateTrees('sym', 'freq') bootGcTrees <- GenerateTrees('sym', 'gc') jackFreqTrees <- GenerateTrees('jak', 'freq') jackGcTrees <- GenerateTrees('jak', 'gc')
clBootFreqQuartets <- lapply(bootFreqTrees, vapply, QuartetScores, FUN.VALUE=BlankReturn('freq', 7L)) clJackFreqQuartets <- lapply(jackFreqTrees, vapply, QuartetScores, FUN.VALUE=BlankReturn('freq', 7L)) clBootFreqPartitions <- lapply(bootFreqTrees, vapply, PartitionScores, FUN.VALUE=BlankReturn('freq', 8L)) clJackFreqPartitions <- lapply(jackFreqTrees, vapply, PartitionScores, FUN.VALUE=BlankReturn('freq', 8L))
clBootGcQuartets <- lapply(bootGcTrees, vapply, QuartetScores, FUN.VALUE=BlankReturn('gc', 7L)) clJackGcQuartets <- lapply(jackGcTrees, vapply, QuartetScores, FUN.VALUE=BlankReturn('gc', 7L)) clBootGcPartitions <- lapply(bootGcTrees, vapply, PartitionScores, FUN.VALUE=BlankReturn('gc', 8L)) clJackGcPartitions <- lapply(jackGcTrees, vapply, PartitionScores, FUN.VALUE=BlankReturn('gc', 8L))
usethis::use_data(clBootFreqQuartets, clBootFreqPartitions, clJackFreqQuartets, clJackFreqPartitions, clBootGcQuartets, clBootGcPartitions, clJackGcQuartets, clJackGcPartitions, overwrite=TRUE)
tools::resaveRdaFiles('data') # Use optimal compression ```
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.