bayesgen.pl
Before running this file, convert MrBayes output into R-readable output in
nexTrees folder using t2nex.pl
Before loading trees, R needs to know where abouts on the computer files are stored.
If you are using RStudio, then R will by default begin in the directory in which the package is installed -- great, no further work is required.
Otherwise you might need to run setwd("C:/path/to/OReillyEtAl2016/data-raw")
,
substituting in the necessary path on your machine.
Data are saved to the package using the usedata
package, which you'll need to
install using install.packages('usethis')
if you haven't already.
```{R Initialize}
referenceTree <- OReillyEtAl2016::orReferenceTree
DIR_ROOT = '../data-raw/trees/'
N_BAYES_RUNS <- 2 DATASET_NUMS <- 1:100 REPL_NUMS <- 1:10 NCHAR_VALUES <- c('100', '350', '1000') FILE_NUMS <- as.character(vapply(DATASET_NUMS, paste0, character(length(REPL_NUMS)), '_', REPL_NUMS))
TIP_ORDER <- c("Osteolepiformes", "Ichthyostega", "Archeria", "Proterogyrinus", "Ariekanerpeton", "Seymouria", "Kotlassia", "Nectridea", "Utaherpeton", "Asaphestera", "Lysorophia", "Cardiocephalus", "Brachystelechidae", "Rhynchonkos", "Pantylus", "Microbrachis", "Adelogyrinidae", "Caeciliidae", "Typhlonectidae", "Ichthyophiidae", "Rhinatrematidae", "Eocaecilia", "Rhyacotritonidae", "Plethodontinae", "Bolitoglossinae", "Spelerpinae", "Hemidactylinae", "Amphiumidae", "Salamandridae", "Dicamptodontidae", "Ambystomatidae", "Proteidae", "Sirenidae", "Cryptobranchidae", "Hynobiidae", "Karaurus", "Leiopelmatidae", "Ascaphidae", "Pelodytidae", "Megophryidae", "Pelobatidae", "Scaphiopodidae", "Ranoidea", "Sooglossidae", "Heleophrynidae", "Myobatrachidae", "Calyptocephallidae", "Hyloidea", "Rhinophrynidae", "Pipidae", "Bombinatoridae", "Discoglossidae", "Triadobatrachus", "Aistopoda", "Westlothiana", "Procolophonidae", "Captorhinidae", "Synapsida", "Diadectes", "Limnoscelis", "Solenodonsaurus", "Gephyrostegidae", "Doleserpeton", "Amphibamus", "Apateon", "Tersomius", "Ecolsonia", "Eryops", "Dendrerpeton", "Colosteidae", "Baphetidae", "Crassigyrinus", "Tulerpeton", "Acanthostega", "Panderichthyidae");
TREE_FILE <- paste0(DIR_ROOT, '%s.%s/%s.sym') # Defines the pattern of the file name BAYES_FILE <- paste0(DIR_ROOT, '%s.%s/R%s.con.nex') # Defines the pattern of the file name BAYES_TREE <- paste0(DIR_ROOT, 'MrBayes/%s_%s.mb.nex.run%s.nex')
SUBOPTIMAL <- list( mk = seq(1, 0.5, length.out = 21L), gc = c(-100, -75, -50, -25, seq(0, 100, length.out = 21L-4L)) )
ReadNexus <- ape::read.nexus WriteNexus <- ape::write.nexus
GenerateScores <- function(analysis, trees) { cat("Generating results for", analysis, "... ") cat("Quartets...") orQuartets[[as.character(NCHAR)]][[analysis]] <<- vapply(trees, Quartet::QuartetStatus, 0L * BLANK_QUARTS[, , 1], cf = referenceTree) cat(" Done. Partitions...") orPartitions[[as.character(NCHAR)]][[analysis]] <<- vapply(trees, Quartet::SplitStatus, 0 * BLANK_SPLITS[, , 1], cf = referenceTree) cat(" Done.\n") }
ANALYSES <- c(mk = 'markov', eq = 'equal', k2 = 'implied2', k3 = 'implied3', k5 = 'implied5', k10 = 'implied10', k20 = 'implied20', k200 ='implied200', kC = 'impliedC')
BLANK_SPLITS <- array(NA, c(21, 8, length(FILE_NUMS)), dimnames=list(NULL, c("N", "P1", "P2", "s", "d1", "d2", "r1", "r2"), FILE_NUMS)) ALL_BLANK_P <- lapply(ANALYSES, function(x) BLANK_SPLITS) names(ALL_BLANK_P) <- ANALYSES
BLANK_QUARTS <- array(NA, c(21, 7, length(FILE_NUMS)), dimnames=list(NULL, c("N", "Q", "s", "d", "r1", "r2", "u"), FILE_NUMS)) ALL_BLANK_Q <- lapply(ANALYSES, function(x) BLANK_QUARTS) names(ALL_BLANK_Q) <- ANALYSES THESE_FILE_NUMS <- as.character(vapply(DATASET_NUMS, paste0, character(length(REPL_NUMS)), '_', REPL_NUMS))
if (!any(file.exists(paste0(c('..', '.'), '/data/orQuartets.rda')))) { # Don't overwrite existing data! orPartitions <- list('100'=ALL_BLANK_S, '350'=ALL_BLANK_S, '1000'=ALL_BLANK_S) orQuartets <- list('100'=ALL_BLANK_Q, '350'=ALL_BLANK_Q, '1000'=ALL_BLANK_Q) usethis::use_data(orQuartets, orPartitions, overwrite=FALSE) remove("orQuartets", "orPartitions") }
TranslateTntTips <- function(tree) { tree$tip.label <- TIP_ORDER[as.integer(tree$tip.label) + 1L] tree }
Resampling <- function (jackFile) { jackLines <- readLines(jackFile) jackTree <- TreeSearch::TNTText2Tree(jackLines[3]) jackTipOrder <- order(as.integer(jackTree$tip.label) + 1L) jackNodeOrder <- unique(unlist(phangorn::Ancestors(jackTree, jackTipOrder)))[-1] nTntNode <- jackTree$Nnode jackTree <- TranslateTntTips(jackTree)
jackScores <- trimws(gsub("ttag \+\d+ (.); ", "\1", jackLines[length(jackLines) - (nTntNode - 2L):0]))[order(jackNodeOrder)] jackScores <- gsub("\?", 0, jackScores) jackScores <- gsub("\[(\d+)\]", "-\1", jackScores) jackScores <- matrix(as.double(unlist(strsplit(jackScores, '/'))), 2, dimnames=list(c('freq', 'gc'), NULL))
attr(jackTree, 'scores') <- jackScores # Return: jackTree }
ReduceTreesBySupport <- function(tree) { nTip <- length(tree$tip.label) rootNode <- nTip + 1L nNode <- tree$Nnode nodeSupport <- attr(tree, 'scores')['gc', ]
nodeNumbers <- rootNode + seq_along(nodeSupport) collapse <- lapply(SUBOPTIMAL[['gc']], function (threshold) nodeNumbers[nodeSupport < threshold])
# Return: lapply(collapse, function (toCollapse) TreeSearch::CollapseNode(tree, toCollapse)) }
SupportSuboptimal <- function (filePath) { resampled <- Resampling(filePath) reduced <- ReduceTreesBySupport(resampled) structure(reduced, class='multiPhylo') }
LoadSuboptimal <- function (fileNums, pref) { ret <- lapply(sprintf(TREE_FILE, pref, NCHAR, fileNums), SupportSuboptimal) names(ret) <- fileNums
# Return: ret }
### Load trees, calcuate and save statistics ```{R calculate-statistics} orQuartets <- OReillyEtAl2016::orQuartets orPartitions <- OReillyEtAl2016::orPartitions for (NCHAR in NCHAR_VALUES) { for (NUM in THESE_FILE_NUMS) { # Generate consensus trees from MrBayes output. if (!file.exists(sprintf(TREE_FILE, 'mk', NCHAR, NUM, '')) && all(file.exists(sprintf(BAYES_TREE, NCHAR, NUM, seq_len(N_BAYES_RUNS))))) { trees <- structure(unlist(lapply(seq_len(N_BAYES_RUNS), function (run) { ReadNexus(file=sprintf(BAYES_TREE, NCHAR, NUM, run)) }), recursive=FALSE), class='multiPhylo') consi <- lapply(BAYES_SUBOPTIMAL, function (p) ape::consensus(trees, p=p)) names(consi) <- paste0('consensus_', BAYES_SUBOPTIMAL) WriteNexus(rev(consi), file=sprintf(TREE_FILE, 'mk', NCHAR, NUM, '')) } } # Load consensus trees from Equal Weights and Markov model analyses markov <- lapply(sprintf(BAYES_FILE, 'mk', NCHAR, THESE_FILE_NUMS), ReadNexusSafely) names(markov) <- THESE_FILE_NUMS SAFE_FILE_NUMS <- THESE_FILE_NUMS[ !vapply(markov, is.null, logical(1)) & file.exists(paste0(DIR_ROOT, 'k2.' , NCHAR, '/', THESE_FILE_NUMS, '.sym')) & file.exists(paste0(DIR_ROOT, 'k5.' , NCHAR, '/', THESE_FILE_NUMS, '.sym')) & file.exists(paste0(DIR_ROOT, 'k10.' , NCHAR, '/', THESE_FILE_NUMS, '.sym')) & file.exists(paste0(DIR_ROOT, 'k20.' , NCHAR, '/', THESE_FILE_NUMS, '.sym')) & file.exists(paste0(DIR_ROOT, 'k200.', NCHAR, '/', THESE_FILE_NUMS, '.sym'))] # Some TNT / MrBayes analyses failed to complete after > 48 hours cat(NCHAR, "characters: Results not available for: ", THESE_FILE_NUMS[!THESE_FILE_NUMS %in% SAFE_FILE_NUMS], "\n") GenerateScores('markov', markov[SAFE_FILE_NUMS]) remove('markov') # Free memory GenerateScores('equal', LoadSuboptimal(SAFE_FILE_NUMS, 'eq')) implied2Trees <- LoadSuboptimal(SAFE_FILE_NUMS, 'k2') implied3Trees <- LoadSuboptimal(SAFE_FILE_NUMS, 'k3') implied5Trees <- LoadSuboptimal(SAFE_FILE_NUMS, 'k5') implied10Trees <- LoadSuboptimal(SAFE_FILE_NUMS, 'k10') implied20Trees <- LoadSuboptimal(SAFE_FILE_NUMS, 'k20') implied200Trees <- LoadSuboptimal(SAFE_FILE_NUMS, 'k200') impliedCTrees <- lapply(SAFE_FILE_NUMS, function(NUM) { i2 <- implied2Trees[[NUM]] i3 <- implied3Trees[[NUM]] i5 <- implied5Trees[[NUM]] i10 <- implied10Trees[[NUM]] i20 <- implied20Trees[[NUM]] i200 <- implied200Trees[[NUM]] # Return: structure(lapply(1:21, function (j) ape::consensus(i2[[j]], i3[[j]], i5[[j]], i10[[j]], i20[[j]], i200[[j]])), class = 'multiPhylo') } ) names(impliedCTrees) <- SAFE_FILE_NUMS ### Calculate tree statistics # Define the expected format of tree statistics (needed for vapply) # (Using lapply or sapply instead of vapply can be simpler, and is only slightly slower) GenerateScores( 'impliedC', impliedCTrees); remove(impliedCTrees) GenerateScores( 'implied2', implied2Trees); remove(implied2Trees) GenerateScores( 'implied3', implied3Trees); remove(implied3Trees) GenerateScores( 'implied5', implied5Trees); remove(implied5Trees) GenerateScores( 'implied10', implied10Trees); remove(implied10Trees) GenerateScores( 'implied20', implied20Trees); remove(implied20Trees) GenerateScores('implied200', implied200Trees); remove(implied200Trees) usethis::use_data(orPartitions, overwrite=TRUE, compress='bzip2') usethis::use_data(orQuartets, overwrite=TRUE, compress='bzip2') # xz better, but errors } tools::resaveRdaFiles('data') # Slow; once optimal compression identified, # specify it in use_data
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.