.onAttach <- function(...)
{
packageStartupMessage("rBEAST 1.0.0\nhttps://github.com/olli0601/rBEAST")
}
######################################################################################
# write xml file for BEAST multilocus.hky run
#' @title Generate multilocus XML file with a codon specific GTR model and fixed starting tree
#' @description See Examples.
# import XML phytools ape data.table reshape2 ggplot2
#' @export
#' @example example/ex.beastxml.multilocus.codon.gtr.R
#' @author Oliver Ratmann
beastxml.multilocus.codon.gtr<- function( file.name, seq.select, phd, verbose=1 )
{
stopifnot(c("CLU_ID", "TAXON_ID", "TAXON_NAME", "SEQ", "SAMPLINGTIME")%in%names(seq.select))
file.name.base <- gsub('.xml','',rev(strsplit(file.name,'/')[[1]])[1])
# write XML file with new sequences
bxml <- newXMLDoc(addFinalizer=T)
bxml.beast <- newXMLNode("beast", doc=bxml, addFinalizer=T)
tmp <- newXMLCommentNode(text=paste("Generated by beastxml.multilocus.codon.gtr"), parent=bxml.beast, doc=bxml, addFinalizer=T)
# add new taxa
beast.add.taxa(bxml, seq.select, beast.date.direction= "forwards", beast.date.units= "years", verbose=1)
# add alignment for every transmission cluster
for(clu in seq.select[, unique(CLU_ID)])
{
beast.add.alignment(bxml, subset(seq.select, CLU_ID==clu), df.seqfield='SEQ', beast.alignment.id=paste("alignment_CLU",clu,sep=''), beast.alignment.dataType= "nucleotide", verbose=1)
}
# add starting tree for every transmission cluster
for(clu in names(phd))
{
beast.add.startingtree(bxml, phd[[clu]], beast.usingDates="true", beast.newickid=paste("startingTree_CLU",clu,sep=''), beast.brlunits="years", verbose=1)
}
# add CODON patterns for every alignment
for(clu in seq.select[, unique(CLU_ID)])
for(k in 1:3)
{
beast.add.patterns(bxml, paste("patterns.CP",k,"_CLU",clu,sep=''), paste("alignment_CLU",clu,sep=''), k, beast.patterns.every=3, beast.patterns.strip='false', verbose=1)
}
# add tree model for every alignment
for(clu in seq.select[, unique(CLU_ID)])
{
beast.add.treemodel(bxml, paste("treeModel_CLU",clu,sep=''), paste("treeModel.rootHeight_CLU",clu,sep=''), paste("treeModel.internalNodeHeights_CLU",clu,sep=''), paste("treeModel.allInternalNodeHeights_CLU",clu,sep=''), newick.id=paste("startingTree_CLU",clu,sep=''), internalNodes='true', rootNode='true', verbose=1)
}
# add skygrid
tmp <- sapply( seq.select[, unique(CLU_ID)], function(x) paste("treeModel_CLU",x,sep='') )
grid.height <- max(sapply(phd, function(ph) max(node.depth.edgelength(ph))))
beast.add.gmrfskygrid(bxml, "skygrid", "skygrid.logPopSize", "skygrid.precision", "skygrid.numGridPoints", "skygrid.cutOff", tmp,
80, -2.396895772465287, 1.0, 0.0, grid.height, verbose=1)
# add uncorrelated relaxed clock for every transmission cluster
tmp <- seq.select[, unique(CLU_ID)]
clu <- tmp[1]
beast.add.discretizedBranchRates(bxml, paste("branchRates_CLU",clu,sep=''), paste("treeModel_CLU",clu,sep=''), paste("branchRates.categories_CLU",clu,sep=''),
mean.id="ucld.mean", mean.value="0.0025", mean.lower="0.0", sd.id="ucld.stdev", sd.value="0.3333333333333333", sd.lower="0.0", meanInRealSpace="true",
rateCategories.dimension= subset(seq.select, CLU_ID==clu)[, 2*(length(unique(TAXON_ID))-1)], verbose=1)
for(clu in tmp[-1])
{
beast.add.discretizedBranchRates(bxml, paste("branchRates_CLU",clu,sep=''), paste("treeModel_CLU",clu,sep=''), paste("branchRates.categories_CLU",clu,sep=''),
mean.idref="ucld.mean", sd.idref="ucld.stdev", meanInRealSpace="true",
rateCategories.dimension= subset(seq.select, CLU_ID==clu)[, 2*(length(unique(TAXON_ID))-1)], verbose=1)
}
# add rate statistic for first cluster
for(clu in seq.select[, unique(CLU_ID)])
{
beast.add.rateStatistics(bxml, paste('branchRatesStat_CLU',clu, sep=''), paste('treeModel_CLU',clu, sep=''), paste('branchRates_CLU',clu, sep=''))
}
# add GTR CP1 2 3 models
for(k in 1:3)
{
clu <- seq.select[, unique(CLU_ID)][1]
beast.add.gtrModel(bxml, paste("gtr.CP",k,"_CLU", clu,sep=""), paste("patterns.CP",k,"_CLU",clu,sep=''), paste("gtr.frequencies.CP",k,"_CLU",clu,sep=''),
rateAC.id=paste("gtr.ac.CP",k,sep=""), rateAC.value="0.3", rateAC.lower="0.0",
rateAG.id=paste("gtr.ag.CP",k,sep=""), rateAG.value="1.0", rateAG.lower="0.0",
rateAT.id=paste("gtr.at.CP",k,sep=""), rateAT.value="0.2", rateAT.lower="0.0",
rateCG.id=paste("gtr.cg.CP",k,sep=""), rateCG.value="0.2", rateCG.lower="0.0",
rateGT.id=paste("gtr.gt.CP",k,sep=""), rateGT.value="0.1", rateGT.lower="0.0",
frequencies.dimension='4', frequencyModel.dataType='nucleotide')
for(clu in seq.select[, unique(CLU_ID)][-1])
beast.add.gtrModel(bxml, paste("gtr.CP",k,"_CLU", clu,sep=""), paste("patterns.CP",k,"_CLU",clu,sep=''), paste("gtr.frequencies.CP",k,"_CLU",clu,sep=''),
rateAC.idref=paste("gtr.ac.CP",k,sep=""),
rateAG.idref=paste("gtr.ag.CP",k,sep=""),
rateAT.idref=paste("gtr.at.CP",k,sep=""),
rateCG.idref=paste("gtr.cg.CP",k,sep=""),
rateGT.idref=paste("gtr.gt.CP",k,sep=""),
frequencies.dimension='4', frequencyModel.dataType='nucleotide')
}
# add CP1 2 3 site models
for(k in 1:3)
{
clu <- seq.select[, unique(CLU_ID)][1]
beast.add.siteModel(bxml, paste("site.CP",k,"_CLU", clu,sep=""), paste("gtr.CP",k,"_CLU", clu,sep=""),
relativeRate.id=paste("site.mu.CP",k,sep=""), relativeRate.value="1.0", relativeRate.lower="0.0",
gamma.id=paste("site.alpha.CP",k,sep=""), gamma.value="0.5", gamma.lower="0.0",
gammaCategories="4", verbose=1)
for(clu in seq.select[, unique(CLU_ID)][-1])
beast.add.siteModel(bxml, paste("site.CP",k,"_CLU", clu,sep=""), paste("gtr.CP",k,"_CLU", clu,sep=""),
relativeRate.idref=paste("site.mu.CP",k,sep=""), gamma.idref=paste("site.alpha.CP",k,sep=""), gammaCategories="4", verbose=1)
}
# copy compoundParameter
beast.add.compoundParameter(bxml, 'all.site.mu', select.id='site.mu', verbose=1)
# add treeLikelihood
for(clu in seq.select[, unique(CLU_ID)])
for(k in 1:3)
{
beast.add.treeLikelihood(bxml, paste("treeLikelihood.CP",k,"_CLU",clu,sep=""),
paste("patterns.CP",k,"_CLU",clu,sep=''),
paste("treeModel_CLU",clu,sep=''),
paste('site.CP',k,'_CLU',clu,sep=''),
paste("branchRates_CLU",clu,sep=''), useAmbiguities='false', verbose=1)
}
# add compound likelihood
tmp <- unique(unlist(xpathApply(bxml, "//treeLikelihood[@id]", xmlGetAttr, "id")))
beast.add.likelihood(bxml, 'likelihood', tmp)
# add prior element
beast.add.priors(bxml, 'prior')
# add priors
beast.add.uniformPrior(bxml, "gtr.ac.CP1", 0, 1.5)
beast.add.gammaPrior(bxml, "gtr.ag.CP1", 0.05, 20, 0)
beast.add.uniformPrior(bxml, "gtr.at.CP1", 0, .5)
beast.add.uniformPrior(bxml, "gtr.cg.CP1", 0, 1)
beast.add.uniformPrior(bxml, "gtr.gt.CP1", 0, .5)
beast.add.uniformPrior(bxml, "gtr.ac.CP2", 0, 1.5)
beast.add.gammaPrior(bxml, "gtr.ag.CP2", 0.05, 20, 0)
beast.add.uniformPrior(bxml, "gtr.at.CP2", 0, .5)
beast.add.uniformPrior(bxml, "gtr.cg.CP2", 0, 1)
beast.add.uniformPrior(bxml, "gtr.gt.CP2", 0, .5)
beast.add.uniformPrior(bxml, "gtr.ac.CP3", 0, 1.5)
beast.add.gammaPrior(bxml, "gtr.ag.CP3", 0.05, 20, 0)
beast.add.uniformPrior(bxml, "gtr.at.CP3", 0, .5)
beast.add.uniformPrior(bxml, "gtr.cg.CP3", 0, 1)
beast.add.uniformPrior(bxml, "gtr.gt.CP3", 0, .5)
for(k in 1:3)
beast.add.uniformPrior(bxml, paste("site.mu.CP",k,sep=''), 0.1, 10)
for(k in 1:3)
beast.add.exponentialPrior(bxml, paste("site.alpha.CP",k,sep=''), 0.5, 0)
beast.add.uniformPrior(bxml, "ucld.mean", 0.0001, 0.006)
beast.add.exponentialPrior(bxml, "ucld.stdev", 0.1, 0)
beast.add.gammaPrior(bxml, "skygrid.precision", 0.001, 1000, 0)
beast.add.gmrfSkyGridPrior(bxml, 'skygrid')
# add mcmc element
beast.add.mcmc(bxml, mcmc.id='mcmc', posterior.id='posterior', prior.idref='prior', likelihood.idref='likelihood',
mcmc.chainLength=20e6, mcmc.autoOptimize='true', mcmc.operatorAnalysis= paste(file.name.base, '.ops',sep=''))
# add operator element
beast.add.operators(bxml, 'operators')
# add scale operators
tmp <- as.vector( sapply( c("gtr.ac.CP","gtr.ag.CP","gtr.at.CP","gtr.cg.CP","gtr.gt.CP","site.alpha.CP"), function(x) paste(x,1:3,sep='') ) )
tmp <- c("skygrid.precision", tmp)
tmp <- data.table( ID= tmp, SF=0.75, W=0.1 )
for(i in seq_len(nrow(tmp)))
beast.add.scaleOperator(bxml, tmp[i,ID], tmp[i,SF], tmp[i,W])
tmp <- data.table( ID= c('ucld.mean',"ucld.stdev"), SF=0.75, W=3 )
for(i in seq_len(nrow(tmp)))
beast.add.scaleOperator(bxml, tmp[i,ID], tmp[i,SF], tmp[i,W])
# add deltaExchange operators
beast.add.deltaExchangeOperator(bxml, 'all.site.mu', delta=0.75, parameterWeights=c(948, 948, 948), weight=9)
# add gmrf block operators
beast.add.gmrfGridBlockUpdateOperator(bxml, 'skygrid', scaleFactor=2, weight=2)
# add branch rate category operators
for(clu in seq.select[, unique(CLU_ID)])
{
beast.add.uniformIntegerOperator(bxml, paste("branchRates.categories_CLU",clu,sep=''), 10)
beast.add.swapOperator(bxml, paste("branchRates.categories_CLU",clu,sep=''), size=1, weight=10, autoOptimize="false")
}
# add screen log
beast.add.screenLog(bxml, log.id='screenLog', posterior.idref='posterior', likelihood.idref="likelihood", prior.idref="prior", logEvery=2e4)
# add file log
beast.add.fileLog(bxml, log.id='fileLog', posterior.idref='posterior', likelihood.idref="likelihood", prior.idref="prior", logEvery=2e4,
log.fileName=paste(file.name.base, '.log',sep=''), log.overwrite='false',
select.id=c('rootHeight', 'ucld', 'gtr', 'alpha', 'skygrid','branchRatesStat'), deselect.id=c('frequencies','categories'))
# add tree log for every transmission cluster
#for(clu in seq.select[, unique(CLU_ID)])
#{
# beast.add.logTree(bxml, paste("logTree_CLU",clu,sep=''), paste("treeModel_CLU",clu,sep=''), paste("branchRates_CLU",clu,sep=''), 'posterior',
# logEvery=2e4, nexusFormat='true', sortTranslationTable='true',
# fileName=paste(file.name.base,'_CLU', clu, '.trees', sep=''), verbose=1)
#}
bxml
}
######################################################################################
#' @title Generate multilocus XML file with HKY model and fixed starting tree
#' @description See Examples.
# import XML phytools ape data.table reshape2 ggplot2
#' @export
#' @author Oliver Ratmann
beastxml.multilocus.hky.fixed.topology<- function( file.name, seq.select, phd, verbose=1 )
{
stopifnot(c("CLU_ID", "TAXON_ID", "TAXON_NAME", "SEQ", "SAMPLINGTIME")%in%names(seq.select))
file.name.base <- gsub('.xml','',rev(strsplit(file.name,'/')[[1]])[1])
# write XML file with new sequences
bxml <- newXMLDoc(addFinalizer=T)
bxml.beast <- newXMLNode("beast", doc=bxml, addFinalizer=T)
tmp <- newXMLCommentNode(text=paste("Generated by beastxml.multilocus.hky.fixed.tree"), parent=bxml.beast, doc=bxml, addFinalizer=T)
# add new taxa
beast.add.taxa(bxml, seq.select, beast.date.direction= "forwards", beast.date.units= "years", verbose=1)
# add alignment for every transmission cluster
for(clu in seq.select[, unique(CLU_ID)])
{
beast.add.alignment(bxml, subset(seq.select, CLU_ID==clu), df.seqfield='SEQ', beast.alignment.id=paste("alignment_CLU",clu,sep=''), beast.alignment.dataType= "nucleotide", verbose=1)
}
# add starting tree for every transmission cluster
for(clu in names(phd))
{
beast.add.startingtree(bxml, phd[[clu]], beast.usingDates="true", beast.newickid=paste("startingTree_CLU",clu,sep=''), beast.brlunits="years", verbose=1)
}
# add CODON patterns for every alignment
for(clu in seq.select[, unique(CLU_ID)])
{
beast.add.patterns(bxml, paste("patterns_CLU",clu,sep=''), paste("alignment_CLU",clu,sep=''), 1, beast.patterns.every=NA, beast.patterns.strip='false', verbose=1)
}
# add tree model for every alignment
for(clu in seq.select[, unique(CLU_ID)])
{
beast.add.treemodel(bxml, paste("treeModel_CLU",clu,sep=''), paste("treeModel.rootHeight_CLU",clu,sep=''), paste("treeModel.internalNodeHeights_CLU",clu,sep=''), paste("treeModel.allInternalNodeHeights_CLU",clu,sep=''), newick.id=paste("startingTree_CLU",clu,sep=''), internalNodes='true', rootNode='true', verbose=1)
}
# add skygrid
tmp <- sapply( seq.select[, unique(CLU_ID)], function(x) paste("treeModel_CLU",x,sep='') )
grid.height <- max(sapply(phd, function(ph) max(node.depth.edgelength(ph))))
beast.add.gmrfskygrid(bxml, "skygrid", "skygrid.logPopSize", "skygrid.precision", "skygrid.numGridPoints", "skygrid.cutOff", tmp,
80, -2.396895772465287, 1.0, 0.0, grid.height, verbose=1)
# add uncorrelated relaxed clock for first cluster
tmp <- seq.select[, unique(CLU_ID)]
clu <- tmp[1]
beast.add.discretizedBranchRates(bxml, paste("branchRates_CLU",clu,sep=''), paste("treeModel_CLU",clu,sep=''), paste("branchRates.categories_CLU",clu,sep=''),
mean.id="ucld.mean", mean.value="0.0025", mean.lower="0.0", sd.id="ucld.stdev", sd.value="0.3333333333333333", sd.lower="0.0", meanInRealSpace="true",
rateCategories.dimension= subset(seq.select, CLU_ID==clu)[, 2*(length(unique(TAXON_ID))-1)], verbose=1)
# link uncorrelated relaxed clock to other clusters
for(clu in tmp[-1])
{
beast.add.discretizedBranchRates(bxml, paste("branchRates_CLU",clu,sep=''), paste("treeModel_CLU",clu,sep=''), paste("branchRates.categories_CLU",clu,sep=''),
mean.idref="ucld.mean", sd.idref="ucld.stdev", meanInRealSpace="true",
rateCategories.dimension= subset(seq.select, CLU_ID==clu)[, 2*(length(unique(TAXON_ID))-1)], verbose=1)
}
# add rate statistic for first cluster
for(clu in seq.select[, unique(CLU_ID)])
{
beast.add.rateStatistics(bxml, paste('branchRatesStat_CLU',clu, sep=''), paste('treeModel_CLU',clu, sep=''), paste('branchRates_CLU',clu, sep=''))
}
# add HKY model for first cluster
clu <- seq.select[, unique(CLU_ID)][1]
beast.add.hkyModel(bxml, paste("hky_CLU", clu,sep=""), paste("patterns_CLU",clu,sep=''), paste("hky.frequencies_CLU",clu,sep=''),
rate.id=paste("hky.kappa",sep=""), rate.value="1.0", rate.lower="0.0",
frequencies.dimension='4', frequencyModel.dataType='nucleotide')
# link HKY model to other clusters
for(clu in seq.select[, unique(CLU_ID)][-1])
{
beast.add.hkyModel(bxml, paste("hky_CLU", clu,sep=""), paste("patterns_CLU",clu,sep=''), paste("hky.frequencies_CLU",clu,sep=''),
rate.idref=paste("hky.kappa",sep=""),
frequencies.dimension='4', frequencyModel.dataType='nucleotide')
}
# add site model for first cluster
clu <- seq.select[, unique(CLU_ID)][1]
beast.add.siteModel(bxml, paste("siteModel_CLU", clu,sep=""), paste("hky_CLU", clu,sep=""),
substitutionModel.class='HKYModel', gamma.id=paste("site.alpha",sep=""), gamma.value="0.5", gamma.lower="0.0", gammaCategories="4", verbose=1)
# link site model to other clusters
for(clu in seq.select[, unique(CLU_ID)][-1])
beast.add.siteModel(bxml, paste("siteModel_CLU", clu,sep=""), paste("hky_CLU", clu,sep=""),
substitutionModel.class='HKYModel', gamma.idref=paste("site.alpha",sep=""), gammaCategories="4", verbose=1)
# add treeLikelihood
for(clu in seq.select[, unique(CLU_ID)])
beast.add.treeLikelihood(bxml, paste("treeLikelihood_CLU",clu,sep=""),
paste("patterns_CLU",clu,sep=''),
paste("treeModel_CLU",clu,sep=''),
paste('siteModel_CLU',clu,sep=''),
paste("branchRates_CLU",clu,sep=''), useAmbiguities='false', verbose=1)
# add compound likelihood
tmp <- as.vector(sapply(seq.select[, unique(CLU_ID)], function(clu) paste("treeLikelihood_CLU",clu,sep="") ))
beast.add.likelihood(bxml, 'likelihood', tmp)
# add prior element
beast.add.priors(bxml, 'prior')
# add priors https://code.google.com/p/beast-mcmc/wiki/ParameterPriors
beast.add.logNormalPrior(bxml, "hky.kappa", 1, 1.25, 0) #default
beast.add.uniformPrior(bxml, "ucld.mean", 0.0001, 0.006)
beast.add.exponentialPrior(bxml, "ucld.stdev", 0.1, 0)
beast.add.exponentialPrior(bxml, "site.alpha", 0.5, 0)
for(clu in seq.select[, unique(CLU_ID)])
beast.add.uniformPrior(bxml, paste("treeModel.rootHeight_CLU",clu,sep=''), 0, 80)
beast.add.gammaPrior(bxml, "skygrid.precision", 0.001, 1000, 0)
beast.add.gmrfSkyGridPrior(bxml, 'skygrid')
# add mcmc element
beast.add.mcmc(bxml, mcmc.id='mcmc', posterior.id='posterior', prior.idref='prior', likelihood.idref='likelihood',
mcmc.chainLength=20e6, mcmc.autoOptimize='true', mcmc.operatorAnalysis= paste(file.name.base, '.ops',sep=''))
# add operator element
beast.add.operators(bxml, 'operators')
# add scale operators
tmp <- data.table( ID= c("hky.kappa","site.alpha","ucld.mean","ucld.stdev","skygrid.precision", sapply( seq.select[, unique(CLU_ID)], function(clu) paste("treeModel.rootHeight_CLU",clu,sep='') )), SF=0.75, W=1)
set(tmp, tmp[,which(grepl('ucld',ID))], 'W', 5)
for(i in seq_len(nrow(tmp)))
beast.add.scaleOperator(bxml, tmp[i,ID], tmp[i,SF], tmp[i,W])
# add internal node height operators
for(clu in seq.select[, unique(CLU_ID)])
beast.add.uniformOperator(bxml, paste("treeModel.internalNodeHeights_CLU",clu,sep=''), weight=1)
# add UpDown operator on ucld.mean and root heights
tmp <- sapply( seq.select[, unique(CLU_ID)], function(clu) paste("treeModel.allInternalNodeHeights_CLU",clu,sep='') )
beast.add.upDownOperator(bxml, "ucld.mean", tmp, scaleFactor=0.75, weight=1)
# add gmrf block operator
beast.add.gmrfGridBlockUpdateOperator(bxml, 'skygrid', scaleFactor=2, weight=1)
# add branch rate category operators
for(clu in seq.select[, unique(CLU_ID)])
{
beast.add.uniformIntegerOperator(bxml, paste("branchRates.categories_CLU",clu,sep=''), 1)
beast.add.swapOperator(bxml, paste("branchRates.categories_CLU",clu,sep=''), size=1, weight=1, autoOptimize="false")
}
# add screen log
beast.add.screenLog(bxml, log.id='screenLog', posterior.idref='posterior', likelihood.idref="likelihood", prior.idref="prior", logEvery=2e4)
# add file log
beast.add.fileLog(bxml, log.id='fileLog', posterior.idref='posterior', likelihood.idref="likelihood", prior.idref="prior", logEvery=2e4,
log.fileName=paste(file.name.base, '.log',sep=''), log.overwrite='false',
select.id=c('rootHeight', 'ucld', 'hky', 'alpha', 'skygrid','branchRatesStat'), deselect.id=c('frequencies','categories'))
# add tree log for every transmission cluster
if(with.logTree)
for(clu in seq.select[, unique(CLU_ID)])
{
beast.add.logTree(bxml, paste("logTree_CLU",clu,sep=''), paste("treeModel_CLU",clu,sep=''), paste("branchRates_CLU",clu,sep=''), 'posterior',
logEvery=2e4, nexusFormat='true', sortTranslationTable='true',
fileName=paste(file.name.base,'_CLU', clu, '.trees', sep=''), verbose=1)
}
bxml
}
######################################################################################
#' @title Generate multilocus XML file with HKY model and fixed starting tree
#' @description See Examples.
# import XML phytools ape data.table reshape2 ggplot2
#' @export
#' @example example/ex.beastxml.multilocus.hky.fixed.tree.R
#' @author Oliver Ratmann
beastxml.multilocus.hky.fixed.tree<- function( file.name, seq.select, phd, verbose=1)
{
stopifnot(c("CLU_ID", "TAXON_ID", "TAXON_NAME", "SEQ", "SAMPLINGTIME")%in%names(seq.select))
file.name.base <- gsub('.xml','',rev(strsplit(file.name,'/')[[1]])[1])
# write XML file with new sequences
bxml <- newXMLDoc(addFinalizer=T)
bxml.beast <- newXMLNode("beast", doc=bxml, addFinalizer=T)
tmp <- newXMLCommentNode(text=paste("Generated by beastxml.multilocus.hky.fixed.tree"), parent=bxml.beast, doc=bxml, addFinalizer=T)
# add new taxa
beast.add.taxa(bxml, seq.select, beast.date.direction= "forwards", beast.date.units= "years", verbose=1)
# add alignment for every transmission cluster
for(clu in seq.select[, unique(CLU_ID)])
{
beast.add.alignment(bxml, subset(seq.select, CLU_ID==clu), df.seqfield='SEQ', beast.alignment.id=paste("alignment_CLU",clu,sep=''), beast.alignment.dataType= "nucleotide", verbose=1)
}
# add starting tree for every transmission cluster
for(clu in names(phd))
{
beast.add.startingtree(bxml, phd[[clu]], beast.usingDates="true", beast.newickid=paste("startingTree_CLU",clu,sep=''), beast.brlunits="years", verbose=1)
}
# add CODON patterns for every alignment
for(clu in seq.select[, unique(CLU_ID)])
{
beast.add.patterns(bxml, paste("patterns_CLU",clu,sep=''), paste("alignment_CLU",clu,sep=''), 1, beast.patterns.every=NA, beast.patterns.strip='false', verbose=1)
}
# add tree model for every alignment
for(clu in seq.select[, unique(CLU_ID)])
{
beast.add.treemodel(bxml, paste("treeModel_CLU",clu,sep=''), paste("treeModel.rootHeight_CLU",clu,sep=''), paste("treeModel.internalNodeHeights_CLU",clu,sep=''), paste("treeModel.allInternalNodeHeights_CLU",clu,sep=''), newick.id=paste("startingTree_CLU",clu,sep=''), internalNodes='true', rootNode='true', verbose=1)
}
# add skygrid
tmp <- sapply( seq.select[, unique(CLU_ID)], function(x) paste("treeModel_CLU",x,sep='') )
grid.height <- max(sapply(phd, function(ph) max(node.depth.edgelength(ph))))
beast.add.gmrfskygrid(bxml, "skygrid", "skygrid.logPopSize", "skygrid.precision", "skygrid.numGridPoints", "skygrid.cutOff", tmp,
80, -2.396895772465287, 1.0, 0.0, grid.height, verbose=1)
# add uncorrelated relaxed clock for first cluster
tmp <- seq.select[, unique(CLU_ID)]
clu <- tmp[1]
beast.add.discretizedBranchRates(bxml, paste("branchRates_CLU",clu,sep=''), paste("treeModel_CLU",clu,sep=''), paste("branchRates.categories_CLU",clu,sep=''),
mean.id="ucld.mean", mean.value="0.0025", mean.lower="0.0", sd.id="ucld.stdev", sd.value="0.3333333333333333", sd.lower="0.0", meanInRealSpace="true",
rateCategories.dimension= subset(seq.select, CLU_ID==clu)[, 2*(length(unique(TAXON_ID))-1)], verbose=1)
# link uncorrelated relaxed clock to other clusters
for(clu in tmp[-1])
{
beast.add.discretizedBranchRates(bxml, paste("branchRates_CLU",clu,sep=''), paste("treeModel_CLU",clu,sep=''), paste("branchRates.categories_CLU",clu,sep=''),
mean.idref="ucld.mean", sd.idref="ucld.stdev", meanInRealSpace="true",
rateCategories.dimension= subset(seq.select, CLU_ID==clu)[, 2*(length(unique(TAXON_ID))-1)], verbose=1)
}
# add rate statistics for all clusters
for(clu in seq.select[, unique(CLU_ID)])
{
beast.add.rateStatistics(bxml, paste('branchRatesStat_CLU',clu, sep=''), paste('treeModel_CLU',clu, sep=''), paste('branchRates_CLU',clu, sep=''))
}
# add HKY model for first cluster
clu <- seq.select[, unique(CLU_ID)][1]
beast.add.hkyModel(bxml, paste("hky_CLU", clu,sep=""), paste("patterns_CLU",clu,sep=''), paste("hky.frequencies_CLU",clu,sep=''),
rate.id=paste("hky.kappa",sep=""), rate.value="1.0", rate.lower="0.0",
frequencies.dimension='4', frequencyModel.dataType='nucleotide')
# link HKY model to other clusters
for(clu in seq.select[, unique(CLU_ID)][-1])
{
beast.add.hkyModel(bxml, paste("hky_CLU", clu,sep=""), paste("patterns_CLU",clu,sep=''), paste("hky.frequencies_CLU",clu,sep=''),
rate.idref=paste("hky.kappa",sep=""),
frequencies.dimension='4', frequencyModel.dataType='nucleotide')
}
# add site model for first cluster
clu <- seq.select[, unique(CLU_ID)][1]
beast.add.siteModel(bxml, paste("siteModel_CLU", clu,sep=""), paste("hky_CLU", clu,sep=""),
substitutionModel.class='HKYModel', gamma.id=paste("site.alpha",sep=""), gamma.value="0.5", gamma.lower="0.0", gammaCategories="4", verbose=1)
# link site model to other clusters
for(clu in seq.select[, unique(CLU_ID)][-1])
beast.add.siteModel(bxml, paste("siteModel_CLU", clu,sep=""), paste("hky_CLU", clu,sep=""),
substitutionModel.class='HKYModel', gamma.idref=paste("site.alpha",sep=""), gammaCategories="4", verbose=1)
# add treeLikelihood
for(clu in seq.select[, unique(CLU_ID)])
beast.add.treeLikelihood(bxml, paste("treeLikelihood_CLU",clu,sep=""),
paste("patterns_CLU",clu,sep=''),
paste("treeModel_CLU",clu,sep=''),
paste('siteModel_CLU',clu,sep=''),
paste("branchRates_CLU",clu,sep=''), useAmbiguities='false', verbose=1)
# add compound likelihood
tmp <- as.vector(sapply(seq.select[, unique(CLU_ID)], function(clu) paste("treeLikelihood_CLU",clu,sep="") ))
beast.add.likelihood(bxml, 'likelihood', tmp)
# add prior element
beast.add.priors(bxml, 'prior')
# add priors https://code.google.com/p/beast-mcmc/wiki/ParameterPriors
beast.add.logNormalPrior(bxml, "hky.kappa", 1, 1.25, 0) #default
beast.add.uniformPrior(bxml, "ucld.mean", 0.0001, 0.006)
beast.add.exponentialPrior(bxml, "ucld.stdev", 0.1, 0)
beast.add.exponentialPrior(bxml, "site.alpha", 0.5, 0)
beast.add.gammaPrior(bxml, "skygrid.precision", 0.001, 1000, 0)
beast.add.gmrfSkyGridPrior(bxml, 'skygrid')
# add mcmc element
beast.add.mcmc(bxml, mcmc.id='mcmc', posterior.id='posterior', prior.idref='prior', likelihood.idref='likelihood',
mcmc.chainLength=20e6, mcmc.autoOptimize='true', mcmc.operatorAnalysis= paste(file.name.base, '.ops',sep=''))
# add operator element
beast.add.operators(bxml, 'operators')
# add scale operators
tmp <- data.table( ID= c("hky.kappa","site.alpha","ucld.mean","ucld.stdev","skygrid.precision"),
SF= 0.75,
W= c(0.1, 0.1, 3, 3, 0.1))
for(i in seq_len(nrow(tmp)))
{
beast.add.scaleOperator(bxml, tmp[i,ID], tmp[i,SF], tmp[i,W])
}
# add gmrf block operator
beast.add.gmrfGridBlockUpdateOperator(bxml, 'skygrid', scaleFactor=2, weight=2)
# add branch rate category operators
for(clu in seq.select[, unique(CLU_ID)])
{
beast.add.uniformIntegerOperator(bxml, paste("branchRates.categories_CLU",clu,sep=''), 10)
beast.add.swapOperator(bxml, paste("branchRates.categories_CLU",clu,sep=''), size=1, weight=10, autoOptimize="false")
}
# add screen log
beast.add.screenLog(bxml, log.id='screenLog', posterior.idref='posterior', likelihood.idref="likelihood", prior.idref="prior", logEvery=2e4)
# add file log
beast.add.fileLog(bxml, log.id='fileLog', posterior.idref='posterior', likelihood.idref="likelihood", prior.idref="prior", logEvery=2e4,
log.fileName=paste(file.name.base, '.log',sep=''), log.overwrite='false',
select.id=c('rootHeight', 'ucld', 'hky', 'alpha', 'skygrid','branchRatesStat'), deselect.id=c('frequencies','categories'))
# add tree log for every transmission cluster
#for(clu in seq.select[, unique(CLU_ID)])
#{
# beast.add.logTree(bxml, paste("logTree_CLU",clu,sep=''), paste("treeModel_CLU",clu,sep=''), paste("branchRates_CLU",clu,sep=''), 'posterior',
# logEvery=2e4, nexusFormat='true', sortTranslationTable='true',
# fileName=paste(file.name.base,'_CLU', clu, '.trees', sep=''), verbose=1)
#}
bxml
}
######################################################################################
#' @title Create BEAST XML file from template
#' @description See Examples.
#' @param btemplate BEAST XML template or file name of BEAST XML template
#' @param df Taxon data.table
#' @param start.tree Starting tree containing the taxa in the taxon data.table (optional)
#' @param xml.taxonsets4clusters If true, add taxon sets for each phylogenetic cluster in the taxon data.table
#' @param xml.monophyly4clusters If true, add monophyly contraints to each phylogenetic cluster in the taxon data.table
#' @param verbose If true, run in verbose mode
#' @example example/ex.beast.from.template.R
#' @export
#' @author Oliver Ratmann
beastxml.from.template<- function( btemplate, df, start.tree=NULL, xml.taxonsets4clusters=1, xml.monophyly4clusters=1, verbose=1)
{
stopifnot(class(btemplate)[1]%in%c('character',"XMLInternalDocument"))
if(class(btemplate)=='character')
{
# load xml template file
btemplate <- xmlTreeParse(btemplate, useInternalNodes=TRUE, addFinalizer = TRUE)
}
bxml <- newXMLDoc(addFinalizer=T)
bxml.beast <- newXMLNode("beast", doc=bxml, addFinalizer=T)
newXMLCommentNode(text=paste("Generated by rBEAST from template"), parent=bxml.beast, doc=bxml, addFinalizer=T)
# add new taxa
beast.add.taxa(bxml, df, beast.date.direction= "forwards", beast.date.units= "years", verbose=verbose)
# add alignment for every transmission cluster
beast.add.alignment(bxml, df, beast.alignment.dataType= "nucleotide", verbose=verbose)
# add startingTree
if(!is.null(start.tree)) # add startingTree if provided
beast.add.startingtree(bxml, start.tree, beast.newickid= "startingTree", beast.usingDates="true", beast.brlunits="years", verbose=verbose)
# copy everything else from template
beast.add.from.template(bxml, btemplate, select.after.elt=NA, select.to.elt=NA, verbose=1)
#
# for clusters, add taxon sets and tmrca statistics
#
if(xml.taxonsets4clusters)
{
tmp <- length(getNodeSet(bxml, "//treeModel"))
if(tmp==0)
stop('Cannot find treeModel.')
treeModel.id <- unique(unlist(xpathApply(bxml, "//treeModel[@id]", xmlGetAttr, "id")))
if(length(treeModel.id)>1)
stop('More than one treeModel currently not supported.')
taxonset.prefix <- 'c'
includeStem <- "false"
# add taxon sets for each cluster
beast.add.taxonsets.for.clusters(bxml, df, taxonset.prefix=taxonset.prefix)
# add tmrcaStatistic for each cluster
beast.add.tmrcaStatistic.for.taxonsets(bxml, treeModel.id=treeModel.id, taxonset.prefix=taxonset.prefix, includeStem=includeStem, verbose=verbose)
# add monophylyStatistic for each cluster
if(xml.monophyly4clusters)
{
if(is.null(start.tree))
warning('BEAST may have zero probabilitiy for initial topology if starting tree not specified and mononphyly constraints set')
beast.add.monophylyStatistic.for.taxonsets(bxml, treeModel.id=treeModel.id, taxonset.prefix=taxonset.prefix)
}
}
#
# if user-provided startingTree, reset <upgmaTree idref="startingTree"/> to <newick idref="startingTree"/>
#
if(!is.null(start.tree))
{
tmp <- getNodeSet(bxml, "//*[@idref='startingTree']")
if(length(tmp)==0)
stop('Cannot find idref startingTree')
if(length(tmp)>1)
stop("More than one idref startingTree")
xmlName(tmp[[1]]) <- "newick"
}
#
bxml
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.