# Simple(simplistic) simulation of trees with outliers. No missing data.
#' Simplistic simulation of gene trees with outliers
#'
#' Simple (simplistic) simulation of trees with outliers.
#'
#' The simulation process is as follows: a first tree is generated with the \code{rtree()} function
#' and is then duplicated and modified according to the parameters chosen by the user.
#'
#' @usage simtrees(Ngn, Nsp, Nsp.out = 0, Ngn.out = 0, Nb.cell.outlier = 0,
#' brlen.sd = 0, out.type="topology",bl.mult=2)
#' @param Ngn Number of gene trees to simulate.
#' @param Nsp Number of species (tips) per tree.
#' @param Nsp.out Number of outlier species (also called rogue taxa). 0 = none.
#' @param Ngn.out Number of outlier genes. 0 = none.
#' @param Nb.cell.outlier Number of times one species in one tree is an outlier. 0 = none. The type of outlier is set by \code{out.type=}.
#' @param brlen.sd Heterogeneity of branch lengths in trees. A value with mean 0 and standard
#' deviation equal to brlen.sd is added to each branch length. If the resulting branch length has a negative value, its absolute value is taken.
#' @param out.type The type of cell outlier. Can be "topology" (the default), where outlier species
#' are simulated by branching them elsewhere (randomly) in the tree, "brlength", where terminal branches of outlier species are
#' multiplied by \code{bl.mult} (see after) or "both" where half of the outliers are "topology" and the other half are "brlength".
#' In the latter case, if the number of outliers (\code{Nb.cell.outlier}) id odd, there is one more topology outlier than brlength outlier simulated.
#' @param bl.mult Multiplier of terminal branches of outlier species when \code{out.type="topology"} or \code{out.type="both"}. Ignored otherwise.
#' @return A list X containing a list of trees in \code{multiPhylo} format (X$trees) and a list of outliers (X$outl).
#' @examples
#' # Very basic simulator, for debugging purpose mainly.
#' # examples: 30 genes, 120 species, 2 outlier species, 3 outlier genes
#' # 4 gene/species outliers of type "topology", branch length variance = 0.6.
#' # The branch length multiplier is set to 2 but this is
#' # ignored if out.type="topology"
#' simu<-simtrees(30,120,2,3,4,0.6, "topology",2)
#' trees<-simu$trees #list of trees
#' outl<-simu$outl #list of simulated outliers and their type
#'
#'
#'
#' @importFrom ape rtree drop.tip Ntip bind.tree multi2di dist.topo unroot
#' @importFrom stats rnorm
#' @export
simtrees<-function(Ngn, Nsp, Nsp.out=0,Ngn.out=0,Nb.cell.outlier=0, brlen.sd=0, out.type="topology", bl.mult=2) {
gen.trees<-function(Ntrees, Ntiptotal, Nspmove, NbweirdGenes, brlen.sd) {
res<-list()
selec<-NULL
anames<-NULL
a<-rtree(Ntiptotal)
TrueTree<-a
if (Nspmove>0) {
anames<-sample(a$tip.label, Nspmove)
a<-drop.tip(a, anames)
}
for (i in 1:Ntrees){
res[[i]]<-a
if (Nspmove>0) {
for (j in 1:Nspmove) {
b<-rtree(2)
nodelabs<-(Ntip(res[[i]])+1):(Ntip(res[[i]])+Ntip(res[[i]])-1)
tiplab<-c(anames[j], "out")
b$tip.label<-tiplab
tr<-bind.tree(res[[i]],b, where=sample(nodelabs, 1))
tr<-drop.tip(tr, tiplab[2])
tr<-multi2di(tr)
tr$edge.length[tr$edge.length==0]<-mean(tr$edge.length)
res[[i]]<-tr
}
}
}
if (NbweirdGenes>0) {
sam<-1:Ntrees
selec<-sample(sam,NbweirdGenes)
for (k in selec) {
res[[k]]<-rtree(Ntiptotal)
}
}
RES<-NULL
RES$TrueTree<-TrueTree
RES$trees<-lapply(res, SlightChangeInTreeBrLen, brlen.sd=brlen.sd)
RES$gn<-selec
RES$sp<-anames
return(RES)
}
add.outliers<-function(trees, nbrep, type, mult) {
move.tip<-function(tree, tip) {
treenew<-tree
cpt<-0
while(dist.topo(unroot(tree),unroot(treenew))==0) {
treesmall<-drop.tip(tree, tip)
temptree<-rtree(2)
temptree$tip.label[1]<-"out"
temptree$tip.label[2]<-tip
node.attach<-sample(unique(treesmall$edge[,1]),1)
treenew<-bind.tree(treesmall, temptree,where=node.attach)
treenew<-drop.tip(treenew,"out")
treenew<-multi2di(treenew)
}
return(treenew)
}
expand.branch<-function(tree,tip, m) {
treenew<-tree
wheretip<-which(tree$tip.label==tip)
whereedge<-which(tree$edge[,2]==wheretip)
treenew$edge.length[whereedge]<-tree$edge.length[whereedge]*m
return(treenew)
}
Ntiptotal<-Ntip(trees[[1]])
Ntrees<-length(trees)
rrr<-c(NA,NA,NA)
names(rrr)<-c("Species","Genes", "type")
if (type=="topology") {
for (w in 1:nbrep) {
which.gn<-sample(1:Ntrees, 1)
which.sp<-paste("t",sample(1:Ntiptotal, 1),sep="")
rrr<-rbind(rrr,c(which.sp,which.gn, type))
trees[[which.gn]]<-move.tip(trees[[which.gn]], which.sp)
}
}
if (type=="brlength") {
for (w in 1:nbrep) {
which.gn<-sample(1:Ntrees, 1)
which.sp<-paste("t",sample(1:Ntiptotal, 1),sep="")
rrr<-rbind(rrr,c(which.sp,which.gn, type))
trees[[which.gn]]<-expand.branch(trees[[which.gn]], which.sp, mult)
}
}
if (type=="both") {
nbrep1<-ceiling(nbrep/2)
nbrep2<-nbrep-nbrep1
for (w in 1:nbrep1) {
which.gn<-sample(1:Ntrees, 1)
which.sp<-paste("t",sample(1:Ntiptotal, 1),sep="")
rrr<-rbind(rrr,c(which.sp,which.gn, "topology"))
trees[[which.gn]]<-move.tip(trees[[which.gn]], which.sp)
}
for (w in 1:nbrep2) {
which.gn<-sample(1:Ntrees, 1)
which.sp<-paste("t",sample(1:Ntiptotal, 1),sep="")
rrr<-rbind(rrr,c(which.sp,which.gn, "brlength"))
trees[[which.gn]]<-expand.branch(trees[[which.gn]], which.sp, mult)
}
}
RES<-NULL
RES$trees<-trees
RES$outl<-rrr[2:nrow(rrr),]
return(RES)
}
SlightChangeInTreeBrLen<-function(tr, brlen.sd) {
tr$edge.length<-abs(tr$edge.length+rnorm(length(tr$edge.length), sd=brlen.sd))
tr
}
tr<-gen.trees(Ngn,Nsp,Nsp.out,Ngn.out, brlen.sd)
if (Nb.cell.outlier>0) {
resu<-add.outliers(tr$trees, Nb.cell.outlier, out.type, bl.mult)
}
else {
resu<-list()
resu$outl<-NULL
resu$trees<-tr$trees
}
class(resu$trees)<-"multiPhylo"
return(resu)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.