`sim.dna` <-
function(treenode,seqlength,model,kappa=2,rate=c(1,1,1,1,1,1),frequency=c(1/4,1/4,1/4,1/4))
{
############################################################################################
# model = 1 : JC model
# model = 2 : H2P model, alpha and beta (two parameters).
# model = 3 : HKY model, alpha, beta, frequencies (five parameters)
# model = 4 : GTR model, six rates, frequencies (nine parameters)
# model = 5 : Methylation 3x3 model
# rate[1] = A <-> C rate
# rate[2] = A <-> G rate
# rate[3] = A <-> T rate
# rate[4] = C <-> G rate
# rate[5] = C <-> T rate
# rate[6] = G <-> T rate
#
# code for constructing rate matrix is copied from Mrbayes (Huelsenbeck and Ronquist)
############################################################################################
nodematrix = treenode$nodes
if(model==1){
rateValues<-rep(1,6)
bs<-rep(1/4,4)
}else if(model == 2)
{
rateValues<-rep(1,6)
rateValues[c(2,5)]<-kappa
bs<-rep(1/4,4)
}else if(model == 3){
rateValues<-rep(1,6)
rateValues[c(2,5)]<-kappa
bs<-frequency
}else if (model == 4){
rateValues <- rate
bs <- frequency
}else{
return("Please select a valid model 1-4")
}
a<-matrix(0,4,4)
scaler <- 0.0
for (i in 1:3){
for (j in (i+1):4){
if (i == 1 && j == 2)
mult <- rateValues[1]
else if (i == 1 && j == 3)
mult <- rateValues[2]
else if (i == 1 && j == 4)
mult <- rateValues[3]
else if (i == 2 && j == 3)
mult <- rateValues[4]
else if (i == 2 && j == 4)
mult <- rateValues[5]
else if (i == 3 && j == 4)
mult <- rateValues[6]
a[i,j] <- bs[j] * mult
a[j,i] <- bs[i] * mult
a[i,i] <- a[i,i] - a[i,j]
a[j,j] <- a[j,j] - a[j,i]
scaler <- scaler + bs[i] * a[i,j]
scaler <- scaler + bs[j] * a[j,i]
}
}
#rescale Q matrix
scaler = 1.0 / scaler;
for (i in 1:4)
for (j in 1:4)
a[i,j] = a[i,j] * scaler
root <- .rootoftree(nodematrix)
if(nodematrix[root,1] == -8){
sonnodes<-nodematrix[root,2:4]
nsonnodes<-3
nsequence<-(dim(nodematrix)[1]+2)/2
} else{
sonnodes<-nodematrix[root,2:3]
nsonnodes<-2
nsequence<-(dim(nodematrix)[1]+1)/2
}
##nucleotides at root node follow the equilibrium distribution defined by parameter frequency.
dna<-matrix(0,nrow=dim(nodematrix)[1],ncol=seqlength)
dna[root, ] <- sample(1:4,size=seqlength,replace=TRUE,prob=frequency)
tranprob<-matrix(0,seqlength,4)
##nucleotides at other nodes
while(nsonnodes>0){
for(i in 1:nsonnodes){
father<-nodematrix[sonnodes[i],1]
dna[sonnodes[i],] <- .sim.nucleotide(dna[father,],nodematrix[sonnodes[i],4], a)
}
sonnodes<-nodematrix[sonnodes,2:3]
sonnodes<-sonnodes[sonnodes>0]
{if(length(sonnodes)>0)
nsonnodes<-length(sonnodes)
else
nsonnodes<-0
}
}
seq = dna[1:nsequence,]
rownames(seq) = treenode$names
return(seq)
}
.rootoftree <- function (nodematrix)
{
if(sum(nodematrix[,1]<0)>1)
stop("more than two roots!")
nodes<-sum((nodematrix[,1]<0)*(1:length(nodematrix[,1])))
return(nodes)
}
.sim.nucleotide <- function(nucleotide, brlens, qmatrix)
{
tranprob<-expm(Matrix(qmatrix*brlens))
new<-tranprob[nucleotide,]
newnucleotide<-apply(new,1,function(x) sample(1:4,1,TRUE,x))
newnucleotide
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.