PhyInformR: Rapid Calculation of Phylogenetic Information Content

##libraries
library(ape)
library(splines)
library(gplots)
library(RColorBrewer)
require(phytools)
library(foreach)
library(iterators)
library(geiger)
library(doParallel)
library(gridExtra)
library(ggplot2)
library(hexbin)
library(PBSmodelling)
#set the number of cores
registerDoParallel(cores=8)

##internal functions, just read these in###########################
###########################
###########################
###########################
###Need this to get individual point at a time of profile
#this function is based on Townsend 2007
site.summer<-function(rate.vector,time)
{
	length(rate.vector)->calculation.length
		at.site<-matrix(ncol=calculation.length)
for(i in 1:calculation.length)
		

	{
		rate.vector[i]->current
		16*current*current*time*exp(-4*current*time)->at.site[i]
		}
		sum(at.site)->inform.at.time
		return(inform.at.time)
		}

##another internal function
###########################
###########################
###########################

get.ind.sites<-function(rate.output,breaks)
{
	
	rate.output->rates
	length(rates)->vector.length
	c(1:vector.length)->numbers
	cbind(numbers,rates)->unsorted.matrix
	length(breaks[,1])->n
	length(rates)->limit
	matrix(ncol=n, nrow=limit)->extracted.sites
	matrix(ncol=n)->names.of.columns
	for(i in 1:n)
	{
###this part looks through the breaks and extracts the site numbers for each user specified bin
		breaks[i,]->upper.lower
		upper.lower[1]->lower
		upper.lower[2]->upper
		which(rates>=lower)->lista
		which(rates<=upper)->listb
		####get the list of sites, which are bigger than lower bound but smaller than upper bound
		lista[(lista%in%listb)]->numbers
		length(numbers)->data.length
		limit-data.length->filler	
		rep("Na",filler)->fill
		c(numbers,fill)->output
		output-> extracted.sites[,i]
		 
	}
	###assign column names
	for(i in 1:n)
	{
		string1="Charset_"
		string2=paste(string1,i,sep="")
		string3=paste(string2,":",sep="")
		names.of.columns[,i]<-string3
		}
		colnames(extracted.sites)<-names.of.columns
		as.data.frame(extracted.sites)->ES
		return(ES)

}	

###internal function #3
###########################
###########################
###########################
###########################

inform.profile.generator2<-function(use.rates,tree)
	{
	branching.times(tree)->btimes
	c(0,btimes)->btimes2
	sort(btimes2)->sorted.btimes
	length(btimes2)->branching.points
	length(use.rates)->calculation.length
	
	inform.at.time<-matrix(ncol=branching.points)
	for(i in 1:branching.points)
	{
		sorted.btimes[i]->btime
		site.summer(use.rates,btime)->inform.at.time[i]

	
}
inform.at.time->close
return(close)
}
#' @export
defined.multi.profile<-function(rate.vector,tree,breaks)
{
	
	length(rate.vector)->n
	branching.times(tree)->btimes
	c(0,btimes)->btimes2
	sort(btimes2)->sorted.btimes
		length(btimes2)->branching.points

	
	length(breaks[,1])->n.parts
	close<-matrix(ncol=branching.points,nrow=n.parts)
	for (i in 1:n.parts)
	{
	
	min(breaks[i,]):max(breaks[i,])->part
	as.matrix(part)->partx
	partx[partx%in%1:n]->part.check
	as.numeric(part.check)->part.check
	rate.vector->rates
	rates[part.check]->part.current
	inform.profile.generator2(part.current,tree)->close[i,]

	}

	

rbind(sorted.btimes,close)->closer
return(closer)
}

#' @export
Approximator<-function(t,t0,rateVector,s)	
{	
rateVector->rv
	currentProbability<-matrix(nrow=length(rv), ncol=1)
	Expectationxinnersum1<-c(0)
	Expectationxinnersum2<-c(0)
	Expectationy<-c(0)
	Expectationy2<-c(0)
	ExpectationX1Y<-c(0)
	ExpectationSQROOTX1Y<-c(0)
	length(rv)->n

###Loop calculations and variance
for(i in 1:n)
{
	rv[i,]->rateVector2
	npnl<-pnl(rateVector2,t,t0,s)
	npro<-prother(rateVector2,t,t0,s)
	npsnr<-psnr(rateVector2,t,t0,s)
	
	Expectationy<-Expectationy+npsnr
	Expectationxinnersum1<-Expectationxinnersum1+npnl
	Expectationxinnersum2<-Expectationxinnersum2+npnl*npnl
	Expectationy2<-Expectationy2+ npsnr* npsnr
	ExpectationX1Y<-ExpectationX1Y+ npsnr* npnl
	ExpectationSQROOTX1Y<-ExpectationSQROOTX1Y+ npsnr*sqrt(npnl)
}


Expectationx<-Expectationxinnersum1+sqrt((Expectationxinnersum1/pi))


Expectation<- Expectationy-Expectationx
variancey<- Expectationy-Expectationy2
variancex<-((pi-1)/pi)*Expectationxinnersum1-Expectationxinnersum2
variance<-variancey+variancex-2*ExpectationX1Y-(2/sqrt(pi)) * ExpectationSQROOTX1Y	
	
rnorm(n, mean=Expectation, sd=sqrt(variance))->ndistr
princtree<-pnorm(-0.5,mean=Expectation, sd=sqrt(variance))
prpolytomy<-pnorm(0.5,mean=Expectation, sd=sqrt(variance))-pnorm(-0.5,mean=Expectation, sd=sqrt(variance))
prcortree=1-pnorm(0.5,mean=Expectation, sd=sqrt(variance))
c("Probabilty Correct", "Probability Polytomy", "Probability Incorrect" )->labels
c(prcortree,prpolytomy,princtree)->values
labels->names(values)
return(values)
}

psnr<-function(lambda,t,t0,s)
{
	(-1/s^3 + 1/s^2 + (3/s^3 -1/s^2 -2/s +1 + (-4/s^2 + 4/s -1)*exp(-t0*lambda)) *exp(-(4*s)/(s-1)*t*lambda) + (-8/s^3+4/s^2  +(8/s^2-4/s) *exp (-t0*lambda)) *exp((-3*s)/(s-1)*t*lambda) +(6/s^3 -4/s^2 +2/s -4/s^2 *exp(-t0*lambda) )*exp((-2*s)/(s-1)*t*lambda) )->psnr.value
	return(psnr.value)}
	
	
pnl<-function(lambda,t,t0,s)
{pnl.value<-( -1/s^3 +1/s^2 + (3/s^3-1/s^2 + (-4/s^2+2/s) * exp(-t0*lambda) ) *exp(((-4*s)/(s-1))*t*lambda) + (-8/s^3+4/s^2+ (8/s^2-4/s) * exp(-t0*lambda)) * exp((-3*s)/(s-1)*t*lambda) + (6/s^3 - 4/s^2 + (-4/s^2+2/s) * exp(-t0*lambda))*exp((-2*s/(s-1))*t*lambda) )
	return(pnl.value)
	}

pnL2<-function(lambda,t,t0,s)
{pnl.value<-(-1/s^3+1/s^2+ (3/s^3-1/s^2+ (-4/s^2+2/s) *exp(-t0*lambda) ) *exp(((-4*s)/(s-1))*t*lambda) + (-8/s^3+4/s^2+(8/s^2-4/s)*exp(-t0*lambda)) * exp((-3*s)/(s-1)*t*lambda) +(6/s^3 - 4/s^2 + (-4/s^2+2/s) *exp(-t0*lambda))*exp((-2*s/(s-1))*t*lambda) )
	return(pnl.value)
	}

	prother<-function(lambda,t,t0,s)
	{prother.value<-1-pnL2(lambda,t,t0,s)-pnl(lambda,t,t0,s)-psnr(lambda,t,t0,s)
		return(prother.value)}
		
pnl2<-function(lambda,t,t0,s)
{pnl.value<-(-1/s^3+1/s^2+ (3/s^3-1/s^2+ (-4/s^2+2/s) *exp(-t0*lambda) ) *exp(((-4*s)/(s-1))*t*lambda) + (-8/s^3+4/s^2+(8/s^2-4/s)*exp(-t0*lambda)) * exp((-3*s)/(s-1)*t*lambda) +(6/s^3 - 4/s^2 + (-4/s^2+2/s) *exp(-t0*lambda))*exp((-2*s/(s-1))*t*lambda) )
	return(pnl.value)
	}

	prother<-function(lambda,t,t0,s)
	{prother.value<-1-pnL2(lambda,t,t0,s)-pnl(lambda,t,t0,s)-psnr(lambda,t,t0,s)
		return(prother.value)}
Approximator.lite<-function(t,t0,rateVector,s)	
{	
rateVector->rv
	currentProbability<-matrix(nrow=length(rv), ncol=1)
	Expectationxinnersum1<-c(0)
	Expectationxinnersum2<-c(0)
	Expectationy<-c(0)
	Expectationy2<-c(0)
	ExpectationX1Y<-c(0)
	ExpectationSQROOTX1Y<-c(0)
	length(rv)->n

###Loop calculations and variance
for(i in 1:n)
{
	rv[i,]->rateVector2
	npnl<-pnl(rateVector2,t,t0,s)
	npro<-prother(rateVector2,t,t0,s)
	npsnr<-psnr(rateVector2,t,t0,s)
	
	Expectationy<-Expectationy+npsnr
	Expectationxinnersum1<-Expectationxinnersum1+npnl
	Expectationxinnersum2<-Expectationxinnersum2+npnl*npnl
	Expectationy2<-Expectationy2+ npsnr* npsnr
	ExpectationX1Y<-ExpectationX1Y+ npsnr* npnl
	ExpectationSQROOTX1Y<-ExpectationSQROOTX1Y+ npsnr*sqrt(npnl)
}


Expectationx<-Expectationxinnersum1+sqrt((Expectationxinnersum1/pi))


Expectation<- Expectationy-Expectationx
variancey<- Expectationy-Expectationy2
variancex<-((pi-1)/pi)*Expectationxinnersum1-Expectationxinnersum2
variance<-variancey+variancex-2*ExpectationX1Y-(2/sqrt(pi)) * ExpectationSQROOTX1Y	
	
rnorm(n, mean=Expectation, sd=sqrt(variance))->ndistr
princtree<-pnorm(-0.5,mean=Expectation, sd=sqrt(variance))
prpolytomy<-pnorm(0.5,mean=Expectation, sd=sqrt(variance))-pnorm(-0.5,mean=Expectation, sd=sqrt(variance))
prcortree=1-pnorm(0.5,mean=Expectation, sd=sqrt(variance))
c("Probabilty Correct", "Probability Polytomy", "Probability Incorrect" )->labels
c(prcortree,prpolytomy,princtree)->values
labels->names(values)
return(prcortree)
}

#' @export
space.maker<-function(rateVector,t,s)
{
	t/20->by.this
	seq(by.this,t-0.0001,by=by.this)->lilts
	rowspace<-matrix(nrow=1,ncol=length(lilts))
	for (i in 1:length(lilts))
	{
	lilts[i]->to
	Approximator.lite(t,to,rateVector,s)->rowspace[i]
	}
	return(rowspace)
}

#' @export
space.maker.narrow<-function(rateVector,t,s)
{
	t/2->halft
	halft/20->by.this
	seq(by.this, halft-0.0001,by=by.this)->lilts
	rowspace<-matrix(nrow=1,ncol=length(lilts))
	for (i in 1:length(lilts))
	{
	lilts[i]->to
	Approximator.lite(t,to,rateVector,s)->rowspace[i]
	}
	return(rowspace)
}




##generates informativeness output like phydesign
inform.profile.generator<-function(rate.vector,tree)
	{
	
	branching.times(tree)->btimes
	c(0,btimes)->btimes2
	sort(btimes2)->sorted.btimes
	length(btimes2)->branching.points
	length(rate.vector)->calculation.length
	
	inform.at.time<-matrix(ncol=branching.points)
	for(i in 1:branching.points)
	{
		sorted.btimes[i]->btime
		site.summer(rate.vector,btime)->inform.at.time[i]

	
}
rbind(sorted.btimes,inform.at.time)->close
return(close)
}

####This part will get all the points with the rate vector already computed from other functions
#' @export
informativeness.profile<-function(rate.vector, tree, codon="FALSE", values="display")
{
  
	branching.times(tree)->btimes
	c(0,btimes)->btimes2
	
	
if (codon=="FALSE"){
inform.profile.generator(rate.vector,tree)->close


close[1,]->sorted.btimes
close[2,]->inform.at.time
round(max(btimes))->upper
upper/5->by.this
round(max(inform.at.time),digits=2)->uppery
uppery/10->by.y

yy <-predict(interpSpline(sorted.btimes, inform.at.time))
mat<- matrix(c(1:2),nrow=2,ncol=1)
layout(mat=mat,heights=c(250,300))
par(mar=c(0,0,0,0), oma=c(5,5,1,1))
#par(bg = "white")   
#split.screen(c(2,1))
#screen(1)
#plot(0,0,type="n",axes=FALSE,xlab="",ylab="")
##coord are left,right,bottom,top from 0 to 1

par(plt=c(0,0.9,0.2,0.99))
plot(tree,show.tip.label=FALSE,direction="l")
####Lower corner, note that the pi is offset to mirror the trees end

par(plt=c(0.027,0.9,0,0.99))

plot(sorted.btimes,inform.at.time,pch=NA_integer_,axes=FALSE, ylim=c(0,uppery+(uppery*.15)), xlim=c(0,upper))
axis(1, at = seq(0, upper, by = by.this), las =1, lwd=1, labels=TRUE, mgp=c(.75,.5,0))
axis(2, at = seq(0, uppery, by = by.y), las =1, lwd=1, labels=TRUE, mgp=c(.75,.5,0))


lines(yy, pch=NA_integer_, col="blue",lty=1,)
legend("topright",y=NULL,c("PI of Locus"),lty=1,col="blue",lwd=2,title="PI Profile")
#plot(0,0,type="n",axes=FALSE,xlab="",ylab="")
#close.screen(all = TRUE)

}
if (codon=="TRUE")
{
	pos1 <- rate.vector[seq(1, length(rate.vector), 3)]
	pos2 <- rate.vector[seq(2, length(rate.vector), 3)]
	pos3 <- rate.vector[seq(3, length(rate.vector), 3)]
	length(btimes2)->branching.points

	close2<-matrix(ncol=branching.points,nrow=3)

	inform.profile.generator2(pos1,tree)->close2[1,]
	inform.profile.generator2(pos2,tree)->close2[2,]
	inform.profile.generator2(pos3,tree)->close2[3,]

	
inform.profile.generator(rate.vector,tree)->close


close[1,]->sorted.btimes
close[2,]->inform.at.time
round(max(btimes))->upper
upper/5->by.this
sort(btimes2)->sortedbtimes2
round(max(close2),digits=2)->uppery
uppery/10->by.y

mat<- matrix(c(1:2),nrow=2,ncol=1)
layout(mat=mat,heights=c(250,300))
par(mar=c(0,0,0,0), oma=c(5,5,1,1))
#par(bg = "white")   
#split.screen(c(2,1))
#screen(1)
#plot(0,0,type="n",axes=FALSE,xlab="",ylab="")
##coord are left,right,bottom,top from 0 to 1

par(plt=c(0,0.9,0.2,0.99))
plot(tree,show.tip.label=FALSE,direction="l")
####Lower corner, note that the pi is offset to mirror the trees end

par(plt=c(0.027,0.9,0,0.99))

plot(sorted.btimes,close2[3,],pch=NA_integer_,axes=FALSE, ylim=c(0,uppery), xlim=c(0,upper))

axis(1, at = seq(0, upper, by = by.this), las =1, lwd=1, labels=TRUE, mgp=c(.75,.5,0),ylab="Time from Present")
axis(2, at = seq(0, uppery, by = by.y), las =1, lwd=1, labels=TRUE, mgp=c(.75,.5,0),  xlab="Phylogenetic Informativeness")
c("black","blue","gray","green","purple","brown","azure","red","yellow")->colors
c("part1","part2","part3","part4","part5","part6","part7","part8","part9")->leglab
c(1,2,3,1,2,3,1,2,3)->style
c(2,2,3,2,2,3,2,2,3)->thickness
legend("topright",y=NULL,leglab[1:length(close2[,1])],lty=style,col=colors,lwd=thickness,title="Partition PI Profile")
for (i in 1:length(close2[,1])){
close2[i,]->inform.at.time	
yy <-predict(interpSpline(sortedbtimes2, inform.at.time))

lines(yy, pch=NA_integer_, col=colors[i],lty=style[i],)

}
resetGraph(reset.mf=TRUE)
rbind(sorted.btimes,close2)->closer
if (values=="display"){
return(closer)} else if (values=="off"){
  return("done")
}
 }
#return(close)
}

####For user defined informativeness profiles, note that this has a maximum limit of X since the plot will become unreadable
#' @export
multi.profile<-function(rate.vector,tree,breaks,values="display")
{
	length(rate.vector)->n
	branching.times(tree)->btimes
	c(0,btimes)->btimes2
	sort(btimes2)->sorted.btimes
		length(btimes2)->branching.points

	get.ind.sites(rate.vector,breaks)->ES
	length(breaks[,1])->n.parts
	close<-matrix(ncol=branching.points,nrow=n.parts)
	for (i in 1:n.parts)
	{
	
	ES[,i]->part
	as.matrix(part)->partx
	partx[partx%in%1:n]->part.check
	as.numeric(part.check)->part.check
	rate.vector->rates
	rates[part.check]->part.current
	inform.profile.generator2(part.current,tree)->close[i,]

	}

	
####now draw the profile######################
########First set the x and y axis bounds##########
round(max(btimes))->upper
upper/5->by.this

round(max(close))->uppery
uppery/10->by.y



mat<- matrix(c(1:2),nrow=2,ncol=1)
layout(mat=mat,heights=c(250,300))
par(mar=c(0,0,0,0), oma=c(5,5,1,1))
#par(bg = "white")   
#split.screen(c(2,1))
#screen(1)
#plot(0,0,type="n",axes=FALSE,xlab="",ylab="")
##coord are left,right,bottom,top from 0 to 1

par(plt=c(0,0.9,0.2,0.99))
plot(tree,show.tip.label=FALSE,direction="l")
####Lower corner, note that the pi is offset to mirror the trees end

par(plt=c(0.027,0.9,0,0.99))

plot(sorted.btimes,close[1,],pch=NA_integer_,axes=FALSE, ylim=c(0,uppery), xlim=c(0,upper))

axis(1, at = seq(0, upper, by = by.this), las =1, lwd=1, labels=TRUE, mgp=c(.75,.5,0),ylab="Time from Present")
axis(2, at = seq(0, uppery, by = by.y), las =1, lwd=1, labels=TRUE, mgp=c(.75,.5,0),  xlab="Phylogenetic Informativeness")
c("black","blue","gray","green","purple","brown","azure","red","yellow")->colors
c("part1","part2","part3","part4","part5","part6","part7","part8","part9")->leglab
c(1,2,3,1,2,3,1,2,3)->style
c(2,2,3,2,2,3,2,2,3)->thickness
legend("topright",y=NULL,leglab[1:n.parts],lty=style,col=colors,lwd=thickness,title="Partition PI Profile")
for (i in 1:n.parts){
close[i,]->inform.at.time	
yy <-predict(interpSpline(sorted.btimes, inform.at.time))

lines(yy, pch=NA_integer_, col=colors[i],lty=style[i],)

}
#resetGraph(reset.mf=TRUE)
rbind(sorted.btimes,close)->closer
if (values=="display"){
  return(closer)} else {
    print("done")
  }
}


###this gets the output of all three positions
#c("times","pos1","pos2","pos3")->rownames

#rbind(close,inform.at.time2)->closer
#rbind(closer,inform.at.time3)->cLoser
#row.names(cLoser)<-rownames
#return(cLoser)

	
	
#}	


####For user defined informativeness profiles, note that this has a maximum limit of X since the plot will become unreadable
#' @export
defined.multi.profile<-function(rate.vector,tree,breaks,values="display")
{
	length(rate.vector)->n
	branching.times(tree)->btimes
	c(0,btimes)->btimes2
	sort(btimes2)->sorted.btimes
		length(btimes2)->branching.points

	
	length(breaks[,1])->n.parts
	close<-matrix(ncol=branching.points,nrow=n.parts)
	for (i in 1:n.parts)
	{
	
	min(breaks[i,]):max(breaks[i,])->part
	as.matrix(part)->partx
	partx[partx%in%1:n]->part.check
	as.numeric(part.check)->part.check
	rate.vector->rates
	rates[part.check]->part.current
	inform.profile.generator2(part.current,tree)->close[i,]

	}

	
####now draw the profile######################
########First set the x and y axis bounds##########
round(max(btimes))->upper
upper/5->by.this

round(max(close))->uppery
uppery/10->by.y



mat<- matrix(c(1:2),nrow=2,ncol=1)
layout(mat=mat,heights=c(250,300))
par(mar=c(0,0,0,0), oma=c(5,5,1,1))
#par(bg = "white")   
#split.screen(c(2,1))
#screen(1)
#plot(0,0,type="n",axes=FALSE,xlab="",ylab="")
##coord are left,right,bottom,top from 0 to 1

par(plt=c(0,0.9,0.2,0.99))
plot(tree,show.tip.label=FALSE,direction="l")
####Lower corner, note that the pi is offset to mirror the trees end

par(plt=c(0.027,0.9,0,0.99))

plot(sorted.btimes,close[1,],pch=NA_integer_,axes=FALSE, ylim=c(0,uppery), xlim=c(0,upper))

axis(1, at = seq(0, upper, by = by.this), las =1, lwd=1, labels=TRUE, mgp=c(.75,.5,0),ylab="Time from Present")
axis(2, at = seq(0, uppery, by = by.y), las =1, lwd=1, labels=TRUE, mgp=c(.75,.5,0),  xlab="Phylogenetic Informativeness")
c("black","blue","gray","green","purple","brown","azure","red","yellow")->colors
c("part1","part2","part3","part4","part5","part6","part7","part8","part9")->leglab
c(1,2,3,1,2,3,1,2,3)->style
c(2,2,3,2,2,3,2,2,3)->thickness
legend("topright",y=NULL,leglab[1:n.parts],lty=style,col=colors,lwd=thickness,title="Partition PI Profile")
for (i in 1:n.parts){
close[i,]->inform.at.time	
yy <-predict(interpSpline(sorted.btimes, inform.at.time))

lines(yy, pch=NA_integer_, col=colors[i],lty=style[i],)

}
#resetGraph(reset.mf=TRUE)
rbind(sorted.btimes,close)->closer
if (values=="display"){
  return(closer)} else {
    print("done")
    
  }
}
NodeWalker<-function(tree)
{

#first figure out which tree$edge rows are just internal nodes
rows <- which(tree$edge[,2]>length(tree$tip.label))
#tree$edge.length[rows]
#this gets the internal node labels (parent and daughter) and internode length
dist <- cbind(tree$edge[rows,],tree$edge.length[rows])
#gets the branchingtimes of parent node by matching column 1
parent_times <- branching.times(tree)[match(dist[,1],names(branching.times(tree)))]
#gets the branchingtimes of daughter node by matching column 1
daughter_times	 <- branching.times(tree)[match(dist[,2],names(branching.times(tree)))]
dist <- cbind(dist,parent_times,daughter_times)
colnames(dist) <- c("parent_node","daughter_node","edge_length","p_node_time","d_node_time")		
return(dist)
}

###Plot the space
#' @export
PlotTreeSI<-function(tree,ratevector,s)
{{
#First get x axis
	NodeWalker(tree)->nodes
	nodes[,4]->parentn
	nodes[,5]->daughtern
probs<-matrix(ncol=3,nrow=length(parentn))
for (i in 1:length(parentn))
{
	parentn[i]->parentValue
	daughtern[i]->t
	parentValue-t->t0
	Approximator(t,t0,ratevector,s)->probs[i,]
	
}		
	probs[,1]->correct
	probs[,3]->incorrect
	
	mat<- matrix(c(1:2),nrow=2,ncol=1)
layout(mat=mat,heights=c(250,300))
par(mar=c(0,0,0,0), oma=c(5,5,1,1))
par(plt=c(0,0.9,0.2,0.99))
plot(tree,show.tip.label=FALSE,direction="l")
par(plt=c(0.027,0.9,0,0.99))

	###upperx is the xlim in case you want to zoom in or expand for another rate vector
	plot(parentn,correct, xlim=c(0, max(parentn)),ylim=c(0,1), col="white", pch=17)
	#points(daughtern,correct, col="blue", pch=17)
	segments(parentn,correct,daughtern,correct, col="blue")
	
	#points(parentn,incorrect, bg=312, pch=25)
	#points(daughtern,incorrect, bg=312, pch=25)
	#segments(parentn,incorrect,daughtern,incorrect, col=312)
	return(probs)
}
	resetGraph(reset.mf=TRUE)
}
#' @export
Plot.Another.TreeSI<-function(tree,ratevector,s,color,type)
{
#First get x axis
	NodeWalker(tree)->nodes
	nodes[,4]->parentn
	nodes[,5]->daughtern
probs<-matrix(ncol=3,nrow=length(parentn))
for (i in 1:length(parentn))
{
	parentn[i]->parentValue
	daughtern[i]->t
	parentValue-t->t0
	Approximator(t,t0,ratevector,s)->probs[i,]
	
}		
	probs[,1]->correct
	probs[,3]->incorrect
	#points(daughtern,correct, col=color, pch=17)
	#points(parentn, correct, bg=312, pch=25)
	par(plt=c(0.027,0.9,0,0.99))

	segments(parentn,correct,daughtern,correct, col=color,lty=type)
	
	#points(parentn,incorrect, bg=312, pch=25)
	#points(daughtern,incorrect, bg=312, pch=25)
	#segments(parentn,incorrect,daughtern,incorrect, col=312)
	return(probs)
resetGraph(reset.mf=TRUE)

}

PlotManyTreeSN<-function(contree,trees,ratevector,s)
{

	sapply(trees,NodeWalker)->nodes
	
	
}

####uses rate output from rate.by.site function and a matrix of breaks to return the site numbers for the break matrix should be in the format lower bound in column one, upper bound in column 2 this can have as many partitions as the user wants. Best idea would be to use the hist(rate.vector[1,]) function to see the frequency distribution of site patterns to design the partitioning strategy	

####Note that this function returns site patterns as a data.frame. Need another function to deal with this output for actual nexus output

get.ind.sites<-function(rate.output,breaks)
{
	
	rate.output->rates
	length(rates)->vector.length
	c(1:vector.length)->numbers
	cbind(numbers,rates)->unsorted.matrix
	length(breaks[,1])->n
	length(rates)->limit
	matrix(ncol=n, nrow=limit)->extracted.sites
	matrix(ncol=n)->names.of.columns
	for(i in 1:n)
	{
###this part looks through the breaks and extracts the site numbers for each user specified bin
		breaks[i,]->upper.lower
		upper.lower[1]->lower
		upper.lower[2]->upper
		which(rates>=lower)->lista
		which(rates<=upper)->listb
		####get the list of sites, which are bigger than lower bound but smaller than upper bound
		lista[(lista%in%listb)]->numbers
		length(numbers)->data.length
		limit-data.length->filler	
		rep("Na",filler)->fill
		c(numbers,fill)->output
		output-> extracted.sites[,i]
		 
	}
	###assign column names
	for(i in 1:n)
	{
		string1="Charset_"
		string2=paste(string1,i,sep="")
		string3=paste(string2,":",sep="")
		names.of.columns[,i]<-string3
		}
		colnames(extracted.sites)<-names.of.columns
		as.data.frame(extracted.sites)->ES
		return(ES)

}	

#to run and log output to cluster, note image name and filename cannot be identical!!!!
###Sample Input: cluster.signal.noise(86, 91, cytBM, 10, filename="tarsius", imagename="tarsius.pdf")
#' @export	
cluster.signal.noise<-function(t, t0, rateVector, nsims,s, filename,imagename, image="FALSE")
{
	signal.noise.multimix(t,t0,rateVector, nsims,s)->currentprobdist
	normdprobdist<-(currentprobdist/nsims)
	###probability of yielding correct tree
	length(rateVector)->n
	2*n+1->max.bound
	n+2->start.right
	n+1->poly
	###probability of getting the right tree
	normdprobdist[start.right:max.bound]->right.signal
	sum(right.signal)->right.signal
	#return(right.signal)
	###probability of polytomy
	normdprobdist[poly]->polytomy
	#return(polytomy)
	###probability of wrong tree
	normdprobdist[1:n]->wrong.tree
	sum(wrong.tree)->false.knowledge
	#return(false.knowledge)
	###odds ratio of correct vs. incorrect tree
	right.signal/false.knowledge->odds.of.recovery
	
	###odds of correct vs incorrect OR polytomy
	wrong.tree+polytomy->bogus
	right.signal/bogus->odds.of.anything
	
	###odds of no polytomy
	1-polytomy->odds.resolving
	###plot using function below
	graph.signal.noise(currentprobdist, rateVector, imagename, image)
	
	##return values
	sig.noise<-cbind(right.signal,polytomy,false.knowledge,odds.of.recovery,odds.of.anything,odds.resolving)
	colnames(sig.noise)<-c("P_correct", "P_poly","P_wrong", "odds_correctvswrong", "odds_corrvswrong/poly", "odds_resolving")
	write.table(sig.noise[1,],  file=filename)
	}


parallel.multimixA<-function (t,t0, ratevector, nsims,s){
	ratevector->rv
	n<-length(rv)
	matrix(nrow=n*2+1, ncol=nsims)->shrill.mess
foreach(q=1:nsims, .combine=cbind)%dopar%
{
CurrentProbabilityDistribution(ratevector, t, t0,s)->shrill.mess[,q]


}	

}

parallel.multimixfull<-function(t,t0,ratevector, nsims, s)
{
	parallel.multimixA(t,t0,ratevector, nsims, s)->temp
rowSums(temp)->currentprobdistro
return(currentprobdistro)
	
	
}
#to run and log output to cluster, note image name and filename cannot be identical!!!!
#' @export
parallel.cluster.signal.noise<-function(t, t0, rateVector, nsims,s, filename,imagename, image="TRUE")
{
	parallel.multimixfull(t,t0,rateVector, nsims,s)->currentprobdist
	normdprobdist<-(currentprobdist/nsims)
	###probability of yielding correct tree
	length(rateVector)->n
	2*n+1->max.bound
	n+2->start.right
	n+1->poly
	###probability of getting the right tree
	normdprobdist[start.right:max.bound]->right.signal
	sum(right.signal)->right.signal
	#return(right.signal)
	###probability of polytomy
	normdprobdist[poly]->polytomy
	#return(polytomy)
	###probability of wrong tree
	normdprobdist[1:n]->wrong.tree
	sum(wrong.tree)->false.knowledge
	#return(false.knowledge)
	###odds ratio of correct vs. incorrect tree
	right.signal/false.knowledge->odds.of.recovery
	
	###odds of correct vs incorrect OR polytomy
	wrong.tree+polytomy->bogus
	right.signal/bogus->odds.of.anything
	
	###odds of no polytomy
	1-polytomy->odds.resolving
	###plot using function below
	graph.signal.noise(currentprobdist, rateVector, imagename, image)
	
	##return values
	sig.noise<-cbind(right.signal,polytomy,false.knowledge,odds.of.recovery,odds.of.anything,odds.resolving)
	colnames(sig.noise)<-c("P_correct", "P_poly","P_wrong", "odds_correctvswrong", "odds_corrvswrong/poly", "odds_resolving")
	write.table(sig.noise[1,],  file=filename)
	
	if (image=="TRUE"){
	  return(sig.noise[1,])
	} else if (image=="FALSE"){
	  return("done")
	}
	}
	
	###Solve for x you need for current probabilty distribution	
##s is the number of states	
ExMaker<-function(t,t0,rateVector,s)
{	
	rateVector->rv
	currentProbability<-matrix(nrow=length(rv), ncol=1)
	nwone<-matrix(nrow=length(rv), ncol=1)
	nwtwo<-matrix(nrow=length(rv), ncol=1)
	snr<-matrix(nrow=length(rv), ncol=1)
	length(rv)->n
		for(i in 1:n)
			{rv[i,]->lambda
			###evaluate lambda	
				npro<-prother(lambda,t,t0,s)
				npsnr<-psnr(lambda,t,t0,s)
				npnl<-pnl(lambda,t,t0,s)
				npnl2<-pnl2(lambda,t,t0,s)
			###empirical test Block
			#npro<-prother(.21,.9,.02)
			#	npsnr<-psnr(.21,.9,.02)
			#	npnl<-pnl(.21,.9,.02)
			#	npnl2<-pnl2(.21,.9,.02)
				
			##draw random Number
			runif(1, min=0, max=1)->randomNumber
			###create columns of values
			###first=null
		###testloop
			#Returner<-function(randomNumber, npsnr,npnl,npnl2,npro,matrix){	
				#n<-length(matrix)
				#	currentProbability<-matrix(nrow=length(matrix), ncol=1)
	#nwone<-0
	#nwtwo<-0
	#snr<-0
			if (randomNumber<npro){
			next}else
			###second=signal noise ratio
			if(randomNumber<npro+npsnr){1->snr[i,]
				}else
			###third=nwone
			if(randomNumber<npro+npsnr+npnl){1->nwone[i,]
			}else			
			###fourth=nwtwo
			if(randomNumber<npro+npsnr+npnl){1->nwtwo[i,] 
				}
			}
		na.omit(snr)->snr
		sum(snr)->snr
		na.omit(nwone)->nwone
		sum(nwone)->nwone
		na.omit(nwtwo)->nwtwo
		sum(nwtwo)->nwtwo
		cbind(nwone, nwtwo)->wrong
		max(wrong)->wronger	
		currentProbability2<-n+1+snr-wronger
		return(currentProbability2)}
		
CurrentProbabilityDistribution<-function(rateVector, t, t0,s)
{
	rateVector->rv
	n<-length(rv)
	ExMaker(t,t0, rateVector,s)->x
	####create zero vector
	mat.or.vec(n*2+1,1)->zero.table
	as.matrix(zero.table)->zero.table.mat
	zero.table.mat[x,1]<-1
	return(zero.table.mat)
	}

###Put all into one matrix
signal.noise.multimix<-function(t,t0,rateVector, nsims,s)
{
	rateVector->rv
	n<-length(rv)
	matrix(nrow=n*2+1, ncol=nsims)->shrill.mess
	for(i in 1:nsims)
	{
		CurrentProbabilityDistribution(rateVector, t, t0,s)->shrill.mess[,i]
		}
	rowSums(shrill.mess)->current.prob.dist
	return(current.prob.dist)}
	
	#internal drawing function for Terminal Runs

graph.signal.noise<-function(currentprobdist, rateVector, filename, image="TRUE")
{
	currentprobdist->matrix.of.noise
	length(rateVector)->n
	2*n+1->max.bound
	n+2->start.right
	n+1->poly
	###isolate_each
	
	matrix.of.noise[poly]->polytomy
	
	###isolate_Non_zero_grey
	matrix.of.noise[start.right:max.bound]->green.side
	green.side!=0->nozero1
	green.side[nozero1]->blue.plot
	###isolate_Non_zero_blue
	matrix.of.noise[1:n]->wrong.side
	wrong.side!=0->nozero2
	wrong.side[nozero2]->grey.plot
	
	###the x y for all
	c(grey.plot,polytomy,blue.plot)->plotter
	1:length(plotter)->plotterx
	
	##the wrong side and poly parts
	length(grey.plot)->wrong.side.length
	1:wrong.side.length->wrong.side.x
	1+wrong.side.length->poly.location
	
	###the prob of correct signal parts
	as.numeric(max(plotterx))->upperbound
	1+poly.location->bluestart
	bluestart:upperbound->correct.x
	
	
if (image=="TRUE"){
  plot(plotterx,plotter, type="h", lwd=6, xaxt="n", bty="l", ylab="Frequency", xlab="Signal Noise Plot")
  if(length(polytomy)>=1)
  {
    lines(poly.location, polytomy, type="h", col="black", xaxt="n", lwd=12)}
  if(length(grey.plot)>=1){
    lines(wrong.side.x, grey.plot, type="h", col="grey", xaxt="n", lwd=12)}
  if(length(blue.plot)>=1)
  {
    lines(correct.x, blue.plot, type="h", col="blue", xaxt="n", lwd=12)}
} else if (image=="FALSE") {

	
	
	##disregard
	#bin<-as.numeric(cut(polytomy,combo.time$breaks))
	#plot(combo.time, col=replace(rep("white", length(combo.time$breaks)-1), bin, "blue"))
	
pdf(file=filename, height=8,width=8)

		plot(plotterx,plotter, type="h", lwd=6, xaxt="n", bty="l", ylab="Frequency", xlab="Signal Noise Plot")
	if(length(polytomy)>=1)
	{
	lines(poly.location, polytomy, type="h", col="black", xaxt="n", lwd=12)}
	if(length(grey.plot)>=1){
	lines(wrong.side.x, grey.plot, type="h", col="grey", xaxt="n", lwd=12)}
	if(length(blue.plot)>=1)
	{
	lines(correct.x, blue.plot, type="h", col="blue", xaxt="n", lwd=12)}
	dev.off()
	}}
#' @export
allmodel.signal.noise<-function(a,b,c,d,e,f,internode,Pi_T,Pi_C,Pi_A,Pi_G, rate_vector)
{
	rate_vector->rr

##Legacy call to Su_et_al.py. 

	#print(internode)
	#paste("-i","--internode",sep=" ")->inttemp
	#paste(inttemp,internode[1],sep=" ")->int1
	#paste("",rr, sep=" ")->rrr
	#as.numeric(rrr)->ra
	#paste("-r","--ratevector", sep=" ")->rtemp
	#paste(rtemp ,ra[1], sep= " ")->rrrr
	#paste("",ra[2:length(ra)], sep= "")->rara
	#paste("python", "./Su_et_al.py", sep=" ")->start
#as.vector(c(a,b,c,d,e,f, Pi_T,Pi_C,Pi_A,Pi_G, int1,internode[2],internode[3],internode[4],internode[5],rrrr,rara, sep=" "))->command
	#c(start, command)->go
	#paste(go, sep=" ", collapse=" ")->go2
	#system(go2, intern=TRUE)

	default_rate_vector<-c(0.003108, 0, 0, 0.015862, 0.000426, 0, 0.005114, 0, 0, 0.00778, 0, 0, 0.001352, 0.000862, 0, 0.000862, 0, 0, 0.001338, 0, 0, 0, 0, 0, 0.005988, 0, 0, 0.001909, 0, 0, 0.000428, 0, 0, 0.000425, 0, 0, 0, 0, 0, 0.004616, 0, 0, 0.000869, 0.000426, 0, 0.000426, 0, 0, 0.004359, 0, 0, 0.001804, 0, 0.000424, 0.003546, 0, 0, 0.003128, 0, 0.000426, 0.015036, 0, 0, 0.005353, 0, 0, 0.004196, 0, 0, 0.002492, 0, 0, 0.005289, 0, 0, 0.004892, 0, 0, 0.005636, 0, 0, 0.002417, 0, 0, 0.003462, 0, 0, 0.001851, 0.000423, 0, 0.000423, 0, 0, 0, 0.000426, 0, 0.004764, 0.001354, 0, 0.00255, 0, 0, 0.004156, 0.001327, 0, 0.004163, 0.001361, 0, 0.001852, 0, 0, 0.000907, 0, 0, 0.002612, 0, 0, 0.001807, 0, 0, 0.003801, 0, 0, 0.001846, 0, 0, 0, 0.000425, 0, 0.004002, 0, 0, 0.009891, 0.000923, 0.001814, 0.002406, 0, 0, 0.000426, 0, 0, 0, 0, 0, 0.00608, 0, 0, 0.001862, 0, 0, 0, 0, 0, 0.003463, 0, 0, 0.003651, 0, 0, 0.000426, 0, 0, 0, 0, 0, 0.003501, 0, 0, 0.000871, 0, 0, 0.005557, 0, 0, 0.001893, 0, 0, 0.000866, 0, 0, 0.001412, 0, 0, 0.004276, 0, 0, 0.002342, 0.001837, 0, 0.001837, 0, 0, 0.001802, 0, 0, 0.005248, 0, 0, 0.001803, 0, 0, 0.001822, 0, 0, 0.001918, 0, 0, 0.003073, 0, 0, 0.001323, 0, 0, 0.009164, 0.002059, 0, 0.002637, 0.000423, 0, 0.002336, 0, 0, 0.003847, 0, 0, 0.004949, 0, 0, 0.002946, 0, 0, 0.001807, 0.000426, 0.000426, 0.000426, 0, 0, 0.003, 0, 0, 0.005688,0, 0, 0.004278, 0.001811, 0.002346, 0.012034, 0, 0, 0.001409, 0.000865, 0, 0.000865, 0, 0, 0.001374, 0, 0, 0.002942, 0, 0, 0, 0, 0.000428, 0.008127, 0, 0, 0.001892, 0, 0, 0.003498, 0.001856, 0, 0.000428, 0, 0, 0.004151, 0, 0, 0.003209, 0, 0, 0.004108, 0, 0, 0.000951, 0, 0, 0.001352, 0, 0, 0.002333, 0, 0, 0.002329, 0, 0, 0.010802, 0, 0, 0.001418, 0, 0, 0.001322, 0, 0, 0.003694, 0.001999, 0.001999, 0.005564, 0, 0, 0.007526, 0, 0, 0.003692, 0, 0, 0.003083, 0.000426, 0, 0.008106, 0.001333, 0.000425, 0.003509, 0, 0, 0.009753,0.001374, 0, 0.006182, 0, 0, 0.001363, 0.000426, 0, 0.00542, 0.001324, 0.001324, 0.004788, 0, 0, 0.000428, 0, 0, 0, 0, 0, 0.006989, 0, 0, 0.006022, 0, 0, 0, 0, 0, 0.004086, 0, 0, 0.003316, 0.000423, 0, 0.003664, 0, 0, 0.005446, 0, 0, 0.005158, 0, 0, 0, 0, 0, 0.002359, 0, 0, 0, 0, 0, 0.002336, 0, 0, 0.003833, 0, 0, 0, 0.000423, 0, 0.002407, 0, 0, 0.003585, 0, 0, 0.002905, 0, 0, 0.005398, 0, 0, 0.001824, 0.000877, 0, 0.005099, 0, 0, 0.000423, 0, 0.000425, 0, 0.000894, 0, 0.010747, 0.002316, 0, 0.005676, 0.000428,0, 0.004035, 0, 0, 0.003574, 0, 0.001347, 0.00183, 0, 0, 0.00385, 0.000876, 0.000424, 0.001835, 0.000428, 0, 0.000428, 0.001382, 0, 0.005137, 0.000423, 0, 0.003118, 0.00087, 0, 0.003728, 0, 0, 0.00405, 0, 0, 0.00087, 0.00134, 0, 0.00134, 0, 0, 0.004536, 0.000425, 0, 0.002412, 0, 0, 0.007825, 0.000874, 0.000424, 0.001347,0.001857, 0, 0.000878, 0.001349, 0, 0.002333, 0.000426, 0.000426, 0.000426, 0.000426, 0.000426, 0.002371, 0, 0, 0.00296, 0.001823, 0, 0.002912, 0.001813, 0.000884, 0.007372, 0, 0, 0.002954, 0.001373, 0,0.001893, 0, 0, 0.001343, 0, 0, 0, 0.003074, 0.000426, 0.006498, 0,0, 0, 0.001854, 0, 0.007631, 0, 0, 0.003719, 0.000874, 0.000426, 0.005504, 0, 0, 0.004131, 0, 0, 0.003597, 0.000869, 0.000428, 0.001836, 0, 0, 0.000423, 0, 0, 0.005095, 0, 0, 0.008057, 0.001426, 0, 0.001426, 0, 0, 0.003061, 0, 0, 0.00459, 0, 0, 0.004175, 0, 0, 0.005326, 0, 0.000441, 0.004984, 0.002378, 0, 0.003662, 0.000428, 0.000435, 0.000423, 0, 0, 0.002364, 0, 0, 0.004486, 0, 0, 0.003928, 0.000871, 0, 0.00354, 0, 0, 0.00568, 0, 0.000874, 0.004861, 0, 0, 0.00283, 0, 0, 0.001818, 0, 0, 0.004818, 0.001412, 0, 0.000881, 0, 0, 0.000423, 0, 0.000426, 0.006653, 0.001377, 0, 0.007102, 0.001848,0, 0.003496, 0, 0, 0.000423, 0, 0, 0.005761, 0, 0, 0.006607, 0.000431, 0, 0.009237, 0.000425, 0, 0.004134, 0, 0, 0.003539, 0, 0, 0.004863, 0, 0, 0.006153, 0, 0, 0.001959, 0, 0, 0.000884, 0.000423, 0, 0.005317, 0, 0, 0.002122, 0, 0, 0, 0, 0, 0.001811, 0.000426, 0.000426, 0.000428, 0, 0, 0.00602, 0, 0, 0.002454, 0, 0, 0.003476, 0, 0, 0.004903, 0, 0, 0.000428, 0, 0, 0.001404, 0, 0, 0.00359, 0.000424, 0, 0.000424, 0, 0, 0.001333, 0, 0, 0, 0.001326, 0, 0.002336, 0.002408, 0.000424, 0.002902, 0, 0, 0.002361, 0, 0, 0.004338, 0.00087, 0, 0.001356, 0, 0, 0.00087, 0.003039, 0.000424, 0.006266, 0, 0, 0.002405, 0.003591, 0.000426, 0.002357, 0.000435, 0.001358, 0.004681, 0.002691, 0, 0.00902, 0.000866, 0, 0.002355, 0.000871, 0, 0.004251, 0, 0, 0.001805, 0.001847, 0, 0.001323, 0.000867, 0.000867, 0.002418, 0, 0, 0, 0, 0, 0.003326, 0, 0, 0.002368, 0, 0, 0.000423, 0.000424, 0.000424, 0.003706, 0, 0, 0.003546, 0, 0.001893, 0.000919, 0, 0, 0.000426, 0, 0, 0.006524, 0, 0, 0.001955, 0.000423, 0.00087, 0.003471, 0, 0, 0.000428, 0.000435, 0.000435, 0.000881, 0, 0, 0.003216, 0.002908, 0, 0.007752, 0, 0, 0.002305, 0, 0, 0.006781, 0.003127, 0, 0.003127, 0, 0.001349, 0.00305, 0, 0, 0.003765, 0.000428, 0, 0.005602, 0, 0, 0.000866, 0.000868, 0.000868, 0.000866, 0, 0, 0.000426, 0, 0, 0.00435, 0, 0, 0.006003, 0.000871, 0, 0.000428, 0, 0, 0.003064, 0, 0, 0.00088, 0, 0, 0, 0.000423, 0, 0.004115, 0, 0, 0.005536, 0.000426, 0, 0.000423, 0, 0.000423, 0, 0, 0, 0.002876, 0, 0, 0.000425, 0, 0, 0.002879, 0, 0, 0.002351, 0, 0, 0.002352, 0.001942, 0, 0.001942, 0, 0, 0, 0, 0, 0.00133, 0, 0, 0.000877, 0, 0, 0.004291, 0, 0, 0.006154, 0, 0, 0.005701, 0.000424, 0, 0.004187, 0.000423, 0, 0.00088, 0, 0, 0.002352, 0, 0, 0.003438, 0, 0, 0.00684, 0, 0, 0.00937, 0, 0, 0, 0, 0, 0.000916, 0, 0.000428, 0.006328, 0, 0, 0.001443, 0, 0, 0.001935, 0, 0, 0.003471, 0, 0, 0.00235, 0, 0, 0.005219, 0, 0, 0.001851, 0, 0,0.005637, 0, 0, 0, 0, 0, 0.002673, 0, 0.000428, 0, 0, 0, 0.001405, 0, 0, 0.002335, 0, 0, 0, 0, 0, 0.007385, 0, 0, 0, 0, 0, 0.000871, 0,0, 0.003282, 0, 0, 0.003464, 0.000423, 0, 0.001809, 0.000426, 0, 0.001938, 0, 0, 0.012756, 0.000428, 0, 0.002343, 0.000427, 0, 0.004977, 0, 0, 0.001794, 0, 0, 0, 0, 0, 0.001827, 0, 0, 0.002322, 0, 0, 0, 0, 0, 0, 0, 0, 0.002388, 0, 0, 0.003643)


	if(length(internode)!=5){
		print("Internode distance list not correct")
		return
	}
	Mu_<- 1/2/(a*Pi_T*Pi_C + b*Pi_T*Pi_A + c*Pi_T*Pi_G +d*Pi_C*Pi_A + e*Pi_C*Pi_G + f*Pi_A*Pi_G)
##Construct Q Matrix
	Q<-matrix(nrow=4,ncol=4)
	Q[1,1]<-((-a)*Pi_C) - (b*Pi_A) - (c*Pi_G)
	Q[1,2]<-a*Pi_C
	Q[1,3]<-b*Pi_A
	Q[1,4]<-c*Pi_G
	Q[2,1]<-a*Pi_T
	Q[2,2]<-((-a)*Pi_T) - (d*Pi_A) - (e*Pi_G)
	Q[2,3]<-d*Pi_A
	Q[2,4]<-e*Pi_G
	Q[3,1]<-b*Pi_T
	Q[3,2]<-d*Pi_C
	Q[3,3]<-((-b)*Pi_T) - (d*Pi_C) - (f*Pi_G)
	Q[3,4]<-f*Pi_G
	Q[4,1]<-c*Pi_T
	Q[4,2]<-e*Pi_C
	Q[4,3]<-f*Pi_A
	Q[4,4]<-((-c)*Pi_T) - (e*Pi_C) - (f*Pi_A)
	Q<-Mu_*Q

#Vectorize the base frequencies
	frequ<-c(Pi_T,Pi_C,Pi_A,Pi_G)

#Obtain the eigenvalues and vectors
	evects<-eigen(Q)
	evalues<-evects$values
	evectors<-evects$vectors

#Reorder in ascending order, swap rows.
	evalues<-evalues[c(4,3,2,1)] #same as mathematica
	evectors<-evectors[c(4,3,2,1),c(4,3,2,1)]


#tev<-t(evectors) #depriciated
	tev<-(evectors)
#Get inverse
	itev<-solve(tev)

#Internal function to evaluate lamda
	evalLambda<-function(lamda){
		p<-list()
		p<-array(,dim=c(5,4,4))
		for(v in 1:length(internode)){
			p[v,,]<-(tev %*% (diag(exp(evalues*lamda*internode[v]))%*%itev))
		}

		correct<-0
		wrong1<-0
		wrong2<-0

		for(original_character in 1:4){
			for(internode_character in 1:4){
				for(leaf_character_1 in 1:4){
					for(leaf_character_2 in 1:4){
						if(leaf_character_1!=leaf_character_2){
							correct <- correct+(frequ[original_character]*
                	                   		p[5,original_character, internode_character]*
                	                   		p[1,original_character, leaf_character_1]*
                	                   		p[2,original_character, leaf_character_1]*
                	                   		p[3,internode_character, leaf_character_2]*
                	                   		p[4,internode_character, leaf_character_2])
							wrong1 <- wrong1+(frequ[original_character]*
                	                 		p[5,original_character, internode_character]*
                	                  		p[1,original_character, leaf_character_1]*
                	                  		p[2,original_character, leaf_character_2]*
                	                  		p[3,internode_character, leaf_character_1]*
                	                  		p[4,internode_character, leaf_character_2])
							wrong2<-wrong2+ (frequ[original_character]*
                	                 		p[5,original_character, internode_character]*
                	                  		p[1,original_character, leaf_character_1]*
                	                  		p[2,original_character, leaf_character_2]*
                	                  		p[3,internode_character, leaf_character_2]*
                	                  		p[4,internode_character, leaf_character_1])
						}
					}
				}
			}
		}
		all<-c(correct,wrong1,wrong2)
		return(all)
	}
#Initialize blanks
	eYsum <- 0
	eX1sum <- 0
	eX2sum <- 0
	eY2sum <- 0
	eX12sum <- 0
	eX22sum <- 0
	eX1Ysum <- 0
	eX2Ysum <- 0
	eX1X2sum <- 0

	for(lmbda in rate_vector){
		all<-evalLambda(lmbda)
		y<-all[1]
		x1<-all[2]
		x2<-all[3]	
		eYsum<-eYsum+y
		eX1sum<-eX1sum+x1
		eX2sum<-eX2sum+x2
	
		eY2sum<-eY2sum+(y^2)
		eX12sum<-eX12sum+(x1^2)
		eX22sum<-eX22sum+(x2^2)
	
		eX1Ysum<-eX1Ysum+(x1*y)
		eX2Ysum<-eX2Ysum+(x2*y)
		eX1X2sum<-eX1X2sum+(x1*x2)
		}

	Mu_1 <- eYsum - eX1sum
	Mu_2 <- eYsum - eX2sum

	
	Sigma_1 <- sqrt(eX1sum + eYsum - eX12sum - eY2sum + 2*eX1Ysum)
	Sigma_2 <-sqrt(eX2sum + eYsum - eX22sum - eY2sum + 2*eX2Ysum)
	Rho_<- (-eX1X2sum + eX1Ysum + eX2Ysum + eYsum - eY2sum)/(Sigma_1*Sigma_2)

#Internal function for integration
	FofT<-function(t){
		F1ofT=((1 / Sigma_1) * dnorm((t - Mu_1)/ Sigma_1)*pnorm(Rho_*(t - Mu_1)/(Sigma_1* sqrt(1 - Rho_*Rho_)) - (t - Mu_2)/(Sigma_2* sqrt(1 - Rho_*Rho_))))
		F2ofT=((1 / Sigma_2) * dnorm((t - Mu_2)/ Sigma_2)*pnorm(Rho_*(t - Mu_2)/(Sigma_2* sqrt(1 - Rho_*Rho_)) - (t - Mu_1)/(Sigma_1* sqrt(1 - Rho_*Rho_))))
		return(F1ofT+F2ofT)
	}

	princtree<-integrate(FofT, -Inf, -.5)
	prpolytomy = integrate(FofT, -.5, .5)
	prcortree  = integrate(FofT, .5, Inf)

	print(paste0("Probablility Correct: ",prcortree$value))
	print(paste0("Probability Incorrect: ",princtree$value))
	print(paste0("Probability Polytomy: ",prpolytomy$value))
	return(c(princtree$value,prpolytomy$value,prcortree$value))
}


get.tree<-function(quart,tree){
	as.matrix(tree$tip.label)->drop
	drop[which(!drop[,1]%in%quart),]->prune
	drop.tip(tree,prune)->four.taxa	
	return(four.taxa)
}
bayes.signal.prep<-function(quart,tree){
	
	get.tree(quart,tree)->four.taxa
	combn(quart,2)->get
	knowledge<-matrix()
	for (i in 1:6){
		is.monophyletic(four.taxa,get[,i])->knowledge[i]
	}

	length(which(knowledge[1:6]=="TRUE"))->pec.or.quart

	if (pec.or.quart==2)	{
	
		##the following line from Liam Revells phytools blog
		 ee<-setNames(four.taxa$edge.length[sapply(1:4,function(x,y) which(y==x), y=four.taxa$edge[,2])],four.taxa$tip.label)
		max(branching.times(four.taxa))-max(ee)->internode
		"internode"->names(internode)
		##arrange
		which(names(ee)==quart[1])->first
			which(names(ee)==quart[2])->second
				which(names(ee)==quart[3])->third
					which(names(ee)==quart[4])->fourth
		c(ee[first],ee[second],ee[third],ee[fourth], internode)->vector
	}	
	else if (pec.or.quart==1){
		ee<-setNames(four.taxa$edge.length[sapply(1:4,function(x,y) which(y==x), y=four.taxa$edge[,2])],four.taxa$tip.label)
		##arrange to get to what the internode is and where to add the BL to T1.
		rev(sort(ee))->ee2
		ee2[1]-ee2[2]->internode
		"internode"->names(internode)
		ee2+c(internode,0,0,0)->newee
		which(names(newee)==quart[1])->first
		which(names(newee)==quart[2])->second
		which(names(newee)==quart[3])->third
		which(names(newee)==quart[4])->fourth
		c(newee[first], newee[second], newee[third], newee[fourth], internode)->vector	
	}	
	
	return(vector)
}



##these are the same inputs as the allmodel.signal.noise, users will use this and save the output for plotting
post.su<-function(a,b,c,d,e,f,Pi_T,Pi_C,Pi_A,Pi_G, rate_vector,quart,tree)
{
  
  ###first get your internodes
  matrix(ncol=5)->stored_ints
  for (i in 1:length(tree))
  {
    bayes.signal.prep(quart,tree[[i]])-> temp
    rbind(stored_ints,temp)->stored_ints
    
  }
  stored_ints[2:length(stored_ints[,1]),]->stored_ints
  length(stored_ints[,1])->loop.length
  matrix(ncol=length(stored_ints[,1]),nrow=3)-> quart.probs
  #foreach(i=1:loop.length, .combine=cbind)%dopar%
  for (i in 2:length(stored_ints[,1]))
  {
    allmodel.signal.noise (a,b,c,d,e,f, stored_ints[i,],Pi_T,Pi_C,Pi_A,Pi_G, rate_vector)-> temp2 #quart.probs[,i]
    #rbind(quart.probs,temp2)->quart.probs#[i,]
    temp2->quart.probs[,i]
  }
  t(quart.probs)->qp2
  cbind(qp2,stored_ints)->final
  return(qp2)	
  
}

##### User function. foreach does not work with downstream manipulations of objects well, so this takes the output of the core post.su function and adds the internode lengths back to have one nice result object
#' @export
su.bayes<-function(a,b,c,d,e,f,Pi_T,Pi_C,Pi_A,Pi_G, rate_vector,quart,tree){
  post.su(a,b,c,d,e,f,Pi_T,Pi_C,Pi_A,Pi_G, rate_vector,quart,tree)->final
  t(final)->qp2
  matrix(ncol=5)->stored_ints
  for (i in 1:length(tree))
  {
    bayes.signal.prep(quart,tree[[i]])-> temp
    rbind(stored_ints,temp)->stored_ints
  }
  cbind(t(qp2),stored_ints[2:length(stored_ints[,1]),])->final.result
  return(final.result)	
  
}


###This will either plot the Quartet internode probs with their internode, or else the violin plots o look at density another way
#' @export
plotPosterior<-function(final, plotType="QIPs")
{
as.data.frame(final)->final2
##Experimental
final2<-final2[2:nrow(final2),]
##  
dim(final)->ll
ll[1]->up
final2[2:up,]->final22
x    <- as.numeric(as.character(final22[,8]))
y1    <- as.numeric(as.character(final22[,3]))
y2    <- as.numeric(as.character(final22[,2])) #polytomy
y3    <- as.numeric(as.character(final22[,1]))

if (plotType=="QIPs")	{
p1<-ggplot(final22,aes(x=x,y=y1)) + stat_binhex(colour="white",na.rm=TRUE)+ xlab("internode length") + ylab("QIRP") + scale_fill_gradientn(colours=c("green1","red"),name = "Frequency",na.value=NA)+ theme_bw()
p2<-ggplot(final22,aes(x=x,y=y2)) + stat_binhex(colour="white",na.rm=TRUE)+ xlab("internode length") + ylab("QIPP") + scale_fill_gradientn(colours=c("green1","red"),name = "Frequency",na.value=NA)+ theme_bw()
p3<-ggplot(final22,aes(x=x,y=y3)) + stat_binhex(colour="white",na.rm=TRUE)+ xlab("internode length") + ylab("QIHP") + scale_fill_gradientn(colours=c("green1","red"),name = "Frequency",na.value=NA)+ theme_bw()
grid.arrange(p1, p2, p3, ncol=1, nrow =3)
} else if (plotType=="violin"){
	c(y1,y2,y3)->stacks
	length(y1)->set
	rep("QIRP",set)->Qirp
		rep("QIPP",set)->Qipp
			rep("QIHP",set)->Qihp
				c(Qirp,Qipp,Qihp)->c2
					cbind(stacks,c2)->newy
						colnames(newy)<-c("Probability","Analysis")
							rep(x,3)->internodes
								#colnames(internodes)<-"internode"
								cbind(internodes,newy)->data
								as.data.frame(data)->data
								as.numeric(as.character(data[,1]))->data[,1]
								as.numeric(as.character(data[,2]))->data[,2]
								Analysis<-data[,"Analysis"]
								Probability<-data[,"Probability"]
			p<-ggplot(data, aes(x= Analysis, y= Probability, fill=Analysis)) 
				p + geom_violin(trim=FALSE)+scale_fill_manual(values=c("firebrick","deepskyblue3","seagreen")) + geom_boxplot(width=0.1, fill= "aliceblue")		
					
}}
carolinafishes/PhyInformR documentation built on May 13, 2019, 12:50 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
carolinafishes/PhyInformR
Rapid Calculation of Phylogenetic Information Content

R/PhyInformR.r
In carolinafishes/PhyInformR: Rapid Calculation of Phylogenetic Information Content

R Package Documentation

Browse R Packages

We want your feedback!

carolinafishes/PhyInformR Rapid Calculation of Phylogenetic Information Content

R/PhyInformR.r In carolinafishes/PhyInformR: Rapid Calculation of Phylogenetic Information Content

R Package Documentation

Browse R Packages

We want your feedback!

carolinafishes/PhyInformR
Rapid Calculation of Phylogenetic Information Content

R/PhyInformR.r
In carolinafishes/PhyInformR: Rapid Calculation of Phylogenetic Information Content