R/consTTree.R

#' Build a consensus transmission tree from a MCMC output
#' @param record Output from inferTTree function
#' @param burnin Proportion of the MCMC output to be discarded as burnin
#' @param minimum Minimum probability for inclusion in consensus
#' @export
consTTree = function(record,burnin=0.5,minimum=0.5)
{
  #Remove burnin
  if (burnin>0) record=record[round(length(record)*burnin):length(record)]
  m=length(record)
  n=sum(record[[1]]$ctree$ctree[,2]==0&record[[1]]$ctree$ctree[,3]==0) #Number of sampled individuals

  #Record partitions in sampled transmission tree
  hash=vector('list',n*m*10)
  a=floor(n*10*runif(n))+1
  for (i in 1:length(record))
  {
    ttree=extractTTree(record[[i]]$ctree)$ttree
    #Find children of nodes
    todo=1:n
    children=matrix(NA,nrow(ttree),n)
    while (length(todo)>0) {
      w=todo[1]
      todo=todo[-1]
      direct=which(ttree[,3]==w)
      children[w,]=colSums(children[direct,,drop=F])>0
      if (is.na(children[w,1])) next
      if (w<=n) children[w,w]=1
      if (ttree[w,3]>0) todo=c(todo,ttree[w,3])
    }
    #Add nodes one by one
    for (j in 1:nrow(ttree)) {
      c=children[j,]
      hh=1+(sum(a[which(c==1)])%%(length(hash)))
      found=0
      while (1) {
        if (is.null(hash[[hh]])) break
        if (all(c==hash[[hh]]$c)) break
        hh=hh+1
        if (hh>length(hash)) hh=1
      }
      if (is.null(hash[[hh]])) {hash[[hh]]$c=c;hash[[hh]]$n=0;hash[[hh]]$w=c();hash[[hh]]$last=0;hash[[hh]]$t=c()}
      if (hash[[hh]]$last==i) {hash[[hh]]$w[1]=hash[[hh]]$w[1]+1;hash[[hh]]$t[1]=max(hash[[hh]]$t[1],ttree[j,1])}
      else {
        hash[[hh]]$last=i
        hash[[hh]]$n=hash[[hh]]$n+1
        hash[[hh]]$w=c(1,hash[[hh]]$w)
        hash[[hh]]$t=c(ttree[j,1],hash[[hh]]$t)
      }
    }
    
    #Add bonus leaves
    for (j in 1:n) {
      if ((sum(children[j,]))==1) next
      c=rep(0,n);c[j]=1
      hh=1+(sum(a[which(c==1)])%%(length(hash)))
      found=0
      while (1) {
        if (is.null(hash[[hh]])) break
        if (all(c==hash[[hh]]$c)) break
        hh=hh+1
        if (hh>length(hash)) hh=1
      }
      if (is.null(hash[[hh]])) {hash[[hh]]$c=c;hash[[hh]]$n=0;hash[[hh]]$w=c();hash[[hh]]$last=0;hash[[hh]]$t=c()}
      hash[[hh]]$n=hash[[hh]]$n+1
      hash[[hh]]$w=c(0,hash[[hh]]$w)
      hash[[hh]]$t=c(ttree[j,1],hash[[hh]]$t)
    }
  }
  
  #Choose partitions to include in consensus transmission tree
  comb=hash[!sapply(hash,is.null)]
  if (minimum>0) comb=comb[sapply(comb,'[[',2)>(minimum*m)]
  comb=comb[order(sapply(comb,'[[',2),decreasing=T)]
  keep=c()
  for (i in 1:n) {
    c=rep(0,n);c[i]=1
    for (j in 1:length(comb)) if (all(c==comb[[j]]$c)) {keep=c(keep,j);break}
  }
  for (i in 1:length(comb)) {
    if (is.element(i,keep)) next
    c1=which(comb[[i]]$c==1)
    exclude=F
    for (j in keep) {
      c2=which(comb[[j]]$c==1)
      if (length(intersect(c1,c2))>0 && length(setdiff(c1,c2))>0 && length(setdiff(c2,c1))>0) {exclude=T;break}
    }
    if (exclude==F) keep=c(keep,i)
  }
  comb=comb[keep]

  #Build list of parents from selected partitions
  parents=rep(NA,length(comb))
  bralen =rep(NA,length(comb))
  inftim =rep(NA,length(comb))
  for (i in 1:length(comb)) {
    bralen[i]=round(mean(comb[[i]]$w))
    inftim[i]=sum(comb[[i]]$t)/comb[[i]]$n
    ci=which(comb[[i]]$c==1)
    bestscore=Inf
    for (j in 1:length(comb)) {
      cj=which(comb[[j]]$c==1)
      if (i==j) next
      if (length(setdiff(ci,cj))>0) next
      if (length(setdiff(cj,ci))<bestscore) {bestscore=length(setdiff(cj,ci));parents[i]=j}
    }
    if (bestscore==Inf) root=i
  }
  
#   #Temporary plot
#   tr=list()
#   tr$Nnode=length(comb)-n
#   tr$tip.label=as.character(1:n)
#   edge=cbind(parents[which(!is.na(parents))],which(!is.na(parents)))
#   tr$edge=edge;tr$edge[edge==n+1]=root;tr$edge[edge==root]=n+1#Make the root the (n+1)-th
#   tr$edge.length=rep(NA,nrow(tr$edge))
#   for (i in 1:nrow(tr$edge)) tr$edge.length[i]=median(comb[[tr$edge[i,2]]]$w)
#   class(tr)<-'phylo'
#   plot(tr)
  
  #Update vectors parents and bralen and inftim so that branches of length zero are removed and branches of length>1 are deduplicated
  i=1
  while (i<length(parents)) {
    if (bralen[i]==0) {
    torem=parents[i]
    parents[i]=parents[torem]
    bralen[i]=bralen[torem]
    inftim[i]=inftim[torem]
    parents[which(parents==torem)]=i;parents=parents[-torem]
    bralen=bralen[-torem]
    inftim=inftim[-torem]
    parents[which(parents>torem)]=parents[which(parents>torem)]-1 
    } 
    if (bralen[i]>1) {
      dt=abs(inftim[i]-inftim[parents[i]])
      inftim=c(inftim,inftim[i]-dt/bralen[i])
      bralen=c(bralen,bralen[i]-1)
      bralen[i]=1
      parents=c(parents,parents[i])
      parents[i]=length(bralen)
    }
    i=i+1}

  #Build transmission tree
  cons=matrix(NA,length(parents),3)
  cons[,1]=inftim
  parents[which(is.na(parents))]=0
  cons[,3]=parents
  cons[1:n,2]=ttree[1:n,2]#copy sampling dates from any ttree
  return(list(ttree=cons,nam=record[[1]]$ctree$nam))
}
yuanwxu/TransPhyloC documentation built on May 4, 2019, 6:35 p.m.