buildPhylo<-function(sp_cbs,outF,treeAlgorithm="bionjs",dm=NA, add="Germline",verbose=T){
out=list("tree"=NULL,"dm"=dm);
ii=grep("SP",colnames(sp_cbs));
cnv=sp_cbs[,ii];
if(is.null(ncol(cnv))){
print("Less than two SPs coexist in this tumor. Aborting phylogeny reconstruction");
return(out);
}
.notifyUser(paste("Building phylogeny using ",treeAlgorithm," algorithm",sep=""),verbose=verbose)
print("Pairwise SP distances calculated as: % segments with identical copy number");
##Add user specified artificial SP, if any
if(!is.null(add)){
if (!any(colnames(cnv)==add,na.rm=T) && (add=="Consensus" || add=="Germline") ){
cnv=cbind(cnv,matrix(matrix(NaN,nrow(cnv),1),nrow=nrow(cnv), ncol=1, dimnames=list(rownames(cnv),add)));
if(add=="Consensus"){
cnv[,add]=round(colMeans(t(cnv),na.rm=T)); ####ploidy:=consensus at all positions
}else if(add=="Germline"){
cnv[,add]=2; ##assuming diploid germline status
}
}
}
##distance matrix from pairwise alignments
# D=.calculateHYpergeometricDistance(cnv);
D=.calculateHammingDistance(cnv)
if(is.null(nrow(D)) || nrow(D)<3){
print("No two SPs found between which distance could be calculated. Aborting phylogeny reconstruction");
return(out);
}
D=D*100;
write.table(D,paste(outF,".dist",sep=""),quote=F,sep="\t");
print(paste("distance-matrix saved under ",outF,".dist",sep=""));
tr =c();
if (treeAlgorithm=="bionjs"){
tr <- bionjs(D);
}else{
tr <- njs(D);
}
tr$root.edge <- 0; ## adds root dummy
outF=paste(outF,".tree",sep="");
write.tree(tr, file = outF);
print(paste("tree saved under ",outF,sep=""));
out$tree=tr;
out$spRelations=NULL;
if(!is.na(dm)){
out1=try(.assignSNVsToMultipleSPs(dm, outF, verbose=verbose),silent=FALSE)
if(class(out1)!="try-error"){
dm=out1$dm;
out$spRelations=out1$spRelations;
}
out$dm=dm;
}
return(out);
}
.assignSNVsToMultipleSPs <-function(dm,outF,verbose){
# dm[, c("SP_cnv","SP")]=round(1000*dm[, c("SP_cnv","SP")])/1000
if (!requireNamespace("phylobase", quietly = TRUE)) {
print("Package \'phylobase\' is needed for assigning SNVs to Multiple SPs but is not installed.")
return(dm);
}
tr=try(phylobase::readNewick(outF,check.names=F),silent=F);
if(class(tr)=="try-error"){
print("Warning! Setting negative edge lengths to 0!")
tr=read.tree(outF);
tr$edge.length[tr$edge.length<0]=0
write.tree(tr, file = "tmp.tree");
tr=phylobase::readNewick("tmp.tree",check.names=F);
}
.notifyUser("Assigning SNVs to SPs...",verbose=verbose)
spsInTree=names(phylobase::getNode(tr,type="tip"));
SPs = sort(unique(c(dm[, "SP_cnv"],dm[,"SP"])))
spNames= paste("SP_", SPs, sep = "");
x = colnames(dm)
x[(length(x) + 1):(length(x) + length(SPs))] =spNames
x=c(x,"Clone");
dm = cbind(dm, matrix(0, nrow(dm), length(SPs)), dm[,"SP"]);
colnames(dm)=x;
##Save ancestor-to-descendant (rows-to-columns) relations:
spRelations=matrix(0,length(spNames),length(spNames));
rownames(spRelations)=spNames; colnames(spRelations)=spNames;
for (k in 1:nrow(dm)) {
thisSP = paste("SP_", dm[k,"SP"],sep="");
if (!is.na(dm[k,"SP"])){
dm[k,gsub(" ","_",thisSP)]=1; #dm[k,"PM_B"]; binary assignment for now
}
if(!thisSP %in% spsInTree){
next;
}
if (mod(k, 100) == 0) {
.notifyUser(paste("Assigning SPs for SNV", k,
"out of ", nrow(dm), "..."),verbose=verbose)
}
dm=.propagateSNVToMultipleSPs(thisSP,dm,k,tr,SPs)
spRelations[thisSP,dm[k,colnames(spRelations)]==1]=1;
spRelations[thisSP,thisSP]=0
}
for (i in (length(spNames)-1):1){
iPhylo=which(sum(t(dm[,spNames]!=0))>length(spNames)-i)
print(paste(length(iPhylo), " SNVs assigned to >",length(spNames)-i," SPs"))
if(length(iPhylo)==0){
break;
}
}
return(list(dm=dm,spRelations=spRelations) )
}
.propagateSNVToMultipleSPs <-function(thisSP,dm,k,tr, spSizes){
thisSPsize=as.numeric(gsub("SP_","",thisSP));
xx=phylobase::getNode(tr,type="tip");
ii_This=match(thisSP,names(xx)); ##Node representing SP which harbors this SNV
sibl=phylobase::siblings(tr,xx[ii_This]); ##Siblings of SP with this SNV
if (length(sibl)==0){
return(dm)
}
for (s in 1:length(sibl)){
desc=.getAllDescendingTips_InclSelf(tr,sibl[s],c());
i_toKEEP=union(grep("SP",names(desc)),which(is.na(names(desc))));
desc=desc[i_toKEEP];
for (sd in names(desc)){
otherSP=gsub(" ","_",sd);
otherSPsize=as.numeric(gsub("SP_","",otherSP))
rootL=0
if(is.na(names(sibl[s])) || sd!=names(sibl[s])){
rootL=phylobase::edgeLength(tr,sibl[s]);
}
if (dm[k,otherSP]==0 && rootL+ phylobase::edgeLength(tr,sd) > 1.25*phylobase::edgeLength(tr,xx[ii_This])){ ##1.4
##This SP is likely ancestor of SP assigned as sibling. TODO: find tree reconstruction algorithm that can assign "living populations" as common ancestors
dm[k,otherSP]=1; #dm[k,"PM_B"]; ##Other SP inherits mutation of this SP.
dm[k,"Clone"]=thisSPsize-otherSPsize;
##TODO: what if the ploidy of the otherSP in this region is < B-allele ploidy of this SP! need to check this and set to minimum. Temporary solution --> binary assignment (above)
dm=.propagateSNVToMultipleSPs(otherSP,dm,k,tr,spSizes)
}
}
}
return(dm);
}
.getAllDescendingTips_InclSelf<-function(tr,nd,desc){
kids=phylobase::children(tr, nd);
if(length(kids)==0){
return(nd)
}
for (i in 1:length(kids)){
if(is.na(names(kids)[i])){
desc=c(desc,.getAllDescendingTips_InclSelf(tr,kids[i],desc));
}else{
desc=c(desc,kids[i]);
}
}
return(desc);
}
## @TODO: test .calculateHYpergeometricDistance(...)
# .calculateHYpergeometricDistance<-function(dmx){
# cols=gsub(" ","",colnames(dmx));
# D=matrix(matrix(1,ncol(dmx),ncol(dmx)), nrow=ncol(dmx), ncol=ncol(dmx), dimnames=list(cols,cols));
# for (SP_A in colnames(dmx)){
# for (SP_B in colnames(dmx)){
# ii1=which(dmx[,SP_A]>0); ##white balls in urn - SNVs in this primary SP
# ij=which(dmx[,SP_A]==0);##black balls in urn - SNVs in other primary SP
# ii2=which(dmx[,SP_B]>0); ##balls drawn from urn - SNVs in this recurrent SP
# o=intersect(ii1,ii2); ##white balls drawn from urn - overlapping SNVs in this recurrent SP
# D[SP_A,SP_B]=1-phyper(length(o)-1, length(ii1), length(ij), length(ii2))
# if (SP_A!=SP_B){
# D[SP_A,SP_B]=D[SP_A,SP_B]+0.3;
# }
# }
# }
#
# toRm=which(apply(is.na(D),1,any))
# #remove NAs
# if (length(toRm)>0){
# print(paste("Insufficient copy number segments for ",rownames(D)[toRm],". SP excluded from phylogeny",sep=""))
# D=D[-toRm,];
# D=D[,-toRm];
# }
# return(D)
# }
.calculateHammingDistance<-function(cnv){
toRm=c();
cols=gsub(" ","",colnames(cnv));
D=matrix(matrix(1,ncol(cnv),ncol(cnv)), nrow=ncol(cnv), ncol=ncol(cnv), dimnames=list(cols,cols));
for (i in 1:ncol(cnv)){
for (j in i:ncol(cnv)){
ii=which(!is.na(cnv[,i]) & !is.na(cnv[,j]));
if (length(ii)==0){
D[i,j]<-D[j,i]<-NA;
next;
}
x=cnv[ii,i]; y=cnv[ii,j];
dd=length(which(x!=y))/length(ii);
if (i!=j){
dd=dd+0.3;
}
D[i,j]<-D[j,i]<-dd
}
if (any(is.na(D[i,1:i]))){
toRm=cbind(toRm,i);#remove NAs
}
}
#remove NAs
if (length(toRm)>0){
print(paste("Insufficient copy number segments for ",rownames(D)[toRm],". SP excluded from phylogeny",sep=""))
D=D[-toRm,];
D=D[,-toRm];
}
return(D)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.