knitr::opts_chunk$set(include = TRUE)
WGPAN (Whole-Genome Pairwise Alignment Noising).
library('knitr') library('reshape') library('dplyr') library('ape') require('devtools') install_github('bradfordcondon/WGPAN') devtools::load_all()
Note that the matrix MUST be identical across the diagonal for the distance calculation to work.
ID <- c("Amanita","Polyporus","Coprinus", "Daldinia", "Entoloma", "Flaviporus") myData<- matrix(sample(100:10000, 36), nrow = 6, ncol = 6, dimnames = list(ID, ID)) diag(myData)<- 0 #set diagonal to 0
clades = c("Agaricales", "Polyporales", "Agaricales", "Xylariales", "Agaricales", "Polyporales") cladeList = data.frame(ID, clades) kable(cladeList)
Calculate the distances between all within-clade comparisons. Make a boxplot of this, then make a summary table with the SD and mean.
x<- myData idist = na.omit(reshape::melt(x)) colnames(idist) <- c("a", "b", "dist") allSelfDists = data.frame() clade_loop <- unique(cladeList$clades) ID_names <- cladeList$ID cladeAssignments <-cladeList$clades WithinDistanceTable <- withinGroupDistances(distanceMatrix = x, cladeAssignments = cladeAssignments, ID_names = ID_names) boxplot(distance ~ clade, data = WithinDistanceTable, las=3 , main = "within-clade distances 1-11-17", ylab = "SNPs/MB" ) withinSDtable<- WithinDistanceTable %>% group_by(clade)%>%summarize(sd = sd(distance), mean = mean(distance)) kable(withinSDtable) #replace NA with 0 withinSDtable[is.na(withinSDtable)]<- 0
Next, we want to noise the tree based on this distance file.
### #Write 100 bootstrap trees, where within-clade distances (only!) are noised by the normal distribution with SD= that clade's within SD. ### bstrees<- generateBootstrapTrees(distanceMatrix = x, numberOfTrees = 100, withinSDtable= withinSDtable, cladeAssignments = cladeAssignments, ID_names = ID_names)
Now that we have 1000 bootstrap trees, add them as bootstraps to original tree using prop.clades. Note also that interclade distances are not noised. As such we are only looking at nodes defining clades, not nodes defining relationships between clades.
On the performance of prop.clades:
Since ape 3.5, prop.clades should return sensible results for all values of rooted: if FALSE, the numbers of bipartitions (or splits); if TRUE, the number of clades (of hopefully rooted trees).
master <- myData main_tree <- nj(as.dist(master)) clade_support <- prop.clades(main_tree, bstrees) #generate BS values based on trees #clade_support = clade_support/10 Divide by 10 for 1,000 bootstrap trees clade_support[is.na(clade_support)]<- 0 clade_support main_tree$node.labels<- clade_support plot(main_tree, type="radial")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.