# library(ggtree)
# library(treeio)
# library(phangorn)
# library(MEDICCquant)
#' plotCNAtree plots phylogenetic trees of CNAs
#'
#' The CNAs trees were constructed by MEDICC.
#'
#' @param dist dist files that generated by MEDICC.
#' @param bootstrap.rep.num number of bootstrap steps.
#' @param group a list that used to indicate the sample groups
#' @param group.colors an array indicates the colors of sample groups.
#' @param title title of the plot.
#' @param normal.node the sample name of normal sample in the tree.
#' @param hexpand_ratio hexpand ratio. see \code{\link[ggtree]{hexpand}}
#'
#' @examples
#' # read samples distances.
#' # This dist file is the output of MEDICC
#' dist <- system.file(package = "MPTevol", "extdata", "tree_final.dist")
#'
#' # set group information
#' group <- list(
#' NORMAL = "NORMAL",
#' Breast = paste0("Breast_", 1:5),
#' Coad = paste0("Coad_", 1:5),
#' Lung = paste0("Lung_", 1:5),
#' OveryLM = paste0("OveryLM_", 1:5),
#' OveryRM = paste0("OveryRM_", 1:6),
#' UterusM = paste0("UterusM_", c(1:7))
#' )
#'
#' # set group colors
#' group.colors <- setNames(set.colors(n = length(group)), nm = names(group))
#'
#' # built trees
#' tree <- plotCNAtree(
#' dist = dist,
#' group = group,
#' group.colors = group.colors
#' )
#'
#' tree$plot
#' @import ggtree
#' @import treeio
#' @import phangorn
#' @import ape
#' @export
plotCNAtree <- function(dist,
bootstrap.rep.num = 500,
group = NULL,
group.colors = NULL,
title = "Cancer",
normal.node = "NORMAL",
hexpand_ratio = 0.3) {
message("Calculate the bootstraps")
# get trees and using NJ to get the bootstraps.
phyloTree <- bootstrap.trees(
dist = dist,
title = title,
bootstrap.rep.num = bootstrap.rep.num
)
mtree <- phyloTree$tree
bootstrap.value <- phyloTree$bootstrap.value
# all.length = mtree$edge.length
root.length <- rev(mtree$edge.length)[1]
# set outgroup and removing the Normal
mtree <- treeio::root(mtree, outgroup = normal.node)
mtree <- treeio::drop.tip(mtree, normal.node)
# combined bootstrap
bp2 <- data.frame(
node = 1:(treeio::Nnode(mtree)) + treeio::Ntip(mtree),
bootstrap = bootstrap.value
)
mtree <- dplyr::full_join(mtree, bp2, by = "node")
p_trees <- ggtree::ggtree(mtree, size = 1) +
ggtree::geom_tiplab(size = 4) +
ggtree::geom_treescale(fontsize = 6, linesize = 1, offset = 1) +
# set root length
ggtree::geom_rootedge(rootedge = root.length, size = 1, colour = "grey40") +
ggtree::hexpand(hexpand_ratio, direction = 1)
p_trees <- p_trees +
ggtree::geom_nodepoint(ggplot2::aes(fill = cut(bootstrap, c(0, 70, 90, 100))),
shape = 21, size = 2.5
) +
ggplot2::scale_fill_manual(
values = c("black", "grey", "white"), guide = "legend",
name = "Bootstrap Percentage(BP)",
breaks = c("(90,100]", "(70,90]", "(0,70]"),
labels = expression(BP >= 90, 70 <= BP * " < 90", BP < 70)
) +
ggtree::theme_tree(legend.position = c(0.8, 0.25))
if (!is.null(group)) {
# check samples ids between trees and group
if (!identical(sort(purrr::reduce(group, c)), sort(mtree@phylo$tip.label))) {
stop("the samplenames in group were not identical to sample names in the tree")
}
p_trees <- ggtree::groupOTU(p_trees, group, "Sites")
# get levels
Sites <- levels(p_trees$data$Sites)
if (!is.null(group.colors)) {
if (!identical(sort(names(group.colors)), sort(levels(p_trees$data$Sites)))) {
# get levels that were not in the group.colors
Site1 <- setdiff(levels(p_trees$data$Sites), names(group.colors))
Site.colors <- setNames(
c(set.colors(n = length(Site1), rev = T), group.colors),
nm = c(Site1, names(group.colors))
)
Site.colors <- Site.colors[levels(p_trees$data$Sites)]
} else {
Site.colors <- group.colors[levels(p_trees$data$Sites)]
}
} else {
Site.colors <- set.colors(n = length(Sites))
}
p_trees <- p_trees +
ggplot2::aes(color = Sites) +
ggplot2::scale_colour_manual(
values = Site.colors
)
}
p_trees <- p_trees +
ggplot2::labs(title = title) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5))
return(
list(
phyloTree = phyloTree,
plot = p_trees
)
)
}
########################################################################################
# Supporting Functions
# functions to get the bootstrap values.
medicc.resample.distance.matrices <- function(D, niter = 100) {
result <- list()
# pb=txtProgressBar(min=0,max=niter,style=3)
for (s in 1:niter) {
# setTxtProgressBar(pb,s)
Dnew <- as.matrix(D)
for (i in 1:nrow(Dnew)) {
for (j in 1:i) {
Dnew[i, j] <- rnorm(1, mean = Dnew[i, j], sd = sqrt(Dnew[i, j]))
Dnew[j, i] <- Dnew[i, j]
}
}
Dnew[Dnew < 0] <- 0
Dnew <- round(Dnew)
Dnew <- as.dist(Dnew)
result[[s]] <- Dnew
}
return(result)
}
bootstrap.trees <- function(dist, bootstrap.rep.num = 1000, title = "Cancer") {
# using NJ to create a new tree with bootstrap values.
getTrees <- function(D) {
matTree <- ape::nj(D)
root_num <- which(matTree$tip.label == "diploid")
matTree <- treeio::root(matTree, root_num)
matTree
}
D <- as.matrix(read.table(dist, row.names = 1, skip = 1))
colnames(D) <- rownames(D)
matTree <- getTrees(D) # getTrees is defined the beginning of this function.
# plot(matTree)
# bootstrap
# resampled trees.
resampled <- medicc.resample.distance.matrices(D, bootstrap.rep.num)
bootstrap.value <- prop.clades(matTree, lapply(resampled, getTrees), rooted = is.rooted(matTree)) / bootstrap.rep.num * 100
# bootstrap.value <- ape::boot.phylo(matTree, mut_dat, function(e){nj(dist.gene(e))},B = bootstrap.rep.num,quiet = TRUE,rooted = TRUE)/(bootstrap.rep.num)*100
# plot( matTree, main = title)
# nodelabels(bootstrap.value)
# for MesKit to plot trees.
matTree$tip.label[which(matTree$tip.label == "diploid")] <- "NORMAL"
phyloTree <- list(
tree = matTree,
bootstrap.value = bootstrap.value[1:(length(bootstrap.value) - 1)],
patientID = title
)
phyloTree
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.