R/plotCNATree.R

Defines functions bootstrap.trees medicc.resample.distance.matrices plotCNAtree

Documented in plotCNAtree

# library(ggtree)
# library(treeio)
# library(phangorn)
# library(MEDICCquant)


#' plotCNAtree plots phylogenetic trees of CNAs
#'
#' The CNAs trees were constructed by MEDICC.
#'
#' @param dist dist files that generated by MEDICC.
#' @param bootstrap.rep.num number of bootstrap steps.
#' @param group a list that used to indicate the sample groups
#' @param group.colors an array indicates the colors of sample groups.
#' @param title title of the plot.
#' @param normal.node the sample name of normal sample in the tree.
#' @param hexpand_ratio hexpand ratio. see \code{\link[ggtree]{hexpand}}
#'
#' @examples
#' # read samples distances.
#' # This dist file is the output of MEDICC
#' dist <- system.file(package = "MPTevol", "extdata", "tree_final.dist")
#'
#' # set group information
#' group <- list(
#'   NORMAL = "NORMAL",
#'   Breast = paste0("Breast_", 1:5),
#'   Coad = paste0("Coad_", 1:5),
#'   Lung = paste0("Lung_", 1:5),
#'   OveryLM = paste0("OveryLM_", 1:5),
#'   OveryRM = paste0("OveryRM_", 1:6),
#'   UterusM = paste0("UterusM_", c(1:7))
#' )
#'
#' # set group colors
#' group.colors <- setNames(set.colors(n = length(group)), nm = names(group))
#'
#' # built trees
#' tree <- plotCNAtree(
#'   dist = dist,
#'   group = group,
#'   group.colors = group.colors
#' )
#'
#' tree$plot
#' @import ggtree
#' @import treeio
#' @import phangorn
#' @import ape
#' @export
plotCNAtree <- function(dist,
                        bootstrap.rep.num = 500,
                        group = NULL,
                        group.colors = NULL,
                        title = "Cancer",
                        normal.node = "NORMAL",
                        hexpand_ratio = 0.3) {
  message("Calculate the bootstraps")

  # get trees and using NJ to get the bootstraps.
  phyloTree <- bootstrap.trees(
    dist = dist,
    title = title,
    bootstrap.rep.num = bootstrap.rep.num
  )

  mtree <- phyloTree$tree
  bootstrap.value <- phyloTree$bootstrap.value


  # all.length = mtree$edge.length
  root.length <- rev(mtree$edge.length)[1]

  # set outgroup and removing the Normal
  mtree <- treeio::root(mtree, outgroup = normal.node)
  mtree <- treeio::drop.tip(mtree, normal.node)

  # combined bootstrap
  bp2 <- data.frame(
    node = 1:(treeio::Nnode(mtree)) + treeio::Ntip(mtree),
    bootstrap = bootstrap.value
  )
  mtree <- dplyr::full_join(mtree, bp2, by = "node")

  p_trees <- ggtree::ggtree(mtree, size = 1) +
    ggtree::geom_tiplab(size = 4) +
    ggtree::geom_treescale(fontsize = 6, linesize = 1, offset = 1) +
    # set root length
    ggtree::geom_rootedge(rootedge = root.length, size = 1, colour = "grey40") +
    ggtree::hexpand(hexpand_ratio, direction = 1)

  p_trees <- p_trees +
    ggtree::geom_nodepoint(ggplot2::aes(fill = cut(bootstrap, c(0, 70, 90, 100))),
      shape = 21, size = 2.5
    ) +
    ggplot2::scale_fill_manual(
      values = c("black", "grey", "white"), guide = "legend",
      name = "Bootstrap Percentage(BP)",
      breaks = c("(90,100]", "(70,90]", "(0,70]"),
      labels = expression(BP >= 90, 70 <= BP * " < 90", BP < 70)
    ) +
    ggtree::theme_tree(legend.position = c(0.8, 0.25))

  if (!is.null(group)) {

    # check samples ids between trees and group
    if (!identical(sort(purrr::reduce(group, c)), sort(mtree@phylo$tip.label))) {
      stop("the samplenames in group were not identical to sample names in the tree")
    }

    p_trees <- ggtree::groupOTU(p_trees, group, "Sites")

    # get levels
    Sites <- levels(p_trees$data$Sites)

    if (!is.null(group.colors)) {
      if (!identical(sort(names(group.colors)), sort(levels(p_trees$data$Sites)))) {

        # get levels that were not in the group.colors
        Site1 <- setdiff(levels(p_trees$data$Sites), names(group.colors))
        Site.colors <- setNames(
          c(set.colors(n = length(Site1), rev = T), group.colors),
          nm = c(Site1, names(group.colors))
        )

        Site.colors <- Site.colors[levels(p_trees$data$Sites)]
      } else {
        Site.colors <- group.colors[levels(p_trees$data$Sites)]
      }
    } else {
      Site.colors <- set.colors(n = length(Sites))
    }

    p_trees <- p_trees +
      ggplot2::aes(color = Sites) +
      ggplot2::scale_colour_manual(
        values = Site.colors
      )
  }


  p_trees <- p_trees +
    ggplot2::labs(title = title) +
    ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5))

  return(
    list(
      phyloTree = phyloTree,
      plot = p_trees
    )
  )
}



########################################################################################

# Supporting Functions


# functions to get the bootstrap values.

medicc.resample.distance.matrices <- function(D, niter = 100) {
  result <- list()
  # pb=txtProgressBar(min=0,max=niter,style=3)
  for (s in 1:niter) {
    # setTxtProgressBar(pb,s)
    Dnew <- as.matrix(D)
    for (i in 1:nrow(Dnew)) {
      for (j in 1:i) {
        Dnew[i, j] <- rnorm(1, mean = Dnew[i, j], sd = sqrt(Dnew[i, j]))
        Dnew[j, i] <- Dnew[i, j]
      }
    }
    Dnew[Dnew < 0] <- 0
    Dnew <- round(Dnew)
    Dnew <- as.dist(Dnew)
    result[[s]] <- Dnew
  }
  return(result)
}


bootstrap.trees <- function(dist, bootstrap.rep.num = 1000, title = "Cancer") {

  # using NJ to create a new tree with bootstrap values.
  getTrees <- function(D) {
    matTree <- ape::nj(D)
    root_num <- which(matTree$tip.label == "diploid")
    matTree <- treeio::root(matTree, root_num)
    matTree
  }

  D <- as.matrix(read.table(dist, row.names = 1, skip = 1))
  colnames(D) <- rownames(D)

  matTree <- getTrees(D) # getTrees is defined the beginning of this function.
  # plot(matTree)
  # bootstrap
  # resampled trees.
  resampled <- medicc.resample.distance.matrices(D, bootstrap.rep.num)

  bootstrap.value <- prop.clades(matTree, lapply(resampled, getTrees), rooted = is.rooted(matTree)) / bootstrap.rep.num * 100

  # bootstrap.value <- ape::boot.phylo(matTree, mut_dat, function(e){nj(dist.gene(e))},B = bootstrap.rep.num,quiet = TRUE,rooted = TRUE)/(bootstrap.rep.num)*100

  # plot( matTree, main = title)
  # nodelabels(bootstrap.value)

  # for MesKit to plot trees.
  matTree$tip.label[which(matTree$tip.label == "diploid")] <- "NORMAL"

  phyloTree <- list(
    tree = matTree,
    bootstrap.value = bootstrap.value[1:(length(bootstrap.value) - 1)],
    patientID = title
  )

  phyloTree
}
qingjian1991/MPTevol documentation built on Jan. 30, 2023, 10:16 p.m.