R/getPhyloTree.R

Defines functions getPhyloTree

Documented in getPhyloTree

#' @title getPhyloTree
#' 
#' @param maf Maf or MafList object generated by \code{\link{readMaf}} function
#' @param patient.id Select the specific patients. Default NULL, all patients are included.
#' @param method Approach to construct phylogenetic trees. 
#' Choose one of "NJ"(Neibor-Joining), "MP"(maximum parsimony), 
#' "ML"(maximum likelihood), "FASTME.ols" or "FASTME.bal".  
#' @param min.vaf The minimum value of vaf. Default 0.
#' @param min.ccf The minimum value of CCF. Default 0
#' @param bootstrap.rep.num Bootstrap iterations. Default 100.
#' @param ... Other options passed to \code{\link{subMaf}}
#' 
#' 
#' @examples
#' maf.File <- system.file("extdata/", "CRC_HZ.maf", package = "MesKit")
#' clin.File <- system.file("extdata/", "CRC_HZ.clin.txt", package = "MesKit")
#' ccf.File <- system.file("extdata/", "CRC_HZ.ccf.tsv", package = "MesKit")
#' maf <- readMaf(mafFile=maf.File, clinicalFile = clin.File, ccfFile=ccf.File, refBuild="hg19")
#' phyloTree <- getPhyloTree(maf)
#' @return PhyloTree or phyloTreeList object
#' @import phangorn ape
#' @export getPhyloTree

getPhyloTree <- function(maf,
                      patient.id = NULL, 
                      method = "NJ",
                      min.vaf = 0, 
                      min.ccf = 0,
                      bootstrap.rep.num = 100,
                      ...){
  
  processGetPhyloTree <- function(m){
    maf_data <- getMafData(m)
    patient <- getMafPatient(m)
    if(nrow(maf_data) == 0){
      message("Warning: there was no mutation found in ", patient, " after filtering.")
      return(NA)
    }
    # print(nrow(maf_data))
    refBuild <- getMafRef(m)
    ## information input
    binary.matrix <- getMutMatrix(maf_data, use.ccf = FALSE)
    
    if(nrow(binary.matrix) == 1){
      message("Warning: ", patient, " should have at least two mutations.")
      return(NA)
    }
    
    if("CCF" %in% colnames(maf_data)){
      ccf.matrix <- getMutMatrix(maf_data, use.ccf = TRUE)
    }else{
      ccf.matrix <- matrix() 
    }
    mut_dat <- t(binary.matrix)
    if(method == "NJ"){
      matTree <- nj(dist.gene(mut_dat))
      # root_num <- which(matTree$tip.label == "NORMAL")
      # matTree <- root(matTree, root_num)
      bootstrap.value <- ape::boot.phylo(matTree, mut_dat, function(e){nj(dist.gene(e))}, B = bootstrap.rep.num,quiet = TRUE, rooted = TRUE)/(bootstrap.rep.num)*100
    }else if(method == "MP"){
      matTree <- byMP(mut_dat)
      # root_num <- which(matTree$tip.label == "NORMAL")
      # matTree <- root(matTree, root_num)
      bootstrap.value <- ape::boot.phylo(matTree, mut_dat, function(e){byMP(e)}, B = bootstrap.rep.num, quiet = TRUE, rooted = TRUE)/(bootstrap.rep.num)*100 
    }else if(method == "ML"){
      matTree <- byML(mut_dat)
      # root_num <- which(matTree$tip.label == "NORMAL")
      # matTree <- root(matTree, root_num)
      bootstrap.value <- ape::boot.phylo(matTree, mut_dat, function(e)byML(e), B = bootstrap.rep.num, quiet = TRUE, rooted = TRUE)/(bootstrap.rep.num)*100
    }else if(method == "FASTME.bal"){
      matTree <- ape::fastme.bal(dist.gene(mut_dat))
      # root_num <- which(matTree$tip.label == "NORMAL")
      # matTree <- root(matTree, root_num)
      bootstrap.value <- ape::boot.phylo(matTree, mut_dat, function(e) ape::fastme.bal(dist.gene(e)),B = bootstrap.rep.num,quiet = TRUE, rooted = TRUE)/(bootstrap.rep.num)*100
    }else if(method == "FASTME.ols"){
      matTree <- ape::fastme.ols(dist.gene(mut_dat))
      # root_num <- which(matTree$tip.label == "NORMAL")
      # matTree <- root(matTree, root_num)
      bootstrap.value <- ape::boot.phylo(matTree, mut_dat, function(e) ape::fastme.ols(dist.gene(e)),B = bootstrap.rep.num,quiet = TRUE, rooted = TRUE)/(bootstrap.rep.num)*100
    }
    branch.id <- readPhyloTree(matTree)
    
    mut.branches_types <- treeMutationalBranches(maf_data, branch.id, binary.matrix)
    mut.branches <- mut.branches_types$mut.branches
    branch.type <- mut.branches_types$branch.type
    
    if("Tumor_Sample_Label" %in% colnames(maf_data)){
      tsb.label <-  maf_data %>% 
        dplyr::select("Tumor_Sample_Barcode", "Tumor_Sample_Label") %>% 
        dplyr::distinct(.data$Tumor_Sample_Barcode, .keep_all = TRUE)
    }else{
      tsb.label <- data.frame()
    }
    
    phylo.tree <- new('phyloTree',
                      patientID = patient, tree = matTree, 
                      binary.matrix = binary.matrix, ccf.matrix = ccf.matrix, 
                      mut.branches = mut.branches, branch.type = branch.type,
                      ref.build = refBuild,
                      bootstrap.value = bootstrap.value, method = method,
                      tsb.label = tsb.label)
    return(phylo.tree)
  }
  
  
  method <- match.arg(method, choices = c("NJ", "MP", "ML", 
                                          "FASTME.ols", "FASTME.bal"), several.ok = FALSE)
  
  maf_input <- subMaf(maf,
                      patient.id = patient.id,
                      min.vaf = min.vaf,
                      min.ccf = min.ccf,
                      mafObj = TRUE,
                      ...)
  
  phyloTree_patient_list <- lapply(maf_input, processGetPhyloTree)
  phyloTree_patient_list <- phyloTree_patient_list[!is.na(phyloTree_patient_list)]
  
  
  if(length(phyloTree_patient_list) > 1){
      phyloTree_list <- new('phyloTreeList',phyloTree_patient_list)
      return(phyloTree_list)
  }else if(length(phyloTree_patient_list) == 1){
      return(phyloTree_patient_list[[1]])
  }else{
      return(NA)
  }

}

Try the MesKit package in your browser

Any scripts or data that you put into this service are public.

MesKit documentation built on March 28, 2021, 6 p.m.