R/mutCluster.R

Defines functions mutCluster

Documented in mutCluster

#' mutCluster
#' @description Cluster mutations based on variant allele frequencies (VAFs) or cancer cell fractions (CCFs).
#'  
#' @param maf Maf or MafList object generated by \code{\link{readMaf}} function.
#' @param patient.id Select the specific patients. Default NULL, all patients are included.
#' @param segFile The segment file.
#' @param use.ccf Cluster CCF. Default FALSE.
#' @param withinTumor Cluster Tumor average CCF within tumors in each patients. Default FALSE.
#' @param use.tumorSampleLabel Logical (Default: FALSE). 
#' Rename the 'Tumor_Sample_Barcode' by 'Tumor_Sample_Label'.
#' @param ... Other options passed to \code{\link{subMaf}}
#' 
#' @examples
#' maf.File <- system.file("extdata/", "CRC_HZ.maf", package = "MesKit")
#' clin.File <- system.file("extdata/", "CRC_HZ.clin.txt", package = "MesKit")
#' ccf.File <- system.file("extdata/", "CRC_HZ.ccf.tsv", package = "MesKit")
#' maf <- readMaf(mafFile=maf.File, clinicalFile = clin.File, ccfFile=ccf.File, refBuild="hg19")
#' mutCluster(maf, patient.id = 'V402')
#' 
#' @import ggridges mclust
#' @importFrom grDevices boxplot.stats
#' @return clustering plots of vaf
#' @export mutCluster

## Main function for VAF plot
mutCluster <- function(maf,
                      patient.id = NULL,
                      use.ccf = FALSE, 
                      segFile = NULL,
                      withinTumor = FALSE,
                      use.tumorSampleLabel = FALSE,
                      ...){
  
  processVafcluster_maf <- function(m){
    maf_data <- getMafData(m)
    if(nrow(maf_data) == 0){
      message("Warning: there was no mutation in ", patient, " after filtering.")
      return(NA)
    }
    patient <- getMafPatient(m)
    
    seg <- NULL
    
    if(withinTumor){
      if(!"CCF" %in% colnames(getMafData(m))){
        stop(paste0("mutCluster requires CCF data when 'withinTumor'." ,
                    "No CCF data was found when generate Maf/MafList object."))
      }
      maf_data$V <- maf_data$Tumor_Average_CCF
      maf_data$ID <- maf_data$Tumor_ID
      xlab <- "CCF"
    }else{
      if(use.ccf){
        if(!"CCF" %in% colnames(getMafData(m))){
          stop(paste0("mutCluster requires CCF data when 'use.CCF'." ,
                      "No CCF data was found when generate Maf/MafList object."))
        }
        maf_data$V <- maf_data$CCF
        xlab <- "CCF"
      }else{
        if(!is.null(segFile)){
          seg <- readSegment(segFile = segFile)
          # maf_data <- copyNumberFilter(maf_data,seg, use.tumorSampleLabel = use.tumorSampleLabel)
        }
        else{
          message("## No segment files specified. Assuming all mutations have a CN of 2.")
        }
        maf_data$V <- maf_data$VAF
        xlab <- "VAF"
      }
      maf_data$ID <- maf_data$Tumor_Sample_Barcode
    }
    maf_data <- maf_data[!is.na(maf_data$V), ]
    id_list <- unique(maf_data$ID)
    
    
    sample_result <- lapply(id_list, processVafcluster_sample,
                            maf_data, patient, xlab, seg = seg, use.tumorSampleLabel)
    na_idx <- which(!is.na(sample_result))
    sample_result <- sample_result[na_idx]
    
    plot_list <- lapply(sample_result, function(x)x$p)
    names(plot_list) <- id_list[na_idx]
    
    rebuild_data_list <- lapply(sample_result, function(x)x$subdata)
    
    rebuild_data <- dplyr::bind_rows(rebuild_data_list)
    
    if(length(plot_list) == 1){
      plot_list <- plot_list[[1]]
    }
    
    return(list(cluster.data = rebuild_data, cluster.plot = plot_list))
    
  }
  
  processVafcluster_sample <- function(id, maf_data, patient, xlab, seg, use.tumorSampleLabel){
    
    message(paste("## Performing one-dimensional clustering for ", id," of ", patient, sep = ""))
    
    subdata <- maf_data[maf_data$ID == id]
    
    if(!is.null(seg)){
      subdata <- copyNumberFilter(subdata, seg, use.tumorSampleLabel = use.tumorSampleLabel)
    }
    ## data cleaning
    if (nrow(subdata) < 3) {
      message(paste0("## Mutation counts of Sample ", id, " are not enough for clustering!"))
      return(NA)
    }
    
    ## infer possible cluster from maf_data
    cluster_result <- mclust::densityMclust(subdata$V, G=seq_len(6), verbose=FALSE)
    subdata$cluster <- as.character(cluster_result$classification)
    
    ## define outfilter
    out_vaf <- boxplot.stats(subdata$V)$out
    subdata[subdata$V %in% out_vaf]$cluster <- "outlier"
    
    ## rename cluster
    cluster_name_order <- c(
      sort(unique(subdata[subdata$cluster != "outlier"]$cluster)),
      "outlier"
    )
    subdata <- subdata %>%
      dplyr::rowwise()%>%
      dplyr::mutate(cluster = dplyr::if_else(
        cluster == "outlier",
        "outlier",
        as.character(which(cluster == cluster_name_order))
      )) %>%
      as.data.frame()
    
    p  <- drawVAFCombine(subdata, xlab)
    subdata <- dplyr::select(subdata, -"V", -"ID")
    return(list(p = p, subdata = subdata))
  }
  
  # plotOption <- match.arg(plotOption, choices = c('combine', 'compare'), several.ok = FALSE)
  ## check input data
  maf_input <- subMaf(maf, 
                      patient.id = patient.id,
                      use.tumorSampleLabel = use.tumorSampleLabel,
                      mafObj = TRUE, ...)
  
  result <- lapply(maf_input, processVafcluster_maf)
  result <- result[!is.na(result)]
  
  if(length(result) > 1){
    return(result)
  }else if(length(result) == 0){
    return(NA)
  }else{
    return(result[[1]])
  }
  
}

Try the MesKit package in your browser

Any scripts or data that you put into this service are public.

MesKit documentation built on March 28, 2021, 6 p.m.