R/subMaf.R

Defines functions subMaf

Documented in subMaf

#' Subset Maf object
#'
#' @param maf Maf or MafList object generated by \code{\link{readMaf}} function.
#' @param mafObj return Maf class. (Default: FALSE).
#' @param patient.id Select the specific patients. Default NULL, all patients are included.
#' @param geneList A list of genes to restrict the analysis. Default NULL.
#' @param chrSilent Chromosomes excluded in the analysis. e.g, 1, 2, X, Y. Default NULL.
#' @param mutType Select Proper variant classification you need. Default "All". Option: "nonSyn".
#' @param use.indel Logical value. Whether to use INDELs besides somatic SNVs. (Default: TRUE).
#' @param min.vaf The minimum VAF for filtering variants. Default 0.
#' @param max.vaf The maximum VAF for filtering variants. Default 1.
#' @param min.average.vaf The minimum tumor average VAF for filtering variants. Default 0.
#' @param min.average.adj.vaf The minimum tumor average ajust VAF for filtering variants. Default 0.
#' @param min.ccf The minimum CCF for filtering variants. Default NULL.
#' @param min.ref.depth The minimum reference allele depth for filtering variants. Default 0.
#' @param min.alt.depth The minimum alteratation allele depth for filtering variants. Default 0.
#' @param min.total.depth The minimum total allele depth for filtering variants. Default 0.
#' @param clonalStatus Subset by clonal status. Default NULL. Option: "Clonal","Subclonal".
#' @param use.adjVAF Use adjusted VAF in analysis when adjusted VAF or CCF is available. Default FALSE. 
#' @param use.tumorSampleLabel Logical (Default: FALSE). Rename the 'Tumor_Sample_Barcode' by 'Tumor_Sample_Label'.
#'
#' @examples
#' maf.File <- system.file("extdata/", "CRC_HZ.maf", package = "MesKit")
#' clin.File <- system.file("extdata/", "CRC_HZ.clin.txt", package = "MesKit")
#' ccf.File <- system.file("extdata/", "CRC_HZ.ccf.tsv", package = "MesKit")
#' maf <- readMaf(mafFile=maf.File, clinicalFile = clin.File, ccfFile=ccf.File, refBuild="hg19")
#' maf_data <- subMaf(maf)
#' @return Maf object or Maf data.
#' @export subMaf

subMaf <- function(maf,
                   mafObj = FALSE,
                   patient.id = NULL,
                   geneList = NULL,
                   chrSilent = NULL,
                   mutType = "All",
                   use.indel = TRUE,
                   min.vaf = 0,
                   max.vaf = 1,
                   min.average.vaf = 0,
                   min.ccf = 0,
                   min.ref.depth = 0,
                   min.alt.depth = 0,
                   min.total.depth = 0,
                   clonalStatus = NULL,
                   use.adjVAF = FALSE,
                   use.tumorSampleLabel = FALSE){
   
   ## check mafInput
   if(is(maf, "Maf")){
      patient <- getMafPatient(maf)
      maf_list <- list(maf)
      names(maf_list) <- patient
   }else if(is(maf, "MafList")){
      maf_list <- maf
   }else{
       stop("Input should be a Maf or MafList object generated by readMaf")
   }
   
   if(!is.null(patient.id)){
      patient.setdiff <- setdiff(patient.id, names(maf_list))
      if(length(patient.setdiff) > 0){
         stop(paste0("Error: patient ", patient.setdiff, " can not be found in MafList."))
      }
      maf_list <- maf_list[names(maf_list) %in% patient.id]
   }
   
   result <- list()
   for(maf in maf_list){
      
      maf_data <- getMafData(maf)
      nonSyn.vc <- getNonSyn_vc(maf)
      patient <- unique(maf_data$Patient_ID)
      
      if(use.tumorSampleLabel){
         if(!"Tumor_Sample_Label" %in% colnames(maf_data)){
            stop("Tumor_Sample_Label was not found. Please check clinical data or let use.tumorSampleLabel be 'FALSE'")
         }
         maf_data <- maf_data %>% 
            dplyr::mutate(Tumor_Sample_Barcode = .data$Tumor_Sample_Label)
      }
      
      if(use.adjVAF){
         if(!"VAF_adj" %in% colnames(maf_data)){
            stop("Adjusted VAF was not found in maf object.")
         }
         else{
            maf_data$VAF <- maf_data$VAF_adj
            # maf_data$Tumor_Average_VAF <- maf_data$Tumor_Average_VAF_adj
         }
      }
      
      
      # ## patient filter
      # if(!is.null(patient.id)){
      #    patient.setdiff <- setdiff(patient.id, unique(maf_data$Patient_ID))
      #    if(length(patient.setdiff) > 0){
      #       stop(paste0("Patient ", patient.setdiff, " can not be found in your data"))
      #    }
      #    maf_data <- maf_data[Patient_ID %in% patient.id]
      # }
      
      ## chromosome filter
      if (!is.null(chrSilent)) {
         chr.setdiff <- setdiff(chrSilent, unique(maf_data$Chromosome))
         if(length(chr.setdiff) > 0){
            stop(paste0("Chromosome ", chr.setdiff, " can not be found in your data"))
         }
         maf_data <- maf_data[!maf_data$Chromosome %in% chrSilent]
      }
      
      ## filter variant classification
      mutType <- match.arg(mutType, choices = c("All","nonSyn"), several.ok = FALSE)
      if (mutType == "nonSyn") {
         maf_data <-  maf_data[maf_data$Variant_Classification %in% nonSyn.vc]
      }
      
      ## use.indel filter
      if (!use.indel) {
         maf_data <- maf_data[maf_data$Variant_Type == "SNP"]
      }
      
      ## Select mutations in selected genes
      if(!is.null(geneList)){
         maf_data <- maf_data[maf_data$Hugo_Symbol %in% geneList]
      }
      
      ## allele depth filter
      # if(min.total.depth <= 1){
      #    stop("'min.total.depth' should be greater than 1!")
      # }
      maf_data$Ref_allele_depth[is.na(maf_data$Ref_allele_depth)] <- 0
      maf_data$Alt_allele_depth[is.na(maf_data$Alt_allele_depth)] <- 0
      maf_data <- maf_data[maf_data$Ref_allele_depth >= min.ref.depth & maf_data$Alt_allele_depth >= min.alt.depth &(maf_data$Ref_allele_depth + maf_data$Alt_allele_depth) >= min.total.depth,]
      
      ## vaf filter
      maf_data <- maf_data[maf_data$VAF >= min.vaf & maf_data$VAF <= max.vaf]
      if(!is.null(min.average.vaf)){
         if(min.average.vaf > 0){
            maf_data <- maf_data[maf_data$Tumor_Average_VAF >= min.average.vaf]
         }
      }
      # 
      # if("VAF_adj" %in% colnames(maf_data)){
      #    if(!is.null(min.average.adj.vaf)){
      #       if(min.average.adj.vaf > 0){
      #          maf_data <- maf_data[maf_data$Tumor_Average_VAF_adj >= min.average.adj.vaf]
      #       }
      #    }
      # }
      
      ## ccf filter
      if("CCF" %in% colnames(maf_data)){
         if(!is.null(min.ccf)){
            if(min.ccf > 0){
               maf_data <- maf_data[maf_data$CCF >= min.ccf]
            }
         }
      }
      
      if(!is.null(clonalStatus)){
         if(!"Clonal_Status" %in% colnames(maf_data)){
            stop("Missing information about clonal status in maf object.")
         }
         clonalStatus <- match.arg(clonalStatus, choices = c("Clonal", "Subclonal"))
         maf_data <- maf_data[maf_data$Clonal_Status == clonalStatus]
      }
      
      if(mafObj){
         maf <- Maf(
            data = maf_data,
            sample.info = getSampleInfo(maf),
            nonSyn.vc = getNonSyn_vc(maf),
            ref.build = getMafRef(maf)
         )
         result[[patient]] <- maf
      }else{
         result[[patient]] <- maf_data
      }
   }
   
   if(mafObj & length(result) > 1){
      return(MafList(result))
   }else{
      return(result)
   }
   # 
   # if(length(result) == 1){
   #    return(result[[1]])
   # }else{
   #    return(result)
   # }
   
   
}

Try the MesKit package in your browser

Any scripts or data that you put into this service are public.

MesKit documentation built on March 28, 2021, 6 p.m.