R/maf2variants.R

Defines functions maf2variants

Documented in maf2variants

#' maf2variants
#' @description Change the maf object into variants data frame for the subclonal structures analysis.

#' @param maf Maf or MafList object generated by `readMaf()` function
#' @param patient.id Select the specific patients. Default `NULL`, all patients are included.
#' @param ccf.cutoff Removing low-CCF mutations (default: 0.1).
#' @param extract.VAF Whether extract the VAF information. Default `FALSE`: extract CCF rather than VAF.
#'
#' @details
#'
#' This function extracts the `Cluster` information from the CCF data. Therefore, the `Cluster` column is required in the ccfFile.
#'
#' For the output `variants`, the first five columns are Mutid, Hugo_Symbol, Variant_Classification, Patient_ID, Cluster. The remaining columns indicate variant cellular prevalence for each sample.
#'
#'
#' @examples
#' #' data.type <- "split1"
#'
#' maf1 <- readMaf(
#'   mafFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.mutation.txt", data.type)),
#'   ccfFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.CCF.txt", data.type)),
#'   clinicalFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.clinical.txt", data.type)),
#'   refBuild = "hg19",
#'   ccf.conf.level = 0.95
#' )
#'
#' ccfs = maf2variants(maf1, patient.id = "Met1")
#'
#' #extract VAF rather than CCF.
#' vafs = maf2variants(maf1, patient.id = "Met1", extract.VAF = T)
#'
#' @export
#'

maf2variants <- function(
  maf,
  patient.id = NULL,
  ccf.cutoff = 0.1,
  extract.VAF = FALSE
  ){

  processMaf2Vars = function(m) {

    maf_data <- MesKit::getMafData(m)
    patient <- MesKit::getMafPatient(m)

    #Check whether the Cluster column exists in CCF data.
    if(!"Cluster" %in% colnames(maf_data)){
      stop("The Cluster column is missing in maf data, stop.")
    }

    if(extract.VAF){

      #VAF
      message("Extract VAF rather than CCF")
      mut_standardcol = c("Hugo_Symbol", "Chromosome", "Start_Position", "End_Position", "Reference_Allele", "Tumor_Seq_Allele2", "Variant_Classification","Tumor_Sample_Barcode","Tumor_ID", "Patient_ID", "Tumor_Sample_Label", "VAF", "Cluster")

      vars = maf_data %>%
        dplyr::select(dplyr::all_of(mut_standardcol)) %>%
        dplyr::mutate(Mutid = str_c(Chromosome, Start_Position, End_Position, Reference_Allele,Tumor_Seq_Allele2, sep = ":")) %>%
        dplyr::rowwise() %>%
        dplyr::mutate(VAF = ifelse(max(VAF)<=1, VAF*100, VAF) ) %>%
        dplyr::filter(VAF >= ccf.cutoff*100/2) %>%
        dplyr::filter(!is.na(Cluster) &  Cluster >=1 ) %>%
        tidyr::pivot_wider(
          id_cols = c(Mutid, Hugo_Symbol, Variant_Classification, Patient_ID, Cluster),
          names_from = c(Tumor_Sample_Label),
          values_from = c(VAF),
          values_fill = 0
        )
    }else{

      #CCF
      mut_standardcol = c("Hugo_Symbol", "Chromosome", "Start_Position", "End_Position", "Reference_Allele", "Tumor_Seq_Allele2", "Variant_Classification","Tumor_Sample_Barcode","Tumor_ID", "Patient_ID", "Tumor_Sample_Label", "CCF", "Cluster")

      vars = maf_data %>%
        dplyr::select(dplyr::all_of(mut_standardcol)) %>%
        dplyr::mutate(Mutid = str_c(Chromosome, Start_Position, End_Position, Reference_Allele,Tumor_Seq_Allele2, sep = ":")) %>%
        dplyr::rowwise() %>%
        dplyr::mutate(CCF = ifelse(max(CCF)<=1, CCF*100, CCF) ) %>%
        dplyr::filter(CCF >= ccf.cutoff*100) %>%
        dplyr::filter(!is.na(Cluster) &  Cluster >=1 ) %>%
        tidyr::pivot_wider(
          id_cols = c(Mutid, Hugo_Symbol, Variant_Classification, Patient_ID, Cluster),
          names_from = c(Tumor_Sample_Label),
          values_from = c(CCF),
          values_fill = 0
        )

    }

    message(sprintf("Patient %s has the following cluster: %s", patient,
                    str_c(sort(unique(vars$Cluster)), collapse = "; ") ))

    vars
  }


  if(is.null(patient.id)){
    Vars <- lapply(maf, processMaf2Vars)
    names(Vars) <- names(maf)
  }else{
    Vars = processMaf2Vars(maf[[patient.id]])
  }

  return(
    Vars
  )

}
qingjian1991/MPTevol documentation built on Jan. 30, 2023, 10:16 p.m.