#' maf2variants
#' @description Change the maf object into variants data frame for the subclonal structures analysis.
#' @param maf Maf or MafList object generated by `readMaf()` function
#' @param patient.id Select the specific patients. Default `NULL`, all patients are included.
#' @param ccf.cutoff Removing low-CCF mutations (default: 0.1).
#' @param extract.VAF Whether extract the VAF information. Default `FALSE`: extract CCF rather than VAF.
#'
#' @details
#'
#' This function extracts the `Cluster` information from the CCF data. Therefore, the `Cluster` column is required in the ccfFile.
#'
#' For the output `variants`, the first five columns are Mutid, Hugo_Symbol, Variant_Classification, Patient_ID, Cluster. The remaining columns indicate variant cellular prevalence for each sample.
#'
#'
#' @examples
#' #' data.type <- "split1"
#'
#' maf1 <- readMaf(
#' mafFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.mutation.txt", data.type)),
#' ccfFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.CCF.txt", data.type)),
#' clinicalFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.clinical.txt", data.type)),
#' refBuild = "hg19",
#' ccf.conf.level = 0.95
#' )
#'
#' ccfs = maf2variants(maf1, patient.id = "Met1")
#'
#' #extract VAF rather than CCF.
#' vafs = maf2variants(maf1, patient.id = "Met1", extract.VAF = T)
#'
#' @export
#'
maf2variants <- function(
maf,
patient.id = NULL,
ccf.cutoff = 0.1,
extract.VAF = FALSE
){
processMaf2Vars = function(m) {
maf_data <- MesKit::getMafData(m)
patient <- MesKit::getMafPatient(m)
#Check whether the Cluster column exists in CCF data.
if(!"Cluster" %in% colnames(maf_data)){
stop("The Cluster column is missing in maf data, stop.")
}
if(extract.VAF){
#VAF
message("Extract VAF rather than CCF")
mut_standardcol = c("Hugo_Symbol", "Chromosome", "Start_Position", "End_Position", "Reference_Allele", "Tumor_Seq_Allele2", "Variant_Classification","Tumor_Sample_Barcode","Tumor_ID", "Patient_ID", "Tumor_Sample_Label", "VAF", "Cluster")
vars = maf_data %>%
dplyr::select(dplyr::all_of(mut_standardcol)) %>%
dplyr::mutate(Mutid = str_c(Chromosome, Start_Position, End_Position, Reference_Allele,Tumor_Seq_Allele2, sep = ":")) %>%
dplyr::rowwise() %>%
dplyr::mutate(VAF = ifelse(max(VAF)<=1, VAF*100, VAF) ) %>%
dplyr::filter(VAF >= ccf.cutoff*100/2) %>%
dplyr::filter(!is.na(Cluster) & Cluster >=1 ) %>%
tidyr::pivot_wider(
id_cols = c(Mutid, Hugo_Symbol, Variant_Classification, Patient_ID, Cluster),
names_from = c(Tumor_Sample_Label),
values_from = c(VAF),
values_fill = 0
)
}else{
#CCF
mut_standardcol = c("Hugo_Symbol", "Chromosome", "Start_Position", "End_Position", "Reference_Allele", "Tumor_Seq_Allele2", "Variant_Classification","Tumor_Sample_Barcode","Tumor_ID", "Patient_ID", "Tumor_Sample_Label", "CCF", "Cluster")
vars = maf_data %>%
dplyr::select(dplyr::all_of(mut_standardcol)) %>%
dplyr::mutate(Mutid = str_c(Chromosome, Start_Position, End_Position, Reference_Allele,Tumor_Seq_Allele2, sep = ":")) %>%
dplyr::rowwise() %>%
dplyr::mutate(CCF = ifelse(max(CCF)<=1, CCF*100, CCF) ) %>%
dplyr::filter(CCF >= ccf.cutoff*100) %>%
dplyr::filter(!is.na(Cluster) & Cluster >=1 ) %>%
tidyr::pivot_wider(
id_cols = c(Mutid, Hugo_Symbol, Variant_Classification, Patient_ID, Cluster),
names_from = c(Tumor_Sample_Label),
values_from = c(CCF),
values_fill = 0
)
}
message(sprintf("Patient %s has the following cluster: %s", patient,
str_c(sort(unique(vars$Cluster)), collapse = "; ") ))
vars
}
if(is.null(patient.id)){
Vars <- lapply(maf, processMaf2Vars)
names(Vars) <- names(maf)
}else{
Vars = processMaf2Vars(maf[[patient.id]])
}
return(
Vars
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.