#' mutCluster
#' @description Cluster mutations based on variant allele frequencies (VAFs) or cancer cell fractions (CCFs).
#'
#' @param maf Maf or MafList object generated by \code{\link{readMaf}} function.
#' @param patient.id Select the specific patients. Default NULL, all patients are included.
#' @param segFile The segment file.
#' @param use.ccf Cluster CCF. Default FALSE.
#' @param withinTumor Cluster Tumor average CCF within tumors in each patients. Default FALSE.
#' @param use.tumorSampleLabel Logical (Default: FALSE).
#' Rename the 'Tumor_Sample_Barcode' by 'Tumor_Sample_Label'.
#' @param ... Other options passed to \code{\link{subMaf}}
#'
#' @examples
#' maf.File <- system.file("extdata/", "CRC_HZ.maf", package = "MesKit")
#' clin.File <- system.file("extdata/", "CRC_HZ.clin.txt", package = "MesKit")
#' ccf.File <- system.file("extdata/", "CRC_HZ.ccf.tsv", package = "MesKit")
#' maf <- readMaf(mafFile=maf.File, clinicalFile = clin.File, ccfFile=ccf.File, refBuild="hg19")
#' mutCluster(maf, patient.id = 'V402')
#'
#' @import ggridges mclust
#' @importFrom grDevices boxplot.stats
#' @return clustering plots of vaf
#' @export mutCluster
## Main function for VAF plot
mutCluster <- function(maf,
patient.id = NULL,
use.ccf = FALSE,
segFile = NULL,
withinTumor = FALSE,
use.tumorSampleLabel = FALSE,
...){
processVafcluster_maf <- function(m){
maf_data <- getMafData(m)
if(nrow(maf_data) == 0){
message("Warning: there was no mutation in ", patient, " after filtering.")
return(NA)
}
patient <- getMafPatient(m)
seg <- NULL
if(withinTumor){
if(!"CCF" %in% colnames(getMafData(m))){
stop(paste0("mutCluster requires CCF data when 'withinTumor'." ,
"No CCF data was found when generate Maf/MafList object."))
}
maf_data$V <- maf_data$Tumor_Average_CCF
maf_data$ID <- maf_data$Tumor_ID
xlab <- "CCF"
}else{
if(use.ccf){
if(!"CCF" %in% colnames(getMafData(m))){
stop(paste0("mutCluster requires CCF data when 'use.CCF'." ,
"No CCF data was found when generate Maf/MafList object."))
}
maf_data$V <- maf_data$CCF
xlab <- "CCF"
}else{
if(!is.null(segFile)){
seg <- readSegment(segFile = segFile)
# maf_data <- copyNumberFilter(maf_data,seg, use.tumorSampleLabel = use.tumorSampleLabel)
}
else{
message("## No segment files specified. Assuming all mutations have a CN of 2.")
}
maf_data$V <- maf_data$VAF
xlab <- "VAF"
}
maf_data$ID <- maf_data$Tumor_Sample_Barcode
}
maf_data <- maf_data[!is.na(maf_data$V), ]
id_list <- unique(maf_data$ID)
sample_result <- lapply(id_list, processVafcluster_sample,
maf_data, patient, xlab, seg = seg, use.tumorSampleLabel)
na_idx <- which(!is.na(sample_result))
sample_result <- sample_result[na_idx]
plot_list <- lapply(sample_result, function(x)x$p)
names(plot_list) <- id_list[na_idx]
rebuild_data_list <- lapply(sample_result, function(x)x$subdata)
rebuild_data <- dplyr::bind_rows(rebuild_data_list)
if(length(plot_list) == 1){
plot_list <- plot_list[[1]]
}
return(list(cluster.data = rebuild_data, cluster.plot = plot_list))
}
processVafcluster_sample <- function(id, maf_data, patient, xlab, seg, use.tumorSampleLabel){
message(paste("## Performing one-dimensional clustering for ", id," of ", patient, sep = ""))
subdata <- maf_data[maf_data$ID == id]
if(!is.null(seg)){
subdata <- copyNumberFilter(subdata, seg, use.tumorSampleLabel = use.tumorSampleLabel)
}
## data cleaning
if (nrow(subdata) < 3) {
message(paste0("## Mutation counts of Sample ", id, " are not enough for clustering!"))
return(NA)
}
## infer possible cluster from maf_data
cluster_result <- mclust::densityMclust(subdata$V, G=seq_len(6), verbose=FALSE)
subdata$cluster <- as.character(cluster_result$classification)
## define outfilter
out_vaf <- boxplot.stats(subdata$V)$out
subdata[subdata$V %in% out_vaf]$cluster <- "outlier"
## rename cluster
cluster_name_order <- c(
sort(unique(subdata[subdata$cluster != "outlier"]$cluster)),
"outlier"
)
subdata <- subdata %>%
dplyr::rowwise()%>%
dplyr::mutate(cluster = dplyr::if_else(
cluster == "outlier",
"outlier",
as.character(which(cluster == cluster_name_order))
)) %>%
as.data.frame()
p <- drawVAFCombine(subdata, xlab)
subdata <- dplyr::select(subdata, -"V", -"ID")
return(list(p = p, subdata = subdata))
}
# plotOption <- match.arg(plotOption, choices = c('combine', 'compare'), several.ok = FALSE)
## check input data
maf_input <- subMaf(maf,
patient.id = patient.id,
use.tumorSampleLabel = use.tumorSampleLabel,
mafObj = TRUE, ...)
result <- lapply(maf_input, processVafcluster_maf)
result <- result[!is.na(result)]
if(length(result) > 1){
return(result)
}else if(length(result) == 0){
return(NA)
}else{
return(result[[1]])
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.