################################################################################
#' calFst
#' @description Genetic divergence between regions of subclonal sSNVs using
#' the Weir and Cockerham method
#' @references Sun R, Hu Z, Sottoriva A, et al. Between-region
#' genetic divergence reflects the mode and tempo of tumor evolution.
#' Nat Genet. 2017;49(7):1015-1024.
#'
#' Bhatia G, Patterson N, Sankararaman S, Price AL.
#' Estimating and interpreting FST: the impact of rare variants.
#' Genomic Res. 2013;23(9):1514-1521.
#'
#' @param maf A Maf or MafList object generated by \code{\link{readMaf}} function.
#' @param patient.id Select the specific patients.
#' Default NULL, all patients are included.
#' @param min.vaf Specify The minimum VAF to filter variants. Default 0.
#' @param min.total.depth The minimum total allele depth for filtering variants.
#' Default 2.
#' @param use.adjVAF Use adjusted VAF in analysis when adjusted VAF or CCF is available. Default FALSE.
#' @param plot Logical (Default: TRUE).
#' @param withinTumor Logical (Default: FALSE). Whether calculate between-region heterogeneity within tumors.
#' @param use.circle Logical (Default: TRUE). Whether use "circle" in the plot.
#' as visualization method of correlation matrix
#' @param title The title of the plot. Default "Nei's distance"
#' @param number.cex The size of text shown in correlation plot. Default 8.
#' @param number.col The color of text shown in correlation plot.
#' Default "#C77960".
#' @param use.tumorSampleLabel Logical (Default: FALSE). Rename the 'Tumor_Sample_Barcode' by 'Tumor_Sample_Label'.
#' @param ... Other options passed to \code{\link{subMaf}}
#'
#' @return A list contains Fst value of MRS and Hudson estimator of each sample-pair, respectively.
#'
#' @examples
#' maf.File <- system.file("extdata/", "CRC_HZ.maf", package = "MesKit")
#' clin.File <- system.file("extdata/", "CRC_HZ.clin.txt", package = "MesKit")
#' ccf.File <- system.file("extdata/", "CRC_HZ.ccf.tsv", package = "MesKit")
#' maf <- readMaf(mafFile=maf.File, clinicalFile = clin.File, ccfFile=ccf.File, refBuild="hg19")
#' calFst(maf)
#' @import dplyr circlize tidyr
#' @importFrom utils combn
#' @export calFst
calFst <- function(
maf,
patient.id = NULL,
min.vaf = 0,
min.total.depth = 2,
use.adjVAF = FALSE,
plot = TRUE,
withinTumor = FALSE,
use.circle = TRUE,
title = NULL,
number.cex = 8,
number.col = "#C77960",
use.tumorSampleLabel = FALSE,
...){
processFst <- function(maf_data){
patient <- unique(maf_data$Patient_ID)
if(nrow(maf_data) == 0){
message("Warning: there was no mutation in ", patient, " after filtering.")
return(NA)
}
# if(!"VAF_adj" %in% colnames(maf_data)){
# stop("Adjusted VAFs were not found in the maf object.")
# }
Fst_input <- maf_data %>%
tidyr::unite(
"Mut_ID",
c(
"Chromosome",
"Start_Position",
"Reference_Allele",
"Tumor_Seq_Allele2"
),
sep = ":",
remove = FALSE
) %>%
dplyr::mutate(totalDepth = .data$Ref_allele_depth + .data$Alt_allele_depth) %>%
dplyr::select(
"Mut_ID",
"Tumor_ID",
"Tumor_Sample_Barcode",
"VAF",
"totalDepth") %>%
## remove NA(it may be caused by NA of VAF)
dplyr::filter(!is.na(.data$VAF))
## check data
if(withinTumor){
tumor <- unique(Fst_input$Tumor_ID)
if(length(unique(Fst_input$Tumor_Sample_Barcode)) < 2 ){
message(paste0("Warning: only one sample was found of ", tumor,
" in ", patient,
". If you want to compare CCF between regions, withinTumor should be set as FALSE"))
return(NA)
}
}else{
if(length(unique(Fst_input$Tumor_Sample_Barcode)) < 2 ){
message(paste0("Warning: only one sample was found in ", patient, "."))
return(NA)
}
}
## pairwise heterogeneity
samples <- as.character(unique(Fst_input$Tumor_Sample_Barcode))
pairs <- utils::combn(samples, 2, simplify = FALSE)
dist_mat <- diag(1, nrow=length(samples), ncol=length(samples))
dist_mat <- cbind(dist_mat, samples)
rownames(dist_mat) <- samples
colnames(dist_mat) <- c(samples, "name")
processFstpair <- function(pair){
pair_vaf <- subset(Fst_input, Fst_input$Tumor_Sample_Barcode %in% c(pair[1], pair[2])) %>%
dplyr::select(-"Tumor_ID") %>%
tidyr::pivot_wider(
names_from = "Tumor_Sample_Barcode",
values_from = c("VAF", "totalDepth"),
values_fill = list("VAF" = 0, "totalDepth" = 0)
)
colnames(pair_vaf) <- c("Mut_ID", "vaf1", "vaf2", "depth1", "depth2")
##
pair_vaf <- pair_vaf %>%
dplyr::mutate(
covariance = (.data$vaf1-.data$vaf2)^2-(.data$vaf1*(1-.data$vaf1))/(.data$depth1-1)-(.data$vaf2*(1-.data$vaf2))/(.data$depth2-1),
sd = .data$vaf1*(1-.data$vaf2)+.data$vaf2*(1-.data$vaf1)
)
fst <- mean(pair_vaf$covariance)/mean(pair_vaf$sd)
return(fst)
}
fst.list <- lapply(pairs, processFstpair) %>% unlist()
pairs_name <- lapply(pairs, function(x)paste0(x[1], "_", x[2])) %>% unlist()
names(fst.list) <- pairs_name
processFstMat <- function(j){
row_name <- j["name"]
j1 <- j[-length(j)]
idx <- which(j == "0")
j2 <- names(j[idx])
mat_row <- vapply(j2, function(g){
name1 <- paste0(g, "_", row_name)
name2 <- paste0(row_name, "_", g)
# pos <- which(grepl(name1, names(fst.list)))
pos <- which(names(fst.list) == name1)
if(length(pos) == 0){
# pos <- which(grepl(name2, names(fst.list)))
pos <- which(names(fst.list) == name2)
}
fst <- fst.list[pos]
return(fst)
}, FUN.VALUE = double(1))
j[idx] <- mat_row
return(j)
}
dist_mat <- t(apply(dist_mat, 1, processFstMat))
dist_mat <- dist_mat[,-ncol(dist_mat)] %>% apply(c(1,2),as.numeric)
Fst.avg <- mean(dist_mat[upper.tri(dist_mat, diag = FALSE)])
Fst.pair <- dist_mat
if(plot){
## get significant number
if(is.null(title)){
min_value <- min(dist_mat[dist_mat != 1 & dist_mat != 0])
min_value <- format(min_value, scientific = FALSE)
significant_digit <- gsub(pattern = "0\\.0*", "", as.character(min_value))
digits <- nchar(as.character(min_value)) - nchar(significant_digit)
if(withinTumor){
title_id <- paste0("Fst of ", tumor, " in ", patient, ": ",
round(Fst.avg, digits))
}else{
title_id <- paste0("Fst of patient ", patient, ": ", round(Fst.avg, digits))
}
}else{
title_id <- title
}
Fst.plot <- plotCorr(
dist_mat,
use.circle,
number.cex=number.cex,
number.col=number.col,
title = if(!is.null(title_id)) title_id else{NA}
)
return(list(Fst.avg = Fst.avg, Fst.pair = Fst.pair, Fst.plot = Fst.plot))
}else{
return(list(Fst.avg = Fst.avg, Fst.pair = Fst.pair))
}
}
if(withinTumor){
clonalStatus <- "Subclonal"
}else{
clonalStatus <- NULL
}
maf_input <- subMaf(maf = maf,
min.vaf = min.vaf,
min.total.depth = min.total.depth,
clonalStatus = clonalStatus,
mafObj = TRUE,
use.adjVAF = use.adjVAF,
patient.id = patient.id,
use.tumorSampleLabel = use.tumorSampleLabel,
...)
maf_data_list <- lapply(maf_input, getMafData)
if(withinTumor){
maf_group <- dplyr::bind_rows(maf_data_list) %>%
dplyr::group_by(.data$Patient_ID, .data$Tumor_ID)
group_name <- paste(group_keys(maf_group)$Patient_ID, group_keys(maf_group)$Tumor_ID, sep = "_")
result <- maf_group %>%
dplyr::group_map(~processFst(.), .keep = TRUE)
names(result) <- group_name
}else{
maf_group <- dplyr::bind_rows(maf_data_list) %>%
dplyr::group_by(.data$Patient_ID)
group_name <- group_keys(maf_group)$Patient_ID
result <- maf_group %>%
dplyr::group_map(~processFst(.), .keep = TRUE)
names(result) <- group_name
}
result <- result[!is.na(result)]
if(length(result) > 1){
return(result)
}else if(length(result) == 0){
return(NA)
}else{
return(result[[1]])
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.