#' compareJSI
#'
#' @description The Jaccard similarity index (JSI) is applied to distinguish monoclonal versus polyclonal seeding in metastases.
#' @references Hu, Z., Li, Z., Ma, Z. et al. Multi-cancer analysis of clonality and the timing of systemic spread in paired primary tumors and metastases. Nat Genet (2020).
#' @param maf Maf or MafList object generated by \code{\link{readMaf}} function.
#' @param patient.id Select the specific patients. Default NULL, all patients are included.
#' @param pairByTumor Compare JSI between different tumors. Default FALSE.
#' @param min.ccf The minimum value of CCF. Default 0.
#' @param plot Logical (Default: FALSE).
#' @param use.circle Logical (Default: TRUE). Whether to use "circle" as visualization method of correlation matrix.
#' @param title Title of the plot Default "Jaccard similarity".
#' @param number.cex The size of text shown in correlation plot. Default 8.
#' @param number.col The color of text shown in correlation plot. Default "#C77960".
#' @param use.tumorSampleLabel Logical (Default: FALSE). Rename the 'Tumor_Sample_Barcode' by 'Tumor_Sample_Label'.
#' @param ... Other options passed to \code{\link{subMaf}}
#'
#' @examples
#' maf.File <- system.file("extdata/", "CRC_HZ.maf", package = "MesKit")
#' clin.File <- system.file("extdata/", "CRC_HZ.clin.txt", package = "MesKit")
#' ccf.File <- system.file("extdata/", "CRC_HZ.ccf.tsv", package = "MesKit")
#' maf <- readMaf(mafFile=maf.File, clinicalFile = clin.File, ccfFile=ccf.File, refBuild="hg19")
#' calJSI(maf)
#' @return Correlation matrix and heatmap via Jaccard similarity coefficient method
#' @export calJSI
calJSI <- function(
maf,
patient.id = NULL,
pairByTumor = FALSE,
min.ccf = 0,
plot = FALSE,
use.circle = TRUE,
title = NULL,
number.cex = 8,
number.col = "#C77960",
use.tumorSampleLabel = FALSE,
...) {
processJSI <- function(patient, maf_input){
maf <- maf_input[[patient]]
maf_data <- getMafData(maf)
if(! "CCF" %in% colnames(maf_data)){
stop(paste0("Calculation of Jaccard similarity requires CCF data." ,
"No CCF data was found when generating Maf/MafList object."))
}
maf_data <- maf_data %>%
dplyr::filter(!is.na(.data$Clonal_Status),
!is.na(CCF))
if(nrow(maf_data) == 0){
message("Warning: there was no mutation found in ", patient, " after filtering.")
return(NA)
}
if(pairByTumor){
if(length(unique(maf_data$Tumor_ID)) < 2 ){
message(paste0("Warning: only one tumor was found in ", patient,
" according to 'Tumor_ID'. You may want to compare CCF between regions by setting 'pairByTumor' as 'FALSE'")
)
return(NA)
}
maf_data$CCF <- maf_data$Tumor_Average_CCF
tumors <- as.character(unique(maf_data$Tumor_ID))
pairs <- utils::combn(tumors, 2, simplify = FALSE)
dist_mat <- diag(1, nrow = length(tumors), ncol = length(tumors))
dist_mat <- cbind(dist_mat, tumors)
rownames(dist_mat) <- tumors
colnames(dist_mat) <- c(tumors, "name")
}else{
if(length(unique(maf_data$Tumor_Sample_Barcode)) < 2 ){
message(paste0("Warning: only one sample was found in ", patient, "."))
return(NA)
}
samples <- as.character(unique(maf_data$Tumor_Sample_Barcode))
pairs <- utils::combn(samples, 2, simplify = FALSE)
dist_mat <- diag(1, nrow = length(samples), ncol = length(samples))
dist_mat <- cbind(dist_mat, samples)
rownames(dist_mat) <- samples
colnames(dist_mat) <- c(samples, "name")
}
JSI_input <- maf_data %>%
tidyr::unite(
"Mut_ID",
c(
"Chromosome",
"Start_Position",
"Reference_Allele",
"Tumor_Seq_Allele2"
),
sep = ":",
remove = FALSE
) %>%
dplyr::select(
"Mut_ID",
"Tumor_ID",
"Tumor_Sample_Barcode",
"Clonal_Status",
"CCF")
processJSI2 <- function(pair){
if(pairByTumor){
# name <- paste(pair[1], pair[2], sep = "_")
ccf.pair <- subset(JSI_input, JSI_input$Tumor_ID %in% c(pair[1], pair[2])) %>%
tidyr::unite("Mut_ID2",
c("Mut_ID",
"Tumor_ID"),
sep = ":",
remove = FALSE
) %>%
dplyr::distinct(.data$Mut_ID2, .keep_all = TRUE) %>%
dplyr::select("Mut_ID", "Tumor_ID", "Clonal_Status", "CCF") %>%
tidyr::pivot_wider(
names_from = "Tumor_ID",
values_from = c("CCF", "Clonal_Status"),
values_fill = list("CCF" = 0, "Clonal_Status" = "nostatus")
) %>%
dplyr::ungroup()
colnames(ccf.pair) <- c("Mut_ID", "ccf1", "ccf2", "status1", "status2")
}
else{
# name <- paste(pair[1], pair[2], sep = "_")
ccf.pair <- subset(JSI_input, JSI_input$Tumor_Sample_Barcode %in% c(pair[1], pair[2])) %>%
dplyr::select("Mut_ID", "Tumor_Sample_Barcode", "Clonal_Status", "CCF") %>%
tidyr::pivot_wider(
names_from = "Tumor_Sample_Barcode",
values_from = c("CCF", "Clonal_Status"),
values_fill = list("CCF" = 0, "Clonal_Status" = "nostatus")
) %>%
dplyr::ungroup()
colnames(ccf.pair) <- c("Mut_ID", "ccf1", "ccf2", "status1", "status2")
}
ccf.pair <- ccf.pair %>%
dplyr::filter(.data$ccf1 + .data$ccf2 !=0) %>%
data.table::setDT()
PC_1 <- nrow(ccf.pair[ccf.pair$status1 == "Clonal" &
ccf.pair$ccf1 > 0 &
ccf.pair$ccf2 == 0])
PC_2 <- nrow(ccf.pair[ccf.pair$status2 == "Clonal" &
ccf.pair$ccf1 == 0 &
ccf.pair$ccf2 > 0])
SS_12 = nrow(ccf.pair[ccf.pair$status2 == "Subclonal" &
ccf.pair$status1 == "Subclonal" &
ccf.pair$ccf1>0 &
ccf.pair$ccf2>0 ])
jsi <- SS_12/(PC_1+PC_2+SS_12)
if(is.nan(jsi)){
jsi <- 0
}
return(c(PC_1 = PC_1, PC_2 = PC_2, SS_12 = SS_12, jsi = jsi))
}
pair_result <- lapply(pairs, processJSI2)
PC_1.list <- lapply(pair_result, function(x)x["PC_1"]) %>% unlist()
PC_2.list <- lapply(pair_result, function(x)x["PC_2"]) %>% unlist()
SS_12.list <- lapply(pair_result, function(x)x["SS_12"]) %>% unlist()
multi <- mean(SS_12.list)/(mean(PC_1.list) + mean(PC_2.list) + mean(SS_12.list))
multi <- ifelse(is.nan(multi), 0, multi)
pairs_name <- lapply(pairs, function(x)paste0(x[1], "_", x[2])) %>% unlist()
jsi.list <- lapply(pair_result, function(x)x["jsi"]) %>% unlist()
names(jsi.list) <- pairs_name
processJSI3 <- function(j){
row_name <- j["name"]
j1 <- j[-length(j)]
idx <- which(j == "0")
j2 <- names(j[idx])
mat_row <- vapply(j2, function(g){
name1 <- paste0(g, "_", row_name)
name2 <- paste0(row_name, "_", g)
pos <- which(names(jsi.list) == name1)
if(length(pos) == 0){
pos <- which(names(jsi.list) == name2)
}
jsi <- jsi.list[pos]
return(jsi)
}, FUN.VALUE = double(1))
j[idx] <- mat_row
return(j)
}
dist_mat <- t(apply(dist_mat, 1, processJSI3))
dist_mat <- dist_mat[,-ncol(dist_mat)] %>% apply(c(1,2), as.numeric)
JSI.multi <- multi
JSI.pair <- dist_mat
if(plot){
values <- sort(unique(as.numeric(dist_mat)))
if(length(values[values!=0 &values !=1]) == 0){
message(paste0("Warning: private clonal mutations or shared subclonal mutations were not found in all tumor/sample pairs in ", patient, ".\n",
"Correlation plot of Jaccard similarity coefficient will not be generated for ", patient, "!"))
return(list(JSI.multi = JSI.multi, JSI.pair = JSI.pair, JSI.plot = NA))
}
if(is.null(title)){
min_value <- min(dist_mat[dist_mat!=1 & dist_mat!=0])
significant_digit <- gsub(pattern = "0\\.0*", "", as.character(min_value))
digits <- nchar(as.character(min_value)) - nchar(significant_digit)
title_id <- paste0("JSI of patient ", patient, ": ", round(multi,digits))
}else{
title_id <- title
}
p <- plotCorr(
dist_mat,
use.circle,
number.cex = number.cex,
number.col = number.col,
title = if(!is.null(title_id)) title_id else{NA}
)
JSI.plot <- p
return(list(JSI.multi = JSI.multi, JSI.pair = JSI.pair, JSI.plot = JSI.plot))
}else{
return(list(JSI.multi = JSI.multi, JSI.pair = JSI.pair))
}
}
maf_input <- subMaf(maf,
min.ccf = min.ccf,
patient.id = patient.id,
mafObj = TRUE,
use.tumorSampleLabel = use.tumorSampleLabel,
...)
patient_list <- names(maf_input)
result <- lapply(patient_list, processJSI, maf_input)
patient_list <- patient_list[!is.na(result)]
result <- result[!is.na(result)]
if(length(result) > 1){
names(result) <- patient_list
return(result)
}else if(length(result) == 0){
return(NA)
}else{
return(result[[1]])
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.