Nothing
#' calNeiDist
#' @description Nei's distance of CCF for sample/tumor pair.
#' @references Lee JK, Wang J, Sa JK, et al. Spatiotemporal genomic architecture informs precision oncology in glioblastoma. Nat Genet. 2017;49(4):594-599.
#'
#' @param maf A Maf or MafList object generated by \code{\link{readMaf}} function.
#' @param patient.id Select the specific patients. Default NULL, all patients are included.
#' @param withinTumor Calculate between-region heterogeneity within tumor.
#' (Default: FALSE).
#' @param min.ccf Specify the minimum CCF. Default 0.
#' @param plot Logical (Default: TRUE).
#' @param use.circle Logical (Default: TRUE). Whether to use "circle" as visualization method of correlation matrix.
#' @param title The title of the plot. Default "Nei's distance"
#' @param number.cex The size of text shown in correlation plot. Default 8.
#' @param number.col The color of text shown in correlation plot. Default "#C77960".
#' @param use.tumorSampleLabel Logical (Default: FALSE). Rename the 'Tumor_Sample_Barcode' by 'Tumor_Sample_Label'.
#' @param ... Other options passed to \code{\link{subMaf}}
#'
#' @return Nei's genetic distance matrix and heatmap of sample-pairs from the same patient
#'
#' @examples
#' maf.File <- system.file("extdata/", "CRC_HZ.maf", package = "MesKit")
#' clin.File <- system.file("extdata/", "CRC_HZ.clin.txt", package = "MesKit")
#' ccf.File <- system.file("extdata/", "CRC_HZ.ccf.tsv", package = "MesKit")
#' maf <- readMaf(mafFile=maf.File, clinicalFile = clin.File, ccfFile=ccf.File, refBuild="hg19")
#' calNeiDist(maf)
#' @export calNeiDist
calNeiDist <- function(maf,
patient.id = NULL,
withinTumor = FALSE,
min.ccf = 0,
plot = TRUE,
use.circle = TRUE,
title = NULL,
number.cex = 8,
number.col = "#C77960",
use.tumorSampleLabel = FALSE,
...){
processNei <- function(maf_data){
patient <- unique(maf_data$Patient_ID)
if(nrow(maf_data) == 0){
message("Warning: there was no mutation in ", patient, " after filtering.")
return(NA)
}
if(!"CCF" %in% colnames(maf_data)){
stop(paste0("Calculation of Nei's distance requires CCF data.",
"No CCF data was found when generating Maf object with readMaf function"))
}
Nei_input <- maf_data %>%
tidyr::unite(
"Mut_ID",
c(
"Chromosome",
"Start_Position",
"Reference_Allele",
"Tumor_Seq_Allele2"
),
sep = ":",
remove = FALSE
) %>%
dplyr::select(
"Mut_ID",
"Tumor_ID",
"Tumor_Sample_Barcode",
"CCF"
) %>%
dplyr::filter(!is.na(.data$CCF))
if(withinTumor){
tumor <- unique(Nei_input$Tumor_ID)
if(length(unique(Nei_input$Tumor_Sample_Barcode)) < 2){
message(paste0("Warning: only one sample was found in ", tumor,
" in ", patient, ". If you want to compare CCF between regions, withinTumor should be set as FALSE\n"))
return(NA)
}
}else{
if(length(unique(Nei_input$Tumor_Sample_Barcode)) < 2){
message(paste0("Warning: only one sample was found in ",patient,"."))
return(NA)
}
}
## pairwise heterogeneity
samples <- as.character(unique(Nei_input$Tumor_Sample_Barcode))
pairs <- utils::combn(samples, 2, simplify = FALSE)
dist_mat <- diag(1, nrow=length(samples), ncol=length(samples))
dist_mat <- cbind(dist_mat, samples)
rownames(dist_mat) <- samples
colnames(dist_mat) <- c(samples, "name")
processNeipair <- function(pair){
ccf.pair <- subset(Nei_input, Nei_input$Tumor_Sample_Barcode %in% c(pair[1], pair[2])) %>%
dplyr::mutate(CCF = as.numeric(.data$CCF)) %>%
as.data.frame( ) %>%
tidyr::pivot_wider(names_from = "Tumor_Sample_Barcode",
values_from = "CCF",
values_fill = list("CCF" = 0)) %>%
dplyr::select(-"Mut_ID", -"Tumor_ID")
colnames(ccf.pair) <- c("ccf1", "ccf2")
x <- ccf.pair$ccf1
y <- ccf.pair$ccf2
x_ <- sum(x ^ 2 + (1 - x) ^ 2)
y_ <- sum(y ^ 2 + (1 - y) ^ 2)
xy <- sum(x * y + (1 - x) * (1 - y))
nei_dist <- -log(xy / sqrt(x_ * y_))
# print(paste0(pair[1], " ,", pair[2], ",", nei_dist))
return(nei_dist)
}
nei.list <- lapply(pairs, processNeipair) %>% unlist()
pairs_name <- lapply(pairs, function(x)paste0(x[1], "_", x[2])) %>% unlist()
names(nei.list) <- pairs_name
processNeiMat <- function(j){
row_name <- j["name"]
j1 <- j[-length(j)]
idx <- which(j == "0")
j2 <- names(j[idx])
mat_row <- vapply(j2, function(g){
name1 <- paste0(g, "_", row_name)
name2 <- paste0(row_name, "_", g)
pos <- which(grepl(name1, names(nei.list)))
if(length(pos) == 0){
pos <- which(grepl(name2, names(nei.list)))
}
nei <- nei.list[pos]
return(nei)
}, FUN.VALUE = double(1))
j[idx] <- mat_row
return(j)
}
dist_mat <- t(apply(dist_mat, 1, processNeiMat))
dist_mat <- dist_mat[,-ncol(dist_mat)] %>% apply(c(1,2), as.numeric)
Nei.dist.avg <- mean(dist_mat[upper.tri(dist_mat, diag = FALSE)])
Nei.dist <- dist_mat
if(plot){
if(is.null(title)){
## get significant number
min_value <- min(dist_mat[dist_mat!=1 & dist_mat!=0])
min_value <- format(min_value, scientific = FALSE)
significant_digit <- gsub(pattern = "0\\.0*","", as.character(min_value))
digits <- nchar(as.character(min_value)) - nchar(significant_digit)
if(withinTumor){
title_id <- paste0("Nei's distance of ", tumor," in ", patient, ": ",round(Nei.dist.avg,digits))
}else{
title_id <- paste0("Nei's distance of patient ", patient, ": ",round(Nei.dist.avg,digits))
}
}else{
title_id <- title
}
Nei.plot <- plotCorr(
dist_mat,
use.circle,
number.cex = number.cex,
number.col = number.col,
title = if(!is.null(title_id)) title_id else{NA}
)
return(list(Nei.dist.avg = Nei.dist.avg, Nei.dist = Nei.dist, Nei.plot = Nei.plot))
}else{
return(list(Nei.dist.avg = Nei.dist.avg, Nei.dist = Nei.dist))
}
}
if(withinTumor){
clonalStatus <- "Subclonal"
}else{
clonalStatus <- NULL
}
maf_input <- subMaf(maf,
patient.id = patient.id,
min.ccf = min.ccf,
clonalStatus = clonalStatus,
use.tumorSampleLabel = use.tumorSampleLabel,
mafObj = TRUE, ...)
maf_data_list <- lapply(maf_input, getMafData)
. <- NULL
if(withinTumor){
maf_group <- dplyr::bind_rows(maf_data_list) %>%
dplyr::group_by(.data$Patient_ID, .data$Tumor_ID)
group_name <- paste(group_keys(maf_group)$Patient_ID, group_keys(maf_group)$Tumor_ID, sep = "_")
result <- maf_group %>%
dplyr::group_map(~processNei(.), .keep = TRUE)
names(result) <- group_name
}else{
maf_group <- dplyr::bind_rows(maf_data_list) %>%
dplyr::group_by(.data$Patient_ID)
group_name <- group_keys(maf_group)$Patient_ID
result <- maf_group %>%
dplyr::group_map(~processNei(.), .keep = TRUE)
names(result) <- group_name
}
result <- result[!is.na(result)]
if(length(result) > 1){
return(result)
}else if(length(result) == 0){
return(NA)
}else{
return(result[[1]])
}
return(result)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.