R/calNeiDist.R

Defines functions calNeiDist

Documented in calNeiDist

#' calNeiDist
#' @description Nei's distance of CCF for sample/tumor pair.
#' @references Lee JK, Wang J, Sa JK, et al. Spatiotemporal genomic architecture informs precision oncology in glioblastoma. Nat Genet. 2017;49(4):594-599.
#' 
#' @param maf A Maf or MafList object generated by \code{\link{readMaf}} function.
#' @param patient.id Select the specific patients. Default NULL, all patients are included.
#' @param withinTumor Calculate between-region heterogeneity within tumor. 
#' (Default: FALSE).
#' @param min.ccf Specify the minimum CCF. Default 0.
#' @param plot Logical (Default: TRUE).
#' @param use.circle Logical (Default: TRUE). Whether to use "circle" as visualization method of correlation matrix.
#' @param title The title of the plot. Default "Nei's distance"
#' @param number.cex The size of text shown in correlation plot. Default 8.
#' @param number.col The color of text shown in correlation plot. Default "#C77960".
#' @param use.tumorSampleLabel Logical (Default: FALSE). Rename the 'Tumor_Sample_Barcode' by 'Tumor_Sample_Label'.
#' @param ... Other options passed to \code{\link{subMaf}}
#' 
#' @return Nei's genetic distance matrix and heatmap of sample-pairs from the same patient
#'
#' @examples
#' maf.File <- system.file("extdata/", "CRC_HZ.maf", package = "MesKit")
#' clin.File <- system.file("extdata/", "CRC_HZ.clin.txt", package = "MesKit")
#' ccf.File <- system.file("extdata/", "CRC_HZ.ccf.tsv", package = "MesKit")
#' maf <- readMaf(mafFile=maf.File, clinicalFile = clin.File, ccfFile=ccf.File, refBuild="hg19")
#' calNeiDist(maf)
#' @export calNeiDist

calNeiDist <- function(maf, 
                       patient.id = NULL,
                       withinTumor = FALSE, 
                       min.ccf = 0,
                       plot = TRUE, 
                       use.circle = TRUE, 
                       title = NULL,
                       number.cex = 8, 
                       number.col = "#C77960",
                       use.tumorSampleLabel = FALSE,
                       ...){
  
  processNei <- function(maf_data){
    
    patient <- unique(maf_data$Patient_ID)
    if(nrow(maf_data) == 0){
      message("Warning: there was no mutation in ", patient, " after filtering.")
      return(NA)
    }
    
    if(!"CCF" %in% colnames(maf_data)){
      stop(paste0("Calculation of Nei's distance requires CCF data.",
                  "No CCF data was found when generating Maf object with readMaf function"))
    }
    Nei_input <- maf_data %>%
      tidyr::unite(
        "Mut_ID",
        c(
          "Chromosome",
          "Start_Position",
          "Reference_Allele",
          "Tumor_Seq_Allele2"
        ),
        sep = ":",
        remove = FALSE
      ) %>% 
      dplyr::select(
        "Mut_ID",
        "Tumor_ID",
        "Tumor_Sample_Barcode",
        "CCF"
      ) %>% 
      dplyr::filter(!is.na(.data$CCF))
    
    if(withinTumor){
      tumor <- unique(Nei_input$Tumor_ID)
      if(length(unique(Nei_input$Tumor_Sample_Barcode)) < 2){
        message(paste0("Warning: only one sample was found in ", tumor,
                       " in ", patient, ". If you want to compare CCF between regions, withinTumor should be set as FALSE\n"))
        return(NA)
        
      }
    }else{
      if(length(unique(Nei_input$Tumor_Sample_Barcode)) < 2){
        message(paste0("Warning: only one sample was found in ",patient,"."))
        return(NA)
      }
    }
    
    
    
    ## pairwise heterogeneity
    samples <- as.character(unique(Nei_input$Tumor_Sample_Barcode))
    pairs <- utils::combn(samples, 2, simplify = FALSE)
    
    dist_mat <- diag(1, nrow=length(samples), ncol=length(samples))
    dist_mat <- cbind(dist_mat, samples)
    rownames(dist_mat) <- samples
    colnames(dist_mat) <- c(samples, "name")
    
    processNeipair <- function(pair){
      ccf.pair <- subset(Nei_input, Nei_input$Tumor_Sample_Barcode %in% c(pair[1], pair[2])) %>%
        dplyr::mutate(CCF = as.numeric(.data$CCF)) %>% 
        as.data.frame( ) %>% 
        tidyr::pivot_wider(names_from = "Tumor_Sample_Barcode",
                           values_from = "CCF",
                           values_fill = list("CCF" = 0)) %>%
        dplyr::select(-"Mut_ID", -"Tumor_ID")
      colnames(ccf.pair) <- c("ccf1", "ccf2")
      x <- ccf.pair$ccf1
      y <- ccf.pair$ccf2
      x_ <- sum(x ^ 2 + (1 - x) ^ 2)
      y_ <- sum(y ^ 2 + (1 - y) ^ 2)
      
      xy <- sum(x * y + (1 - x) * (1 - y))
      
      nei_dist <- -log(xy / sqrt(x_ * y_))
      # print(paste0(pair[1], " ,", pair[2], ",", nei_dist))
      return(nei_dist)
    }
    
    nei.list <- lapply(pairs, processNeipair) %>% unlist()
    pairs_name <- lapply(pairs, function(x)paste0(x[1], "_", x[2])) %>% unlist()
    names(nei.list) <- pairs_name
    processNeiMat <- function(j){
      row_name <- j["name"]
      j1 <- j[-length(j)]
      
      idx <- which(j == "0")
      j2 <- names(j[idx])
      
      mat_row <- vapply(j2, function(g){
        name1 <- paste0(g, "_", row_name)
        name2 <- paste0(row_name, "_", g)
        
        pos <- which(grepl(name1, names(nei.list)))
        if(length(pos) == 0){
          pos <- which(grepl(name2, names(nei.list)))
        }
        nei <- nei.list[pos]
        return(nei)
      }, FUN.VALUE = double(1))
      
      j[idx] <- mat_row
      return(j)
    }
    
    dist_mat <- t(apply(dist_mat, 1, processNeiMat))
    dist_mat <- dist_mat[,-ncol(dist_mat)] %>% apply(c(1,2), as.numeric)
    
    Nei.dist.avg <- mean(dist_mat[upper.tri(dist_mat, diag = FALSE)])
    Nei.dist <- dist_mat
    
    if(plot){
      if(is.null(title)){
        ## get significant number
        min_value <- min(dist_mat[dist_mat!=1 & dist_mat!=0])
        min_value <- format(min_value, scientific = FALSE)
        significant_digit <- gsub(pattern =  "0\\.0*","", as.character(min_value))
        digits <- nchar(as.character(min_value)) - nchar(significant_digit)
        if(withinTumor){
          title_id <- paste0("Nei's distance of ", tumor," in ", patient, ": ",round(Nei.dist.avg,digits))
        }else{
          title_id <- paste0("Nei's distance of patient ", patient, ": ",round(Nei.dist.avg,digits))
        }
      }else{
        title_id <- title
      }
      Nei.plot <- plotCorr(
        dist_mat, 
        use.circle, 
        number.cex = number.cex,
        number.col = number.col,
        title = if(!is.null(title_id)) title_id else{NA} 
      )
      return(list(Nei.dist.avg = Nei.dist.avg, Nei.dist = Nei.dist, Nei.plot = Nei.plot))
      
    }else{
      return(list(Nei.dist.avg = Nei.dist.avg, Nei.dist = Nei.dist))
      
    }
    
  }
  
  if(withinTumor){
    clonalStatus <- "Subclonal"
  }else{
    clonalStatus <- NULL
  }
  
  maf_input <- subMaf(maf, 
                      patient.id = patient.id,
                      min.ccf = min.ccf,
                      clonalStatus = clonalStatus, 
                      use.tumorSampleLabel = use.tumorSampleLabel,
                      mafObj = TRUE, ...)
  
  maf_data_list <- lapply(maf_input, getMafData)
  
  . <- NULL
  
  if(withinTumor){
    maf_group <- dplyr::bind_rows(maf_data_list) %>%
      dplyr::group_by(.data$Patient_ID, .data$Tumor_ID)
    group_name <- paste(group_keys(maf_group)$Patient_ID, group_keys(maf_group)$Tumor_ID, sep = "_")
    
    result <- maf_group %>% 
      dplyr::group_map(~processNei(.), .keep = TRUE)
    names(result) <- group_name
  }else{
    maf_group <- dplyr::bind_rows(maf_data_list) %>%
      dplyr::group_by(.data$Patient_ID)
    group_name <- group_keys(maf_group)$Patient_ID
    
    result <- maf_group %>% 
      dplyr::group_map(~processNei(.), .keep = TRUE)
    names(result) <- group_name
  }
  
  result <- result[!is.na(result)]
  
  if(length(result) > 1){
    return(result)
  }else if(length(result) == 0){
    return(NA)
  }else{
    return(result[[1]])
  }
  return(result)
}

Try the MesKit package in your browser

Any scripts or data that you put into this service are public.

MesKit documentation built on March 28, 2021, 6 p.m.