R/ccfAUC.R

Defines functions ccfAUC

Documented in ccfAUC

#' @title ccfAUC
#' @description The tumor heterogeneity was estimated as the area under the curve (AUC) of the cumulative density function from all cancer cell fractions per tumor
#' 
#' @references Charoentong P, Finotello F, et al. Pan-cancer Immunogenomic Analyses Reveal Genotype-Immunophenotype Relationships and Predictors of Response to Checkpoint Blockade. Cell reports 2017, 18:248-262. 
#' 
#' @param maf A Maf or MafList object generated by \code{\link{readMaf}} function.
#' @param patient.id Select the specific patients. Default NULL, all patients are included.
#' @param min.ccf The minimum value of CCF. Default 0.
#' @param withinTumor Calculate between-region heterogeneity within tumor. Default FALSE.
#' @param plot.density Whether to show the density plot. Default TRUE.
#' @param use.tumorSampleLabel Logical (Default: FALSE). Rename the 'Tumor_Sample_Barcode' by 'Tumor_Sample_Label'.
#' @param ... Other options passed to \code{\link{subMaf}}
#' 
#' @return A list containing AUC of CCF and a graph
#' 
#' @examples
#' maf.File <- system.file("extdata/", "CRC_HZ.maf", package = "MesKit")
#' clin.File <- system.file("extdata/", "CRC_HZ.clin.txt", package = "MesKit")
#' ccf.File <- system.file("extdata/", "CRC_HZ.ccf.tsv", package = "MesKit")
#' maf <- readMaf(mafFile=maf.File, clinicalFile = clin.File, ccfFile=ccf.File, refBuild="hg19")
#' ccfAUC(maf)
#' 
#' @importFrom stats approxfun
#' @export ccfAUC

ccfAUC <- function(
  maf, 
  patient.id = NULL, 
  min.ccf = 0, 
  withinTumor = FALSE,
  plot.density = TRUE,
  use.tumorSampleLabel = FALSE,
  ...
){
  
  if(withinTumor){
    id_col <- "Tumor_ID"
    ccf_col <- "Tumor_Average_CCF"
  }else{
    id_col <- "Tumor_Sample_Barcode"
    ccf_col <- "CCF"
  }
  processAUC <- function(m, withinTumor, plot.density){
    
    maf_data <- getMafData(m) %>% 
      dplyr::filter(!is.na(CCF),
                    !is.na(Tumor_Average_CCF))
    
    patient <- getMafPatient(m)
    
    if(nrow(maf_data) == 0){
      message("Warning: there was no mutation in ", patient, " after filtering.")
      return(NA)
    }
    ids <- unique(maf_data[[id_col]])
    processAUCID <- function(id, maf_data, withinTumor){
      subdata <- subset(maf_data, maf_data[[id_col]]  == id)
      
      subdata <-  subdata %>%
        dplyr::arrange(.data[[ccf_col]])
      
      fn <- stats::ecdf(subdata[[ccf_col]])
      auc <- suppressWarnings(stats::integrate(fn,
                                               0,
                                               1,
                                               stop.on.error = FALSE)$value)
      
      subdata[[id_col]] <- paste0(subdata[[id_col]]," (", round(auc,3), ")")
      auc_result <- data.frame(Patient_ID = patient, Tumor_ID = id, AUC = auc)
      colnames(auc_result) <- c("Patient_ID", id_col, "AUC")
      
      subdata$prop <- fn(subdata[[ccf_col]])
      if(min(subdata[[ccf_col]]) > 0){
        min_ccf <- min(subdata[[ccf_col]])
        
        subdata <- rbind(subdata, subdata[1,])
        subdata[nrow(subdata),][[ccf_col]] <- 0
        subdata[nrow(subdata),]$prop<- 0
        
        subdata <- rbind(subdata, subdata[1,])
        subdata[nrow(subdata),][[ccf_col]] <- min_ccf
        subdata[nrow(subdata),]$prop <- 0
      }
      if(max(subdata[[ccf_col]]) < 1){
        subdata <- rbind(subdata, subdata[1,])
        subdata[nrow(subdata),][[ccf_col]] <- 1
        subdata[nrow(subdata),]$prop <- 1
      }
      subdata <-  subdata %>%
        dplyr::arrange(.data[[ccf_col]], .data$prop)
      
      
      return(list(auc_result = auc_result, subdata = subdata))
    }
    
    id_result <- lapply(ids, processAUCID, maf_data, withinTumor)
    CCF.sort <- lapply(id_result, function(x)x$subdata) %>% dplyr::bind_rows()
    AUC.df <- lapply(id_result, function(x)x$auc_result) %>% dplyr::bind_rows()
    

    
    if(plot.density){
      ## initialize variable in ggplot for biocheck error
      Tumor_Average_CCF <- NULL
      prop <- NULL
      Tumor_ID <- NULL
      CCF <- NULL
      Tumor_Sample_Barcode <- NULL

      
      if(withinTumor){
        p <- ggplot2::ggplot(CCF.sort, 
                             aes(x=Tumor_Average_CCF, y = prop,  group=Tumor_ID, color=Tumor_ID))
      }else{
        p <- ggplot2::ggplot(CCF.sort, 
                             aes(x=CCF, y = prop,  group=Tumor_Sample_Barcode, color=Tumor_Sample_Barcode))
      }
      p <- p + 
        #geom_smooth(na.rm = TRUE, se = FALSE, size = 1.2, formula = y ~ s(x, bs = "cs"), method = "gam") +
        theme_bw() + 
        geom_line(size=1.2) +
        # stat_ecdf(geom = "step", pad = T, size = 1.2) +
        xlim(0, 1) + ylim(0, 1) +     
        coord_fixed() +
        theme(
          #legend.position='none', 
          legend.title = element_blank(),
          plot.title =  element_text(size=13.5, face = "bold"), 
          axis.title = element_text(size=13),
          panel.grid=element_blank(), 
          panel.border=element_blank(), 
          axis.line=element_line(size=0.7, colour = "black"),
          axis.text = element_text(size=11, colour = "black"),
          legend.text = element_text(size=11, colour = "black"),
          panel.grid.major = element_line(linetype = 2, color = "grey")
        )+
        labs(x = "CCF", y = "Cumulative density", 
             title = patient)
      
      CCF.density.plot <- p
      return(list(AUC.value = AUC.df, CCF.density.plot = CCF.density.plot))
    }else{
      return(AUC.df)
    }
  }
  
  if(withinTumor){
    clonalStatus <- "Subclonal"
  }else{
    clonalStatus <- NULL
  }
  
  maf_input <- subMaf(maf,
                      patient.id = patient.id,
                      min.ccf = min.ccf,
                      clonalStatus = clonalStatus,
                      use.tumorSampleLabel = use.tumorSampleLabel,
                      mafObj = TRUE,...)
  
  
  result <- lapply(maf_input, processAUC, withinTumor, plot.density)
  result <- result[!is.na(result)]   
  
  
  if(length(result) > 1){
    return(result)
  }else if(length(result) == 0){
    return(NA)
  }else{
    return(result[[1]])
  }
  return(result)
  
}

Try the MesKit package in your browser

Any scripts or data that you put into this service are public.

MesKit documentation built on March 28, 2021, 6 p.m.