R/calPropDriver.R

Defines functions calPropDriver

Documented in calPropDriver

#' calPropDriver calculates the proportions of driver mutation for mutation sets.
#'
#' The mutations are classified by `classifyMut()` internally.
#'
#' @param maf Maf or MafList object generated by `readMaf()` function
#' @param patient.id Select the specific patients. Default `NULL`, all patients are included.
#' @param driverGene The driver Gene names (Genes Symbols)
#' @param vaf.cutoff Removing mutations of low variant allele frequency (VAF).
#' @param class The class which would be represented.
#' "SP" (Shared pattern: Public/Shared/Private), other options: "CS" (Clonal status: Clonal/Subclonl)
#' and "SPCS". see [MesKit::classifyMut()].
#' @param classByTumor Logical (Default: `FALSE`). Classify mutations based on "Tumor_ID".
#' @param silent.columns The `Variant_Classification` field in the MAF files that indicates the silent mutations. Defaults: c("Silent", "3'Flank", "IGR", "Intron", "RNA")
#'
#' @examples
#'
#' # Get the driver gene.
#' driverGene <- read.delim(system.file(package = "MPTevol", "extdata", "IntOGen-Drivers-Cancer_Genes.tsv"), header = T) %>%
#' filter(CANCER_TYPE %in% c("BRCA", "COREAD", "LUAD", "LUSC")) %>%
#'   pull(SYMBOL) %>%
#'   unique()
#'
#' prop = calPropDriver(maf, patient.id = "BRCA", driverGene = driverGene)
#'
#' prop$BRCA$plot
#'
#'
#' @export
#'
calPropDriver <- function(maf,
                    patient.id = NULL,
                    driverGene,
                    class = "SP",
                    classByTumor = FALSE,
                    vaf.cutoff = 0.01,
                    silent.columns = NULL
                    ) {

  # To do: be careful about the samples and tumors.

  class.levels <- NULL
  if (class == "SP") {
    class.levels <- c("Public", "Shared", "Private")
  } else if (class == "CS") {
    class.levels <- c("Clonal", "Subclonl")
  } else if (class == "SPCS") {
    class.levels <- c("Public_Clonal", "Shared_Clonal", "Shared_Subclonal", "Private_Subclonal")
  }

  if(is.null(silent.columns)){
    silent.columns = c("Silent", "3'Flank", "IGR", "Intron", "RNA")
  }

  ######################################################################

  estProp <- function(patient.id, maf_input, maf_class) {
    # Merge the maf input and mutation class
    message(patient.id)

    maf_merge <- maf_input[[patient.id]] %>%
      dplyr::mutate(Mut_ID = stringr::str_c(Hugo_Symbol, Chromosome, Start_Position,
                                            Reference_Allele, Tumor_Seq_Allele2,
                                            sep = ":"
      )) %>%
      dplyr::left_join(
        maf_class[[patient.id]]
      ) %>%
      dplyr::select(
        Hugo_Symbol, Chromosome, Start_Position, End_Position,
        Reference_Allele, Tumor_Seq_Allele2, Tumor_Sample_Barcode,
        Mutation_Type, Patient_ID, Tumor_ID, Variant_Classification, VAF
      )

    maf_data = maf_merge %>%
      #only keep non-silent mutations
      filter(!Variant_Classification %in% silent.columns) %>%
      mutate(is.driver = ifelse(Hugo_Symbol %in% driverGene, TRUE, FALSE)  ) %>%
      mutate(type = paste(Tumor_ID, Mutation_Type, sep = ":") ) %>%
      group_by(Tumor_ID, Mutation_Type, is.driver) %>%
      summarise(num = n()) %>%
      group_by(Tumor_ID, Mutation_Type) %>%
      mutate(num_total = sum(num)) %>%
      filter(is.driver) %>%
      mutate(prop = num/num_total,
             Mutation_Type = factor(Mutation_Type, levels = class.levels)
             )


    p1 = maf_data %>%
      ggplot2::ggplot(ggplot2::aes(x = Tumor_ID, y = prop, fill = Mutation_Type)) +
      ggplot2::geom_bar(stat = "identity", position = ggplot2::position_dodge(width = 0.90)) +
      theme_bw() +
      ggplot2::labs(x = NULL, y = latex2exp::TeX("Prop of driver mutations")) +
      ggplot2::scale_fill_manual(values = set.colors(length(unique(maf_data$Mutation_Type)))) +
      ggplot2::theme(
        axis.title.x = element_blank(),
        axis.text.x = element_text(angle = 0, hjust = 0.5, size = 14),
        axis.title.y = element_text(size = 16)
      )


    list(
      maf.prop = maf_data,
      plot = p1
    )
  }
  ##########################################################################
  # running

  # Get the mutation groups.
  maf_input <- MesKit::subMaf(maf, patient.id = patient.id, mafObj = FALSE, use.tumorSampleLabel = TRUE)

  # get mutation classifications.
  maf_class <- MesKit::classifyMut(maf, patient.id = patient.id, class = class, classByTumor = classByTumor)

  # Note the different format between maf_input and maf_class when the patient.id is a single value.
  if (!is.null(patient.id)) {
    maf_class1 <- list()
    maf_class1[[patient.id]] <- maf_class
    maf_class <- maf_class1
  }

  prop <- lapply(names(maf_input), estProp, maf_input, maf_class)
  names(prop) <- names(maf_input)

  return(
    prop
  )
}
qingjian1991/MPTevol documentation built on Jan. 30, 2023, 10:16 p.m.