R/calKaKs.R

Defines functions getKaKs calKaKs

Documented in calKaKs getKaKs

#' calKaKs calculates the Ka/Ks of each group
#'
#' The mutations are classified by `classifyMut()` internally.
#'
#' @param maf Maf or MafList object generated by `readMaf()` function
#' @param patient.id Select the specific patients. Default `NULL`, all patients are included.
#' @param vaf.cutoff Removing mutations with low variant allele frequency (VAF).
#' @param class The class which would be represented.
#' "SP" (Shared pattern: Public/Shared/Private), other options: "CS" (Clonal status: Clonal/Subclonl)
#' and "SPCS". see [MesKit::classifyMut()].
#' @param parallel If `TRUE` (default), run in parallel.
#'
#' @examples
#' library(MesKit)
#' data.type <- "split1"
#'
#' maf <- readMaf(
#'   mafFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.mutation.txt", data.type)),
#'   ccfFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.CCF.txt", data.type)),
#'   clinicalFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.clinical.txt", data.type)),
#'   refBuild = "hg19",
#'   ccf.conf.level = 0.95
#' )
#'
#' # calKaKas
#' kaks <- calKaKs(maf, patient.id = "Breast", class = "SP", parallel = TRUE, vaf.cutoff = 0.05)
#' kaks
#' kaks <- calKaKs(maf, patient.id = "Breast", class = "CS", parallel = TRUE, vaf.cutoff = 0.05)
#' kaks
#' kaks <- calKaKs(maf, class = "SP", parallel = TRUE, vaf.cutoff = 0.05)
#' kaks
#' @export
calKaKs <- function(maf,
                    patient.id = NULL,
                    class = "SP",
                    classByTumor = FALSE,
                    vaf.cutoff = 0.05,
                    parallel = TRUE) {

  # To do: be careful about the samples and tumors.

  class.levels <- NULL
  if (class == "SP") {
    class.levels <- c("Public", "Shared", "Private")
  } else if (class == "CS") {
    class.levels <- c("Clonal", "Subclonl")
  } else if (class == "SPCS") {
    class.levels <- c("Public_Clonal", "Shared_Clonal", "Shared_Subclonal", "Private_Subclonal")
  }

  ######################################################################

  estKaKs <- function(patient.id, maf_input, maf_class) {
    # Merge the maf input and mutation class
    message(patient.id)

    maf_merge <- maf_input[[patient.id]] %>%
      dplyr::mutate(Mut_ID = stringr::str_c(Hugo_Symbol, Chromosome, Start_Position,
        Reference_Allele, Tumor_Seq_Allele2,
        sep = ":"
      )) %>%
      dplyr::left_join(
        maf_class[[patient.id]]
      ) %>%
      dplyr::select(
        Hugo_Symbol, Chromosome, Start_Position, End_Position,
        Reference_Allele, Tumor_Seq_Allele2, Tumor_Sample_Barcode,
        Mutation_Type, Patient_ID, Tumor_ID, Variant_Classification, VAF
      )

    maf_list <- maf_merge %>%
      split(paste(maf_merge$Tumor_ID, maf_merge$Mutation_Type, sep = ":"))

    # We use the easypar to do parallel calculations.
    if (parallel) {
      maf_KaKs <- easypar::run(
        FUN = getKaKs,
        PARAMS = lapply(1:length(maf_list), function(x) {
          list(df = maf_list[[x]], vaf.cutoff = vaf.cutoff)
        }),
        parallel = TRUE,
        outfile = NULL,
        export = NULL,
        packages = "tidyverse",
        filter_errors = FALSE
      )
    } else {
      maf_KaKs <- lapply(maf_list, getKaKs)
    }

    names(maf_KaKs) <- names(maf_list)

    # Remving groups that fail to calculate the KaKs
    maf_KaKs <- maf_KaKs[lapply(maf_KaKs, function(x) is.numeric(nrow(x))) %>% unlist()]
    KaKs_data <- list()

    for (i in names(maf_KaKs)) {
      KaKs_data[[i]] <- maf_KaKs[[i]] %>%
        dplyr::mutate(type = i)
    }

    KaKs_data <- purrr::reduce(KaKs_data, rbind)

    KaKs_data <- KaKs_data %>%
      dplyr::mutate(
        Tumor_ID = mapply(function(x) x[1], stringr::str_split(type, ":")),
        Type = mapply(function(x) x[2], stringr::str_split(type, ":"))
      ) %>%
      dplyr::mutate(
        Type = factor(Type, levels = class.levels)
      ) %>%
      dplyr::filter(!is.na(Type))


    p1 <- KaKs_data %>%
      dplyr::filter(name %in% c("wall")) %>%
      dplyr::mutate(name = factor(name, levels = c("wall"))) %>%
      # mutate(Type = factor(Type, levels = c("Shared_Clonal","Private_Clonal","Private_Subclonal") ))  %>%
      ggplot2::ggplot(ggplot2::aes(x = Tumor_ID, y = mle, fill = Type)) +
      # ggpubr::theme_pubr() +
      ggplot2::geom_bar(stat = "identity", position = ggplot2::position_dodge(width = 0.90)) +
      # geom_linerange(aes(ymin = cilow, ymax = cihigh), position =  position_dodge(width = 0.90) ) +
      ggplot2::geom_hline(yintercept = 1, linetype = 2, size = 1) +
      ggplot2::labs(x = NULL, y = latex2exp::TeX("Dn/Ds ($\\omega_{all}$)")) +
      ggplot2::scale_fill_manual(values = set.colors(length(unique(KaKs_data$Type)))) +
      ggplot2::theme(
        axis.title.x = element_blank(),
        axis.text.x = element_text(angle = 0, hjust = 0.5, size = 14)
      )

    list(
      KaKs_data = KaKs_data,
      plot = p1
    )
  }
  ##########################################################################
  # running

  # Get the mutation groups.
  maf_input <- MesKit::subMaf(maf, patient.id = patient.id, mafObj = FALSE, use.tumorSampleLabel = TRUE)

  # get mutation classifications.
  maf_class <- MesKit::classifyMut(maf, patient.id = patient.id, class = class, classByTumor = classByTumor)

  # Note the different format between maf_input and maf_class when the patient.id is a single value.
  if (!is.null(patient.id)) {
    maf_class1 <- list()
    maf_class1[[patient.id]] <- maf_class
    maf_class <- maf_class1
  }

  kaks <- lapply(names(maf_input), estKaKs, maf_input, maf_class)
  names(kaks) <- names(maf_input)

  return(
    kaks
  )
}




#' getKaKs compares Ka/Ks between different groups
#'
#' @param df data. Six columns are required to calculate the Ka/Ks,
#' including "Tumor_Sample_Barcode","Chromosome","Start_Position",
#' "Reference_Allele","Tumor_Seq_Allele2" and "VAF".
#' @param vaf.cutoff VAF cutoff. Removing mutations with low variant allele frequency (VAF).
#'
#' @details The Ka/Ks is calculated by [dndscv::dndscv()]
#'
#' @export
getKaKs <- function(df, vaf.cutoff = 0.05) {
  data(list = sprintf("submod_%s", "13r_3w"), package = "dndscv") # TODO not valid operation in package

  mutations <- df %>%
    dplyr::filter(VAF >= vaf.cutoff) %>%
    dplyr::select(c("Tumor_Sample_Barcode", "Chromosome", "Start_Position", "Reference_Allele", "Tumor_Seq_Allele2"))

  if (nrow(mutations) <= 40) {
    stop("without enough mutations")
  }

  kaks <- dndscv::dndscv(
    mutations = mutations,
    max_muts_per_gene_per_sample = Inf,
    max_coding_muts_per_sample = Inf,
    # gene_list = intersect( Genes_Covered, genes),
    sm = submod_13r_3w,
    outp = 1
  )

  kaks$globaldnds
}
qingjian1991/MPTevol documentation built on Jan. 30, 2023, 10:16 p.m.