#' calKaKs calculates the Ka/Ks of each group
#'
#' The mutations are classified by `classifyMut()` internally.
#'
#' @param maf Maf or MafList object generated by `readMaf()` function
#' @param patient.id Select the specific patients. Default `NULL`, all patients are included.
#' @param vaf.cutoff Removing mutations with low variant allele frequency (VAF).
#' @param class The class which would be represented.
#' "SP" (Shared pattern: Public/Shared/Private), other options: "CS" (Clonal status: Clonal/Subclonl)
#' and "SPCS". see [MesKit::classifyMut()].
#' @param parallel If `TRUE` (default), run in parallel.
#'
#' @examples
#' library(MesKit)
#' data.type <- "split1"
#'
#' maf <- readMaf(
#' mafFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.mutation.txt", data.type)),
#' ccfFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.CCF.txt", data.type)),
#' clinicalFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.clinical.txt", data.type)),
#' refBuild = "hg19",
#' ccf.conf.level = 0.95
#' )
#'
#' # calKaKas
#' kaks <- calKaKs(maf, patient.id = "Breast", class = "SP", parallel = TRUE, vaf.cutoff = 0.05)
#' kaks
#' kaks <- calKaKs(maf, patient.id = "Breast", class = "CS", parallel = TRUE, vaf.cutoff = 0.05)
#' kaks
#' kaks <- calKaKs(maf, class = "SP", parallel = TRUE, vaf.cutoff = 0.05)
#' kaks
#' @export
calKaKs <- function(maf,
patient.id = NULL,
class = "SP",
classByTumor = FALSE,
vaf.cutoff = 0.05,
parallel = TRUE) {
# To do: be careful about the samples and tumors.
class.levels <- NULL
if (class == "SP") {
class.levels <- c("Public", "Shared", "Private")
} else if (class == "CS") {
class.levels <- c("Clonal", "Subclonl")
} else if (class == "SPCS") {
class.levels <- c("Public_Clonal", "Shared_Clonal", "Shared_Subclonal", "Private_Subclonal")
}
######################################################################
estKaKs <- function(patient.id, maf_input, maf_class) {
# Merge the maf input and mutation class
message(patient.id)
maf_merge <- maf_input[[patient.id]] %>%
dplyr::mutate(Mut_ID = stringr::str_c(Hugo_Symbol, Chromosome, Start_Position,
Reference_Allele, Tumor_Seq_Allele2,
sep = ":"
)) %>%
dplyr::left_join(
maf_class[[patient.id]]
) %>%
dplyr::select(
Hugo_Symbol, Chromosome, Start_Position, End_Position,
Reference_Allele, Tumor_Seq_Allele2, Tumor_Sample_Barcode,
Mutation_Type, Patient_ID, Tumor_ID, Variant_Classification, VAF
)
maf_list <- maf_merge %>%
split(paste(maf_merge$Tumor_ID, maf_merge$Mutation_Type, sep = ":"))
# We use the easypar to do parallel calculations.
if (parallel) {
maf_KaKs <- easypar::run(
FUN = getKaKs,
PARAMS = lapply(1:length(maf_list), function(x) {
list(df = maf_list[[x]], vaf.cutoff = vaf.cutoff)
}),
parallel = TRUE,
outfile = NULL,
export = NULL,
packages = "tidyverse",
filter_errors = FALSE
)
} else {
maf_KaKs <- lapply(maf_list, getKaKs)
}
names(maf_KaKs) <- names(maf_list)
# Remving groups that fail to calculate the KaKs
maf_KaKs <- maf_KaKs[lapply(maf_KaKs, function(x) is.numeric(nrow(x))) %>% unlist()]
KaKs_data <- list()
for (i in names(maf_KaKs)) {
KaKs_data[[i]] <- maf_KaKs[[i]] %>%
dplyr::mutate(type = i)
}
KaKs_data <- purrr::reduce(KaKs_data, rbind)
KaKs_data <- KaKs_data %>%
dplyr::mutate(
Tumor_ID = mapply(function(x) x[1], stringr::str_split(type, ":")),
Type = mapply(function(x) x[2], stringr::str_split(type, ":"))
) %>%
dplyr::mutate(
Type = factor(Type, levels = class.levels)
) %>%
dplyr::filter(!is.na(Type))
p1 <- KaKs_data %>%
dplyr::filter(name %in% c("wall")) %>%
dplyr::mutate(name = factor(name, levels = c("wall"))) %>%
# mutate(Type = factor(Type, levels = c("Shared_Clonal","Private_Clonal","Private_Subclonal") )) %>%
ggplot2::ggplot(ggplot2::aes(x = Tumor_ID, y = mle, fill = Type)) +
# ggpubr::theme_pubr() +
ggplot2::geom_bar(stat = "identity", position = ggplot2::position_dodge(width = 0.90)) +
# geom_linerange(aes(ymin = cilow, ymax = cihigh), position = position_dodge(width = 0.90) ) +
ggplot2::geom_hline(yintercept = 1, linetype = 2, size = 1) +
ggplot2::labs(x = NULL, y = latex2exp::TeX("Dn/Ds ($\\omega_{all}$)")) +
ggplot2::scale_fill_manual(values = set.colors(length(unique(KaKs_data$Type)))) +
ggplot2::theme(
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 0, hjust = 0.5, size = 14)
)
list(
KaKs_data = KaKs_data,
plot = p1
)
}
##########################################################################
# running
# Get the mutation groups.
maf_input <- MesKit::subMaf(maf, patient.id = patient.id, mafObj = FALSE, use.tumorSampleLabel = TRUE)
# get mutation classifications.
maf_class <- MesKit::classifyMut(maf, patient.id = patient.id, class = class, classByTumor = classByTumor)
# Note the different format between maf_input and maf_class when the patient.id is a single value.
if (!is.null(patient.id)) {
maf_class1 <- list()
maf_class1[[patient.id]] <- maf_class
maf_class <- maf_class1
}
kaks <- lapply(names(maf_input), estKaKs, maf_input, maf_class)
names(kaks) <- names(maf_input)
return(
kaks
)
}
#' getKaKs compares Ka/Ks between different groups
#'
#' @param df data. Six columns are required to calculate the Ka/Ks,
#' including "Tumor_Sample_Barcode","Chromosome","Start_Position",
#' "Reference_Allele","Tumor_Seq_Allele2" and "VAF".
#' @param vaf.cutoff VAF cutoff. Removing mutations with low variant allele frequency (VAF).
#'
#' @details The Ka/Ks is calculated by [dndscv::dndscv()]
#'
#' @export
getKaKs <- function(df, vaf.cutoff = 0.05) {
data(list = sprintf("submod_%s", "13r_3w"), package = "dndscv") # TODO not valid operation in package
mutations <- df %>%
dplyr::filter(VAF >= vaf.cutoff) %>%
dplyr::select(c("Tumor_Sample_Barcode", "Chromosome", "Start_Position", "Reference_Allele", "Tumor_Seq_Allele2"))
if (nrow(mutations) <= 40) {
stop("without enough mutations")
}
kaks <- dndscv::dndscv(
mutations = mutations,
max_muts_per_gene_per_sample = Inf,
max_coding_muts_per_sample = Inf,
# gene_list = intersect( Genes_Covered, genes),
sm = submod_13r_3w,
outp = 1
)
kaks$globaldnds
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.