#' Combines sites and mutations for sites and flanks from each bin in prepared_sites
#'
#' @param this_maf Data frame of mutations prepared by get_mut_trinuc_strand. Contains all if not cofactors are included
#' @param prepared_sites List of coordinates and quadnucleotide contexts generated by prepare_bins_of_sites
#'
#' @return Data frame containing mutation counts for each quadnucleotide context + indel from each bin and sites + up/down flanks
.maf_to_dfr2 = function(this_maf, prepared_sites) {
if (is.null(prepared_sites[[1]])) {
return(NULL)
}
gr_this_maf = GenomicRanges::GRanges(this_maf$chr,
IRanges::IRanges(this_maf$start, this_maf$end))
GenomicRanges::mcols(gr_this_maf)[,"trinuc"] = this_maf$mut_trinuc
GenomicRanges::mcols(gr_this_maf)[,"ID"] = paste(this_maf$chr, this_maf$start, this_maf$end,
this_maf$ref, this_maf$alt, this_maf$patient_id, sep=":")
full_dfr = NULL
for (i in 1:length(prepared_sites)) {
gr_sites = prepared_sites[[i]][['gr_sites']]
gr_flank = prepared_sites[[i]][['gr_flank']]
trinuc_sites = prepared_sites[[i]][['trinuc_sites']]
trinuc_flank = prepared_sites[[i]][['trinuc_flank']]
sites_dfr = .merge_sites_and_muts(gr_sites, gr_this_maf, trinuc_sites,
is_site = TRUE)
flank_dfr = .merge_sites_and_muts(gr_flank, gr_this_maf, trinuc_flank,
is_site = FALSE)
dfr = rbind(sites_dfr, flank_dfr)
dfr$mut_rate = as.numeric(gsub("mutrate__", "", names(prepared_sites)[i]))
dfr$mut_bin = i
full_dfr = rbind(full_dfr, dfr)
}
full_dfr
}
#' Combines quadnucleotide counts and site information
#'
#' @param gr_seq GenomicRanges object of site/flanks chromosomes and positions
#' @param gr_maf GenomicRanges object with mcols containing quadnucleotide context or indel
#' @param trinuc Data frame of counts for each trinucleotide counts extended to each quadnucleotide
#' @param is_site Boolean indicating whether mutation is within site or flanking region
#'
#' @return Data frame containing mutation and trinucleotide counts for each quadnucleotide context + indel. Includes boolean indicator of is_site
.merge_sites_and_muts = function(gr_seq, gr_maf, trinuc, is_site) {
muts_sites = .count_trinuc_muts(gr_seq, gr_maf)
dfr = merge(trinuc, muts_sites, by.x = "quadnuc", by.y = "tag", all = TRUE)
dfr = cbind(is_site = is_site, dfr)
dfr[is.na(dfr$muts_count), "muts_count"] = 0
dfr
}
#' Counts number of mutations per quadnucleotide context
#'
#' @param gr_sites GenomicRanges object of site/flank chromosomes and positions
#' @param gr_maf GenomicRanges object with mcols containing quadnucleotide context or indel
#'
#' @return Data frame containing mutation count and each quadnucleotide context + indel
.count_trinuc_muts = function(gr_sites, gr_maf) {
# remove duplicate mutation calls
gr_maf_in_sites = gr_maf[S4Vectors::subjectHits(GenomicRanges::findOverlaps(gr_sites, gr_maf))]
gr_maf_in_sites = gr_maf_in_sites[!duplicated(GenomicRanges::mcols(gr_maf_in_sites)[,"ID"])]
muts_sites = GenomicRanges::mcols(gr_maf_in_sites)[,"trinuc"]
muts_sites = data.frame(muts_count = c(table(muts_sites)), stringsAsFactors=F)
muts_sites$tag = rownames(muts_sites)
muts_sites
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.