MutSigTools: Mutational signature analysis

Documented in contextSNV

#' @title  Partition mutations according to their genomic contexts
#'
#' @description  Group mutations according to their occurrence within or outside user-annotated genomic segments.
#' @param snv A dataframe having \emph{sample, chr, pos, ref, alt and/or  freq} as its columns.
#' @param file Path of the BED file, having genomic segments and their context information. This snv dataframe can be generated by \code{\link{vcfToSNV}}.
#' @param mode Default is set to \emph{include}: Select mutations lying inside the given genomic contexts (obtained from BED file). 
#' \emph{exclude}: will give mutations outside the given context. 
#' @param bed_file_start_cord  Default: \emph{0}: If BED file has 0-based cordinate system. \emph{1}: If BED file has 1-based cordinate system.
#' @return \emph{snv} dataframe with column names of \emph{sample, chr, pos, ref, alt, freq}. Where, sample name will be concatenated by the its genomic context. Example: if a mutation within a sample of name \emph{sample1}, lies in the context segment of label \emph{C1} given in the input \emph{bed file} , the sample name of mutation is modified to \emph{sample1_C1}. The mutations that don't lie in any context segment given in the \emph{BED} file are discarded.
#' @details When the \emph{include} mode is used, group mutations according to their occurrence in user-specifiedannotated genomic segments; such segments could represent specific genomic or epigenomic contexts (e.g.actively transcribed coding regions). Alternately, in the \emph{exclude} mode, mutations in undesired regions (e.g.black-listed genomic regions) can be removed.
#' @export
#' @examples 
#' BED_file=system.file("extdata", "context_testFile.bed", package = "MutSigTools", mustWork = TRUE)
#' 
#' data(snv_sample.rda)   # load 'snv' dataframe object
#' 
#' context_snv=contextSNV(snv=snv_sample,BED_file, mode='include', bed_file_start_cord=0)
#' 
#' @seealso \code{\link{vcfToSNV}} to generate \emph{snv} dataframe, and \url{https://genome.ucsc.edu/FAQ/FAQformat.html} for BED file format.
#' 
contextSNV <- function(snv, file='empty', mode= 'include', bed_file_start_cord=0) {
      if(file != 'empty' & file_ext(file)=='bed' & file.exists(file)){
          feature_gr <- bed_to_granges(file, start_cord=bed_file_start_cord)
      } else if (file != 'empty' & file_ext(file)!='bed'){
          stop("File not a bed file", call. = TRUE, domain = NULL)
          geterrmessage()
      }else if (file != 'empty' & !file.exists(file)){
          stop("BED File path does not exist", call. = TRUE, domain = NULL)
          geterrmessage()
      } else if(file=='empty') {
          stop("BED File path is not given", call. = TRUE, domain = NULL)
          geterrmessage()
      }
  
      if(ncol(snv)==5)
      {
          snv_df=data.frame(sample=snv$sample, chr=snv$chr, start=snv$pos, end=snv$pos, starnd=rep('.',nrow(snv)), ref=snv$ref, alt=snv$alt)
      }else if(ncol(snv)==6){
          snv_df=data.frame(sample=snv$sample, chr=snv$chr, start=snv$pos, end=snv$pos, starnd=rep('.',nrow(snv)), ref=snv$ref, alt=snv$alt, freq=snv$freq)
      }
      snv_gr=makeGRangesFromDataFrame(snv_df,keep.extra.columns = TRUE)  # validate
      seqlevels(feature_gr)=mapSeqlevels(seqlevels(feature_gr),"UCSC") # or NCBI
      #seqlevels(snv_gr)=mapSeqlevels(seqlevels(snv_gr)[1:24],"UCSC")
      overlaps=findOverlaps(snv_gr,feature_gr)  # query , subject

      if(mode== 'exclude') {
          feature_mutation_gr=snv_gr[-overlaps@from,]
          context_vec=rep('NoContext',length(feature_mutation_gr))
      } else if(mode == 'include'){
          feature_mutation_gr=snv_gr[overlaps@from,]
          context_vec=paste0(snv_gr$sample[overlaps@from],'_',feature_gr$id[overlaps@to])
      }
      if(ncol(snv)==5)
      {
          context_SNV_df=data.frame(sample=context_vec,chr=feature_mutation_gr@seqnames, pos=feature_mutation_gr@ranges@start,ref=feature_mutation_gr$ref, alt=feature_mutation_gr$alt)
      } else if(ncol(snv)==6)
      {
          context_SNV_df=data.frame(sample=context_vec,chr=feature_mutation_gr@seqnames, pos=feature_mutation_gr@ranges@start,ref=feature_mutation_gr$ref, alt=feature_mutation_gr$alt, freq=feature_mutation_gr$freq)
      }
      return(context_SNV_df)  # to snv
}
############