#' @title Partition mutations according to their genomic contexts
#'
#' @description Group mutations according to their occurrence within or outside user-annotated genomic segments.
#' @param snv A dataframe having \emph{sample, chr, pos, ref, alt and/or freq} as its columns.
#' @param file Path of the BED file, having genomic segments and their context information. This snv dataframe can be generated by \code{\link{vcfToSNV}}.
#' @param mode Default is set to \emph{include}: Select mutations lying inside the given genomic contexts (obtained from BED file).
#' \emph{exclude}: will give mutations outside the given context.
#' @param bed_file_start_cord Default: \emph{0}: If BED file has 0-based cordinate system. \emph{1}: If BED file has 1-based cordinate system.
#' @return \emph{snv} dataframe with column names of \emph{sample, chr, pos, ref, alt, freq}. Where, sample name will be concatenated by the its genomic context. Example: if a mutation within a sample of name \emph{sample1}, lies in the context segment of label \emph{C1} given in the input \emph{bed file} , the sample name of mutation is modified to \emph{sample1_C1}. The mutations that don't lie in any context segment given in the \emph{BED} file are discarded.
#' @details When the \emph{include} mode is used, group mutations according to their occurrence in user-specifiedannotated genomic segments; such segments could represent specific genomic or epigenomic contexts (e.g.actively transcribed coding regions). Alternately, in the \emph{exclude} mode, mutations in undesired regions (e.g.black-listed genomic regions) can be removed.
#' @export
#' @examples
#' BED_file=system.file("extdata", "context_testFile.bed", package = "MutSigTools", mustWork = TRUE)
#'
#' data(snv_sample.rda) # load 'snv' dataframe object
#'
#' context_snv=contextSNV(snv=snv_sample,BED_file, mode='include', bed_file_start_cord=0)
#'
#' @seealso \code{\link{vcfToSNV}} to generate \emph{snv} dataframe, and \url{https://genome.ucsc.edu/FAQ/FAQformat.html} for BED file format.
#'
contextSNV <- function(snv, file='empty', mode= 'include', bed_file_start_cord=0) {
if(file != 'empty' & file_ext(file)=='bed' & file.exists(file)){
feature_gr <- bed_to_granges(file, start_cord=bed_file_start_cord)
} else if (file != 'empty' & file_ext(file)!='bed'){
stop("File not a bed file", call. = TRUE, domain = NULL)
geterrmessage()
}else if (file != 'empty' & !file.exists(file)){
stop("BED File path does not exist", call. = TRUE, domain = NULL)
geterrmessage()
} else if(file=='empty') {
stop("BED File path is not given", call. = TRUE, domain = NULL)
geterrmessage()
}
if(ncol(snv)==5)
{
snv_df=data.frame(sample=snv$sample, chr=snv$chr, start=snv$pos, end=snv$pos, starnd=rep('.',nrow(snv)), ref=snv$ref, alt=snv$alt)
}else if(ncol(snv)==6){
snv_df=data.frame(sample=snv$sample, chr=snv$chr, start=snv$pos, end=snv$pos, starnd=rep('.',nrow(snv)), ref=snv$ref, alt=snv$alt, freq=snv$freq)
}
snv_gr=makeGRangesFromDataFrame(snv_df,keep.extra.columns = TRUE) # validate
seqlevels(feature_gr)=mapSeqlevels(seqlevels(feature_gr),"UCSC") # or NCBI
#seqlevels(snv_gr)=mapSeqlevels(seqlevels(snv_gr)[1:24],"UCSC")
overlaps=findOverlaps(snv_gr,feature_gr) # query , subject
if(mode== 'exclude') {
feature_mutation_gr=snv_gr[-overlaps@from,]
context_vec=rep('NoContext',length(feature_mutation_gr))
} else if(mode == 'include'){
feature_mutation_gr=snv_gr[overlaps@from,]
context_vec=paste0(snv_gr$sample[overlaps@from],'_',feature_gr$id[overlaps@to])
}
if(ncol(snv)==5)
{
context_SNV_df=data.frame(sample=context_vec,chr=feature_mutation_gr@seqnames, pos=feature_mutation_gr@ranges@start,ref=feature_mutation_gr$ref, alt=feature_mutation_gr$alt)
} else if(ncol(snv)==6)
{
context_SNV_df=data.frame(sample=context_vec,chr=feature_mutation_gr@seqnames, pos=feature_mutation_gr@ranges@start,ref=feature_mutation_gr$ref, alt=feature_mutation_gr$alt, freq=feature_mutation_gr$freq)
}
return(context_SNV_df) # to snv
}
############
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.