#' Extract mutation contexts in the format compatible with CHORD
#'
#' @description This function is a wrapper for the 3 functions from mutSigExtractor:
#' extractSigsSnv(), extractSigsIndel(), extractSigsSv(). Some post-processing is done to produce
#' compatible input for CHORD
#'
#' @param vcf.snv Path to the vcf file containing SNVs
#' @param vcf.indel Path to the vcf file containing indels. By default vcf.indel=vcf.snv
#' @param vcf.sv Path to the vcf file containing SVs
#' @param df.snv A dataframe containing the columns: chrom, pos, ref, alt.
#' @param df.indel A dataframe containing the columns: chrom, pos, ref, alt.
#' @param df.sv A dataframe with the columns: sv_type, sv_len. sv_type can be DEL, DUP, INV, TRA, BND.
#' @param sample.name The name of the sample as a character. Defaults to 'sample' if none is
#' provided.
#' @param sv.caller SV vcfs are not standardized and therefore need to be parsed differently
#' depending on the caller. Currently supports 'manta' or 'gridss'.
#' @param vcf.filters A list of in the form list(snv=character(),indel=character(),sv=character())
#' indicated which variants to keep, corresponding to the values in the vcf FILTER column. NA can be
#' specified for each list item to ignore filtering of a vcf.
#' @param output.path If a path is specified, the output is written to this path.
#' @param ref.genome A BSgenome reference genome. Default is BSgenome.Hsapiens.UCSC.hg19. If another
#' reference genome is indicated, it will also need to be installed.
#' @param verbose Whether to print progress messages
#'
#' @return A 1-row data frame containing the mutational signature contributions
#' @export
#'
extractSigsChord <- function(
vcf.snv=NULL, vcf.indel=vcf.snv, vcf.sv=NULL,
df.snv=NULL, df.indel=df.snv, df.sv=NULL,
sample.name='sample',
vcf.filters=list(snv=NA,indel=NA,sv=NA),
sv.caller='gridss', output.path=NULL, ref.genome=mutSigExtractor:::DEFAULT_GENOME, verbose=F
){
#vcf.snv='/Users/lnguyen/hpc/cog_bioinf/cuppen/project_data/Luan_projects/CHORD/scripts_main/CHORD/example/vcf/PD4116_snv_indel.vcf.gz'
#vcf.sv='/Users/lnguyen/hpc/cog_bioinf/cuppen/project_data/Luan_projects/CHORD/scripts_main/CHORD/example/vcf/PD4116_sv.vcf.gz'
if(is.null(vcf.snv) & is.null(df.snv)){
stop('Either vcf.snv or df.snv inputs are required')
}
if(is.null(vcf.indel) & is.null(df.indel)){
stop('Either vcf.indel or df.indel inputs are required')
}
if(is.null(vcf.sv) & is.null(df.sv)){
stop('Either vcf.sv or df.sv inputs are required')
}
######### Loading vcfs and/or dataframe inputs #########
if(verbose){ message('\n#====== Loading variants from vcfs ======#') }
variants <- list()
if(verbose){ message('\n## SNVs') }
if(!is.null(vcf.snv)){
variants$snv <- mutSigExtractor::variantsFromVcf(
vcf.snv,
vcf.filter=vcf.filters$snv,
verbose=verbose
)
} else {
variants$snv <- df.snv
}
if(verbose){ message('\n## Indels') }
if(!is.null(vcf.indel)){
if(vcf.indel==vcf.snv){
if(verbose){ message('vcf file is the same for both SNVs and indels. Skipping reading vcf for indels') }
variants$indel <- variants$snv
} else {
variants$indel <- mutSigExtractor::variantsFromVcf(
vcf.indel,
vcf.filter=vcf.filters$indel,
verbose=verbose
)
}
} else {
if(identical(df.indel,df.snv)){
variants$indel <- df.snv
} else {
variants$indel <- df.indel
}
}
if(verbose){ message('\n## SVs') }
if(!is.null(vcf.sv)){
variants$sv <- mutSigExtractor::variantsFromVcf(vcf.sv, vcf.filter=vcf.filters$sv, vcf.fields=c('CHROM','POS','REF','ALT','FILTER','ID','INFO'), verbose=verbose)
variants$sv <- mutSigExtractor::getContextsSv(variants$sv, sv.caller=sv.caller)
} else {
variants$sv <- df.sv
}
######### Count contexts #########
if(verbose){ message('\n#====== Counting mutation contexts ======#') }
sigs <- list()
if(verbose){ message('\n## Single base substitutions') }
sigs$snv <- mutSigExtractor::extractSigsSnv(df=variants$snv, output='contexts', ref.genome=ref.genome, verbose=verbose)
if(verbose){ message('\n## Indel contexts (types x lengths)') }
sigs$indel <- mutSigExtractor::extractSigsIndel(df=variants$indel, method='chord', ref.genome=ref.genome, verbose=verbose)
if(verbose){ message('\n## SV contexts (types x lengths)') }
sigs$sv <- mutSigExtractor::extractSigsSv(
df=variants$sv, sv.caller=sv.caller, output='contexts',
sv.len.cutoffs = c(0, 10^3, 10^4, 10^5, 10^6, 10^7,Inf),
verbose=verbose
)
######### Export #########
if(verbose){ message('\n#====== Exporting output =========#') }
out <- do.call(cbind,lapply(sigs,t))
rownames(out) <- sample.name
if(is.null(output.path)){
if(verbose){ message('output.path not specified. Directly returning output') }
return(out)
} else {
if(verbose){ message('Writing tsv file') }
write.table(out, output.path, sep='\t', quote=F)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.