knitr::opts_chunk$set(warning = FALSE, message = FALSE)

Introduction

In this vignette, we demonstrate how to use annoFuse for single samples to filter out fusions known to be artifactual or with low read support as well as retain high-quality fusion calls from STAR-Fusion and Arriba. We also annotate high quality fusion calls and capture known and putative oncogenic driver fusions previously reported in TCGA or fusions containing gene partners that are known oncogenes, tumor suppressor genes, COSMIC genes, and/or transcription factors.

Generating StarFusion and arriba files

Fusion format requirements

Rsem gene format

Overview of the package

Here, we present annoFuse, an R package developed to annotate and retain expressed known and novel gene fusions, while removing artifactual fusions.

annoFuse_single_sample() runs the following functions for the given sample: fusion_standardization -> fusion_filtering_QC -> expression_filter_fusion -> annotate_fusion_calls. Please refer to the filtering criteria available per the function below:

Additionally, in the vignette we annotate filtered fusions with Pfam domains in Gene1A (Gene 5') and Gene1B (Gene 3') by running get_Pfam_domain(). We visualize the breakpoint in BRAF causing the KIAA1549--BRAF fusion in this sample by running plot_breakpoints().

Single sample Star Fusion and arriba standardization, annotation and filtering.

suppressPackageStartupMessages(library("annoFuse"))
suppressPackageStartupMessages(library("dplyr"))
suppressPackageStartupMessages(library("reshape2"))
suppressPackageStartupMessages(library("ggplot2"))

# Run annoFuse for Single sample with default expression filter and FusionAnnotator red flag artifact filter
standardFusioncalls <- annoFuse::annoFuse_single_sample(
  # Example files are provided in extdata, at-least 1 fusionfile is required along with it's rsem expression file
  fusionfileArriba = system.file("extdata", "arriba_example.tsv", package = "annoFuseData"),
  fusionfileStarFusion = system.file("extdata", "starfusion_example.tsv", package = "annoFuseData"),
  expressionFile = system.file("extdata", "example.rsem.genes.results.gz", package = "annoFuseData"),
  tumorID = "BS_W97QQYKQ",
  # multiple read flag values for filtering using FusionAnnotator values
  artifactFilter = "GTEx_Recurrent|DGD_PARALOGS|Normal|BodyMap|ConjoinG",
  # keep all in-frame , frameshift and other types of Fusion_Type
  readingFrameFilter = "in-frame|frameshift|other",
  # keep all fusions with atleast 1 junction read support
  junctionReadCountFilter = 1,
  # keep only fusions where spanningFragCount-junctionReadCountFilter less than equal to 10
  spanningFragCountFilter = 10,
  # keep read throughs
  readthroughFilter = FALSE
)

# Add domain level information for fusion
# read in pfamDataBioMart with pfam and gene boundaries from UCSC pfam and biomaRt package
bioMartDataPfam <- readRDS(system.file("extdata", "pfamDataBioMart.RDS", package = "annoFuseData"))
annDomain <- annoFuse::get_Pfam_domain(
  standardFusioncalls = standardFusioncalls,
  bioMartDataPfam = bioMartDataPfam,
  # partial overlapping domains are retained == "Partial" with keepPartialAnno=TRUE; if keepPartialAnno=FALSE then domain retained status == "No"
  keepPartialAnno = TRUE
)

# read in exonsToPlot with exon and gene boundaries from gencode.v27.primary_assembly.annotation.gtf.gz
exons <- readRDS(system.file("extdata", "exonsToPlot.RDS", package = "annoFuseData"))

Plot breakpoint

# plot BRAF breakpoint in sample for KIAA1549--BRAF fusion
kiaa1549 <- plot_breakpoints(domainDataFrame = annDomain, exons = exons, geneposition = "Left", fusionname = "KIAA1549--BRAF", base_size = 12)
braf <- plot_breakpoints(domainDataFrame = annDomain, exons = exons, geneposition = "Right", fusionname = "KIAA1549--BRAF", base_size = 12)

ggpubr::ggarrange(kiaa1549, braf, align = "h")

Session info {-}

sessionInfo()


d3b-center/annoFuse documentation built on Feb. 21, 2023, 1:06 a.m.