inst/doc/vignettes.R

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(#echo = TRUE,
  collapse = TRUE,
  comment = "#>")

## ----installation, eval=FALSE-------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  BiocManager::install("StructuralVariantAnnotation")

## ----input, warning=FALSE, message=FALSE--------------------------------------
suppressPackageStartupMessages(require(StructuralVariantAnnotation))
suppressPackageStartupMessages(require(VariantAnnotation))
vcf.file <- system.file("extdata", "gridss.vcf", package = "StructuralVariantAnnotation")
vcf <- VariantAnnotation::readVcf(vcf.file, "hg19")
gr <- breakpointRanges(vcf)

## -----------------------------------------------------------------------------
partner(gr)

## -----------------------------------------------------------------------------
colo829_vcf <- VariantAnnotation::readVcf(system.file("extdata", "COLO829T.purple.sv.ann.vcf.gz", package = "StructuralVariantAnnotation"))
colo829_bpgr <- breakpointRanges(colo829_vcf)
colo829_begr <- breakendRanges(colo829_vcf)
colo829_gr <- sort(c(colo829_begr, colo829_bpgr))
colo829_gr[seqnames(colo829_gr) == "6"]

## -----------------------------------------------------------------------------
colo828_chr6_breakpoints <- colo829_gr[seqnames(colo829_gr) == "6"]
# A call to findBreakpointOverlaps(colo828_chr6_breakpoints, colo828_chr6_breakpoints)
# will fail as there are a) single breakends, and b) breakpoints with missing partners
colo828_chr6_breakpoints <- colo828_chr6_breakpoints[colo828_chr6_breakpoints$partner %in% names(colo828_chr6_breakpoints)]
# As expected, each call on chr6 only overlaps with itself
countBreakpointOverlaps(colo828_chr6_breakpoints, colo828_chr6_breakpoints)

## -----------------------------------------------------------------------------
colo828_chr6_breakpoints <- colo829_gr[
  seqnames(colo829_gr) == "6" |
    seqnames(partner(colo829_gr, selfPartnerSingleBreakends=TRUE)) == "6"]
# this way we keep the chr3<->chr6 breakpoint and don't create any orphans
head(colo828_chr6_breakpoints, 1)

## -----------------------------------------------------------------------------
truth_vcf <- readVcf(system.file("extdata", "na12878_chr22_Sudmunt2015.vcf", package = "StructuralVariantAnnotation"))
truth_svgr <- breakpointRanges(truth_vcf)
truth_svgr <- truth_svgr[seqnames(truth_svgr) == "chr22"]
crest_vcf <- readVcf(system.file("extdata", "na12878_chr22_crest.vcf", package = "StructuralVariantAnnotation"))
# Some SV callers don't report QUAL so we need to use a proxy
VariantAnnotation::fixed(crest_vcf)$QUAL <- info(crest_vcf)$left_softclipped_read_count + info(crest_vcf)$left_softclipped_read_count
crest_svgr <- breakpointRanges(crest_vcf)
crest_svgr$caller <- "crest"
hydra_vcf <- readVcf(system.file("extdata", "na12878_chr22_hydra.vcf", package = "StructuralVariantAnnotation"))
hydra_svgr <- breakpointRanges(hydra_vcf)
hydra_svgr$caller <- "hydra"
svgr <- c(crest_svgr, hydra_svgr)
svgr$truth_matches <- countBreakpointOverlaps(svgr, truth_svgr,
  # read pair based callers make imprecise calls.
  # A margin around the call position is required when matching with the truth set
  maxgap=100,
  # Since we added a maxgap, we also need to restrict the mismatch between the
  # size of the events. We don't want to match a 100bp deletion with a 
  # 5bp duplication. This will happen if we have a 100bp margin but don't also
  # require an approximate size match as well
  sizemargin=0.25,
  # We also don't want to match a 20bp deletion with a 20bp deletion 80bp away
  # by restricting the margin based on the size of the event, we can make sure
  # that simple events actually do overlap
  restrictMarginToSizeMultiple=0.5,
  # HYDRA makes duplicate calls and will sometimes report a variant multiple
  # times with slightly different bounds. countOnlyBest prevents these being
  # double-counted as multiple true positives.
  countOnlyBest=TRUE)

## -----------------------------------------------------------------------------
suppressPackageStartupMessages(require(dplyr))
suppressPackageStartupMessages(require(ggplot2))
ggplot(as.data.frame(svgr) %>%
  dplyr::select(QUAL, caller, truth_matches) %>%
  dplyr::group_by(caller, QUAL) %>%
  dplyr::summarise(
    calls=dplyr::n(),
    tp=sum(truth_matches > 0)) %>%
  dplyr::group_by(caller) %>%
  dplyr::arrange(dplyr::desc(QUAL)) %>%
  dplyr::mutate(
    cum_tp=cumsum(tp),
    cum_n=cumsum(calls),
    cum_fp=cum_n - cum_tp,
    Precision=cum_tp / cum_n,
    Recall=cum_tp/length(truth_svgr))) +
  aes(x=Recall, y=Precision, colour=caller) +
  geom_point() +
  geom_line() +
  labs(title="NA12878 chr22 CREST and HYDRA\nSudmunt 2015 truth set")

## -----------------------------------------------------------------------------
suppressPackageStartupMessages(require(rtracklayer))
# Export to BEDPE
rtracklayer::export(breakpointgr2pairs(gr), con="gridss.bedpe")

# Import to BEDPE
bedpe.gr  <- pairs2breakpointgr(rtracklayer::import("gridss.bedpe"))


## -----------------------------------------------------------------------------
suppressPackageStartupMessages(require(circlize))
colo829_bpgr_with_chr_prefix <- colo829_bpgr
seqlevelsStyle(colo829_bpgr_with_chr_prefix) <- "UCSC"
pairs <- breakpointgr2pairs(colo829_bpgr_with_chr_prefix)
circos.initializeWithIdeogram()
circos.genomicLink(as.data.frame(S4Vectors::first(pairs)), as.data.frame(S4Vectors::second(pairs)))
circos.clear()

## ----add to.gr----------------------------------------------------------------
suppressPackageStartupMessages(require(ggbio))
gr.circos <- colo829_bpgr[seqnames(colo829_bpgr) %in% seqlevels(biovizBase::hg19sub)]
seqlevels(gr.circos) <- seqlevels(biovizBase::hg19sub)
mcols(gr.circos)$to.gr <- granges(partner(gr.circos))

## ----ggbio--------------------------------------------------------------------
p <- ggbio() +
	circle(gr.circos, geom="link", linked.to="to.gr") +
	circle(biovizBase::hg19sub, geom='ideo', fill='gray70') +
	circle(biovizBase::hg19sub, geom='scale', size=2) +
	circle(biovizBase::hg19sub, geom='text', aes(label=seqnames), vjust=0, size=3)
p

## -----------------------------------------------------------------------------
sessionInfo()

Try the StructuralVariantAnnotation package in your browser

Any scripts or data that you put into this service are public.

StructuralVariantAnnotation documentation built on Nov. 8, 2020, 5:43 p.m.