knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

options(crayon.enabled=F)
library(CNAqc)
require(dplyr)

All CNAqc plotting functions use:

# Dataset available with the package 
data('example_dataset_CNAqc', package = 'CNAqc')

x = CNAqc::init(
  mutations = example_dataset_CNAqc$mutations, 
  cna = example_dataset_CNAqc$cna,
  purity = example_dataset_CNAqc$purity,
  ref = 'hg19')

print(x)

Plotting copy number segments

Segments plots

CNAqc can plot genome-wide segments showing major and minor allele counts as red and blue bars. Bottom circles annotate breakpoints; by default this plot has limited y-axis and breakpoints for segments outside the plot (e.g. very high amplifications) are in black. Areas in the genome that are mapped to the most prevalent karyotype are shadowed by default.

# Default plot
plot_segments(x)

Note: the colour scheme in CNAqc is fixed for certain segments. The colour scheme is adopted also for other information, e.g., to report the segment where a certain driver is mapped.

An alternative circular layout plot is available.

# Circular layout
plot_segments(x, circular = TRUE)

Customising segments plot

# Shadow other segments
plot_segments(x, highlight = c("2:1", "2:0", "2:2")) + 
  ggplot2::labs(title = "Annotate different karyotypes")

# Subset the genome
plot_segments(x, chromosomes = 'chr17') + ggplot2::labs(title = "Chromosome 17 zoom")
plot_segments(x, chromosomes = c('chr17', 'chr13')) + ggplot2::labs(title = "Chromosome 17 and 13 zoom")

# Total copy number
plot_segments(x, cn = 'total') + ggplot2::labs(title = "Total copy number")

Subclonal CNAs

We use the PCAWG object released with the package

CNAqc::example_PCAWG %>% print()

Subclonal segments of each subclone have different colours compared to red/blue clonal segments.

plot_segments(CNAqc::example_PCAWG)

Note: the CCF of each subclone is not reported in this plot.

Segments summaries

Based on parameter type, function plot_karyotypes reports:

ggpubr::ggarrange(
  plot_karyotypes(x),
  plot_karyotypes(x, type = 'number'),
  common.legend = TRUE,
  legend = 'bottom'
)

We can also plot the segments length distribution, which is used to detect fragmentation patterns.

plot_segment_size_distribution(x)

Plotting mutations

Data histograms

Read counts information can be plot using plot_data_histogram as histograms.

ggpubr::ggarrange(
  plot_data_histogram(x, which = 'VAF'),
  plot_data_histogram(x, which = 'DP'),
  plot_data_histogram(x, which = 'NV'),
  ncol = 3,
  nrow = 1
  )

Note: if you compute CCFs you can use plot_data_histogram(x, which = 'CCF') as well.

Segment-specific VAFs

VAFs per segment, split by chromosome, can help identify miscalled segments (i.e., for segments with the same major/minor allele copies the VAF distributions should be similar)

inspect_segment(x)
# With facet from a data histogram
plot_data_histogram(x, which = 'VAF') + 
  ggplot2::facet_grid(karyotype~chr, scales = 'free')

Genome-wide mutation data

Genome-wide plots that follow the layout of plot_segments are avaiable to view the genome-wide distributions of the number of mutations, their VAF and depth. VAF and depth-plotting functions (scatterplots) can subset the input data to speed up rendering and reduce the size of output files - by default $5000$ points are shown Counts plot, instead, bin the genome locations by one megabase ($10^6$ bases).

# All genome, all data - counts of mutations per megabase
plot_gw_counts(x)
# An example effect of downsampling the data
ggpubr::ggarrange(
  plot_gw_depth(x, N = 1000),
  plot_gw_depth(x),
  plot_gw_depth(x, N = 10000),
  ncol = 1
)

Combined plots

The default S3 plot function layouts a panel with segments in bottom, and all other genome-wide plots on top. This function aligns plots on the vertical axis, and stretch their relative height to obtain a nice layout.

# The actual plot function
cowplot::plot_grid(
  plot_gw_counts(x),
  plot_gw_vaf(x, N = 10000),
  plot_gw_depth(x, N = 10000),
  plot_segments(x),
  align = 'v',
  nrow = 4,
  rel_heights = c(.15, .15, .15, .8))

Cohort plots

One can have multiple CNAqc objects - i.e., work with a cohort of objects - when working with

Assuming all input objects have the same reference, cohort cohort-level plots are possible.

# Object 1
x = CNAqc::init(
  mutations = example_dataset_CNAqc$mutations, 
  cna = example_dataset_CNAqc$cna,
  purity = example_dataset_CNAqc$purity,
  ref = 'hg19'
  )

# Object 1 with some modifications
y = CNAqc::init(
  mutations = example_dataset_CNAqc$mutations, 
  cna = example_dataset_CNAqc$cna %>%
    mutate(
      Major = ifelse(chr %in% c("chr3", "chr4", "chr1"), 1, Major),
      minor = ifelse(chr %in% c("chr3", "chr4", "chr1"), 1, minor)
      ),
  purity = example_dataset_CNAqc$purity,
  ref = 'hg19')

# PCAWG sample
z = CNAqc::example_PCAWG

# Inputs need to be wrapped in a named list
inputs = list(`Original` = x, `Copy` = x, `Faked_diploid` = y, `PCAWG` = z)

Copy number segments

The frequency of deletions and amplifications relative to diploid are shown by using a certain binning (step $\delta$) of the tumour genome. Here by deletion we define anything with 0 copies of the minor allele, and by amplification anything with $\geq 3$ total copies (sum of the minor and major alleles).

# Comparative CNA plot - default delta
plot_multisample_CNA(inputs)

The binning $\delta$ of the tumour genome can be changed to get larger/ shorter segments

# Comparative CNA plot - 10 Mb delta
plot_multisample_CNA(inputs, delta = 1e7)

A circular layout is also possible, with no binning or transformation of the data.

# Comparative CNA plot
plot_multisample_CNA(inputs, layout = 'circular')


caravagnalab/CNAqc documentation built on Oct. 31, 2024, 3:54 a.m.