>

BiocStyle::markdown()
library(knitr)
htmltools::tagList(rmarkdown::html_dependency_font_awesome())

# set dpi
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  dpi=60
)

Getting started

ggcoverage is an R package distributed as part of the CRAN. To install the package, start R and enter:

# install via CRAN
install.package("ggcoverage")

# install via Github
# install.package("remotes")   #In case you have not installed it.
remotes::install_github("showteeth/ggcoverage")

In general, it is recommended to install from Github repository (update more timely).

Once ggcoverage is installed, it can be loaded by the following command.

library("rtracklayer")
library("graphics")
library("ggcoverage")

Introduction

The goal of ggcoverage is simplify the process of visualizing genome coverage. It contains three main parts:

ggcoverage utilizes ggplot2 plotting system, so its usage is ggplot2-style!


RNA-seq data

Load the data

The RNA-seq data used here are from Transcription profiling by high throughput sequencing of HNRNPC knockdown and control HeLa cells, we select four sample to use as example: ERR127307_chr14, ERR127306_chr14, ERR127303_chr14, ERR127302_chr14, and all bam files are converted to bigwig file with deeptools.

Load metadata:

# load metadata
meta.file <- system.file("extdata", "RNA-seq", "meta_info.csv", package = "ggcoverage")
sample.meta = read.csv(meta.file)
sample.meta

Load track files:

# track folder
track.folder = system.file("extdata", "RNA-seq", package = "ggcoverage")
# load bigwig file
track.df = LoadTrackFile(track.folder = track.folder, format = "bw",
                         meta.info = sample.meta)
# check data
head(track.df)

Prepare mark region:

# create mark region
mark.region=data.frame(start=c(21678900,21732001,21737590),
                       end=c(21679900,21732400,21737650),
                       label=c("M1", "M2", "M3"))
# check data
mark.region

Load GTF file:

gtf.file = system.file("extdata", "used_hg19.gtf", package = "ggcoverage")
gtf.gr = rtracklayer::import.gff(con = gtf.file, format = 'gtf')

Basic coverage

basic.coverage = ggcoverage(data = track.df, color = "auto", 
                            mark.region = mark.region, range.position = "out")
basic.coverage
knitr::include_graphics("../man/figures/README-basic_coverage-1.png")

You can also change Y axis style:

basic.coverage = ggcoverage(data = track.df, color = "auto", 
                            mark.region = mark.region, range.position = "in")
basic.coverage
knitr::include_graphics("../man/figures/README-basic_coverage_2-1.png")

Add gene annotation

basic.coverage + 
  geom_gene(gtf.gr=gtf.gr)
knitr::include_graphics("../man/figures/README-gene_coverage-1.png")

Add transcript annotation

basic.coverage + 
  geom_transcript(gtf.gr=gtf.gr,label.vjust = 1.5)
knitr::include_graphics("../man/figures/README-transcript_coverage-1.png")

Add ideogram

basic.coverage +
  geom_gene(gtf.gr=gtf.gr) +
  geom_ideogram(genome = "hg19",plot.space = 0)
knitr::include_graphics("../man/figures/README-ideogram_coverage_1-1.png")
basic.coverage +
  geom_transcript(gtf.gr=gtf.gr,label.vjust = 1.5) +
  geom_ideogram(genome = "hg19",plot.space = 0)
knitr::include_graphics("../man/figures/README-ideogram_coverage_2-1.png")

DNA-seq data

CNV

Load the data

The DNA-seq data used here are from Copy number work flow, we select tumor sample, and get bin counts with cn.mops::getReadCountsFromBAM with WL 1000.

# track file
track.file = system.file("extdata", "DNA-seq", "CNV_example.txt", package = "ggcoverage")
track.df = read.table(track.file, header = TRUE)
# check data
head(track.df)

Basic coverage

basic.coverage = ggcoverage(data = track.df,color = NULL, mark.region = NULL,
                            region = 'chr4:61750000-62,700,000', range.position = "out")
basic.coverage
knitr::include_graphics("../man/figures/README-basic_coverage_dna-1.png")

Add annotations

Add GC, ideogram and gene annotations.

# load genome data
library("BSgenome.Hsapiens.UCSC.hg19")
# create plot
basic.coverage +
  geom_gc(bs.fa.seq=BSgenome.Hsapiens.UCSC.hg19) +
  geom_gene(gtf.gr=gtf.gr) +
  geom_ideogram(genome = "hg19")
knitr::include_graphics("../man/figures/README-gc_coverage-1.png")

Single-nucleotide level

Load the data

# prepare sample metadata
sample.meta <- data.frame(
  SampleName = c("tumorA.chr4.selected"),
  Type = c("tumorA"),
  Group = c("tumorA")
)
# load bam file
bam.file = system.file("extdata", "DNA-seq", "tumorA.chr4.selected.bam", package = "ggcoverage")
track.df <- LoadTrackFile(
  track.file = bam.file,
  meta.info = sample.meta,
  single.nuc=TRUE, single.nuc.region="chr4:62474235-62474295"
)
head(track.df)

Default color scheme

For base and amino acid annotation, we have following default color schemes, you can change with nuc.color and aa.color parameters.

Default color scheme for base annotation is Clustal-style, more popular color schemes is available here.

# color scheme
nuc.color = c("A" = "#ff2b08", "C" = "#009aff", "G" = "#ffb507", "T" = "#00bc0d")
opar <- graphics::par() 
# create plot
graphics::par(mar = c(1, 5, 1, 1))
graphics::image(
  1:length(nuc.color), 1, as.matrix(1:length(nuc.color)),
  col = nuc.color,
  xlab = "", ylab = "", xaxt = "n", yaxt = "n", bty = "n"
)
graphics::text(1:length(nuc.color), 1, names(nuc.color))
graphics::mtext(
  text = "Base", adj = 1, las = 1,
  side = 2
)

# reset par default
graphics::par(opar)

Default color scheme for amino acid annotation is from Residual colours: a proposal for aminochromography:

aa.color = c(
  "D" = "#FF0000", "S" = "#FF2400", "T" = "#E34234", "G" = "#FF8000", "P" = "#F28500",
  "C" = "#FFFF00", "A" = "#FDFF00", "V" = "#E3FF00", "I" = "#C0FF00", "L" = "#89318C",
  "M" = "#00FF00", "F" = "#50C878", "Y" = "#30D5C8", "W" = "#00FFFF", "H" = "#0F2CB3",
  "R" = "#0000FF", "K" = "#4b0082", "N" = "#800080", "Q" = "#FF00FF", "E" = "#8F00FF",
  "*" = "#FFC0CB", " " = "#FFFFFF", " " = "#FFFFFF", " " = "#FFFFFF", " " = "#FFFFFF"
)

graphics::par(mar = c(1, 5, 1, 1))
graphics::image(
  1:5, 1:5, matrix(1:length(aa.color),nrow=5),
  col = rev(aa.color),
  xlab = "", ylab = "", xaxt = "n", yaxt = "n", bty = "n"
)
graphics::text(expand.grid(1:5,1:5), names(rev(aa.color)))
graphics::mtext(
  text = "Amino acids", adj = 1, las = 1,
  side = 2
)

# reset par default
graphics::par(opar)

Add base and amino acid annotation

ggcoverage(data = track.df, color = "grey", range.position = "out", single.nuc=T, rect.color = "white") +
  geom_base(bam.file = bam.file,
            bs.fa.seq = BSgenome.Hsapiens.UCSC.hg19) +
  geom_ideogram(genome = "hg19",plot.space = 0)
knitr::include_graphics("../man/figures/README-base_aa_coverage-1.png")

ChIP-seq data

The ChIP-seq data used here are from DiffBind, I select four sample to use as example: Chr18_MCF7_input, Chr18_MCF7_ER_1, Chr18_MCF7_ER_3, Chr18_MCF7_ER_2, and all bam files are converted to bigwig file with deeptools.

Create metadata:

# load metadata
sample.meta = data.frame(SampleName=c('Chr18_MCF7_ER_1','Chr18_MCF7_ER_2','Chr18_MCF7_ER_3','Chr18_MCF7_input'),
                         Type = c("MCF7_ER_1","MCF7_ER_2","MCF7_ER_3","MCF7_input"),
                         Group = c("IP", "IP", "IP", "Input"))
sample.meta

Load track files:

# track folder
track.folder = system.file("extdata", "ChIP-seq", package = "ggcoverage")
# load bigwig file
track.df = LoadTrackFile(track.folder = track.folder, format = "bw",
                         meta.info = sample.meta)
# check data
head(track.df)

Prepare mark region:

# create mark region
mark.region=data.frame(start=c(76822533),
                       end=c(76823743),
                       label=c("Promoter"))
# check data
mark.region

Basic coverage

basic.coverage = ggcoverage(data = track.df, color = "auto", region = "chr18:76822285-76900000", 
                            mark.region=mark.region, show.mark.label = FALSE)
basic.coverage
knitr::include_graphics("../man/figures/README-basic_coverage_chip-1.png")

Add annotations

Add gene, ideogram and peak annotations. To create peak annotation, we first get consensus peaks with MSPC.

# get consensus peak file
peak.file = system.file("extdata", "ChIP-seq", "consensus.peak", package = "ggcoverage")

basic.coverage +
  geom_gene(gtf.gr=gtf.gr) +
  geom_peak(bed.file = peak.file) +
  geom_ideogram(genome = "hg19",plot.space = 0)
knitr::include_graphics("../man/figures/README-peak_coverage-1.png")

Session info

sessionInfo()


Try the ggcoverage package in your browser

Any scripts or data that you put into this service are public.

ggcoverage documentation built on Sept. 6, 2022, 9:06 a.m.