BiocStyle::markdown()
library(knitr) htmltools::tagList(rmarkdown::html_dependency_font_awesome()) # set dpi knitr::opts_chunk$set( collapse = TRUE, comment = "#>", dpi=60 )
ggcoverage
is an R package distributed as part of the CRAN.
To install the package, start R and enter:
# install via CRAN install.package("ggcoverage") # install via Github # install.package("remotes") #In case you have not installed it. remotes::install_github("showteeth/ggcoverage")
In general, it is recommended to install from Github repository (update more timely).
Once ggcoverage
is installed, it can be loaded by the following command.
library("rtracklayer") library("graphics") library("ggcoverage")
The goal of ggcoverage
is simplify the process of visualizing genome coverage. It contains three main parts:
ggcoverage
can load BAM, BigWig (.bw), BedGraph files from various NGS data, including WGS, RNA-seq, ChIP-seq, ATAC-seq, et al.ggcoverage
supports six different annotations:ggcoverage
utilizes ggplot2
plotting system, so its usage is ggplot2-style!
The RNA-seq data used here are from Transcription profiling by high throughput sequencing of HNRNPC knockdown and control HeLa cells, we select four sample to use as example: ERR127307_chr14, ERR127306_chr14, ERR127303_chr14, ERR127302_chr14, and all bam files are converted to bigwig file with deeptools.
Load metadata:
# load metadata meta.file <- system.file("extdata", "RNA-seq", "meta_info.csv", package = "ggcoverage") sample.meta = read.csv(meta.file) sample.meta
Load track files:
# track folder track.folder = system.file("extdata", "RNA-seq", package = "ggcoverage") # load bigwig file track.df = LoadTrackFile(track.folder = track.folder, format = "bw", meta.info = sample.meta) # check data head(track.df)
Prepare mark region:
# create mark region mark.region=data.frame(start=c(21678900,21732001,21737590), end=c(21679900,21732400,21737650), label=c("M1", "M2", "M3")) # check data mark.region
Load GTF file:
gtf.file = system.file("extdata", "used_hg19.gtf", package = "ggcoverage") gtf.gr = rtracklayer::import.gff(con = gtf.file, format = 'gtf')
basic.coverage = ggcoverage(data = track.df, color = "auto", mark.region = mark.region, range.position = "out") basic.coverage
knitr::include_graphics("../man/figures/README-basic_coverage-1.png")
You can also change Y axis style:
basic.coverage = ggcoverage(data = track.df, color = "auto", mark.region = mark.region, range.position = "in") basic.coverage
knitr::include_graphics("../man/figures/README-basic_coverage_2-1.png")
basic.coverage + geom_gene(gtf.gr=gtf.gr)
knitr::include_graphics("../man/figures/README-gene_coverage-1.png")
basic.coverage + geom_transcript(gtf.gr=gtf.gr,label.vjust = 1.5)
knitr::include_graphics("../man/figures/README-transcript_coverage-1.png")
basic.coverage + geom_gene(gtf.gr=gtf.gr) + geom_ideogram(genome = "hg19",plot.space = 0)
knitr::include_graphics("../man/figures/README-ideogram_coverage_1-1.png")
basic.coverage + geom_transcript(gtf.gr=gtf.gr,label.vjust = 1.5) + geom_ideogram(genome = "hg19",plot.space = 0)
knitr::include_graphics("../man/figures/README-ideogram_coverage_2-1.png")
The DNA-seq data used here are from Copy number work flow, we select tumor sample, and get bin counts with cn.mops::getReadCountsFromBAM
with WL
1000.
# track file track.file = system.file("extdata", "DNA-seq", "CNV_example.txt", package = "ggcoverage") track.df = read.table(track.file, header = TRUE) # check data head(track.df)
basic.coverage = ggcoverage(data = track.df,color = NULL, mark.region = NULL, region = 'chr4:61750000-62,700,000', range.position = "out") basic.coverage
knitr::include_graphics("../man/figures/README-basic_coverage_dna-1.png")
Add GC, ideogram and gene annotations.
# load genome data library("BSgenome.Hsapiens.UCSC.hg19") # create plot basic.coverage + geom_gc(bs.fa.seq=BSgenome.Hsapiens.UCSC.hg19) + geom_gene(gtf.gr=gtf.gr) + geom_ideogram(genome = "hg19")
knitr::include_graphics("../man/figures/README-gc_coverage-1.png")
# prepare sample metadata sample.meta <- data.frame( SampleName = c("tumorA.chr4.selected"), Type = c("tumorA"), Group = c("tumorA") ) # load bam file bam.file = system.file("extdata", "DNA-seq", "tumorA.chr4.selected.bam", package = "ggcoverage") track.df <- LoadTrackFile( track.file = bam.file, meta.info = sample.meta, single.nuc=TRUE, single.nuc.region="chr4:62474235-62474295" ) head(track.df)
For base and amino acid annotation, we have following default color schemes, you can change with nuc.color
and aa.color
parameters.
Default color scheme for base annotation is Clustal-style
, more popular color schemes is available here.
# color scheme nuc.color = c("A" = "#ff2b08", "C" = "#009aff", "G" = "#ffb507", "T" = "#00bc0d") opar <- graphics::par() # create plot graphics::par(mar = c(1, 5, 1, 1)) graphics::image( 1:length(nuc.color), 1, as.matrix(1:length(nuc.color)), col = nuc.color, xlab = "", ylab = "", xaxt = "n", yaxt = "n", bty = "n" ) graphics::text(1:length(nuc.color), 1, names(nuc.color)) graphics::mtext( text = "Base", adj = 1, las = 1, side = 2 ) # reset par default graphics::par(opar)
Default color scheme for amino acid annotation is from Residual colours: a proposal for aminochromography:
aa.color = c( "D" = "#FF0000", "S" = "#FF2400", "T" = "#E34234", "G" = "#FF8000", "P" = "#F28500", "C" = "#FFFF00", "A" = "#FDFF00", "V" = "#E3FF00", "I" = "#C0FF00", "L" = "#89318C", "M" = "#00FF00", "F" = "#50C878", "Y" = "#30D5C8", "W" = "#00FFFF", "H" = "#0F2CB3", "R" = "#0000FF", "K" = "#4b0082", "N" = "#800080", "Q" = "#FF00FF", "E" = "#8F00FF", "*" = "#FFC0CB", " " = "#FFFFFF", " " = "#FFFFFF", " " = "#FFFFFF", " " = "#FFFFFF" ) graphics::par(mar = c(1, 5, 1, 1)) graphics::image( 1:5, 1:5, matrix(1:length(aa.color),nrow=5), col = rev(aa.color), xlab = "", ylab = "", xaxt = "n", yaxt = "n", bty = "n" ) graphics::text(expand.grid(1:5,1:5), names(rev(aa.color))) graphics::mtext( text = "Amino acids", adj = 1, las = 1, side = 2 ) # reset par default graphics::par(opar)
ggcoverage(data = track.df, color = "grey", range.position = "out", single.nuc=T, rect.color = "white") + geom_base(bam.file = bam.file, bs.fa.seq = BSgenome.Hsapiens.UCSC.hg19) + geom_ideogram(genome = "hg19",plot.space = 0)
knitr::include_graphics("../man/figures/README-base_aa_coverage-1.png")
The ChIP-seq data used here are from DiffBind, I select four sample to use as example: Chr18_MCF7_input, Chr18_MCF7_ER_1, Chr18_MCF7_ER_3, Chr18_MCF7_ER_2, and all bam files are converted to bigwig file with deeptools.
Create metadata:
# load metadata sample.meta = data.frame(SampleName=c('Chr18_MCF7_ER_1','Chr18_MCF7_ER_2','Chr18_MCF7_ER_3','Chr18_MCF7_input'), Type = c("MCF7_ER_1","MCF7_ER_2","MCF7_ER_3","MCF7_input"), Group = c("IP", "IP", "IP", "Input")) sample.meta
Load track files:
# track folder track.folder = system.file("extdata", "ChIP-seq", package = "ggcoverage") # load bigwig file track.df = LoadTrackFile(track.folder = track.folder, format = "bw", meta.info = sample.meta) # check data head(track.df)
Prepare mark region:
# create mark region mark.region=data.frame(start=c(76822533), end=c(76823743), label=c("Promoter")) # check data mark.region
basic.coverage = ggcoverage(data = track.df, color = "auto", region = "chr18:76822285-76900000", mark.region=mark.region, show.mark.label = FALSE) basic.coverage
knitr::include_graphics("../man/figures/README-basic_coverage_chip-1.png")
Add gene, ideogram and peak annotations. To create peak annotation, we first get consensus peaks with MSPC.
# get consensus peak file peak.file = system.file("extdata", "ChIP-seq", "consensus.peak", package = "ggcoverage") basic.coverage + geom_gene(gtf.gr=gtf.gr) + geom_peak(bed.file = peak.file) + geom_ideogram(genome = "hg19",plot.space = 0)
knitr::include_graphics("../man/figures/README-peak_coverage-1.png")
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.