suppressPackageStartupMessages({ library(trackViewer) library(TxDb.Hsapiens.UCSC.hg19.knownGene) library(org.Hs.eg.db) library(InteractionSet) library(rgl) }) knitr::opts_chunk$set(warning=FALSE, message=FALSE)
The chromatin interactions is involved in precise quantitative and spatiotemporal control of gene expression. The development of high-throughput experimental techniques, such as HiC-seq, HiCAR-seq, and InTAC-seq, for analyzing both the higher-order structure of chromatin and the interactions between protein and their nearby and remote regulatory elements has been developed to reveal how gene expression is controlled in genome-wide.
The interaction data will be saved in the format of paired genome coordinates
with the interaction score. The popular format are .validPairs
, .hic
, and
.cool
. The trackViewer
package can be used to handle those data to plot
the heatmap or the interaction links.
Plot chromatin interactions tracks as heatmap.
library(trackViewer) library(InteractionSet) gi <- readRDS(system.file("extdata", "nij.chr6.51120000.53200000.gi.rds", package="trackViewer")) head(gi) ## hicexplorer:hicConvertFormat tool can be used to convert other formats into GInteractions ## eg: hicConvertFormat -m mESC_rep.hic --inputFormat hic --outputFormat cool -o mESC_rep.mcool ## hicConvertFormat -m mESC_rep.mcool::resolutions/10000 --inputFormat cool --outputFormat ginteractions -o mESC_rep.ginteractions --resolutions 10000 ## please note that metadata:score is used for plot. gi$border_color <- NA ## highlight some regions gi$border_color[sample(seq_along(gi), 20)] <- sample(1:7, 20, replace=TRUE) ## The TADs will be drawn as lines at points start(first), center point, end(second). tads <- GInteractions( GRanges("chr6", IRanges(c(51130001, 51130001, 51450001, 52210001), width = 20000)), GRanges("chr6", IRanges(c(51530001, 52170001, 52210001, 53210001), width = 20000))) range <- GRanges("chr6", IRanges(51120000, 53200000)) heatmap <- gi2track(gi) ctcf <- readRDS(system.file("extdata", "ctcf.sample.rds", package="trackViewer")) viewTracks(trackList(ctcf, heatmap, heightDist = c(1, 3)), gr=range, autoOptimizeStyle = TRUE) ## add TAD information addInteractionAnnotation(tads, "heatmap", grid.lines, gp=gpar(col="#E69F00", lwd=3, lty=3)) ## add highlight interested regions gi_sub <- gi[order(gi$score, decreasing = TRUE)] gi_sub <- head(gi_sub[distance(first(gi_sub), second(gi_sub))>200000], n=5) start(regions(gi_sub)) <- start(regions(gi_sub))-40000 end(regions(gi_sub)) <- end(regions(gi_sub))+40000 addInteractionAnnotation(gi_sub, "heatmap", grid.polygon, gp=gpar(col="red", lwd=2, lty=2, fill=NA)) ## add interesting anchor at giving coordinate. addInteractionAnnotation(52900000, "heatmap", gp=gpar(col="blue", lwd=3)) addInteractionAnnotation(-52900000, "heatmap", gp=gpar(col="cyan", lwd=3, lty=4)) ## view the interaction data back to back. ## Please make sure the data are normalized. gi2 <- gi set.seed(123) gi2$score <- gi$score + rnorm(length(gi), sd = sd(gi$score)) back2back <- gi2track(gi, gi2) ## change the color setTrackStyleParam(back2back, "breaks", c(seq(from=0, to=50, by=10), 200)) setTrackStyleParam(back2back, "color", c("lightblue", "yellow", "red")) ## chang the lim of y-axis (by default, [0, 1]) setTrackStyleParam(back2back, "ylim", c(0, .5)) viewTracks(trackList(ctcf, back2back, heightDist=c(1, 5)), gr=range, autoOptimizeStyle = TRUE) addInteractionAnnotation(tads, "back2back", grid.lines, gp=gpar(col="cyan", lwd=3, lty=2)) addInteractionAnnotation(-52208000, "back2back", gp=gpar(col="blue", lwd=3), panel="top") addInteractionAnnotation(51508000, "back2back", gp=gpar(col="gray", lwd=3, lty=2), panel="bottom")
Plot chromatin interactions track as links.
setTrackStyleParam(heatmap, "tracktype", "link") setTrackStyleParam(heatmap, "breaks", c(seq(from=0, to=50, by=10), 200)) setTrackStyleParam(heatmap, "color", c("lightblue", "yellow", "red")) ## filter the links to simulate the real data keep <- distance(heatmap$dat, heatmap$dat2) > 5e5 & heatmap$dat$score>20 heatmap$dat <- heatmap$dat[keep] heatmap$dat2 <- heatmap$dat2[keep] viewTracks(trackList(heatmap), gr=range, autoOptimizeStyle = TRUE)
To import interactions data from ".hic" (reference to
the script of hic-straw and the
documentation).
The function importGInteractions
(trackViewer version>=1.27.6) can be used to
import data from .hic
format file.
hic <- system.file("extdata", "test_chr22.hic", package = "trackViewer", mustWork=TRUE) if(.Platform$OS.type!="windows"){ importGInteractions(file=hic, format="hic", ranges=GRanges("22", IRanges(50000000, 100000000)), out = "GInteractions") }
Another widely used genomic interaction data format is .cool
files and the
cooler index contains analyzed HiC data
for hg19 and mm9 from many different sources. Those files can be used as data
resources for visualizations and annotations
(see ChIPpeakAnno::findEnhancers).
The importGInteractions
function can also be used to import data from .cool
format (trackViewer version>=1.27.6).
cool <- system.file("extdata", "test.mcool", package = "trackViewer", mustWork=TRUE) importGInteractions(file=cool, format="cool", resolution = 2, ranges=GRanges("chr1", IRanges(10, 28)), out = "GInteractions")
loopBouquet
or MDS
plotThe genomic contact frequency can be converted into spatial distances and then
visualized using optimization-based (such as manifold learning techniques)
or probabilistic approaches (such as Markov Chain Monte Carlo).
Here mdsPlot
can be used to plot the bin-based contact matrix by Kruskal's Non-metric Multidimensional Scaling.
library(TxDb.Hsapiens.UCSC.hg19.knownGene) library(org.Hs.eg.db) set.seed(1) feature.gr <- genes(TxDb.Hsapiens.UCSC.hg19.knownGene) feature.gr <- subsetByOverlaps(feature.gr, range(regions(gi))) symbols <- mget(feature.gr$gene_id, org.Hs.egSYMBOL, ifnotfound=NA) feature.gr$label[lengths(symbols)==1] <- unlist(symbols[lengths(symbols)==1]) feature.gr$col <- sample(1:7, length(feature.gr), replace=TRUE) feature.gr$type <- sample(c("cRE", "gene"), length(feature.gr), replace=TRUE, prob=c(0.1, 0.9)) feature.gr$pch <- rep(NA, length(feature.gr)) feature.gr$pch[feature.gr$type=='cRE'] <- 11 mdsPlot(gi, range = range, feature.gr = feature.gr, atacSig = ctcf)
Or plot it in 3d.
library(rgl) library(manipulateWidget) clear3d() # Remove the earlier display mdsPlot(gi, range = range, feature.gr = feature.gr, atacSig = ctcf, k=3) rglwidget() %>% toggleWidget(tags = "tick_minor") %>% toggleWidget(tags = "tick_major") %>% toggleWidget(tags = "tick_labels") %>% toggleWidget(tags = "atac_signal") %>% toggleWidget(tags = "backbone") %>% toggleWidget(tags = "gene_body") %>% toggleWidget(tags = "tss_labels") %>% toggleWidget(tags = "gene_labels") %>% toggleWidget(tags = "cRE") %>% asRow(last = 9)
Different from most of the available tools, loopBouquetPlot
try to plot the loops with the 2D structure. The nodes indicate the region with interactions and
the edges indicates the interactions. The size of the nodes are relative to the width of the region.
The features could be the cRE or gene. The cRE are shown as
points with symbol 11.
library(TxDb.Hsapiens.UCSC.hg19.knownGene) library(org.Hs.eg.db) library(InteractionSet) gi <- readRDS(system.file("extdata", "gi.rds", package="trackViewer")) range <- GRanges("chr2", IRanges(234300000, 235000000)) gene_hg19 <- suppressMessages(genes(TxDb.Hsapiens.UCSC.hg19.knownGene)) feature.gr <- subsetByOverlaps(gene_hg19, range(regions(gi))) feature.gr$col <- sample(2:7, length(feature.gr), replace=TRUE) feature.gr$type <- sample(c("cRE", "gene"), length(feature.gr), replace=TRUE, prob=c(0.1, 0.9)) feature.gr$pch <- rep(NA, length(feature.gr)) feature.gr$pch[feature.gr$type=='cRE'] <- 11 symbol <- mget(feature.gr$gene_id, org.Hs.egSYMBOL, ifnotfound = NA) symbol <- unlist(lapply(symbol, function(.ele) .ele[1])) feature.gr$label <- symbol loopBouquetPlot(gi, range, feature.gr)
gi <- readRDS(system.file("extdata", "nij.chr6.51120000.53200000.gi.rds", package="trackViewer")) range <- GRanges("chr6", IRanges(51120000, 53200000)) ## filter the links to simulate the real data keep <- distance(first(gi), second(gi)) > 5e5 & gi$score>35 gi <- gi[keep] # narrow the width of anchors to ehance the plots reg <- regions(gi) wr <- floor(width(reg)/4) start(reg) <- start(reg) + wr end(reg) <- end(reg) - wr regions(gi) <- reg feature.gr <- subsetByOverlaps(gene_hg19, range(regions(gi))) feature.gr$col <- sample(2:7, length(feature.gr), replace=TRUE) feature.gr$type <- sample(c("cRE", "gene"), length(feature.gr), replace=TRUE, prob=c(0.1, 0.9)) symbol <- mget(feature.gr$gene_id, org.Hs.egSYMBOL, ifnotfound = NA) symbol <- unlist(lapply(symbol, function(.ele) .ele[1])) feature.gr$label <- symbol feature.gr <- c(feature.gr[sample(seq_along(feature.gr), 5)], feature.gr[feature.gr$type=='cRE'][1]) feature.gr <- unique(sort(feature.gr)) loopBouquetPlot(gi, range, feature.gr, coor_tick_unit = 5e4, coor_mark_interval = 5e5, atacSig = ctcf)
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.