ChIPpeakAnno.R
In ChIPpeakAnno: Batch annotation of the peaks identified from either ChIP-seq, ChIP-chip experiments or any experiments resulted in large number of chromosome ranges

## ---- echo=FALSE, results="hide", warning=FALSE-------------------------------
suppressPackageStartupMessages({
    library(ChIPpeakAnno)
    library(org.Hs.eg.db)
    library(GenomicFeatures)
    library(TxDb.Hsapiens.UCSC.hg38.knownGene)
    library(rtracklayer)
    library(GO.db)
    library(EnsDb.Hsapiens.v75)
    library(BSgenome.Ecoli.NCBI.20080805)
})
knitr::opts_chunk$set(warning=FALSE, message=FALSE)

## ----quickStart---------------------------------------------------------------
library(ChIPpeakAnno)
## import the MACS output
macs <- system.file("extdata", "MACS_peaks.xls", package="ChIPpeakAnno")
macsOutput <- toGRanges(macs, format="MACS")
## annotate the peaks with precompiled ensembl annotation
data(TSS.human.GRCh38)
macs.anno <- annotatePeakInBatch(macsOutput, AnnotationData=TSS.human.GRCh38)
## add gene symbols
library(org.Hs.eg.db)
macs.anno <- addGeneIDs(annotatedPeak=macs.anno, 
                        orgAnn="org.Hs.eg.db", 
                        IDs2Add="symbol")

if(interactive()){## annotate the peaks with UCSC annotation
    library(GenomicFeatures)
    library(TxDb.Hsapiens.UCSC.hg38.knownGene)
    ucsc.hg38.knownGene <- genes(TxDb.Hsapiens.UCSC.hg38.knownGene)
    macs.anno <- annotatePeakInBatch(macsOutput, 
                                     AnnotationData=ucsc.hg38.knownGene)
    macs.anno <- addGeneIDs(annotatedPeak=macs.anno, 
                            orgAnn="org.Hs.eg.db", 
                            feature_id_type="entrez_id",
                            IDs2Add="symbol")
}

## ----workflow19, fig.cap="venn diagram of overlaps for duplicated experiments"----
bed <- system.file("extdata", "MACS_output.bed", package="ChIPpeakAnno")
gr1 <- toGRanges(bed, format="BED", header=FALSE) 
## one can also try import from rtracklayer
library(rtracklayer)
gr1.import <- import(bed, format="BED")
identical(start(gr1), start(gr1.import))
gr1[1:2]
gr1.import[1:2] #note the name slot is different from gr1
gff <- system.file("extdata", "GFF_peaks.gff", package="ChIPpeakAnno")
gr2 <- toGRanges(gff, format="GFF", header=FALSE, skip=3)
ol <- findOverlapsOfPeaks(gr1, gr2)
makeVennDiagram(ol,
                fill=c("#009E73", "#F0E442"), # circle fill color
                col=c("#D55E00", "#0072B2"), #circle border color
                cat.col=c("#D55E00", "#0072B2"))

## ----workflow20,fig.cap="Pie chart of common peaks among features"------------
pie1(table(ol$overlappingPeaks[["gr1///gr2"]]$overlapFeature))

## ----workflow21---------------------------------------------------------------
overlaps <- ol$peaklist[["gr1///gr2"]]
## ============== old style ===========
## data(TSS.human.GRCh37) 
## overlaps.anno <- annotatePeakInBatch(overlaps, AnnotationData=annoData, 
##                                      output="overlapping", maxgap=5000L)
## overlaps.anno <- addGeneIDs(overlaps.anno, "org.Hs.eg.db", "symbol")
## ============== new style ===========
library(EnsDb.Hsapiens.v75) ##(hg19)
## create annotation file from EnsDb or TxDb
annoData <- toGRanges(EnsDb.Hsapiens.v75, feature="gene")
annoData[1:2]
overlaps.anno <- annotatePeakInBatch(overlaps, AnnotationData=annoData, 
                                    output="overlapping", maxgap=5000L)
overlaps.anno$gene_name <- 
    annoData$gene_name[match(overlaps.anno$feature,
                             names(annoData))]
head(overlaps.anno)

## ----workflow22,fig.cap="Distribution of aggregated peak scores or peak numbers around transcript start sites.",fig.width=8,fig.height=6----
gr1.copy <- gr1
gr1.copy$score <- 1
binOverFeature(gr1, gr1.copy, annotationData=annoData,
               radius=5000, nbins=10, FUN=c(sum, length),
               ylab=c("score", "count"), 
               main=c("Distribution of aggregated peak scores around TSS", 
                      "Distribution of aggregated peak numbers around TSS"))

## ----workflow23,fig.cap="Peak distribution over different genomic features.",fig.width=10,fig.height=4----
if(require(TxDb.Hsapiens.UCSC.hg19.knownGene)){
    aCR<-assignChromosomeRegion(gr1, nucleotideLevel=FALSE, 
                           precedence=c("Promoters", "immediateDownstream", 
                                         "fiveUTRs", "threeUTRs", 
                                         "Exons", "Introns"), 
                           TxDb=TxDb.Hsapiens.UCSC.hg19.knownGene)
    barplot(aCR$percentage)
}

## ----findOverlapsOfPeaks3-----------------------------------------------------
peaks1 <- GRanges(seqnames=c("1", "2", "3", "4", "5", "6", 
                              "2", "6", "6", "6", "6", "5"),
                   ranges=IRanges(start=c(967654, 2010897, 2496704, 3075869, 
                                          3123260, 3857501, 201089, 1543200, 
                                          1557200, 1563000, 1569800, 167889600),
                                  end= c(967754, 2010997, 2496804, 3075969, 
                                         3123360, 3857601, 201089, 1555199,
                                         1560599, 1565199, 1573799, 167893599),
                                  names=paste("Site", 1:12, sep="")),
                  strand="+")

peaks2 <- GRanges(seqnames=c("1", "2", "3", "4", "5", "6", "1", "2", "3", 
                                     "4", "5", "6", "6", "6", "6", "6", "5"),
                          ranges=IRanges(start=c(967659, 2010898, 2496700, 
                                                 3075866, 3123260, 3857500, 
                                                 96765, 201089, 249670, 307586, 
                                                 312326, 385750, 1549800, 
                                                 1554400, 1565000, 1569400,
                                                 167888600), 
                                         end=c(967869, 2011108, 2496920, 
                                               3076166,3123470, 3857780, 
                                               96985, 201299, 249890, 307796, 
                                               312586, 385960, 1550599, 1560799,
                                               1565399, 1571199, 167888999), 
                                         names=paste("t", 1:17, sep="")),
                          strand=c("+", "+", "+", "+", "+", "+", "-", "-", "-", 
                                   "-", "-", "-", "+", "+", "+", "+", "+"))

ol <- findOverlapsOfPeaks(peaks1, peaks2, maxgap=1000)
peaklist <- ol$peaklist

## ----overlappingPeaks4,fig.cap="Pie chart of common peaks among features."----
overlappingPeaks <- ol$overlappingPeaks
names(overlappingPeaks)
dim(overlappingPeaks[["peaks1///peaks2"]])
overlappingPeaks[["peaks1///peaks2"]][1:2, ]
pie1(table(overlappingPeaks[["peaks1///peaks2"]]$overlapFeature))

## ----overlappingPeaks5--------------------------------------------------------
peaklist[["peaks1///peaks2"]]

## ----6------------------------------------------------------------------------
peaklist[["peaks1"]]

## ----7------------------------------------------------------------------------
peaklist[["peaks2"]]

## ----findOverlapsOfPeaks8,fig.cap="venn diagram of overlaps",fig.width=6,fig.height=6----
makeVennDiagram(ol, totalTest=1e+2,
                fill=c("#009E73", "#F0E442"), # circle fill color
                col=c("#D55E00", "#0072B2"), #circle border color
                cat.col=c("#D55E00", "#0072B2"))

## ----VennerableFigure---------------------------------------------------------
#     install.packages("Vennerable", repos="http://R-Forge.R-project.org", 
#                     type="source")
#     library(Vennerable)
#     venn_cnt2venn <- function(venn_cnt){
#         n <- which(colnames(venn_cnt)=="Counts") - 1
#         SetNames=colnames(venn_cnt)[1:n]
#         Weight=venn_cnt[,"Counts"]
#         names(Weight) <- apply(venn_cnt[,1:n], 1, base::paste, collapse="")
#         Venn(SetNames=SetNames, Weight=Weight)
#     }
# 
#     v <- venn_cnt2venn(ol$venn_cnt)
#     plot(v)

## ----findOverlapsOfPeaks9,fig.cap="venn diagram of overlaps for three input peak lists",fig.width=6,fig.height=6----
peaks3 <- GRanges(seqnames=c("1", "2", "3", "4", "5", 
                             "6", "1", "2", "3", "4"),
                   ranges=IRanges(start=c(967859, 2010868, 2496500, 3075966,
                                          3123460, 3851500, 96865, 201189, 
                                          249600, 307386),
                                  end= c(967969, 2011908, 2496720, 3076166,
                                         3123470, 3857680, 96985, 201299, 
                                         249890, 307796),
                                  names=paste("p", 1:10, sep="")),
                  strand=c("+", "+", "+", "+", "+", 
                           "+", "-", "-", "-", "-"))

ol <- findOverlapsOfPeaks(peaks1, peaks2, peaks3, maxgap=1000, 
                          connectedPeaks="min")
makeVennDiagram(ol, totalTest=1e+2,
                fill=c("#CC79A7", "#56B4E9", "#F0E442"), # circle fill color
                col=c("#D55E00", "#0072B2", "#E69F00"), #circle border color
                cat.col=c("#D55E00", "#0072B2", "#E69F00"))

## ----annoGRgene---------------------------------------------------------------
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
annoData <- toGRanges(TxDb.Hsapiens.UCSC.hg19.knownGene, feature="gene")
annoData

## ----annotatePeakInBatchByannoGR----------------------------------------------
data(myPeakList)
annotatedPeak <- annotatePeakInBatch(myPeakList[1:6],
                                     AnnotationData = annoData)
annotatedPeak[1:3]

## ----annotatePeakInBatch1-----------------------------------------------------
data(TSS.human.NCBI36)
annotatedPeak <- annotatePeakInBatch(myPeakList[1:6], 
                 AnnotationData=TSS.human.NCBI36)
annotatedPeak[1:3]

## ----annotatePeakInBatch2-----------------------------------------------------
myPeak1 <- GRanges(seqnames=c("1", "2", "3", "4", "5", "6", 
                              "2", "6", "6", "6", "6", "5"),
                   ranges=IRanges(start=c(967654, 2010897, 2496704, 3075869, 
                                          3123260, 3857501, 201089, 1543200, 
                                          1557200, 1563000, 1569800, 167889600),
                                  end= c(967754, 2010997, 2496804, 3075969, 
                                         3123360, 3857601, 201089, 1555199,
                                         1560599, 1565199, 1573799, 167893599),
                                  names=paste("Site", 1:12, sep="")))

TFbindingSites <- GRanges(seqnames=c("1", "2", "3", "4", "5", "6", "1", "2", 
                                     "3", "4", "5", "6", "6", "6", "6", "6",
                                     "5"),
                          ranges=IRanges(start=c(967659, 2010898, 2496700, 
                                                 3075866, 3123260, 3857500, 
                                                 96765, 201089, 249670, 307586, 
                                                 312326, 385750, 1549800, 
                                                 1554400, 1565000, 1569400,
                                                 167888600), 
                                         end=c(967869, 2011108, 2496920, 
                                               3076166,3123470, 3857780, 
                                               96985, 201299, 249890, 307796, 
                                               312586, 385960, 1550599, 1560799,
                                               1565399, 1571199, 167888999), 
                                         names=paste("t", 1:17, sep="")),
                          strand=c("+", "+", "+", "+", "+", "+", "-", "-", "-", 
                                   "-", "-", "-", "+", "+", "+", "+", "+"))

annotatedPeak2 <- annotatePeakInBatch(myPeak1, AnnotationData=TFbindingSites)
annotatedPeak2[1:3]

## ----pie1,fig.cap="Pie chart of peak distribution among features",fig.width=5,fig.height=5----
pie1(table(as.data.frame(annotatedPeak2)$insideFeature))

## ----annotatedPromoter--------------------------------------------------------
library(ChIPpeakAnno)
data(myPeakList)
data(TSS.human.NCBI36)
annotationData <- promoters(TSS.human.NCBI36, upstream=5000, downstream=500)
annotatedPeak <- annotatePeakInBatch(myPeakList[1:6,], 
                                     AnnotationData=annotationData,
                                     output="overlapping")
annotatedPeak[1:3]

## -----------------------------------------------------------------------------
annotatedPeak <- annotatePeakInBatch(myPeakList[1:6],
                                     AnnotationData = annoData,
                                     output="both", maxgap=5000)
head(annotatedPeak)

## ----addGeneIDs18-------------------------------------------------------------
data(annotatedPeak)
library(org.Hs.eg.db)
addGeneIDs(annotatedPeak[1:6], orgAnn="org.Hs.eg.db", IDs2Add=c("symbol"))
addGeneIDs(annotatedPeak$feature[1:6], orgAnn="org.Hs.eg.db", 
           IDs2Add=c("symbol"))

## ----getAllPeakSequence12-----------------------------------------------------
peaks <- GRanges(seqnames=c("NC_008253", "NC_010468"),
                 ranges=IRanges(start=c(100, 500), 
                                end=c(300, 600), 
                                names=c("peak1", "peak2")))
library(BSgenome.Ecoli.NCBI.20080805)
peaksWithSequences <- getAllPeakSequence(peaks, upstream=20, 
                                         downstream=20, genome=Ecoli)

## ----write2FASTA13------------------------------------------------------------
write2FASTA(peaksWithSequences,"test.fa")

## ----heatmap,fig.cap="Heatmap of aligned features",fig.width=4,fig.height=6----
path <- system.file("extdata", package="ChIPpeakAnno")
files <- dir(path, "broadPeak")
data <- sapply(file.path(path, files), toGRanges, format="broadPeak")
names(data) <- gsub(".broadPeak", "", files)
ol <- findOverlapsOfPeaks(data)
#makeVennDiagram(ol)
features <- ol$peaklist[[length(ol$peaklist)]]
wid <- width(features)
feature.recentered <- feature.center <- features
start(feature.center) <- start(features) + floor(wid/2)
width(feature.center) <- 1
start(feature.recentered) <- start(feature.center) - 2000
end(feature.recentered) <- end(feature.center) + 2000
## here we also suggest importData function in bioconductor trackViewer package 
## to import the coverage.
## compare rtracklayer, it will save you time when handle huge dataset.
library(rtracklayer)
files <- dir(path, "bigWig")
if(.Platform$OS.type != "windows"){
    cvglists <- sapply(file.path(path, files), import, 
                       format="BigWig", 
                       which=feature.recentered, 
                       as="RleList")
}else{## rtracklayer can not import bigWig files on Windows
    load(file.path(path, "cvglist.rds"))
}
names(cvglists) <- gsub(".bigWig", "", files)
sig <- featureAlignedSignal(cvglists, feature.center, 
                            upstream=2000, downstream=2000) 
heatmap <- featureAlignedHeatmap(sig, feature.center, 
                                 upstream=2000, downstream=2000,
                                 upper.extreme=c(3,.5,4))

## ----distribution,fig.cap="Distribution of aligned features",fig.width=6,fig.height=6----
featureAlignedDistribution(sig, feature.center, 
                           upstream=2000, downstream=2000,
                           type="l")

## ----summarizePatternInPeaks17------------------------------------------------
peaks <- GRanges(seqnames=c("NC_008253", "NC_010468"),
                 ranges=IRanges(start=c(100, 500), 
                                end=c(300, 600), 
                                names=c("peak1", "peak2")))
filepath <- system.file("extdata", "examplePattern.fa", package="ChIPpeakAnno")
readLines(filepath)
library(BSgenome.Ecoli.NCBI.20080805)
summarizePatternInPeaks(patternFilePath=filepath, format="fasta", skip=0L, 
                        BSgenomeName=Ecoli, peaks=peaks)

## ----getEnriched14------------------------------------------------------------
library(org.Hs.eg.db)
over <- getEnrichedGO(annotatedPeak[1:500], orgAnn="org.Hs.eg.db", 
                    maxP=0.01, minGOterm=10, 
                    multiAdjMethod="BH",
                    condense=FALSE)
head(over[["bp"]][, -3])
head(over[["cc"]][, -3])
head(over[["mf"]][, -3])

## ----egOrgMap15---------------------------------------------------------------
egOrgMap("Mus musculus")
egOrgMap("Homo sapiens")

## ----peaksNearBDP16-----------------------------------------------------------
data(myPeakList)
data(TSS.human.NCBI36)
seqlevelsStyle(TSS.human.NCBI36) <- seqlevelsStyle(myPeakList)
annotatedBDP <- peaksNearBDP(myPeakList[1:10,], 
                             AnnotationData=TSS.human.NCBI36, 
                             MaxDistance=5000)
annotatedBDP$peaksWithBDP
c(annotatedBDP$percentPeaksWithBDP, 
  annotatedBDP$n.peaks, 
  annotatedBDP$n.peaksWithBDP)

## ----peakPermTest-------------------------------------------------------------
if(interactive()){
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
cds <- unique(unlist(cdsBy(txdb)))
utr5 <- unique(unlist(fiveUTRsByTranscript(txdb)))
utr3 <- unique(unlist(threeUTRsByTranscript(txdb)))
set.seed(123)
utr3 <- utr3[sample.int(length(utr3), 1000)]
pt <- peakPermTest(utr3, 
             utr5[sample.int(length(utr5), 1000)], 
             maxgap=500,
             TxDb=txdb, seed=1,
             force.parallel=FALSE)
plot(pt)
## highly relevant peaks
ol <- findOverlaps(cds, utr3, maxgap=1)
pt1 <- peakPermTest(utr3,
             c(cds[sample.int(length(cds), 500)], 
                cds[queryHits(ol)][sample.int(length(ol), 500)]), 
             maxgap=500, 
             TxDb=txdb, seed=1,
             force.parallel=FALSE)
plot(pt1)
}

## ----peakPermTest1------------------------------------------------------------
if(interactive()){
    data(HOT.spots)
    data(wgEncodeTfbsV3)
    hotGR <- reduce(unlist(HOT.spots))
    removeOl <- function(.ele){
        ol <- findOverlaps(.ele, hotGR)
        if(length(ol)>0) .ele <- .ele[-unique(queryHits(ol))]
        .ele
    }
    temp <- tempfile()
    download.file(file.path("http://hgdownload.cse.ucsc.edu", 
                            "goldenPath", "hg19", "encodeDCC", 
                            "wgEncodeRegTfbsClustered", 
                            "wgEncodeRegTfbsClusteredV3.bed.gz"), temp)
    data <- toGRanges(gzfile(temp, "r"), header=FALSE, format="others", 
                      colNames = c("seqnames", "start", "end", "TF"))
    unlink(temp)
    data <- split(data, data$TF)
    TAF1 <- removeOl(data[["TAF1"]])
    TEAD4 <- removeOl(data[["TEAD4"]])
    pool <- new("permPool", grs=GRangesList(wgEncodeTfbsV3), N=length(TAF1))
    pt <- peakPermTest(TAF1, TEAD4, pool=pool, ntimes=1000)
    plot(pt)
}

## ----citation, echo=FALSE-----------------------------------------------------
citation(package="ChIPpeakAnno")

## ----sessionInfo--------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
ChIPpeakAnno documentation built on April 1, 2021, 6:01 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
ChIPpeakAnno
Batch annotation of the peaks identified from either ChIP-seq, ChIP-chip experiments or any experiments resulted in large number of chromosome ranges

inst/doc/ChIPpeakAnno.R
In ChIPpeakAnno: Batch annotation of the peaks identified from either ChIP-seq, ChIP-chip experiments or any experiments resulted in large number of chromosome ranges

Try the ChIPpeakAnno package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ChIPpeakAnno Batch annotation of the peaks identified from either ChIP-seq, ChIP-chip experiments or any experiments resulted in large number of chromosome ranges

inst/doc/ChIPpeakAnno.R In ChIPpeakAnno: Batch annotation of the peaks identified from either ChIP-seq, ChIP-chip experiments or any experiments resulted in large number of chromosome ranges

Try the ChIPpeakAnno package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ChIPpeakAnno
Batch annotation of the peaks identified from either ChIP-seq, ChIP-chip experiments or any experiments resulted in large number of chromosome ranges

inst/doc/ChIPpeakAnno.R
In ChIPpeakAnno: Batch annotation of the peaks identified from either ChIP-seq, ChIP-chip experiments or any experiments resulted in large number of chromosome ranges