knitr::opts_chunk$set(echo = TRUE)
load("Report2.Rdata")

Summary Table

Sequence files below are set as inputs of the pipeline.

Case files:

casefilelist

Control files:

ctrlfilelist

Summerized infomation on sequence files has been shown showed below. You can see details in later sections

knitr::kable(x = wholesummary)

A nucleosome free region (NFR) must be present. A mononucleosome peak must be present in the fragment length distribution. These are reads that span a single nucleosome, so they are longer than 147 bp but shorter than 147*2 bp. Good ATAC-seq datasets have reads that span nucleosomes (which allows for calling nucleosome positions in addition to open regions of chromatin).

library(ggplot2)
readsCounts<-getReportVal(caselist$atacProcs$fragLenDistr,"readsCounts")
ggplot(readsCounts[1:1000,], aes(length,counts))+geom_path(color="Red")+xlab("Fragment length (bp)")+ylab("Read counts") + theme_bw() + theme(panel.grid =element_blank()) + geom_vline(xintercept = c(147,294), linetype=2) + annotate("text", x = 147, y = max(readsCounts[1:1000,2]),label="147bp") + annotate("text", x = 147*2, y = max(readsCounts[1:1000,2]),label="147bp*2") + labs(title = "Fragment Length Distribution") + theme(plot.title = element_text(hjust = 0.5))
library(ggplot2)
readsCounts<-getReportVal(ctrllist$atacProcs$fragLenDistr,"readsCounts")
ggplot(readsCounts[1:1000,], aes(length,counts))+geom_path(color="Red")+xlab("Fragment length (bp)")+ylab("Read counts") + theme_bw() + theme(panel.grid =element_blank()) + geom_vline(xintercept = c(147,294), linetype=2) + annotate("text", x = 147, y = max(readsCounts[1:1000,2]),label="147bp") + annotate("text", x = 147*2, y = max(readsCounts[1:1000,2]),label="147bp*2") + labs(title = "Fragment Length Distribution") + theme(plot.title = element_text(hjust = 0.5))

Sequence Statistics

FastQC

Quality control for the sequence data

QC_path1 <- getReportVal(caselist$atacProcs$atacQC,"pdf")
QC_path2 <- getReportVal(ctrllist$atacProcs$atacQC,"pdf")

Click to Visit Report for case

Click to Visit Report for control

Remove adapter

The adapter sequence are shown below. For paired end reads, if adapters were not setted, the adapters below are identified by AdapterRemoval.

knitr::kable(getReportVal(caselist$atacProcs$removeAdapter,"adapters"))
knitr::kable(getReportVal(ctrllist$atacProcs$removeAdapter,"adapters"))

The statistic of adapter removing are show below.

casetb<-getReportVal(caselist$atacProcs$removeAdapter,"statistics")
ctrltb<-getReportVal(ctrllist$atacProcs$removeAdapter,"statistics")
knitr::kable(data.frame(Items=casetb[["Item"]],
                        Case=casetb[["Value"]],
                        Control=ctrltb[["Value"]]
                        ))

For detail, you can visit Website of AdapterRemoval on Github.

Reads Alignment Statistics

Bowtie2 alignment log

getReportVal(caselist$atacProcs$bowtie2Mapping,"detail")
getReportVal(ctrllist$atacProcs$bowtie2Mapping,"detail")

Library complexity

casetb<-getReportVal(caselist$atacProcs$libComplexQC,"report")
ctrltb<-getReportVal(ctrllist$atacProcs$libComplexQC,"report")
knitr::kable(data.frame(Items=casetb[["Item"]],
                        Case=casetb[["Value"]],
                        Control=ctrltb[["Value"]],
                        Reference=ctrltb[["Reference"]]
                        ))

The annotation you can see in section 1.

Filtering statistics

knitr::kable(x = filtstat)

Fragment size distribution

library(ggplot2)
library(stats)
load("Report2.Rdata")

getFsdG1G2 <- function(readsCounts){



    strength<-Mod(fft(readsCounts$counts))/length(readsCounts$counts)
    periodx<-length(readsCounts$counts)/(1:(length(readsCounts$counts)-1))
    strength<-strength[2:length(strength)]

    rs1<-as.data.frame(cbind(periodx[periodx<20&periodx>2],strength[periodx<20&periodx>2],0))
    rs2<-as.data.frame(cbind(periodx[periodx<400&periodx>2],strength[periodx<400&periodx>2],1))
    rs<-rbind(rs1,rs2)
    colnames(rs)<-c("period","strength","check")

    g1<-ggplot(rs[rs["check"]==0,]) + geom_vline(xintercept = 10.4, linetype=2)+ geom_line(aes(x=period,y=strength),color="Red")+ theme_bw() + theme(panel.grid =element_blank()) + annotate("text", x = 10.4, y = max(rs[rs["check"]==0,2]), label = "10.4bp") +xlab("period") + ylab("strength")+ labs(title = "the Pitch of the DNA Helix") + theme(plot.title = element_text(hjust = 0.5))

    g2<-ggplot(rs[rs["check"]==1,]) + geom_vline(xintercept = 186, linetype=2)+ geom_line(aes(x=period,y=strength),color="Red")+ theme_bw() + theme(panel.grid =element_blank()) + annotate("text", x = 186, y = max(rs[rs["check"]==1,2]), label = "186bp") +xlab("period") + ylab("strength")+ labs(title = "Nucleosome") + theme(plot.title = element_text(hjust = 0.5))
    return(list(g1,g2))
}
readsCounts1<-getReportVal(caselist$atacProcs$fragLenDistr,"readsCounts")
readsCounts2<-getReportVal(ctrllist$atacProcs$fragLenDistr,"readsCounts")
ggplot(readsCounts1[1:1000,], aes(length,counts))+geom_path(color="Red")+xlab("Fragment length (bp)")+ylab("Read counts") + theme_bw() + theme(panel.grid =element_blank()) + labs(title = "Fragment Length Distribution") + theme(plot.title = element_text(hjust = 0.5))
ggplot(readsCounts2[1:1000,], aes(length,counts))+geom_path(color="Red")+xlab("Fragment length (bp)")+ylab("Read counts") + theme_bw() + theme(panel.grid =element_blank()) + labs(title = "Fragment Length Distribution") + theme(plot.title = element_text(hjust = 0.5))
g112<-getFsdG1G2(readsCounts1)
g212<-getFsdG1G2(readsCounts2)

library(gridExtra)
grid.arrange(g112[[1]], g212[[1]],ncol=2)
grid.arrange(g112[[2]], g212[[2]],ncol=2)

TSS enrichment

The nucleosome free reads (<100bp) and monnucleosome span reads (180~247bp) enrichment around transcription starting site (TSS) are shown below.

library(ggplot2)
library(gridExtra)
load("Report2.Rdata")
df<-getReportVal(caselist$atacProcs$tssqc100,"tss")
g11<-ggplot(df,aes(pos,counts))+geom_line()+ geom_vline(xintercept = 0, linetype=2)+xlab("upstream<-TSS->downstream")+ylab("reads count")+theme_bw() + theme(panel.grid =element_blank()) + labs(title = "Nucleosome Free Reads") + theme(plot.title = element_text(hjust = 0.5))
df<-getReportVal(caselist$atacProcs$tssqc180_247,"tss")
g12<-ggplot(df,aes(pos,counts))+geom_line()+ geom_vline(xintercept = 0, linetype=2)+xlab("upstream<-TSS->downstream")+ylab("reads count")+theme_bw() + theme(panel.grid =element_blank()) + labs(title = "Monnucleosome Span Reads") + theme(plot.title = element_text(hjust = 0.5))

df<-getReportVal(caselist$atacProcs$tssqc100,"tss")
g21<-ggplot(df,aes(pos,counts))+geom_line()+ geom_vline(xintercept = 0, linetype=2)+xlab("upstream<-TSS->downstream")+ylab("reads count")+theme_bw() + theme(panel.grid =element_blank()) + labs(title = "Nucleosome Free Reads") + theme(plot.title = element_text(hjust = 0.5))
df<-getReportVal(caselist$atacProcs$tssqc180_247,"tss")
g22<-ggplot(df,aes(pos,counts))+geom_line()+ geom_vline(xintercept = 0, linetype=2)+xlab("upstream<-TSS->downstream")+ylab("reads count")+theme_bw() + theme(panel.grid =element_blank()) + labs(title = "Monnucleosome Span Reads") + theme(plot.title = element_text(hjust = 0.5))
grid.arrange(g11, g21, ncol=2)
grid.arrange(g12, g22, ncol=2)

Peak Statistics

Blacklist ratio

ifelse(is.null(caselist$atacProcs$blacklistQC),"NA",{
casetb<-getReportVal(caselist$atacProcs$blacklistQC,"report")
ctrltb<-getReportVal(ctrllist$atacProcs$blacklistQC,"report")
knitr::kable(data.frame(Items=casetb[["Item"]],
                        Case=casetb[["Value"]],
                        Control=ctrltb[["Value"]]
                        ))
})

DHS ratio

ifelse(is.null(caselist$atacProcs$DHSQC),"NA",{
casetb<-getReportVal(caselist$atacProcs$DHSQC,"report")
ctrltb<-getReportVal(ctrllist$atacProcs$DHSQC,"report")
knitr::kable(data.frame(Items=casetb[["Item"]],
                        Case=casetb[["Value"]],
                        Control=ctrltb[["Value"]]
                        ))
})

Fraction of reads in peaks (FRiP)

casetb<-getReportVal(caselist$atacProcs$fripQC,"report")
ctrltb<-getReportVal(ctrllist$atacProcs$fripQC,"report")
knitr::kable(data.frame(Items=casetb[["Item"]],
                        Case=casetb[["Value"]],
                        Control=ctrltb[["Value"]]
                        ))

Peak annotation

library(ChIPseeker)
peakanno <- getReportVal(caselist$atacProcs$Peakanno, "annoOutput.rds")
plotAnnoPie(x = peakanno)
peakanno <- getReportVal(ctrllist$atacProcs$Peakanno, "annoOutput.rds")
plotAnnoPie(x = peakanno)

Case Control Peak Compare

The following is the peak comparation result for case and control.

library(VennDiagram)
library(grid)
PeakNumInCC <- getReportVal(comp_result$peakCom, "venn.data")
venn.plot <- draw.pairwise.venn(
    area1 = PeakNumInCC[1] + PeakNumInCC[3],
    area2 = PeakNumInCC[2] + PeakNumInCC[3],
    cross.area = PeakNumInCC[3],
    category = c("Case", "Control"),
    fill = c("skyblue", "mediumorchid")
)

The total peaks in case are r PeakNumInCC[4], specific peaks in case are r PeakNumInCC[1].

The total peaks in control are r PeakNumInCC[5], specific peaks in control are r PeakNumInCC[2].

The overlap peaks are merged to r PeakNumInCC[3] peaks.

GO Analysis

Gene ontology analysis for all genes around specific peak regions of case and control.

Significant GO terms of genes associated with case specific peaks

go_path <- getReportVal(comp_result$goAna.case, "goOutput")
go_data <- read.table(file = go_path, header = TRUE, sep = "\t")
go_data <- subset(go_data, select = c("ID", "Description", "GeneRatio", "pvalue", "qvalue"))
go_data$pvalue <- signif(go_data$pvalue, digits = 3)
go_data$pvalue <- as.character(go_data$pvalue)
go_data$qvalue <- signif(go_data$qvalue, digits = 3)
go_data$qvalue <- as.character(go_data$qvalue)
if(nrow(go_data)==0){
    message("No GO terms found: empty table")
}else if(nrow(go_data) < 15){
    knitr::kable(go_data, align = "l")
}else{
    knitr::kable(go_data[1:15, ], align = "l")
}

Click to Visit Case Differential Go Analysis file

Significant GO terms of genes associated with control specific peaks

go_path <- getReportVal(comp_result$goAna.ctrl, "goOutput")
go_data <- read.table(file = go_path, header = TRUE, sep = "\t")
go_data <- subset(go_data, select = c("ID", "Description", "GeneRatio", "pvalue", "qvalue"))
go_data$pvalue <- signif(go_data$pvalue, digits = 3)
go_data$pvalue <- as.character(go_data$pvalue)
go_data$qvalue <- signif(go_data$qvalue, digits = 3)
go_data$qvalue <- as.character(go_data$qvalue)
if(nrow(go_data)==0){
    message("No GO terms found: empty table")
}else if(nrow(go_data) < 15){
    knitr::kable(go_data, align = "l")
}else{
    knitr::kable(go_data[1:15, ], align = "l")
}

Click to Visit Ctrl Differential Go Analysis file

Motif Enrichment Analysis

This function takes case and control specific peaks as foreground respectively, overlap peaks between case and control as background.

Significant motifs of case specific peaks

motif_enrich.case <- getReportVal(comp_result$mout, "rdsOutput.peak1")
motif_enrich.case <- motif_enrich.case[, c(1, 3, 4)]
colnames(motif_enrich.case) <- c("motif", "motif length", "p_value")
motif_enrich.case <- motif_enrich.case[order(motif_enrich.case$p_value), ]
rownames(motif_enrich.case) <- seq(nrow(motif_enrich.case))
motif_enrich.case$p_value <- signif(motif_enrich.case$p_value, digits = 3)
motif_enrich.case$p_value[motif_enrich.case$p_value < 1e-300] <- 0
motif_enrich.case$p_value <- as.character(motif_enrich.case$p_value)
if(nrow(motif_enrich.case) < 15){
    knitr::kable(motif_enrich.case, align = "l")
}else{
    knitr::kable(motif_enrich.case[1:15, ], align = "l")
}

Significant motifs of control specific peaks

motif_enrich.ctrl <- getReportVal(comp_result$mout, "rdsOutput.peak2")
motif_enrich.ctrl <- motif_enrich.ctrl[, c(1, 3, 4)]
colnames(motif_enrich.ctrl) <- c("motif", "motif length", "p_value")
motif_enrich.ctrl <- motif_enrich.ctrl[order(motif_enrich.ctrl$p_value), ]
rownames(motif_enrich.ctrl) <- seq(nrow(motif_enrich.ctrl))
motif_enrich.ctrl$p_value <- signif(motif_enrich.ctrl$p_value, digits = 3)
motif_enrich.ctrl$p_value[motif_enrich.ctrl$p_value < 1e-300] <- 0
motif_enrich.ctrl$p_value <- as.character(motif_enrich.ctrl$p_value)
if(nrow(motif_enrich.ctrl) < 15){
    knitr::kable(motif_enrich.ctrl, align = "l")
}else{
    knitr::kable(motif_enrich.ctrl[1:15, ], align = "l")
}

Genomic Footprint

The following is the footprint for motif occurance of case&control peaks.

par(mfrow=c(1,2))

footprint_data <- getReportVal(caselist$atacProcs$footprint, "footprint.data")
if("CTCF" %in% names(footprint_data)){
    footprint_figure.name <- "CTCF"
    footprint_figure.data <- as.vector(footprint_data$CTCF)
}else{
    footprint_figure.name <- names(footprint_data[1])
    footprint_figure.data <- as.vector(footprint_data[[1]])
}
footprint_figure.length <- length(footprint_figure.data) - 200
footprint_text <- paste(footprint_figure.name, "(Case)", sep = "")
plot(footprint_figure.data, type = "l", col = "blue", lwd = 2,
     main = footprint_text,
    xlab = "Relative Distance From Motif (bp)",
    ylab = "Cut Site Count", xaxt = "n", yaxt = "n")
axis(1, at = seq(1, 100, len = 3),
    labels = -(100 + 1 - seq(1, 100 + 1, len = 3)),
    padj = -1.0, tck = -0.01)
axis(1, at = 100 + footprint_figure.length + seq(1, 100, len = 3),
    labels = seq(0, 100, len = 3),
    padj = -1.0, tck = -0.01)
axis(2, padj = 1.0,tck = -0.02)
abline(v = c(100, 100 + footprint_figure.length + 1), lty = 2)

footprint_data <- getReportVal(ctrllist$atacProcs$footprint, "footprint.data")
if("CTCF" %in% names(footprint_data)){
    footprint_figure.name <- "CTCF"
    footprint_figure.data <- as.vector(footprint_data$CTCF)
}else{
    footprint_figure.name <- names(footprint_data[1])
    footprint_figure.data <- as.vector(footprint_data[[1]])
}
footprint_figure.length <- length(footprint_figure.data) - 200
footprint_text <- paste(footprint_figure.name, "(Control)", sep = "")
plot(footprint_figure.data, type = "l", col = "blue", lwd = 2,
     main = footprint_text,
    xlab = "Relative Distance From Motif (bp)",
    ylab = "Cut Site Count", xaxt = "n", yaxt = "n")
axis(1, at = seq(1, 100, len = 3),
    labels = -(100 + 1 - seq(1, 100 + 1, len = 3)),
    padj = -1.0, tck = -0.01)
axis(1, at = 100 + footprint_figure.length + seq(1, 100, len = 3),
    labels = seq(0, 100, len = 3),
    padj = -1.0, tck = -0.01)
axis(2, padj = 1.0,tck = -0.02)
abline(v = c(100, 100 + footprint_figure.length + 1), lty = 2)

case.dir <- getReportVal(caselist$atacProcs$footprint, "pdf.dir")
ctrl.dir <- getReportVal(ctrllist$atacProcs$footprint, "pdf.dir")

For all footprint of case sample, The absolute path is r R.utils::getAbsolutePath(case.dir).

For all footprint of control sample, The absolute path is r R.utils::getAbsolutePath(ctrl.dir).

The following is the footprint for motif occurance of case&control specific peaks.

par(mfrow=c(1,2))

footprint_data <- getReportVal(comp_result$footprint.case, "footprint.data")
if("CTCF" %in% names(footprint_data)){
    footprint_figure.name <- "CTCF"
    footprint_figure.data <- as.vector(footprint_data$CTCF)
}else{
    footprint_figure.name <- names(footprint_data[1])
    footprint_figure.data <- as.vector(footprint_data[[1]])
}
footprint_figure.length <- length(footprint_figure.data) - 200
footprint_text <- paste(footprint_figure.name, "(Case specific)", sep = "")
plot(footprint_figure.data, type = "l", col = "blue", lwd = 2,
     main = footprint_text,
    xlab = "Relative Distance From Motif (bp)",
    ylab = "Cut Site Count", xaxt = "n", yaxt = "n")
axis(1, at = seq(1, 100, len = 3),
    labels = -(100 + 1 - seq(1, 100 + 1, len = 3)),
    padj = -1.0, tck = -0.01)
axis(1, at = 100 + footprint_figure.length + seq(1, 100, len = 3),
    labels = seq(0, 100, len = 3),
    padj = -1.0, tck = -0.01)
axis(2, padj = 1.0,tck = -0.02)
abline(v = c(100, 100 + footprint_figure.length + 1), lty = 2)

footprint_data <- getReportVal(comp_result$footprint.ctrl, "footprint.data")
if("CTCF" %in% names(footprint_data)){
    footprint_figure.name <- "CTCF"
    footprint_figure.data <- as.vector(footprint_data$CTCF)
}else{
    footprint_figure.name <- names(footprint_data[1])
    footprint_figure.data <- as.vector(footprint_data[[1]])
}
footprint_figure.length <- length(footprint_figure.data) - 200
footprint_text <- paste(footprint_figure.name, "(Control specific)", sep = "")
plot(footprint_figure.data, type = "l", col = "blue", lwd = 2,
     main = footprint_text,
    xlab = "Relative Distance From Motif (bp)",
    ylab = "Cut Site Count", xaxt = "n", yaxt = "n")
axis(1, at = seq(1, 100, len = 3),
    labels = -(100 + 1 - seq(1, 100 + 1, len = 3)),
    padj = -1.0, tck = -0.01)
axis(1, at = 100 + footprint_figure.length + seq(1, 100, len = 3),
    labels = seq(0, 100, len = 3),
    padj = -1.0, tck = -0.01)
axis(2, padj = 1.0,tck = -0.02)
abline(v = c(100, 100 + footprint_figure.length + 1), lty = 2)

casesp.dir <- getReportVal(comp_result$footprint.case, "pdf.dir")
ctrlsp.dir <- getReportVal(comp_result$footprint.ctrl, "pdf.dir")

For all footprint of case specific, The absolute path is r R.utils::getAbsolutePath(casesp.dir).

For all footprint of control specific, The absolute path is r R.utils::getAbsolutePath(ctrlsp.dir).

Annotation of Items in Table

For single end sequencing data, esATAC will counts reads number.

For paired end sequencing data, esATAC will counts read pairs or fragment number.

$NRF$ value range |Complexity :------------------:|:---------: $NRF<0.7$ |Concerning $0.7\le NRF \le 0.9$|Acceptable $NRF>0.7$ |Ideal

$PBC1$ value range |Bottlenecking level :-------------------:|:---------: $PBC1<0.7$ |Severe $0.7\le PBC1 \le 0.9$|Moderate $PBC1>0.7$ |None

$PBC2$ value range |Bottlenecking level :------------------:|:----------: $PBC2<1$ |Severe $1\le PBC2 \le 3$ |Moderate $PBC2>3$ |None

Session Info

sessionInfo()


wzthu/ATACpipe documentation built on Aug. 12, 2022, 7:38 a.m.