## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## -----------------------------------------------------------------------------
suppressPackageStartupMessages({
library(MiniChip)
library(ComplexHeatmap)
library(GenomicFeatures)
library(ggplot2)
library(reshape2)
library(dplyr)
library(BSgenome.Mmusculus.UCSC.mm10)
#library(TxDb.Mmusculus.UCSC.mm10.knownGene)
library(TxDb.Mmusculus.UCSC.mm10.ensGene)
library(EnsDb.Mmusculus.v79)
})
## -----------------------------------------------------------------------------
peaks1.d <- read.table(system.file("extdata", "Adnp_rep1_chr11_peaks.narrowPeak", package = "MiniChip"),header=FALSE)
peaks2.d <- read.table(system.file("extdata", "Adnp_rep2_chr11_peaks.narrowPeak", package = "MiniChip"),header=FALSE)
peaks3.d <- read.table(system.file("extdata", "Adnp_rep3_chr11_peaks.narrowPeak", package = "MiniChip"),header=FALSE)
names(peaks1.d) <- c("chr","start","end","name","score","empty","foldchange","pvalue","qvalue","summit")
names(peaks2.d) <- c("chr","start","end","name","score","empty","foldchange","pvalue","qvalue","summit")
names(peaks3.d) <- c("chr","start","end","name","score","empty","foldchange","pvalue","qvalue","summit")
peaks1 <- makeGRangesFromDataFrame(peaks1.d,
keep.extra.columns=TRUE,
ignore.strand=TRUE,
seqinfo=NULL,
seqnames.field=c("chr"),
start.field=c("start"),
end.field=c("end"),
starts.in.df.are.0based=TRUE)
peaks2 <- makeGRangesFromDataFrame(peaks2.d,
keep.extra.columns=TRUE,
ignore.strand=TRUE,
seqinfo=NULL,
seqnames.field=c("chr"),
start.field=c("start"),
end.field=c("end"),
starts.in.df.are.0based=TRUE)
peaks3 <- makeGRangesFromDataFrame(peaks3.d,
keep.extra.columns=TRUE,
ignore.strand=TRUE,
seqinfo=NULL,
seqnames.field=c("chr"),
start.field=c("start"),
end.field=c("end"),
starts.in.df.are.0based=TRUE)
## -----------------------------------------------------------------------------
# peaks that occur in at least 2/3 replicates
peaks3.1 <- subsetByOverlaps(peaks3,peaks1)
peaks3.2 <- subsetByOverlaps(peaks3,peaks2)
peaks1.2 <- subsetByOverlaps(peaks1,peaks2)
#keep all 3.1 peaks, find 3.2 and 1.2 peaks that do not overlap 3.1 peaks
# peaks which are found in 3 and 2, but not in 3 and 1, and not in 1 and 2
peaks3.2u <- peaks3.2[overlapsAny(peaks3.2,peaks3.1) ==FALSE & overlapsAny(peaks3.2,peaks1.2) == FALSE]
# peaks which are found in 1 and 2, but not in 3 and 1, and not in 3 and 2
peaks1.2u <- peaks1.2[overlapsAny(peaks1.2,peaks3.1) ==FALSE & overlapsAny(peaks1.2,peaks3.2) == FALSE]
# add together all 3.1 peaks as well as unique 3.2 and 1.2 peaks
peaks <- c(peaks3.1,peaks3.2u,peaks1.2u)
## -----------------------------------------------------------------------------
peaknames <- "Adnp_peaks"
peaks
## -----------------------------------------------------------------------------
# randomize peak locations
nSites <- length(peaks)
peak.widths <- width(peaks)
random.peaks <- SimulatePeaks(nSites,peak.widths,chromosomeSizes=system.file("extdata", "chrNameLength_mm10_chr11.txt", package = "MiniChip"))
## -----------------------------------------------------------------------------
#select bam files from Adnp experiment
bamFiles <- list.files(system.file("extdata", package = "MiniChip"), full.names=TRUE,pattern="*bam$")
#all.bamFiles <- list.files("/tachyon/scratch/gbuehler/deepSeqData/bam/", full.names=TRUE,pattern="*bam$")
#all.bamFiles <- grep("test",all.bamFiles,value=TRUE,invert=TRUE)
#bamFiles <- grep("1950F",all.bamFiles,value=TRUE)[1:8]
bamFiles
## -----------------------------------------------------------------------------
bamNames <- gsub(paste(system.file("extdata", package = "MiniChip"),"/",sep=""),"",bamFiles)
bamNames <- gsub("_chr11.bam","",bamNames)
bamNames
## ----fig.height=5, fig.width=10, message = FALSE------------------------------
GCbias(bamFiles[1:2],bamNames[1:2],pe="none", restrict="chr11",GCprob=TRUE, genome=BSgenome.Mmusculus.UCSC.mm10)
## ----message = FALSE----------------------------------------------------------
span <- 3025
step <- 50
summits <- peaks
start(summits) <- start(summits) + summits$summit
end(summits) <- start(summits)
names(summits) <- elementMetadata(summits)[,"name"]
counts <- SummitHeatmap(summits,bamFiles,bamNames,span,step,useCPM=TRUE,readShiftSize=75)
## ----heatmap, fig.height=10, fig.width=8, message = FALSE---------------------
ht_list <- DrawSummitHeatmaps(counts[c(1,2,3)], bamNames[c(1,2,3)],orderSample=1, use.log=TRUE,
bottomCpm=c(0,0,0), medianCpm = c(2,2,2), topCpm = c(4,4,4), orderWindows=2, TargetHeight=500)
draw(ht_list, padding = unit(c(3, 8, 8, 2), "mm"))
## -----------------------------------------------------------------------------
inx.ChIP <- grep("ChIP",names(counts))
Adnp <- grep("Adnp",names(counts[inx.ChIP]),value=TRUE)
Input <- grep("Input",names(counts),value=TRUE)[1:3]
sampleList <- list(Adnp=Adnp,Input=Input)
meanCounts <- SummarizeHeatmaps(counts,sampleList)
## ----fig.height=5, fig.width=10, message = FALSE------------------------------
#select peaks that overlap a TSS (+/- 1kb)
#txdb=loadDb("/tungstenfs/scratch/gbuehler/michi/Annotations/GENCODE/Mouse/release_M23/gencode.vM23.annotation.txdb.sqlite")
txdb=TxDb.Mmusculus.UCSC.mm10.ensGene
TSSs <- promoters(txdb,upstream=1000,downstream=1000)
summit_TSS_overlap <- overlapsAny(summits,TSSs)
summit_TSS_overlap_names <- names(summits[summit_TSS_overlap == TRUE])
#cumulative plots
CumulativePlots(meanCounts,bamNames = names(meanCounts),
span=3025,step=50,summarizing = "mean",overlapNames = summit_TSS_overlap_names)
## ----fig.height=10, fig.width=8, message = FALSE------------------------------
promoters <- promoters(txdb,upstream=150,downstream=150)
annoname <- "promoters"
promoters.overlap <- AnnotationHeatmap(summits,annotation=promoters,annoname,span,step)
annos <- list(promoters.overlap)
names(annos) <- c("promoters")
allCounts <- c(meanCounts,annos)
cols <- c("darkblue","darkred","darkgreen")
upper.cpm <- c(rep(2,2),rep(1,1))
splitHM <- ifelse(summit_TSS_overlap==TRUE,"TSS","no TSS")
heatmap_list <- DrawSummitHeatmaps(allCounts, names(allCounts), plotcol= cols, medianCpm = upper.cpm, orderSample = 1, use.log=TRUE, summarizing = "mean", orderWindows=2,MetaScale=c("all","all","individual"), TargetHeight=500,
splitHM=splitHM)
draw(heatmap_list, padding = unit(c(3, 8, 8, 2), "mm"),show_heatmap_legend=FALSE)
## -----------------------------------------------------------------------------
#bigwig files
bigwigFiles <- list.files(system.file("extdata", package = "MiniChip"), full.names=TRUE,pattern="*bw$")
bigwigNames <- c("Adnp_r1","Adnp_r2")
bigwigGroupNames <- c("Adnp","Adnp")
#region for plotting
ROI <- peaks[peaks$score==max(peaks$score)]
ROI <- resize(ROI,1000L, fix="center")
#gene annotations
genes <- genes(txdb)
gene_anno <- genes(EnsDb.Mmusculus.v79)
mcols(genes) <- dplyr::left_join(data.frame(mcols(genes)),data.frame(mcols(gene_anno)),by=c("gene_id"="gene_id"))
exons <- exons(txdb)
#repeat annotations
data(repeat_annotation, package = "MiniChip")
reps
#calculate data frames for track plotting
trackData <- calcTracks(bigwigFiles, bigwigNames, bigwigGroupNames, ROI, CoverageSmoothing=TRUE, smoothing_window=20,reps=reps,genes=genes,exons=exons)
## ----fig.height=10,fig.width=10-----------------------------------------------
# Define the hex codes for each group
color_map <- c(
"Adnp" = "#3aa3db"
)
plotTracks(trackData, bigwigNames, color_map)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.