Nothing
## ----setup, include = FALSE-----------------------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
options(width=100)
## -------------------------------------------------------------------------------------------------
library(data.table)
data(ChIPreads, package="PeakSegDisk", envir=environment())
(experiments <- ChIPreads[, .(
mean.bases=mean(chromEnd-chromStart),
median.bases=median(chromEnd-chromStart),
chromStart=min(chromStart)
), by=list(experiment)])
## -------------------------------------------------------------------------------------------------
end.counts <- ChIPreads[, list(
count=.N #ignores dup reads, sum(count) would not.
), by=list(experiment, chrom, chromEnd)]
(aligned.dt <- rbind(
ChIPreads[, .(
bases.counted="each", experiment, chrom,
chromStart, chromEnd,
count=1)], #ignore duplicate reads.
end.counts[, .(
bases.counted="end", experiment, chrom,
chromStart=chromEnd-1L, chromEnd,
count)]))
## -------------------------------------------------------------------------------------------------
aligned.dt[, {
as.list(quantile(chromEnd-chromStart))
}, by=.(bases.counted, experiment)]
## -------------------------------------------------------------------------------------------------
(seq.dt <- aligned.dt[, {
event.dt <- rbind(
data.table(count, pos=chromStart+1L),
data.table(count=-count, pos=chromEnd+1L))
edge.vec <- event.dt[, {
as.integer(seq(min(pos), max(pos), l=100))
}]
event.bins <- rbind(
event.dt,
data.table(count=0L, pos=edge.vec))
total.dt <- event.bins[, .(
count=sum(count)
), by=list(pos)][order(pos)]
total.dt[, cum := cumsum(count)]
total.dt[, bin.i := cumsum(pos %in% edge.vec)]
## it is somewhat confusing because total.dt pos is the first base
## with cum, and cum goes all the way up to but not including the
## pos of the next row.
total.dt[, data.table(
chromStart=pos[-.N]-1L,
chromEnd=pos[-1]-1L,
count=cum[-.N],
bin.i=bin.i[-.N])]
}, by=list(bases.counted, experiment, chrom)])
## -------------------------------------------------------------------------------------------------
if(require(ggplot2)){
gg.data <- ggplot()+
theme_bw()+
theme(panel.spacing=grid::unit(0, "lines"))+
facet_grid(
bases.counted ~ experiment,
scales="free",
labeller=label_both)+
geom_step(aes(
chromStart/1e3, count, color=data.type),
data=data.table(seq.dt, data.type="exact"))+
scale_color_manual(values=c(
exact="black",
bins="red",
model="deepskyblue"
))+
scale_x_continuous("Position on hg19 chrom (kb = kilo bases)")
print(gg.data)
}
## -------------------------------------------------------------------------------------------------
bin.dt <- seq.dt[, {
bases <- chromEnd - chromStart
data.table(
binStart=min(chromStart),
binEnd=max(chromEnd),
mean.count=sum(count*bases)/sum(bases),
bases=sum(bases)
)}, by=list(bases.counted, experiment, bin.i)]
if(require(ggplot2)){
gg.bins <- gg.data+
geom_step(aes(
binStart/1e3, mean.count, color=data.type),
alpha=0.75,
linewidth=1,
data=data.table(bin.dt, data.type="bins"))+
scale_y_log10("Aligned DNA sequence reads (log scale)")
print(gg.bins)
}
## -------------------------------------------------------------------------------------------------
if(interactive() && requireNamespace("future"))future::plan("multisession")
segs.dt <- seq.dt[, {
data.dir <- file.path(tempdir(), bases.counted, experiment)
dir.create(data.dir, showWarnings=FALSE, recursive=TRUE)
coverage.bedGraph <- file.path(data.dir, "coverage.bedGraph")
fwrite(
.SD[, .(chrom, chromStart, chromEnd, count)],
coverage.bedGraph,
sep="\t",
quote=FALSE,
col.names=FALSE)
fit <- PeakSegDisk::sequentialSearch_dir(data.dir, 2L, verbose=1)
data.table(fit$segments, data.type="model")
}, by=list(bases.counted, experiment)]
changes.dt <- segs.dt[, {
.SD[-1]
}, by=list(bases.counted, experiment, data.type)]
if(require(ggplot2)){
gg.model <- gg.bins+
geom_segment(aes(
chromStart/1e3, mean,
xend=chromEnd/1e3, yend=mean,
color=data.type),
data=segs.dt)+
geom_vline(aes(
xintercept=chromEnd/1e3,
color=data.type),
data=changes.dt)
print(gg.model)
}
## ---- fig.width=10--------------------------------------------------------------------------------
peaks.dt <- segs.dt[status=="peak"]
peaks.dt[, peak.i := rep(1:2, l=.N)]
peak.pos.tall <- melt(
peaks.dt,
measure.vars=c("chromStart", "chromEnd"))
peak.pos.wide <- dcast(
peak.pos.tall,
experiment + variable + peak.i ~ bases.counted)
peak.pos.wide[, diff.bases := abs(each-end)]
read.size.panel <- "each"
bases.max.dt <- seq.dt[, .(max.count=max(count)), by=list(bases.counted)]
read.size.y <- bases.max.dt[
read.size.panel, max.count, on=list(bases.counted)]
diff.panel <- "end"
diff.y <- bases.max.dt[
diff.panel, max.count, on=list(bases.counted)]
diff.y <- Inf
diff.vjust <- 1.1
if(require(ggplot2)){
gg.model+
geom_text(aes(
chromStart/1e3, read.size.y, label=sprintf(
"Median read size:\n%.0f bases",
median.bases)),
hjust=0,
vjust=1,
data=data.table(experiments, bases.counted=read.size.panel))+
geom_text(aes(
end/1e3, diff.y,
label=diff.bases,
color=data.type),
data=data.table(
bases.counted=diff.panel,
data.type="model",
peak.pos.wide),
vjust=diff.vjust,
hjust=0)+
geom_text(aes(
chromStart/1e3, diff.y,
label="Peak position\ndifference in bases:",
color=data.type,
),
hjust=0,
vjust=diff.vjust,
data=data.table(
data.type="model",
bases.counted=diff.panel,
experiments["H3K36me3", on=list(experiment)]))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.