Sam Buckberry 2016-11-28
This package is currently is it's infantcy. Don't expect any real functionality for a while...
library(devtools)
devtools::install_github("SamBuckberry/RunATAC")
install.packages('data.table')
install.packages('magrittr')
## try http:// if https:// URLs are not supported
source("https://bioconductor.org/biocLite.R")
biocLite(c("Biostrings", "motifRG", "GenomicRanges", "IRanges", "GenomicAlignments", "rtracklayer", "Rsamtools"))
First, load a BAM file. Here we will use a BAM file included in the RunATAC package. This BAM is from ATAC-seq on Saccharomyces cerevisiae and includes data only from chrIV.
library(RunATAC)
bam_fl <- system.file("extdata", "chrIV.bam", package = "RunATAC")
frags <- read_atac_frags(bam_file = bam_fl)
Plot the insert size distribution.
plot_insert_size(frags, xlim=c(0,600))
bam_to_insertions_bw(bam_file = bam_fl, file = "inst/extdata/chrIV.ins.bigwig")
bam_to_centres_bw(bam_file = bam_fl, file = "inst/extdata/chrIV.nuc.bigwig")
Get the Tn5 insertion points from BAM file into GRanges object
ins <- read_atac_insertions(bam_fl)
ins
Load the peak data in the MACS2 .narrowPeak format
peak_fl <- system.file("extdata", "chrIV_peaks.narrowPeak", package = "RunATAC")
peaks <- read_bed(peak_fl)
peaks
Calculate the fraction of reads in peaks
calc_frip(signal = ins, peaks = peaks)
library(GenomicAlignments)
library(stringr)
olap_counts <- summarizeOverlaps(features = peaks, reads = ins)
olap_counts <- assays(olap_counts)$counts
rownames(olap_counts) <- str_c(seqnames(peaks), start(peaks), sep = ":") %>%
str_c(end(peaks), sep = "-")
library(BSgenome.Scerevisiae.UCSC.sacCer3)
dinuc_freq <- calc_dinuc_freq(ins_gr = ins, genome = Scerevisiae)
barplot(dinuc_freq$percentage, names=dinuc_freq$dinucleotide)
#calc_ins_pwm <- function()
In this example, we will plot the Tn5 insertion signal around CTCF motifs in ATAC-seq peaks using a positional weight matrix (PWM).
Get the REB1 motif from the JASPAR database.
#source("https://bioconductor.org/biocLite.R")
#biocLite(c("BSgenome.Scerevisiae.UCSC.sacCer3", "JASPAR2016", "TFBSTools"))
library(JASPAR2016)
library(TFBSTools)
reb1 <- getMatrixByName(JASPAR2016, name=c("REB1"))
reb1 <- reb1@profileMatrix
Get the positions of motif in peaks.
motif_pos <- motif_gr(gr = peaks, pwm = reb1, genome = Scerevisiae)
motif_pos
Get the insertion data for the motif regions
ins_bw <- system.file("extdata", "chrIV.ins.bigwig", package = "RunATAC")
dat <- range_summary(bigwig = ins_bw, gr = motif_pos)
library(reshape2)
library(dplyr)
library(ggplot2)
mdat <- melt(dat)
fp <- mdat %>% group_by(variable) %>%
summarise(yy=mean(value))
fp$variable <- as.character(fp$variable) %>% as.numeric()
gg <- ggplot(fp, aes(x = variable, y = yy, fill = 'blue', title="")) +
#stat_smooth(span = 10) +
#geom_area(aes(fill = 'blue', stat = "bin")) +
geom_line(size=0.3) +
xlab("Distance from motif") +
ylab("Tn5 insertions") +
ggtitle(title) +
theme_bw() +
theme(plot.background = element_blank(),
panel.border = element_blank(),
strip.background = element_rect(size = 0),
panel.grid.minor = element_blank(),
axis.line = element_line(),
axis.text = element_text(size=8))
gg
Generate V-plot for motif regions
v_dat <- calc_v(frags_gr = frags, motif_pos_gr = motif_pos,
flank = 200, max_frag = 600)
plot_v(df = v_dat)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.