Alignment of MNaseSeq-Data

Paired-end data is aligned as follows using bowtie2.

Cleaning up of orphaned reads is performed with this script taken from (https://www.biostars.org/p/95929/) ```{python, eval = FALSE,python.reticulate = FALSE}

!/usr/bin/env python

import csv import sys

f = csv.reader(sys.stdin, dialect="excel-tab") of = csv.writer(sys.stdout, dialect="excel-tab") last_read = None for line in f : #take care of the header if(line[0][0] == "@") : of.writerow(line) continue

if(last_read == None) : 
    last_read = line
else :
    if(last_read[0] == line[0]) :
        of.writerow(last_read)
        of.writerow(line)
        last_read = None
    else :
        last_read = line
```{bash, eval=FALSE}
BOWTIE_INDEX= <dir>/Bowtie2Index/genome
BOWTIE_OPTS="-p 10 -X 250 --no-discordant --no-mixed --no-unal"
bowtie2 $BOWTIE_OPTS -x $BOWTIE_INDEX -1 mate_1.fastq.gz -2 mate_2.fastq.gz > aligned.sam
samtools view -hf 0x2 aligned.sam | grep -v "XS:i:" | filter_orphans.py | samtools view -b -o aligned.bam
samtools sort -n -m 1G -@ 8 -o aligned.s.bam aligned.bam
bamToBed -i aligned.s.bam -bedpe > aligned.bed 2>/dev/null
cut -f 1,2,6 aligned.bed > aligned.s.bed

Single-read data is aligned as follows using bowtie2.

```{bash, eval = FALSE} BOWTIE_INDEX=

/Bowtie2Index/genome BOWTIE_OPTS="-p 10 --no-unal" bowtie2 $bowtie_opts -x $BOWTIE_INDEX ${SRUN}.fastq.gz > aligned.sam samtools view -h aligned.sam | grep -v "XS:i:" | samtools view -b -o aligned.bam samtools sort -m 1G -@ 8 -o aligned.s.bam aligned.bam bamToBed -i aligned.s.bam > aligned.bed 2>/dev/null cut -f 1,2,3,6 aligned.bed > aligned.s.bed

### Conversion to coverage object

```r
library(tsTools)
library(IRanges)
library(GenomicRanges)

As an example we load the data included in the package (S. cerevisiae MNase-Seq, subset to chromosome IV)

fpath <- system.file("extdata", "SRR2154281_IV.s.bed", package="tsTools")
cov <- bed2dyad(fpath,"PAIRED",1)

Autocorrelation Function

The autocorrelation function on the signal vector can provide a hint as to whether we have something nucleosomal in the data. Given the beads on a string arrangement of nucleosomes we expect a periodic autocorrelation pattern.

acf(as.vector(cov[['IV']]), lag.max = 1000, main="", xlab="lag (bp)")

Cumulative Plots

data(ann)
head(ann)
ann <- ann[ann$chr=="IV",]
centers <- GRanges(ann$chr, IRanges(ifelse(ann$strand=="+", ann$start, ann$end)), strand=ann$strand)
names(centers) <- rownames(ann)
mat <- coverageWindowsCenteredStranded(centers, window.size=2000, cov)
x <- seq(-1000,1000)
plot(x, apply(mat, 2, mean), type="l", 
     xlab="position relative to center position", ylab="dyad density")

row-wise normalization

mat.sq <- norm.square(mat)
mat.sum <- norm.sum(mat)

par(mfrow=c(1,2))
plot(x, apply(mat.sq, 2, mean), type="l", 
     xlab="position relative to center position", ylab="dyad density (normalized)", main="sum of squares normalization")
plot(x, apply(mat.sum, 2, mean), type="l", 
     xlab="position relative to center position", ylab="dyad density (normalized)", main="sum of row normalization")
mat.bin <- bin.matrix.rows(mat.sum,25)
x <- as.integer(colnames(mat.bin))
plot(x, apply(mat.bin, 2, mean), type="l", 
     xlab="position relative to center position", ylab="dyad density (normalized)", main="binned")
library(zoo)
mat.bin.smoothed <- t(apply(mat.bin, 1, function(x) {zoo::rollmean(x, 9)}))
x <- as.integer(colnames(mat.bin.smoothed))
plot(x, apply(mat.bin.smoothed, 2, mean), type="l", 
     xlab="position relative to center position", ylab="dyad density (normalized)", main="binned & smoothed")

Cumulative plot with scatter

cumPlot(mat.sum, base.col = "#FF0000")

Heatmaps

library(grid)
hmat <- meanScale(mat)
plotRasterHeatmap(hmat)

Getting Spacing and Phasing along Reference points

ann[1:10,]
result <- ocampo(cov, ann[1:10,])
result
sessionInfo()


musikutiv/tsTools documentation built on July 25, 2024, 2:42 a.m.