inst/doc/PWMEnrich.R

## ----include=FALSE------------------------------------------------------------
library(knitr)
opts_chunk$set(
echo=TRUE,eval=TRUE,cache=FALSE,tidy=FALSE
)

## ----include=FALSE------------------------------------------------------------
library(knitr)
opts_chunk$set(
tidy=FALSE,dev='pdf',
message=FALSE, warning=FALSE
)

## ----style-knitr, eval=TRUE, echo=FALSE, results="asis"--------------------
BiocStyle::latex()

## ----simple,echo=TRUE------------------------------------------------------
library(PWMEnrich)
library(PWMEnrich.Dmelanogaster.background)

# load the pre-compiled lognormal background
data(PWMLogn.dm3.MotifDb.Dmel)

# load the stripe2 sequences from a FASTA file for motif enrichment
sequence = readDNAStringSet(system.file(package="PWMEnrich", 
  dir="extdata", file="stripe2.fa"))
sequence  

# perform motif enrichment!
res = motifEnrichment(sequence, PWMLogn.dm3.MotifDb.Dmel)

report = sequenceReport(res, 1)
report

# plot the motif with P-value < 0.05
plot(report[report$p.value < 0.05], fontsize=7, id.fontsize=6)

## ----stripe2visual,echo=TRUE,fig.width=4,fig.height=4,fig.align='center'----

# extract the 3 PWMs for the TFs we are interested in
ids = c("bcd_FlyReg_FBgn0000166", 
        "gt_FlyReg_FBgn0001150", 
        "Kr")
sel.pwms = PWMLogn.dm3.MotifDb.Dmel$pwms[ids]
names(sel.pwms) = c("bcd", "gt", "Kr")

# scan and get the raw scores
scores = motifScores(sequence, sel.pwms, raw.scores=TRUE)

# raw scores for the first (and only) input sequence
dim(scores[[1]])
head(scores[[1]])

# score starting at position 1 of forward strand
scores[[1]][1, "bcd"]
# score for the reverse complement of the motif, starting at the same position
scores[[1]][485, "bcd"]

# plot
plotMotifScores(scores, cols=c("green", "red", "blue"))

## ----motifEcdf,echo=TRUE,fig.width=4,fig.height=4,fig.align='center'-------
library(BSgenome.Dmelanogaster.UCSC.dm3)

# empirical distribution for the bcd motif
bcd.ecdf = motifEcdf(sel.pwms$bcd, Dmelanogaster, quick=TRUE)[[1]]

# find the score that is equivalent to the P-value of 1e-3
threshold.1e3 = log2(quantile(bcd.ecdf, 1 - 1e-3))
threshold.1e3 

# replot only the bcd motif hits with the P-value cutoff of 1e-3 (0.001)
plotMotifScores(scores, cols="green", sel.motifs="bcd", cutoff=threshold.1e3)

# Convert scores into P-values
pvals = 1 - bcd.ecdf(scores[[1]][,"bcd"])
head(pvals)

## ----tinman,echo=TRUE------------------------------------------------------
library(PWMEnrich.Dmelanogaster.background)

# load the pre-compiled lognormal background
data(PWMLogn.dm3.MotifDb.Dmel)

sequences = readDNAStringSet(system.file(package="PWMEnrich", 
  dir="extdata", file="tinman-early-top20.fa"))
 
res = motifEnrichment(sequences, PWMLogn.dm3.MotifDb.Dmel)

report = groupReport(res)
report

plot(report[1:10], fontsize=7, id.fontsize=5)

## ----tinman-alt,echo=TRUE--------------------------------------------------
report.top = groupReport(res, by.top.motifs=TRUE)

report.top

## ----tinman2,echo=TRUE-----------------------------------------------------
res

# raw scores
res$sequence.nobg[1:5, 1:2]

# P-values
res$sequence.bg[1:5, 1:2]

## ----parallel,echo=TRUE----------------------------------------------------
registerCoresPWMEnrich(4)

## ----parallel-stop,echo=TRUE-----------------------------------------------
registerCoresPWMEnrich(NULL)

## ----bigmem,echo=TRUE------------------------------------------------------
useBigMemoryPWMEnrich(TRUE)

## ----bigmemoff,echo=TRUE---------------------------------------------------
useBigMemoryPWMEnrich(FALSE)

## ----readmotifs,echo=TRUE--------------------------------------------------
library(PWMEnrich.Dmelanogaster.background)

motifs.denovo = readMotifs(system.file(package="PWMEnrich", 
  dir="extdata", file="example.transfac"), remove.acc=TRUE)
  
motifs.denovo  

# convert count matrices into PWMs
genomic.acgt = getBackgroundFrequencies("dm3")
pwms.denovo = toPWM(motifs.denovo, prior=genomic.acgt)

bg.denovo = makeBackground(pwms.denovo, organism="dm3", type="logn", quick=TRUE)

# use new motifs for motif enrichment
res.denovo = motifEnrichment(sequences[1:5], bg.denovo)
groupReport(res.denovo)

## ----bg-investigate,echo=TRUE----------------------------------------------
bg.denovo
bg.denovo$bg.mean

## ----custombg,echo=TRUE----------------------------------------------------
library(PWMEnrich.Dmelanogaster.background)
data(dm3.upstream2000)

# make a lognormal background for the two motifs using only first 20 promoters
bg.seq = dm3.upstream2000[1:20]

# the sequences are split into 100bp chunks and fitted
bg.custom = makeBackground(pwms.denovo, bg.seq=bg.seq, type="logn", bg.len=100,
	bg.source="20 promoters split into 100bp chunks")

bg.custom

## ----sessionInfo,echo=FALSE,results='asis'---------------------------------
toLatex(sessionInfo())

Try the PWMEnrich package in your browser

Any scripts or data that you put into this service are public.

PWMEnrich documentation built on Nov. 8, 2020, 7:45 p.m.