Nothing
## ----include=FALSE------------------------------------------------------------
library(knitr)
opts_chunk$set(
echo=TRUE,eval=TRUE,cache=FALSE,tidy=FALSE
)
## ----include=FALSE------------------------------------------------------------
library(knitr)
opts_chunk$set(
tidy=FALSE,dev='pdf',
message=FALSE, warning=FALSE
)
## ----style-knitr, eval=TRUE, echo=FALSE, results="asis"--------------------
BiocStyle::latex()
## ----simple,echo=TRUE------------------------------------------------------
library(PWMEnrich)
library(PWMEnrich.Dmelanogaster.background)
# load the pre-compiled lognormal background
data(PWMLogn.dm3.MotifDb.Dmel)
# load the stripe2 sequences from a FASTA file for motif enrichment
sequence = readDNAStringSet(system.file(package="PWMEnrich",
dir="extdata", file="stripe2.fa"))
sequence
# perform motif enrichment!
res = motifEnrichment(sequence, PWMLogn.dm3.MotifDb.Dmel)
report = sequenceReport(res, 1)
report
# plot the motif with P-value < 0.05
plot(report[report$p.value < 0.05], fontsize=7, id.fontsize=6)
## ----stripe2visual,echo=TRUE,fig.width=4,fig.height=4,fig.align='center'----
# extract the 3 PWMs for the TFs we are interested in
ids = c("bcd_FlyReg_FBgn0000166",
"gt_FlyReg_FBgn0001150",
"Kr")
sel.pwms = PWMLogn.dm3.MotifDb.Dmel$pwms[ids]
names(sel.pwms) = c("bcd", "gt", "Kr")
# scan and get the raw scores
scores = motifScores(sequence, sel.pwms, raw.scores=TRUE)
# raw scores for the first (and only) input sequence
dim(scores[[1]])
head(scores[[1]])
# score starting at position 1 of forward strand
scores[[1]][1, "bcd"]
# score for the reverse complement of the motif, starting at the same position
scores[[1]][485, "bcd"]
# plot
plotMotifScores(scores, cols=c("green", "red", "blue"))
## ----motifEcdf,echo=TRUE,fig.width=4,fig.height=4,fig.align='center'-------
library(BSgenome.Dmelanogaster.UCSC.dm3)
# empirical distribution for the bcd motif
bcd.ecdf = motifEcdf(sel.pwms$bcd, Dmelanogaster, quick=TRUE)[[1]]
# find the score that is equivalent to the P-value of 1e-3
threshold.1e3 = log2(quantile(bcd.ecdf, 1 - 1e-3))
threshold.1e3
# replot only the bcd motif hits with the P-value cutoff of 1e-3 (0.001)
plotMotifScores(scores, cols="green", sel.motifs="bcd", cutoff=threshold.1e3)
# Convert scores into P-values
pvals = 1 - bcd.ecdf(scores[[1]][,"bcd"])
head(pvals)
## ----tinman,echo=TRUE------------------------------------------------------
library(PWMEnrich.Dmelanogaster.background)
# load the pre-compiled lognormal background
data(PWMLogn.dm3.MotifDb.Dmel)
sequences = readDNAStringSet(system.file(package="PWMEnrich",
dir="extdata", file="tinman-early-top20.fa"))
res = motifEnrichment(sequences, PWMLogn.dm3.MotifDb.Dmel)
report = groupReport(res)
report
plot(report[1:10], fontsize=7, id.fontsize=5)
## ----tinman-alt,echo=TRUE--------------------------------------------------
report.top = groupReport(res, by.top.motifs=TRUE)
report.top
## ----tinman2,echo=TRUE-----------------------------------------------------
res
# raw scores
res$sequence.nobg[1:5, 1:2]
# P-values
res$sequence.bg[1:5, 1:2]
## ----parallel,echo=TRUE----------------------------------------------------
registerCoresPWMEnrich(4)
## ----parallel-stop,echo=TRUE-----------------------------------------------
registerCoresPWMEnrich(NULL)
## ----bigmem,echo=TRUE------------------------------------------------------
useBigMemoryPWMEnrich(TRUE)
## ----bigmemoff,echo=TRUE---------------------------------------------------
useBigMemoryPWMEnrich(FALSE)
## ----readmotifs,echo=TRUE--------------------------------------------------
library(PWMEnrich.Dmelanogaster.background)
motifs.denovo = readMotifs(system.file(package="PWMEnrich",
dir="extdata", file="example.transfac"), remove.acc=TRUE)
motifs.denovo
# convert count matrices into PWMs
genomic.acgt = getBackgroundFrequencies("dm3")
pwms.denovo = toPWM(motifs.denovo, prior=genomic.acgt)
bg.denovo = makeBackground(pwms.denovo, organism="dm3", type="logn", quick=TRUE)
# use new motifs for motif enrichment
res.denovo = motifEnrichment(sequences[1:5], bg.denovo)
groupReport(res.denovo)
## ----bg-investigate,echo=TRUE----------------------------------------------
bg.denovo
bg.denovo$bg.mean
## ----custombg,echo=TRUE----------------------------------------------------
library(PWMEnrich.Dmelanogaster.background)
data(dm3.upstream2000)
# make a lognormal background for the two motifs using only first 20 promoters
bg.seq = dm3.upstream2000[1:20]
# the sequences are split into 100bp chunks and fitted
bg.custom = makeBackground(pwms.denovo, bg.seq=bg.seq, type="logn", bg.len=100,
bg.source="20 promoters split into 100bp chunks")
bg.custom
## ----sessionInfo,echo=FALSE,results='asis'---------------------------------
toLatex(sessionInfo())
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.