Nothing
## ----setup, echo=FALSE--------------------------------------------------------
knitr::opts_chunk$set(collapse=TRUE, comment = "#>")
suppressPackageStartupMessages(library(universalmotif))
suppressPackageStartupMessages(library(Biostrings))
data(ArabidopsisPromoters)
data(ArabidopsisMotif)
## -----------------------------------------------------------------------------
library(universalmotif)
library(Biostrings)
## Create some DNA sequences for use with an external program (default
## is DNA):
sequences.dna <- create_sequences(seqnum = 500,
freqs = c(A=0.3, C=0.2, G=0.2, T=0.3))
## writeXStringSet(sequences.dna, "dna.fasta")
sequences.dna
## Amino acid:
create_sequences(alphabet = "AA")
## Any set of characters can be used
create_sequences(alphabet = paste0(letters, collapse = ""))
## -----------------------------------------------------------------------------
library(universalmotif)
## Background of DNA sequences:
dna <- create_sequences()
get_bkg(dna, k = 1:2)
## Background of non DNA/RNA sequences:
qwerty <- create_sequences("QWERTY")
get_bkg(qwerty, k = 1:2)
## -----------------------------------------------------------------------------
library(universalmotif)
library(Biostrings)
data(ArabidopsisPromoters)
## Potentially starting off with some external sequences:
# ArabidopsisPromoters <- readDNAStringSet("ArabidopsisPromoters.fasta")
euler <- shuffle_sequences(ArabidopsisPromoters, k = 2, method = "euler")
markov <- shuffle_sequences(ArabidopsisPromoters, k = 2, method = "markov")
linear <- shuffle_sequences(ArabidopsisPromoters, k = 2, method = "linear")
k1 <- shuffle_sequences(ArabidopsisPromoters, k = 1)
## -----------------------------------------------------------------------------
o.letter <- get_bkg(ArabidopsisPromoters, 1)
e.letter <- get_bkg(euler, 1)
m.letter <- get_bkg(markov, 1)
l.letter <- get_bkg(linear, 1)
data.frame(original=o.letter$count, euler=e.letter$count,
markov=m.letter$count, linear=l.letter$count, row.names = DNA_BASES)
o.counts <- get_bkg(ArabidopsisPromoters, 2)
e.counts <- get_bkg(euler, 2)
m.counts <- get_bkg(markov, 2)
l.counts <- get_bkg(linear, 2)
data.frame(original=o.counts$count, euler=e.counts$count,
markov=m.counts$count, linear=l.counts$count,
row.names = get_klets(DNA_BASES, 2))
## -----------------------------------------------------------------------------
library(universalmotif)
string <- "DASDSDDSASDSSA"
count_klets(string, 2)
shuffle_string(string, 2)
## -----------------------------------------------------------------------------
library(universalmotif)
get_klets(c("A", "S", "D"), 2)
## -----------------------------------------------------------------------------
library(universalmotif)
m1 <- create_motif("TATATATATA", nsites = 50, type = "PWM", pseudocount = 1)
m2 <- matrix(c(0.10,0.27,0.23,0.19,0.29,0.28,0.51,0.12,0.34,0.26,
0.36,0.29,0.51,0.38,0.23,0.16,0.17,0.21,0.23,0.36,
0.45,0.05,0.02,0.13,0.27,0.38,0.26,0.38,0.12,0.31,
0.09,0.40,0.24,0.30,0.21,0.19,0.05,0.30,0.31,0.08),
byrow = TRUE, nrow = 4)
m2 <- create_motif(m2, alphabet = "DNA", type = "PWM")
m1["motif"]
m2["motif"]
## -----------------------------------------------------------------------------
motif_pvalue(m2, pvalue = 0.001)
## -----------------------------------------------------------------------------
library(universalmotif)
library(Biostrings)
data(ArabidopsisPromoters)
## A 2-letter example:
motif.k2 <- create_motif("CWWWWCC", nsites = 6)
sequences.k2 <- DNAStringSet(rep(c("CAAAACC", "CTTTTCC"), 3))
motif.k2 <- add_multifreq(motif.k2, sequences.k2)
## -----------------------------------------------------------------------------
head(scan_sequences(motif.k2, ArabidopsisPromoters, RC = TRUE,
threshold = 0.9, threshold.type = "logodds"))
## -----------------------------------------------------------------------------
head(scan_sequences(motif.k2, ArabidopsisPromoters, use.freq = 2, RC = TRUE,
threshold = 0.9, threshold.type = "logodds"))
## -----------------------------------------------------------------------------
library(universalmotif)
library(Biostrings)
sequences <- DNAStringSet(rep(c("CAAAACC", "CTTTTCC"), 3))
motif <- create_motif(sequences, add.multifreq = 2:3)
## -----------------------------------------------------------------------------
library(universalmotif)
data(ArabidopsisMotif)
data(ArabidopsisPromoters)
enrich_motifs(ArabidopsisMotif, ArabidopsisPromoters, shuffle.k = 3,
threshold = 0.001, RC = TRUE)
## -----------------------------------------------------------------------------
library(universalmotif)
data(ArabidopsisPromoters)
m1 <- create_motif("TTTATAT", name = "PartA")
m2 <- create_motif("GGTTCGA", name = "PartB")
## -----------------------------------------------------------------------------
m <- cbind(m1, m2)
m <- add_gap(m, gaploc = ncol(m1), mingap = 4, maxgap = 6)
m
## -----------------------------------------------------------------------------
scan_sequences(m, ArabidopsisPromoters, threshold = 0.4, threshold.type = "logodds")
## -----------------------------------------------------------------------------
library(universalmotif)
data(ArabidopsisMotif)
data(ArabidopsisPromoters)
hits <- scan_sequences(ArabidopsisMotif, ArabidopsisPromoters, RC = FALSE)
res <- motif_peaks(hits$start,
seq.length = unique(width(ArabidopsisPromoters)),
seq.count = length(ArabidopsisPromoters))
## Significant peaks:
res$Peaks
## -----------------------------------------------------------------------------
res$Plot
## ----eval=FALSE---------------------------------------------------------------
# library(universalmotif)
#
# ## 1. Once per session: via `options()`
#
# options(meme.bin = "/path/to/meme/bin/meme")
#
# run_meme(...)
#
# ## 2. Once per run: via `run_meme()`
#
# run_meme(..., bin = "/path/to/meme/bin/meme")
## ----eval=FALSE---------------------------------------------------------------
# library(universalmotif)
# data(ArabidopsisPromoters)
#
# ## 1. Read sequences from disk (in fasta format):
#
# library(Biostrings)
#
# # The following `read*()` functions are available in Biostrings:
# # DNA: readDNAStringSet
# # DNA with quality scores: readQualityScaledDNAStringSet
# # RNA: readRNAStringSet
# # Amino acid: readAAStringSet
# # Any: readBStringSet
#
# sequences <- readDNAStringSet("/path/to/sequences.fasta")
#
# run_meme(sequences, ...)
#
# ## 2. Extract from a `BSgenome` object:
#
# library(GenomicFeatures)
# library(TxDb.Athaliana.BioMart.plantsmart28)
# library(BSgenome.Athaliana.TAIR.TAIR9)
#
# # Let us retrieve the same promoter sequences from ArabidopsisPromoters:
# gene.names <- names(ArabidopsisPromoters)
#
# # First get the transcript coordinates from the relevant `TxDb` object:
# transcripts <- transcriptsBy(TxDb.Athaliana.BioMart.plantsmart28,
# by = "gene")[gene.names]
#
# # There are multiple transcripts per gene, we only care for the first one
# # in each:
#
# transcripts <- lapply(transcripts, function(x) x[1])
# transcripts <- unlist(GRangesList(transcripts))
#
# # Then the actual sequences:
#
# # Unfortunately this is a case where the chromosome names do not match
# # between the two databases
#
# seqlevels(TxDb.Athaliana.BioMart.plantsmart28)
# #> [1] "1" "2" "3" "4" "5" "Mt" "Pt"
# seqlevels(BSgenome.Athaliana.TAIR.TAIR9)
# #> [1] "Chr1" "Chr2" "Chr3" "Chr4" "Chr5" "ChrM" "ChrC"
#
# # So we must first rename the chromosomes in `transcripts`:
# seqlevels(transcripts) <- seqlevels(BSgenome.Athaliana.TAIR.TAIR9)
#
# # Finally we can extract the sequences
# promoters <- getPromoterSeq(transcripts,
# BSgenome.Athaliana.TAIR.TAIR9,
# upstream = 1000, downstream = 0)
#
# run_meme(promoters, ...)
## ----eval=FALSE---------------------------------------------------------------
# run_meme(sequences, output = "/path/to/desired/output/folder")
## ---- eval=FALSE--------------------------------------------------------------
# motifs <- run_meme(sequences)
# motifs.k23 <- mapply(add_multifreq, motifs$motifs, motifs$sites)
## ----sessionInfo, echo=FALSE--------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.