Nothing
## ---- echo = FALSE------------------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
## ----code, echo = FALSE-------------------------------------------------------
code <- function(...) {
cat(paste(..., sep = "\n"))
}
date = "`r doc_date()`"
pkg = "`r pkg_ver('BiocStyle')`"
## ----global_options, echo=FALSE-----------------------------------------------
short=TRUE #if short==TRUE, do not echo code chunks
debug=FALSE
knitr::opts_chunk$set(echo=!short, warning=debug, message=debug, error=FALSE,
cache.path = "cache/",
fig.path = "figures/")
## ----PFMatrix, echo=TRUE, eval=TRUE-------------------------------------------
library(TFBSTools)
## PFMatrix construction; Not all of the slots need to be initialised.
pfm <- PFMatrix(ID="MA0004.1", name="Arnt",
matrixClass="Zipper-Type", strand="+",
bg=c(A=0.25, C=0.25, G=0.25, T=0.25),
tags=list(family="Helix-Loop-Helix", species="10090",
tax_group="vertebrates",medline="7592839",
type="SELEX",ACC="P53762", pazar_tf_id="TF0000003",
TFBSshape_ID="11", TFencyclopedia_ID="580"),
profileMatrix=matrix(c(4L, 19L, 0L, 0L, 0L, 0L,
16L, 0L, 20L, 0L, 0L, 0L,
0L, 1L, 0L, 20L, 0L, 20L,
0L, 0L, 0L, 0L, 20L, 0L),
byrow=TRUE, nrow=4,
dimnames=list(c("A", "C", "G", "T"))
)
)
pfm
## coerced to matrix
as.matrix(pfm)
## access the slots of pfm
ID(pfm)
name(pfm)
Matrix(pfm)
ncol(pfm)
length(pfm)
## convert a PFM to PWM, ICM
pwm <- toPWM(pfm, type="log2probratio", pseudocounts=0.8,
bg=c(A=0.25, C=0.25, G=0.25, T=0.25))
icm <- toICM(pfm, pseudocounts=sqrt(rowSums(pfm)[1]), schneider=FALSE,
bg=c(A=0.25, C=0.25, G=0.25, T=0.25))
## get the reverse complment matrix with all the same information except the strand.
pwmRevComp <- reverseComplement(pwm)
## ----PFMatrixList, echo=TRUE, eval=TRUE---------------------------------------
pfm2 <- pfm
pfmList <- PFMatrixList(pfm1=pfm, pfm2=pfm2, use.names=TRUE)
pfmList
names(pfmList)
## ----TFFMRead, echo=TRUE, eval=TRUE-------------------------------------------
xmlFirst <- file.path(system.file("extdata", package="TFBSTools"),
"tffm_first_order.xml")
tffmFirst <- readXMLTFFM(xmlFirst, type="First")
tffm <- getPosProb(tffmFirst)
xmlDetail <- file.path(system.file("extdata", package="TFBSTools"),
"tffm_detailed.xml")
tffmDetail <- readXMLTFFM(xmlDetail, type="Detail")
getPosProb(tffmDetail)
## ----searchDB, echo=TRUE, eval=TRUE-------------------------------------------
suppressMessages(library(JASPAR2014))
opts <- list()
opts[["species"]] <- 9606
opts[["name"]] <- "RUNX1"
opts[["type"]] <- "SELEX"
opts[["all_versions"]] <- TRUE
PFMatrixList <- getMatrixSet(JASPAR2014, opts)
PFMatrixList
opts2 <- list()
opts2[["type"]] <- "SELEX"
PFMatrixList2 <- getMatrixSet(JASPAR2014, opts2)
PFMatrixList2
## ----operateDb, echo=TRUE, eval=TRUE------------------------------------------
db <- "myMatrixDb.sqlite"
initializeJASPARDB(db, version="2014")
data("MA0043")
storeMatrix(db, MA0043)
deleteMatrixHavingID(db,"MA0043.1")
file.remove(db)
## ----PWMmatrixMethods, echo=TRUE, eval=TRUE-----------------------------------
pwm <- toPWM(pfm, pseudocounts=0.8)
pwm
## ----ICMmatrixMethods, echo=TRUE, eval=TRUE-----------------------------------
icm <- toICM(pfm, pseudocounts=0.8, schneider=TRUE)
icm
## ----seqLogo1, echo=TRUE, eval=TRUE, fig.width=6, fig.height=4----------------
seqLogo(icm)
## ----PFMSimi, echo=TRUE, eval=TRUE--------------------------------------------
## one to one comparison
data(MA0003.2)
data(MA0004.1)
pfmSubject <- MA0003.2
pfmQuery <- MA0004.1
PFMSimilarity(pfmSubject, pfmQuery)
## one to several comparsion
PFMSimilarity(pfmList, pfmQuery)
## align IUPAC string
IUPACString <- "ACGTMRWSYKVHDBN"
PFMSimilarity(pfmList, IUPACString)
## ----PWMSimilarity, echo=TRUE, eval=TRUE--------------------------------------
data(MA0003.2)
data(MA0004.1)
pwm1 <- toPWM(MA0003.2, type="prob")
pwm2 <- toPWM(MA0004.1, type="prob")
PWMSimilarity(pwm1, pwm2, method="Euclidean")
PWMSimilarity(pwm1, pwm2, method="Pearson")
PWMSimilarity(pwm1, pwm2, method="KL")
## ----permuteMatrix, echo=TRUE, eval=TRUE--------------------------------------
## Matrice permutation
permuteMatrix(pfmQuery)
permuteMatrix(pfmList, type="intra")
permuteMatrix(pfmList, type="inter")
## ----samplingMatrix, echo=TRUE, eval=FALSE------------------------------------
# ## Dirichlet model training
# data(MA0003.2)
# data(MA0004.1)
# pfmList <- PFMatrixList(pfm1=MA0003.2, pfm2=MA0004.1, use.names=TRUE)
# dmmParameters <- dmmEM(pfmList, K=6, alg="C")
# ## Matrice sampling from trained Dirichlet model
# pwmSampled <- rPWMDmm(MA0003.2, dmmParameters$alpha0, dmmParameters$pmix,
# N=1, W=6)
## ----TFFMFirstseqLogo, echo=TRUE, eval=TRUE, fig.width=6, fig.height=10-------
## sequence logo for First-order TFFM
seqLogo(tffmFirst)
## ----TFFMDetailseqLogo, echo=TRUE, eval=TRUE, fig.width=6, fig.height=10------
## sequence logo for detailed TFFM
seqLogo(tffmDetail)
## ----searchSeq, echo=TRUE, eval=TRUE------------------------------------------
library(Biostrings)
data(MA0003.2)
data(MA0004.1)
pwmList <- PWMatrixList(MA0003.2=toPWM(MA0003.2), MA0004.1=toPWM(MA0004.1),
use.names=TRUE)
subject <- DNAString("GAATTCTCTCTTGTTGTAGTCTCTTGACAAAATG")
siteset <- searchSeq(pwm, subject, seqname="seq1", min.score="60%", strand="*")
sitesetList <- searchSeq(pwmList, subject, seqname="seq1",
min.score="60%", strand="*")
## generate gff2 or gff3 style output
head(writeGFF3(siteset))
head(writeGFF3(sitesetList))
head(writeGFF2(siteset))
## get the relative scores
relScore(siteset)
relScore(sitesetList)
## calculate the empirical p-values of the scores
pvalues(siteset, type="TFMPvalue")
pvalues(siteset, type="sampling")
## ----searchAln, echo=TRUE, eval=TRUE------------------------------------------
library(Biostrings)
data(MA0003.2)
pwm <- toPWM(MA0003.2)
aln1 <- DNAString("ACTTCACCAGCTCCCTGGCGGTAAGTTGATC---AAAGG---AAACGCAAAGTTTTCAAG")
aln2 <- DNAString("GTTTCACTACTTCCTTTCGGGTAAGTAAATATATAAATATATAAAAATATAATTTTCATC")
sitePairSet <- searchAln(pwm, aln1, aln2, seqname1="seq1", seqname2="seq2",
min.score="50%", cutoff=0.5,
strand="*", type="any")
## generate gff style output
head(writeGFF3(sitePairSet))
head(writeGFF2(sitePairSet))
## search the Axt alignment
library(CNEr)
axtFilesHg19DanRer7 <- file.path(system.file("extdata", package="TFBSTools"),
"hg19.danRer7.net.axt")
axtHg19DanRer7 <- readAxt(axtFilesHg19DanRer7)
sitePairSet <- searchAln(pwm, axtHg19DanRer7, min.score="80%",
windowSize=51L, cutoff=0.7, strand="*",
type="any", conservation=NULL, mc.cores=1)
GRangesTFBS <- toGRangesList(sitePairSet, axtHg19DanRer7)
GRangesTFBS$targetTFBS
GRangesTFBS$queryTFBS
## ----searchBSgenome, echo=TRUE, eval=FALSE------------------------------------
# library(rtracklayer)
# library(JASPAR2014)
# library(BSgenome.Hsapiens.UCSC.hg19)
# library(BSgenome.Mmusculus.UCSC.mm10)
# pfm <- getMatrixByID(JASPAR2014, ID="MA0004.1")
# pwm <- toPWM(pfm)
# chain <- import.chain("Downloads/hg19ToMm10.over.chain")
# sitePairSet <- searchPairBSgenome(pwm, BSgenome.Hsapiens.UCSC.hg19,
# BSgenome.Mmusculus.UCSC.mm10,
# chr1="chr1", chr2="chr1",
# min.score="90%", strand="+", chain=chain)
## ----MEME-wrapper, echo=TRUE, eval=FALSE--------------------------------------
# motifSet <- runMEME(file.path(system.file("extdata",
# package="TFBSTools"), "crp0.s"),
# binary="meme",
# arguments=list("-nmotifs"=3)
# )
# ## Get the sites sequences and surrounding sequences
# sitesSeq(motifSet, type="all")
# ## Get the sites sequences only
# sitesSeq(motifSet, type="none")
# consensusMatrix(motifSet)
## ----sessionInfo, echo=FALSE--------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.