Nothing
## ----include=FALSE, cache=FALSE-----------------------------------------------
library(ggplot2)
library(knitr)
library(rTRM)
library(org.Mm.eg.db)
opts_chunk$set(fig.align = 'center', fig.width = 3, fig.height = 3, fig.show = 'hold', tidy = TRUE, comment = "", highlight = FALSE, prompt = TRUE)
#options(replace.assign=TRUE,width = 80)
knit_hooks$set(no.mar = function(before, options, envir) {
if (before) par(mar = rep(0,4)) # no margins.
})
## ----fig.cap = "Identification of a TRM 1from a test network (left). In the resulting TRM (right) dark blue indicates the target node light blue are query nodes and white nodes are bridge nodes", no.mar = TRUE----
# load the rTRM package
library(rTRM)
# load network example.
load(system.file(package = "rTRM", "extra/example.rda"))
# plot network
plot(g,vertex.size=20,vertex.label.cex=.8,layout=layout.graphopt)
# define target and query nodes:
target = "N6"
query = c("N7", "N12", "N28")
# find TRM:
s = findTRM(g, target = target, query = query, method = "nsa", max.bridge = 1)
# annotate nodes:
V(s)$color = "white"
V(s)[ name %in% query]$color = "steelblue2"
V(s)[ name %in% target]$color = "steelblue4"
# plot:
plot(s,vertex.size=20,vertex.label.cex=.8)
## -----------------------------------------------------------------------------
pwm = getMatrices()
head(pwm, 1)
## -----------------------------------------------------------------------------
ann = getAnnotations()
head(ann)
## -----------------------------------------------------------------------------
map = getMaps()
head(map)
## -----------------------------------------------------------------------------
o = getOrthologs(organism = "mouse")
head(o)
## -----------------------------------------------------------------------------
getOrthologFromMatrix("MA0009.1", organism="human")
getOrthologFromMatrix("MA0009.1", organism="mouse")
## -----------------------------------------------------------------------------
# check statistics about the network.
biogrid_mm()
# load mouse PPI network:
data(biogrid_mm)
## ----eval = FALSE-------------------------------------------------------------
# # obtain dataset.
# db = getBiogridData() # retrieves latest release.
# # db = getBiogridData("3.2.96") # to get a specific release.
#
# # check release:
# db$release
# db$data
#
# # process PPI data for different organisms (currently supported human and mouse):
# biogrid_hs = processBiogrid(db, org = "human")
# biogrid_mm = processBiogrid(db, org = "mouse")
## ----eval=FALSE---------------------------------------------------------------
# library(PSICQUIC)
# psicquic <- PSICQUIC()
# providers(psicquic)
#
# # obtain BioGrid human PPIs (as data.frame):
# tbl = interactions(psicquic, species="9606",provider="BioGrid")
#
# # the target and source node information needs to be polished (i.e. must be Entrez gene id only)
# biogrid_hs = data.frame(source=tbl$A,target=tbl$B)
# biogrid_hs$source = sub(".*locuslink:(.*)\\|BIOGRID:.*","\\1", biogrid_hs$source)
# biogrid_hs$target = sub(".*locuslink:(.*)\\|BIOGRID:.*","\\1", biogrid_hs$target)
#
# # create graph.
# library(igraph)
# biogrid_hs=graph.data.frame(biogrid_hs,directed=FALSE)
# biogrid_hs=simplify(biogrid_hs)
#
# # annotate with symbols.
# library(org.Hs.eg.db)
# V(biogrid_hs)$label=select(org.Hs.eg.db,keys=V(biogrid_hs)$name,columns=c("SYMBOL"))$SYMBOL
## -----------------------------------------------------------------------------
# read motif enrichment results.
motif_file = system.file("extra/sox2_motif_list.rda", package = "rTRM")
load(motif_file)
length(motif_list)
head(motif_list)
## -----------------------------------------------------------------------------
# get the corresponding gene.
tfs_list = getOrthologFromMatrix(motif_list, organism = "mouse")
tfs_list = unique(unlist(tfs_list, use.names = FALSE))
length(tfs_list)
head(tfs_list)
## -----------------------------------------------------------------------------
# load expression data.
eg_esc_file = system.file("extra/ESC-expressed.txt", package = "rTRM")
eg_esc = scan(eg_esc_file, what = "")
length(eg_esc)
head(eg_esc)
tfs_list_esc = tfs_list[tfs_list %in% eg_esc]
length(tfs_list_esc)
head(tfs_list_esc)
## -----------------------------------------------------------------------------
# load and process PPI data.
biogrid_mm()
data(biogrid_mm)
ppi = biogrid_mm
vcount(ppi)
ecount(ppi)
# remove outliers.
f = c("Ubc", "Sumo1", "Sumo2", "Sumo3")
f=select(org.Mm.eg.db,keys=f,columns="ENTREZID",keytype="SYMBOL")$ENTREZID
f
ppi = removeVertices(ppi, f)
vcount(ppi)
ecount(ppi)
# filter by expression.
ppi_esc = induced.subgraph(ppi, V(ppi)[ name %in% eg_esc ])
vcount(ppi_esc)
ecount(ppi_esc)
# ensure a single component.
ppi_esc = getLargestComp(ppi_esc)
vcount(ppi_esc)
ecount(ppi_esc)
## -----------------------------------------------------------------------------
# define target.
target = select(org.Mm.eg.db,keys="Sox2",columns="ENTREZID",keytype="SYMBOL")$ENTREZID
target
# find TRM.
s = findTRM(ppi_esc, target, tfs_list_esc, method = "nsa", max.bridge = 1)
vcount(s)
ecount(s)
## ----fig.cap = "Sox2 specific TRM in ESCs."-----------------------------------
# generate layout (order by cluster, then label)
cl = getConcentricList(s, target, tfs_list_esc)
l = layout.concentric(s, cl, order = "label")
# plot TRM.
plotTRM(s, layout = l, vertex.cex = 15, label.cex = .8)
plotTRMlegend(s, title = "ESC Sox2 TRM", cex = .8)
## -----------------------------------------------------------------------------
library(rTRM)
library(BSgenome.Mmusculus.UCSC.mm8.masked) # Sox2 peaks found against mm8
library(PWMEnrich)
registerCoresPWMEnrich(1) # register number of cores for parallelization in PWMEnrich
library(MotifDb)
# select mouse PWMs:
sel.mm = values(MotifDb)$organism %in% c("Mmusculus")
pwm.mm = MotifDb[sel.mm]
## -----------------------------------------------------------------------------
# generate logn background model of PWMs:
p = as.list(pwm.mm)
p = lapply(p, function(x) round(x * 100))
p = lapply(p, function(x) t(apply(x, 1, as.integer)))
## ----eval=FALSE---------------------------------------------------------------
# pwm_logn = makeBackground(p, Mmusculus, type = "logn")
## ----echo=FALSE---------------------------------------------------------------
load(system.file("extra/pwm_mm_logn.rda", package = "rTRM"))
## -----------------------------------------------------------------------------
sox2_bed = read.table(system.file("extra/ESC_Sox2_peaks.txt", package = "rTRM"))
colnames(sox2_bed) = c("chr", "start", "end")
sox2_seq = getSequencesFromGenome(sox2_bed, Mmusculus, append.id="Sox2")
# PWMEnrich throws an error if the sequences are shorter than the motifs so we filter those sequences.
min.width = max(sapply(p, ncol))
sox2_seq_filter = sox2_seq[width(sox2_seq) >= min.width]
## ----eval=FALSE---------------------------------------------------------------
# # find enrichment:
# sox2_enr = motifEnrichment(sox2_seq_filter, pwms=pwm_logn, group.only=TRUE)
## ----echo=FALSE---------------------------------------------------------------
load(system.file("extra/sox2_enr.rda", package = "rTRM"))
## ----fig.cap="Density of log2(raw.score) for group. The selected cutoff is indicated with a red line."----
res = groupReport(sox2_enr)
plot(density(res$raw.score),main="",log="x",xlab="log(raw.score)")
abline(v=log2(5),col="red")
mtext(text="log2(5)",at=log2(5),side=3,cex=.8,col="red")
res.gene = unique(values(MotifDb[res$id[res$raw.score > 5]])$geneId)
res.gene = unique(na.omit(res.gene))
## ----fig.cap="Sox2 TRM identified using PWMEnrich for the motif enrichment."----
data(biogrid_mm)
ppi = biogrid_mm
vcount(ppi)
ecount(ppi)
f = c("Ubc", "Sumo1", "Sumo2", "Sumo3")
f=select(org.Mm.eg.db,keys=f,columns="ENTREZID",keytype="SYMBOL")$ENTREZID
ppi = removeVertices(ppi, f)
vcount(ppi)
ecount(ppi)
# filter by expression.
eg_esc = scan(system.file("extra/ESC-expressed.txt", package = "rTRM"), what = "")
ppi_esc = induced.subgraph(ppi, V(ppi)[ name %in% eg_esc ])
vcount(ppi_esc)
ecount(ppi_esc)
# ensure a single component.
ppi_esc = getLargestComp(ppi_esc)
vcount(ppi_esc)
ecount(ppi_esc)
sox2.gene = select(org.Mm.eg.db,keys="Sox2",columns="ENTREZID",keytype="SYMBOL")$ENTREZID
sox2_trm = findTRM(ppi_esc, target=sox2.gene, query = res.gene)
cl = getConcentricList(sox2_trm, t=sox2.gene,e=res.gene)
l = layout.concentric(sox2_trm, concentric=cl, order="label")
plotTRM(sox2_trm, layout = l, vertex.cex = 15, label.cex = .8)
plotTRMlegend(sox2_trm, title = "ESC Sox2 TRM", cex = .8)
## ----fig.cap="Similarity between the TRMs predicted using motif enrichment information from PWMEnrich and HOMER."----
m=getSimilarityMatrix(list(PWMEnrich=sox2_trm,HOMER=s))
m
d=as.data.frame.table(m)
g = ggplot(d, aes(x = Var1, y = Var2, fill = Freq))
g + geom_tile() + scale_fill_gradient2(limit = c(0, 100), low = "white", mid = "darkblue", high = "orange", guide = guide_legend("similarity", reverse=TRUE), midpoint = 50) + xlab(NULL) + ylab(NULL) + theme(aspect.ratio = 1, axis.text.x=element_text(angle = 90, vjust = .5, hjust = 1))
## ----fig.width=2.5,fig.height=2.5,fig.cap="Sox2 TRM obtained with PWMEnrich workflow and layout.concentric is shown in the left. Same TRM with layout.arc is shown in the right."----
plotTRM(sox2_trm, layout = l, vertex.cex = 15, label.cex = .7)
l=layout.arc(sox2_trm,target=sox2.gene,query=res.gene)
plotTRM(sox2_trm, layout=l,vertex.cex=15,label.cex=.7)
## -----------------------------------------------------------------------------
citation(package="rTRM")
## -----------------------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.