tmod.R
In tmod: Feature Set Enrichment Analysis for Metabolomics and Transcriptomics

## ----setup, echo=FALSE, results="hide", include=FALSE-------------------------
library(knitr)
opts_chunk$set(cache=TRUE, autodep=TRUE, tidy=FALSE, fig.width=5, warning=FALSE, fig.height=5, width=60)
opts_knit$set(width=60)
is.internet <- FALSE

## ----eval=TRUE,echo=FALSE-----------------------------------------------------
library(ggplot2)
theme_set(theme_minimal())

## ----qsg1---------------------------------------------------------------------
library(tmod)
data(EgambiaResults)
tt <- EgambiaResults
tt <- tt[ order(tt$adj.P.Val), ]
l  <- tt$GENE_SYMBOL

## ----qsg2---------------------------------------------------------------------
res <- tmodCERNOtest(l)
head(res)

## ----qsg3---------------------------------------------------------------------
ggPanelplot(res)

## ----qsg4,eval=FALSE----------------------------------------------------------
#  ggEvidencePlot(l, "LI.M37.0", gene.labels=FALSE)
#  ggEvidencePlot(l, "DC.M4.2")
#  ggEvidencePlot(l, "DC.M3.4")
#  ggEvidencePlot(l, "DC.M1.2")

## ----fig.width=10,fig.height=10,echo=FALSE------------------------------------
library(cowplot)
g1 <- ggEvidencePlot(l, "LI.M37.0", gene.labels=FALSE) +
  ggtitle("LI.M37.0")
g2 <- ggEvidencePlot(l, "DC.M4.2") +
  ggtitle("DC.M4.2")
g3 <- ggEvidencePlot(l, "DC.M3.4") +
  ggtitle("DC.M3.4")
g4 <- ggEvidencePlot(l, "DC.M1.2") +
  ggtitle("DC.M1.2")
plot_grid(g1, g2, g3, g4)

## -----------------------------------------------------------------------------
getModuleMembers("DC.M1.2")

## ----twoA,eval=TRUE-----------------------------------------------------------
library(tmod)
data(Egambia)
E <- as.matrix(Egambia[,-c(1:3)])

## ----limma,eval=FALSE---------------------------------------------------------
#  library(limma)
#  design <- cbind(Intercept=rep(1, 30), TB=rep(c(0,1), each= 15))
#  fit <- eBayes(lmFit(E, design))
#  tt <- topTable(fit, coef=2, number=Inf,
#    genelist=Egambia[,1:3])

## ----twoA.2-------------------------------------------------------------------
data(EgambiaResults)
tt <- EgambiaResults

## ----twoA2,echo=FALSE, results="asis"-----------------------------------------
library(pander)
options(digits=3)
tmp <- tt[,c("GENE_SYMBOL", "GENE_NAME", "logFC", "adj.P.Val")]
rownames(tmp) <- NULL
pandoc.table(head(tmp), split.tables=Inf, justify="llrr")

## ----basic2, fig.width=4, fig.height=4----------------------------------------
group <- rep( c("CTRL", "TB"), each=15)
showGene(E["20799",], group,
  main=Egambia["20799", "GENE_SYMBOL"])

## ----fourB--------------------------------------------------------------------
l    <- tt$GENE_SYMBOL
resC <- tmodCERNOtest(l)
head(resC, 15)

## ----panelplot00,fig.width=14,fig.height=7------------------------------------
library(cowplot)

g1 <- ggPanelplot(list(Gambia=resC))

## calculate the number of significant genes
## per module
sgenes <- tmodDecideTests(g=tt$GENE_SYMBOL,
  lfc=tt$logFC,
  pval=tt$adj.P.Val)
names(sgenes) <- "Gambia"
g2 <- ggPanelplot(list(Gambia=resC), sgenes = sgenes)
plot_grid(g1, g2, labels=c("A", "B"))

## ----mod1-------------------------------------------------
fg <- tt$GENE_SYMBOL[tt$adj.P.Val < 0.05 & abs( tt$logFC ) > 1]
resHG <- tmodHGtest(fg=fg, bg=tt$GENE_SYMBOL)
options(width=60)
resHG

## ----mod2-------------------------------------------------
l    <- tt$GENE_SYMBOL
resU <- tmodUtest(l)
head(resU)
nrow(resU)

## ----mod2cerno--------------------------------------------
l    <- tt$GENE_SYMBOL
resCERNO <- tmodCERNOtest(l)
head(resCERNO)
nrow(resCERNO)

## ----plage------------------------------------------------
tmodPLAGEtest(Egambia$GENE_SYMBOL, Egambia[,-c(1:3)], group=group)

## ----heatmap0---------------------------------------------
m <- "LI.M75"
## getModuleMembers returns a list – you can choose to 
## select multiple modules
genes <- getModuleMembers(m)[[1]]
sel <- Egambia$GENE_SYMBOL %in% genes
x <- data.matrix(Egambia)[sel, -c(1:3)] # expression matrix

## ----heatmap1,fig.width=12,fig.height=7,echo=FALSE--------
x0 <- t(scale(t(x)))

cols <- colorRampPalette(c("blue", "white", "red"))(17) 
library(gplots)
heatmap.2(x0, trace="n", 
  labRow=Egambia$GENE_SYMBOL[sel], 
  scale="n", 
  dendrogram="r", Colv=F, 
  col=cols,
  breaks=seq(-2.5, 2.5, length.out=18))

## ----heatmap2,fig.width=10,fig.height=7,echo=FALSE--------
## per group means and standard deviations
ms <- apply(x, 1, function(xx) tapply(xx, group, mean))
sd <- apply(x, 1, function(xx) tapply(xx, group, sd))

library(plotwidgets)
plot(NULL, bty="n", ylim=range(x), xlim=c(0.5, 2.5), 
  xaxt="n", xlab="group", ylab="Expression")
axis(1, at=c(1,2), labels=unique(group))
errbar <- function(x, y0, y1, w=0.1, ...) {
  w <- w/2
  segments(x, y0, x, y1, ...)
  segments(x-w, y0, x+w, y0, ...)
  segments(x-w, y1, x+w, y1, ...)
}

cols <- plotPals("default")
n <- ncol(ms)
segments(1, ms[1,], 2, ms[2,], col=cols, lwd=2)
points(rep(1, n), ms[1,], pch=19, col=cols)
points(rep(2, n), ms[2,], pch=19, col=cols)
errbar(1, ms[1,]-sd[1,], ms[1,]+sd[1,], col=cols)
errbar(2, ms[2,]-sd[2,], ms[2,]+sd[2,], col=cols)

## ----eigengene,fig.width=8,fig.height=5-------------------
par(mfrow=c(1,2))
eig <- eigengene(Egambia[,-c(1:3)], Egambia$GENE_SYMBOL)
showGene(eig["LI.M75", ], group, 
  ylab="Eigengene",
  main="antiviral Interferon signature")
showGene(eig["LI.M16", ], group, 
  ylab="Eigengene",
  main="TLR and inflammatory signaling")

## ----five,fig=TRUE,fig.width=7,fig.height=4---------------
l    <- tt$GENE_SYMBOL
theme_set(theme_minimal())
ggEvidencePlot(l, "LI.M75") 

## ----fig.width=12-----------------------------------------
library(purrr)
sel <- c("LI.M67", "LI.M37.0")
plots <- map(sel, ~ ggEvidencePlot(l, .x, gene.labels=FALSE))
plot_grid(plotlist = plots, labels=sel)

## ---------------------------------------------------------
foo <- tmodCERNOtest(l) %>% dplyr::filter(ID %in% sel) 
foo %>% knitr::kable()

## ----fourC,size="tiny"------------------------------------
resAll <- list(CERNO=resC, U=resU, HG=resHG)
#head(tmodSummary(resAll))

## ----fourC2,results="asis",echo=FALSE,size="tiny"---------
tmp <- tmodSummary(resAll)
rownames(tmp) <- NULL
pandoc.table(head(tmp), split.tables=Inf, justify="llrrrrrr")

## ----panelplots, fig.width=8, fig.height=6----------------
resAll$HG$AUC <- log10(resAll$HG$E) - 0.5
ggPanelplot(resAll)

## ----panelplots2, fig.width=8, fig.height=5---------------
ggPanelplot(resAll, q_thr=1e-3)

## ----pie, fig.width=10,fig.height=6-----------------------
degs <- tmodDecideTests(g=tt$GENE_SYMBOL, lfc=tt$logFC,
                       pval=tt$adj.P.Val)[[1]]
degs <- list(CERNO=degs, HG=degs, U=degs)
ggPanelplot(resAll, sgenes = degs)

## ----ankrd22----------------------------------------------
x <- E[ match("ANKRD22", Egambia$GENE_SYMBOL), ]
cors <- t(cor(x, t(E)))
ord <- order(abs(cors), decreasing=TRUE)
head(tmodCERNOtest(Egambia$GENE_SYMBOL[ ord ]))

## ----m75--------------------------------------------------
g <- getGenes("LI.M75", genes=Egambia$GENE_SYMBOL, 
              as.list=TRUE)
x <- E[ match(g[[1]], Egambia$GENE_SYMBOL), ]

## calculating the "eigengene" (PC1)
pca <- prcomp(t(x), scale.=T)
eigen <- pca$x[,1]
cors <- t(cor(eigen, t(E)))

## order all genes by the correlation between the gene and the PC1
ord <- order(abs(cors), decreasing=TRUE)
head(tmodCERNOtest(Egambia$GENE_SYMBOL[ ord ]))

## ----six,fig=TRUE,fig.width=8,fig.height=5----------------
mypal <- c("#E69F00", "#56B4E9")
pca <- prcomp(t(E), scale.=TRUE)

col <- mypal[ factor(group) ]
par(mfrow=c(1, 2))
l<-pcaplot(pca, group=group, col=col)
 
legend("topleft", as.character(l$groups),
       pch=l$pch,
       col=l$colors, bty="n")
l<-pcaplot(pca, group=group, col=col, components=3:4)
legend("topleft", as.character(l$groups),
       pch=l$pch,
       col=l$colors, bty="n")

## ----seven------------------------------------------------
o <- order(abs(pca$rotation[,4]), decreasing=TRUE)
l <- Egambia$GENE_SYMBOL[o]
res <- tmodUtest(l)
head(res)

## ----pcasum-----------------------------------------------
# Calculate enrichment for each component
gs   <- Egambia$GENE_SYMBOL
# function calculating the enrichment of a PC
gn.f <- function(r) {
    tmodCERNOtest(gs[order(abs(r), decreasing=T)],
                qval=0.01)
}
x <- apply(pca$rotation, 2, gn.f)
tmodSummary(x, filter.empty=TRUE)[1:5,]

## ----pcsum2,fig=TRUE,fig.width=8,fig.height=5,results="hide"----
ggPanelplot(x)

## ----pcsum3,fig=TRUE,fig.width=8,fig.height=5-------------
qfnc <- function(r) quantile(r, 0.75)
qqs <- apply(pca$rotation[,1:10], 2, qfnc)
gloadings <- tmodDecideTests(gs, lfc=pca$rotation[,1:10], lfc.thr=qqs)
ggPanelplot(x[1:10], sgenes = gloadings) 

## ----fiveB------------------------------------------------
l    <- tt$GENE_SYMBOL
res2 <- tmodUtest(l, mset="LI")
head( res2 )

## ----tmodobj----------------------------------------------
data(tmod)
tmod

## ----tmodobj2---------------------------------------------
length(tmod)
sel <- grep("Interferon", tmod$gs$Title, ignore.case=TRUE)
ifn <- tmod[sel]
ifn
length(ifn)

## ----exmset-----------------------------------------------
mymset <- makeTmodGS(
  gs=data.frame(ID=c("A", "B"),
             Title=c("A title", 
                      "B title")),
  gs2gene=list(
    A=c("G1", "G2"),
    B=c("G3", "G4"))
)
mymset

## ----eval=FALSE-------------------------------------------
#  library(msigdbr)
#  msig <- msigdbr()
#  msig <- makeTmodFromDataFrame(df=msig,
#    feature_col="gene_symbol",
#    module_col="gs_id", title_col="gs_name",
#    extra_module_cols=c("gs_cat", "gs_subcat", "gs_url",
#                        "gs_exact_source", "gs_description"))

## ----msigdbparse1, eval=FALSE-----------------------------
#  msig <- tmodImportMSigDB("msigdb_v2022.1.Hs.xml")

## ----msigdb6,eval=FALSE-----------------------------------
#  res <- tmodCERNOtest(tt$GENE_SYMBOL, mset=msig)

## ----msigdb7,eval=FALSE-----------------------------------
#  sel <- msig$gs$gs_cat == "H"
#  tmodCERNOtest(tt$GENE_SYMBOL, mset=msig[sel] )

## ----biomart,eval=FALSE-----------------------------------
#  library(biomaRt)
#  mart <- useMart("ensembl", dataset = "hsapiens_gene_ensembl")
#  bm <- getBM(filters="hgnc_symbol",
#              values = Egambia$GENE_SYMBOL,
#              attributes = c( "hgnc_symbol", "entrezgene", "reactome", "go_id", "name_1006", "go_linkage_type"),
#              mart=mart)

## ----biomart2,eval=FALSE----------------------------------
#  m2g_r <- with(bm[ bm$reactome != "", ], split(hgnc_symbol, reactome))
#  m2g_g <- with(bm[ bm$go_id != "", ], split(hgnc_symbol, go_id))
#  
#  ll <- lengths(m2g_r)
#  m2g_r <- m2g_r[ ll >= 5 & ll <= 250 ]
#  ll <- lengths(m2g_g)
#  m2g_g <- m2g_g[ ll >= 5 & ll <= 250 ]
#  
#  m_r <- data.frame(ID=names(m2g_r), Title=names(m2g_r))
#  m_g <- data.frame(ID=names(m2g_g),
#    Title=bm$name_1006[ match(names(m2g_g), bm$go_id)])
#  
#  ensemblR  <- makeTmod(modules=m_r, modules2genes=m2g_r)
#  ensemblGO <- makeTmod(modules=m_g, modules2genes=m2g_g)
#  
#  ## these objects are no longer necessary
#  rm(bm, m_g, m_r, m2g_r, m2g_g)

## ----orghs,eval=FALSE-------------------------------------
#  library(org.Hs.eg.db)
#  mtab <- toTable(org.Hs.egGO)

## ----orghs2,eval=FALSE------------------------------------
#  mtab <- mtab[ mtab$Ontology == "BP", ]
#  m2g <- split(mtab$gene_id, mtab$go_id)
#  ## remove the rather large object
#  rm(mtab)
#  ll <- lengths(m2g)
#  m2g <- m2g[ ll >= 10 & ll <= 100 ]
#  length(m2g)

## ----orghs3,eval=FALSE------------------------------------
#  library(GO.db)
#  gt <- toTable(GOTERM)
#  m <- data.frame(ID=names(m2g))
#  m$Title <- gt$Term[ match(m$ID, gt$go_id) ]
#  
#  goset <- makeTmod(modules=m, modules2genes=m2g)
#  rm(gt, m2g, m)

## ----kegg, eval=FALSE-------------------------------------
#  library(KEGGREST)
#  pathways <- keggLink("pathway", "hsa")
#  
#  ## get pathway Names in addition to IDs
#  paths    <- sapply(unique(pathways), function(p) keggGet(p)[[1]]$NAME)
#  m <- data.frame(ID=unique(pathways), Title=paths)
#  
#  ## m2g is the mapping from modules (pathways) to genes
#  m2g <- split(names(pathways), pathways)
#  
#  ## kegg object can now be used with tmod
#  kegg <- makeTmod(modules=m, modules2genes=m2g)

## ----kegg2, eval=FALSE------------------------------------
#  eg <- paste0("hsa:", tt$EG)
#  tmodCERNOtest(eg, mset="kegg")

## ----msigdb1,eval=FALSE-----------------------------------
#  library(XML)
#  foo  <- xmlParse( "msigdb_v2022.1.Hs.xml" )
#  foo2 <- xmlToList(foo)

## ----msigdb2,eval=FALSE-----------------------------------
#  path1 <- foo2[[1]]
#  class(path1)

## ----msigdb2b,eval=FALSE----------------------------------
#  names(path1)

## ----msigdb3,eval=FALSE-----------------------------------
#  orgs <- sapply(foo2, function(x) x["ORGANISM"])
#  unique(orgs)
#  
#  foo3 <- foo2[ orgs == "Homo sapiens" ]
#  foo3 <- foo3[ ! sapply(foo3, is.null) ]

## ----msigdb4,eval=FALSE-----------------------------------
#  modules <- t(sapply(foo3,
#    function(x)
#      x[ c("SYSTEMATIC_NAME", "STANDARD_NAME", "CATEGORY_CODE", "SUBCATEGORY_CODE") ]))
#  colnames(modules) <- c( "ID", "Title", "Category", "Subcategory" )
#  modules <- data.frame(modules, stringsAsFactors=FALSE)
#  nrow(modules)

## ----msigdb5,eval=FALSE-----------------------------------
#  m2g <- lapply(foo3,
#    function(x) strsplit( x["MEMBERS_SYMBOLIZED"], "," )[[1]])
#  names(m2g) <- modules$ID
#  
#  mymsig <- makeTmod(modules=modules, modules2genes=m2g)
#  mymsig
Any scripts or data that you put into this service are public.
tmod documentation built on March 31, 2023, 9 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
tmod
Feature Set Enrichment Analysis for Metabolomics and Transcriptomics

inst/doc/tmod.R
In tmod: Feature Set Enrichment Analysis for Metabolomics and Transcriptomics

Try the tmod package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

tmod Feature Set Enrichment Analysis for Metabolomics and Transcriptomics

inst/doc/tmod.R In tmod: Feature Set Enrichment Analysis for Metabolomics and Transcriptomics

Try the tmod package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

tmod
Feature Set Enrichment Analysis for Metabolomics and Transcriptomics

inst/doc/tmod.R
In tmod: Feature Set Enrichment Analysis for Metabolomics and Transcriptomics