knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "bioconductor/" )
This article collects notes on Bioconductor packages, made available here to faciliate their use and extensions.
pkgs <- c("AnnotationDbi", "AnnotationFilter", "ComplexHeatmap", "DESeq2", "EnsDb.Hsapiens.v86", "FlowSorted.DLPFC.450k", "GeneNet", "GenomicFeatures", "IlluminaHumanMethylation450kmanifest", "OUTRIDER","RColorBrewer", "RMariaDB", "Rgraphviz", "S4Vectors", "SummarizedExperiment", "TxDb.Hsapiens.UCSC.hg38.knownGene", "bladderbatch", "clusterProfiler", "corpcor", "doParallel", "ensembldb", "fdrtool", "graph", "graphite", "heatmaply", "minfi", "org.Hs.eg.db", "plyr", "quantro", "recount3", "sva") for (p in pkgs) if (length(grep(paste("^package:", p, "$", sep=""), search())) == 0) { if (!requireNamespace(p)) warning(paste0("This vignette needs package `", p, "'; please install")) } invisible(suppressMessages(lapply(pkgs, require, character.only = TRUE)))
See inst/turboman
in the source, https://github.com/jinghuazhao/pQTLtools/tree/master/inst/turboman, or turboman/
directory in the installed package.
This is the documentation example, based on Bioconductor 3.14.
data(bladderdata, package="bladderbatch") edat <- bladderEset[1:50] pheno <- Biobase::pData(edat) batch <- pheno$batch table(batch) quantro::matboxplot(edat,batch,cex.axis=0.6,notch=TRUE,pch=19,ylab="Expression") quantro::matdensity(edat,batch,xlab=" ",ylab="density") legend("topleft",legend=1:5,col=1:5,lty=1) # 1. parametric adjustment combat_edata1 <- sva::ComBat(dat=edat, batch=batch, par.prior=TRUE, prior.plots=TRUE) # 2. non-parametric adjustment, mean-only version combat_edata2 <- sva::ComBat(dat=edat, batch=batch, par.prior=FALSE, mean.only=TRUE) # 3. reference-batch version, with covariates mod <- model.matrix(~as.factor(cancer), data=pheno) combat_edata3 <- sva::ComBat(dat=edat, batch=batch, mod=mod, par.prior=TRUE, ref.batch=3, prior.plots=TRUE)
This is also adapted from the package vignette but with FlowSorted.DLPFC.450k
in place of FlowSorted
.
data(FlowSorted.DLPFC.450k,package="FlowSorted.DLPFC.450k") p <- getBeta(FlowSorted.DLPFC.450k,offset=100) pd <- Biobase::pData(FlowSorted.DLPFC.450k) quantro::matboxplot(p, groupFactor = pd$CellType, xaxt = "n", main = "Beta Values", pch=19) quantro::matdensity(p, groupFactor = pd$CellType, xlab = " ", ylab = "density", main = "Beta Values", brewer.n = 8, brewer.name = "Dark2") legend('top', c("NeuN_neg", "NeuN_pos"), col = c(1, 2), lty = 1, lwd = 3) qtest <- quantro::quantro(object = p, groupFactor = pd$CellType) if (FALSE) { doParallel::registerDoParallel(cores=10) qtestPerm <- quantro::quantro(p, groupFactor = pd$CellType, B = 1000) quantro::quantroPlot(qtestPerm) }
The following is adapted from package OUTRIDER,
ctsFile <- system.file('extdata', 'KremerNBaderSmall.tsv', package='OUTRIDER') ctsTable <- read.table(ctsFile, check.names=FALSE) ods <- OUTRIDER::OutriderDataSet(countData=ctsTable) ods <- OUTRIDER::filterExpression(ods, minCounts=TRUE, filterGenes=TRUE) ods <- OUTRIDER::OUTRIDER(ods) res <- OUTRIDER::results(ods) knitr::kable(res,caption="A check list of outliers") OUTRIDER::plotQQ(ods, res["geneID"],global=TRUE)
ex <- DESeq2::makeExampleDESeqDataSet(m=4) dds <- DESeq2::DESeq(ex) res <- DESeq2::results(dds, contrast=c("condition","B","A")) rld <- DESeq2::rlogTransformation(ex, blind=TRUE) dat <- DESeq2::plotPCA(rld, intgroup=c("condition"),returnData=TRUE) percentVar <- round(100 * attr(dat,"percentVar")) ggplot2::ggplot(dat, ggplot2::aes(PC1, PC2, color=condition, shape=condition)) + ggplot2::geom_point(size=3) + ggplot2::xlab(paste0("PC1:",percentVar[1],"% variance")) + ggplot2::ylab(paste0("PC2:",percentVar[2],"% variance")) ex$condition <- relevel(ex$condition, ref="B") dds2 <- DESeq2::DESeq(dds) res <- DESeq2::results(dds2) knitr::kable(head(as.data.frame(res)))
See the package in action from a snakemake workflow @koster21.
A simple network is furnished with the GeneNet
documentation example,
## A random network with 40 nodes # it contains 780=40*39/2 edges of which 5 percent (=39) are non-zero true.pcor <- GeneNet::ggm.simulate.pcor(40) # A data set with 40 observations m.sim <- GeneNet::ggm.simulate.data(40, true.pcor) # A simple estimate of partial correlations estimated.pcor <- corpcor::cor2pcor( cor(m.sim) ) # A comparison of estimated and true values sum((true.pcor-estimated.pcor)^2) # A slightly better estimate ... estimated.pcor.2 <- GeneNet::ggm.estimate.pcor(m.sim) sum((true.pcor-estimated.pcor.2)^2) ## ecoli data data(ecoli, package="GeneNet") # partial correlation matrix inferred.pcor <- GeneNet::ggm.estimate.pcor(ecoli) # p-values, q-values and posterior probabilities for each potential edge test.results <- GeneNet::network.test.edges(inferred.pcor) # best 20 edges (strongest correlation) test.results[1:20,] # network containing edges with prob > 0.9 (i.e. local fdr < 0.1) net <- GeneNet::extract.network(test.results, cutoff.ggm=0.9) net # significant based on FDR cutoff Q=0.05? num.significant.1 <- sum(test.results$qval <= 0.05) test.results[1:num.significant.1,] # significant based on "local fdr" cutoff (prob > 0.9)? num.significant.2 <- sum(test.results$prob > 0.9) test.results[test.results$prob > 0.9,] # parameters of the mixture distribution used to compute p-values etc. c <- fdrtool::fdrtool(corpcor::sm2vec(inferred.pcor), statistic="correlation") c$param ## A random network with 20 nodes and 10 percent (=19) edges true.pcor <- GeneNet::ggm.simulate.pcor(20, 0.1) # convert to edge list test.results <- GeneNet::ggm.list.edges(true.pcor)[1:19,] nlab <- LETTERS[1:20] # graphviz # network.make.dot(filename="test.dot", test.results, nlab, main = "A graph") # system("fdp -T svg -o test.svg test.dot") # Rgraphviz gr <- GeneNet::network.make.graph( test.results, nlab) gr num.nodes(gr) edge.info(gr) gr2 <- GeneNet::network.make.graph( test.results, nlab, drop.singles=TRUE) gr2 GeneNet::num.nodes(gr2) GeneNet::edge.info(gr2) # plot network plot(gr, "fdp") plot(gr2, "fdp")
A side-by-side heatmaps
set.seed(123454321) m <- matrix(runif(2500),50) r <- cor(m) g <- as.matrix(r>=0.7)+0 f1 <- ComplexHeatmap::Heatmap(r) f2 <- ComplexHeatmap::Heatmap(g) f <- f1+f2 ComplexHeatmap::draw(f) df <- heatmaply::normalize(mtcars) hm <- heatmaply::heatmaply(df,k_col=5,k_row=5, colors = grDevices::colorRampPalette(RColorBrewer::brewer.pal(3, "RdBu"))(256)) htmlwidgets::saveWidget(hm,file="heatmaply.html") htmltools::tags$iframe(src = "heatmaply.html", width = "100%", height = "550px")
so we have heatmaply.html and a module analysis with WGCNA,
pwr <- c(1:10, seq(from=12, to=30, by=2)) sft <- WGCNA::pickSoftThreshold(dat, powerVector=pwr, verbose=5) ADJ <- abs(cor(dat, method="pearson", use="pairwise.complete.obs"))^6 dissADJ <- 1-ADJ dissTOM <- WGCNA::TOMdist(ADJ) TOM <- WGCNA::TOMsimilarityFromExpr(dat) Tree <- hclust(as.dist(1-TOM), method="average") for(j in pwr) { pam_name <- paste0("pam",j) assign(pam_name, cluster::pam(as.dist(dissADJ),j)) pamTOM_name <- paste0("pamTOM",j) assign(pamTOM_name,cluster::pam(as.dist(dissTOM),j)) tc <- table(get(pam_name)$clustering,get(pamTOM_name)$clustering) print(tc) print(diag(tc)) } colorStaticTOM <- as.character(WGCNA::cutreeStaticColor(Tree,cutHeight=.99,minSize=5)) colorDynamicTOM <- WGCNA::labels2colors(cutreeDynamic(Tree,method="tree",minClusterSize=5)) Colors <- data.frame(pamTOM6$clustering,colorStaticTOM,colorDynamicTOM) WGCNA::plotDendroAndColors(Tree, Colors, dendroLabels=FALSE, hang=0.03, addGuide=TRUE, guideHang=0.05) meg <- WGCNA::moduleEigengenes(dat, color=1:ncol(dat), softPower=6)
This section is based on package recount3
.
hs <- recount3::available_projects() dim(subset(hs,file_source=="gtex")) recount3::annotation_options("human") blood_rse <- recount3::create_rse(subset(hs,project=="BLOOD")) S4Vectors::metadata(blood_rse) SummarizedExperiment::rowRanges(blood_rse) colnames(SummarizedExperiment::colData(blood_rse))[1:20] recount3::expand_sra_attributes(blood_rse)
reactome <- graphite::pathways("hsapiens", "reactome") kegg <- graphite::pathways("hsapiens","kegg") pharmgkb <- graphite::pathways("hsapiens","pharmgkb") nodes(kegg[[21]]) kegg_t2g <- ldply(lapply(kegg, nodes), data.frame) names(kegg_t2g) <- c("gs_name", "gene_symbol") VEGF <- subset(kegg_t2g,gs_name=="VEGF signaling pathway")[[2]] eKEGG <- clusterProfiler::enricher(gene=VEGF, TERM2GENE = kegg_t2g, universe=, pAdjustMethod = "BH", pvalueCutoff = 0.1, qvalueCutoff = 0.05, minGSSize = 10, maxGSSize = 500)
An overview of annotation is available @carlson16.
options(width=200) # columns(org.Hs.eg.db) # keyref <- keys(org.Hs.eg.db, keytype="ENTREZID") # symbol_uniprot <- select(org.Hs.eg.db,keys=keyref,columns = c("SYMBOL","UNIPROT")) # subset(symbol_uniprot,SYMBOL=="MC4R") x <- EnsDb.Hsapiens.v86 ensembldb::listColumns(x, "protein", skip.keys=TRUE) ensembldb::listGenebiotypes(x) ensembldb::listTxbiotypes(x) ensembldb::listTables(x) ensembldb::metadata(x) ensembldb::organism(x) ensembldb::returnFilterColumns(x) ensembldb::seqinfo(x) ensembldb::seqlevels(x) ensembldb::updateEnsDb(x) ensembldb::genes(x, columns=c("gene_name"), filter=list(SeqNameFilter("X"), AnnotationFilter::GeneBiotypeFilter("protein_coding"))) ensembldb ::transcripts(x, columns=ensembldb::listColumns(x, "tx"), filter = AnnotationFilter::AnnotationFilterList(), order.type = "asc", return.type = "GRanges") txdbEnsemblGRCh38 <- GenomicFeatures::makeTxDbFromEnsembl(organism="Homo sapiens", release=98) txdb <- as.list(txdbEnsemblGRCh38) lapply(txdb,head) txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene # liverExprs <- quantifyExpressionsFromBWs(txdb = txdb,BWfiles=,experimentalDesign=)
Web: https://support.bioconductor.org/
Package | Description
--------|------------
Bioconductor |
AnnotationDbi | AnnotationDb objects and their progeny, methods etc.
Biobase | Base functions for Bioconductor
clusterProfiler | Functional profiles for genes and gene clusters
ComplexHeatmap | Make complex heatmaps
DESSeq2 | Differential gene expression analysis based on the negative binomial distribution
edgeR | Empirical analysis of digital gene expression
EnsDb.Hsapiens.v86 | Exposes an annotation databases generated from Ensembl
ensembldb | Retrieve annotation data from an Ensembl based package
FlowSorted.DLPFC.450k | Illumina HumanMethylation data on sorted frontal cortex cell populations
graphite | GRAPH Interaction from pathway topological environment
IlluminaHumanMethylation450kmanifest | Annotation for Illumina's 450k methylation arrays
INSPEcT | Quantification of the intronic and exonic gene features and the post-transcriptional regulation analysis
org.Hs.eg.db | Conversion of Entrez ID -- gene symbols
OUTRIDER | OUTlier in RNA-Seq fInDER
Pi | Priority index, leveraging genetic evidence to prioritise drug targets at the gene and pathway level
quantro | A test for when to use quantile normalisation
recount3 | Interface to uniformly processed RNA-seq data
Rgraphiz | Interfaces R with the AT&T graphviz library for plotting R graph objects from the graph package
sva | Surrogate Variable Analysis
TxDb.Hsapiens.UCSC.hg38.knownGene | Annotation of the human genome
CRAN |
doParallel | Foreach Parallel Adaptor for the 'parallel' Package
GeneNet | Modeling and Inferring Gene Networks
ggplot2 | Data Visualisations Using the grammar of graphics
heatmaply | Interactive Cluster Heat Maps Using plotly
and ggplot2
pheatmap | results visualisation
plyr | Splitting, applying and combining data
RColorBrewer | ColorBrewer Palettes
WGCNA | Weighted correlation network analysis
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.