knitr::opts_chunk$set(echo = TRUE, collapse = TRUE, message = FALSE, 
                      out.width = "70%", out.height = "70%")

References

Publication

Oh S, Geistlinger L, Ramos M, Blankenberg D, van den Beek M, Taroni JN, Carey VJ, Waldron L, Davis S. GenomicSuperSignature facilitates interpretation of RNA-seq experiments through robust, efficient comparison to public databases. Nature Communications 2022;13: 3695. doi: 10.1038/s41467-022-31411-3

Bioconductor Package

GenomicSuperSignature

Use case and reproducible codes

GenomicSuperSignaturePaper

Setup

Load packages

BiocManager::install("shbrief/GenomicSuperSignature")

suppressPackageStartupMessages({
    library(GenomicSuperSignature)
    library(bcellViper)
    library(dplyr)
    library(AnVIL)
})

Load RAVmodels

## getModel
system.time(RAVmodel <- getModel("PLIERpriors"))   
system.time(RAVmodel_C2 <- getModel("C2"))   

Database Search

## Check the model
RAVmodel
geneSets(RAVmodel)

## Input data
data(bcellViper)
dset
## validate
system.time(val_all <- validate(dset, RAVmodel)) 
heatmapTable(val_all, RAVmodel)
val_ind <- validatedSignatures(val_all, RAVmodel, indexOnly = TRUE)
## MeSH terms
# for (i in val_ind) {drawWordcloud(RAVmodel, ind = i)}
drawWordcloud(RAVmodel, val_ind[5])
## GSEA
subsetEnrichedPathways(RAVmodel, val_ind[5]) %>% as.data.frame

## Relevant studies
findStudiesInCluster(RAVmodel, val_ind[5], studyTitle = TRUE)

## Misc metadata
getRAVInfo(RAVmodel, val_ind[5])
getStudyInfo(RAVmodel, "SRP095405")

[Slide 17] TCGA-BRCA

## The data file stored in Google Cloud Bucket using AnVIL package
dir <- "gs://genomic_super_signature"
fpath <- file.path(dir, "TCGA_validationDatasets.rda")

## Load the data
load(gsutil_pipe(fpath))

## Panel B
brca <- TCGA_validationDatasets[["BRCA"]]
system.time(val_brca <- validate(brca, RAVmodel_C2))
heatmapTable(val_brca, RAVmodel_C2)

## Panel C
drawWordcloud(RAVmodel, 221)

## Panel D
findStudiesInCluster(RAVmodel, 221, studyTitle = TRUE)

## Panel E
subsetEnrichedPathways(RAVmodel, 221, include_nes = TRUE) %>% as.data.frame
# annotateRAV(RAVmodel, 221, n = 10)

[Slide 18] Annotated PCA plot

E-MTAB-2452 dataset

## The data file stored in Google Cloud Bucket using AnVIL package
dir <- "gs://genomic_super_signature"
fpath <- file.path(dir, "E-MTAB-2452_hugene11st_SCANfast_with_GeneSymbol.pcl")
x <- gsutil_pipe(fpath, open = "rb")

## Load the data
annot.dat <- readr::read_tsv(x, show_col_types = FALSE) %>% as.data.frame
rownames(annot.dat) <- annot.dat[, 2]

dataset <- as.matrix(annot.dat[, 3:ncol(annot.dat)])
rownames(dataset) <- annot.dat$GeneSymbol
dataset[1:3, 1:3]
system.time(val_all <- validate(dataset, RAVmodel))
annotatePC(2, val_all, RAVmodel, simplify = FALSE)
annotatePC(1:3, val_all, RAVmodel, scoreCutoff = 0)

Label each sample with their known cell type.

cellType <- gsub("_.*$", "", colnames(dataset))
cellType <- gsub("CD4", "CD4,T cell", cellType)
cellType <- gsub("CD14", "CD14,monocyte", cellType)
cellType <- gsub("CD16", "CD16,neutrophil", cellType)
names(cellType) <- colnames(dataset)
plotAnnotatedPCA(dataset, RAVmodel, c(2,3), val_all, 
                 scoreCutoff = 0.3, 
                 color_by = cellType, 
                 color_lab = "Cell Type")

Session Info

sessionInfo()



shbrief/GenomicSuperSignaturePaper documentation built on Aug. 2, 2022, 2:04 p.m.