#' read species table from inst
#'
#' @export
speciesTable <- function(){
col_types <- readr::cols(
species = readr::col_character(),
database = readr::col_character(),
common = readr::col_character(),
clusterprofiler = readr::col_character(),
msigdbr = readr::col_character()
)
readr::read_csv(system.file("species.csv", package="deseq2analysis"), col_types=col_types, progress = FALSE) %>% dplyr::mutate(abase=paste("org.", database, sep=""))
}
#' reads species table and filters based on species name
#'
#' @param speciesName = Mm Hs At Dm ...
#' @param mapdb the map name to extract
#'
#' @export
species2map <- function(speciesName, mapdb){
m <- speciesTable() %>% dplyr::filter(species == speciesName) %>% dplyr::pull(!!rlang::sym(mapdb))
if(length(m) == 1){
m
} else {
NA
}
}
#' map of species to various database and species identifiers
#'
#' @param speciesName short name of species Mm Hs At Dm ..
#'
#' default: "org.speciesName.eg.db"
#'
#' @export
speciesIDTypeToLib <- function(speciesName){
abase <- species2map(speciesName, "abase")
clusterprof <- species2map(speciesName, "clusterprofiler")
msigdbr <- species2map(speciesName, "msigdbr")
if(is.na(abase)){
lib <- paste("org.", speciesName, ".eg.db",sep="")
}else{
lib <- paste(abase,".db",sep="")
}
list(lib=lib,clusterprofiler=clusterprof,msigdbr=msigdbr)
}
#' adds entrez id column to table based on geneid
#'
#' @param resultTable with geneid column
#' @param fromType one of the validIDtypes
#' @param orgdb the database e.g. org.MM.eg.db
#'
#' can return rows with duplicated geneids if multiple entrez for this gene id
#' can return NAs in entrez
#' can return rows with duplicated entrez if multiple geneids for entrez
#'
#' @export
getEntrezIds <- function(resultTable, fromType, libstr){
library(libstr, character.only = TRUE)
lib <- get(libstr)
entrez <- AnnotationDbi::select(lib, keys=resultTable$geneid, columns=c("ENTREZID","GENENAME","SYMBOL"), keytype=fromType)
tb <- entrez %>% dplyr::group_by_(fromType) %>%
dplyr::summarize(entrez=ENTREZID[1],entrezConcat=paste(unique(ENTREZID),sep=",",collapse=""),entrezCount=length(unique(ENTREZID)))
# symbol=SYMBOL[1],symbolConcat=paste(unique(SYMBOL),sep=","),symbolCount=length(unique(SYMBOL)),
# genename=GENENAME[1],genenameConcat=paste(unique(GENENAME),sep=","),symbolCount=length(unique(GENENAME)))
resultTable %>% dplyr::inner_join(tb, by=c(geneid=fromType))
}
#' filters gene universe by no NA entrez, no NA GO mapping
#' meanFilter of deseq2
#'
#' @param table the entrez annotatet table
#' @param libstr the annotation db eg org.Hs.eg.db
#'
#' @export
filterGenesGO <- function(table, libstr){
library(libstr, character.only = TRUE)
lib <- get(libstr)
ids <- table %>% dplyr::filter(!is.na(entrez)) %>% dplyr::pull(entrez)
wg <- AnnotationDbi::select(lib, keys=ids, columns="GO", keytype="ENTREZID")$ENTREZID
withGo <- table[table$entrez %in% as.character(wg),]
withMean <- withGo[!withGo$meanFilter,]
withMean
}
#' get collapsed GSEA pathways
#'
#' @param gseaL the complete gsea result
#'
#' @export
getGSEACollapsedPathways <- function(gseaL, pvalGo){
#LOG("called collapse")
gsea <- gseaL$gsea
filtered <- gsea %>% dplyr::filter(padj < pvalGo) %>% dplyr::arrange(pval)
#LOG("filtered collapse")
collapsed <- fgsea::collapsePathways(filtered, gseaL$pathways, gseaL$stats) #takes long
#LOG("collapsed")
collapsed$mainPathways
}
######## GOstats #######
#' tests for GO overrepresentation with GOStats
#'
#'
#' @export
testGO <- function(signifTable, entrezUniverse, ontology, libMap, pvalGo){
signifEntrez <- signifTable$entrez
if(length(signifEntrez) > 0){
tryCatch(
{
params <- new("GOHyperGParams", geneIds=signifEntrez, universeGeneIds=entrezUniverse, annotation=libMap$lib,
ontology=ontology, pvalueCutoff=pvalGo, conditional=TRUE, testDirection="over")
hgOver <- hyperGTest(params)
extractLong <- extract(hgOver, pvalGo, signifTable)
if(!is.na(extractLong)){
extractLong %>% dplyr::mutate(ontology=ontology)
} else {
NA
}
}, error = function(err){ LOG(paste("ERROR but continuing",err)); NA }
)
} else {
NA
}
}
#' tests for overrrepresentation in KEGG with GOstats
#'
#'
#' @export
testKEGG <- function(signifTable, entrezUniverse, libMap, pvalGo){
signifEntrez <- signifTable$entrez
params <- new("KEGGHyperGParams", geneIds=signifEntrez, universeGeneIds=entrezUniverse, annotation=libMap$lib,
pvalueCutoff=pvalGo, testDirection="over")
hgOver <- hyperGTest(params)
kegg <- summary(hgOver) # %>% dplyr::mutate(ontology=ontology)
kegg
}
#' tests for overrepresentation with ReactomePA
#'
#'
#' @export
testReactome <- function(signifTable, organism, pvalGo){
signifEntrez <- signifTable$entrez
ep <- ReactomePA::enrichPathway(gene=signifEntrez, organism=organism, pvalueCutoff=pvalGo, readable=T)
ep
}
#' tests for functional enrichment GO, kegg, react
#' saves result in RDS
#'
#' @export
testFunctionalEnrich <- function(signifTable, entrezUniverse, libMap, organism, pvalGo, regtype, outname){
LOG(paste("organism", organism, pvalGo, outname))
mf <- testGO(signifTable, entrezUniverse, "MF", libMap, pvalGo)
bp <- testGO(signifTable, entrezUniverse, "BP", libMap, pvalGo)
cc <- testGO(signifTable, entrezUniverse, "CC", libMap, pvalGo)
kegg <- NULL
react <- NULL
#TODO: detect PATH in keytypes and react ..
if(FALSE){ # keytypes(libMap$lib) contains PATH ...){
kegg <- testKEGG(signifTable, entrezUniverse, libMap, pvalGo)
}
if(FALSE){#! is.null(organism)){
react <- testReactome(signifTable, organism, pvalGo)
}
li <- list(regtype=regtype,mf=mf,bp=bp,cc=cc,kegg=kegg,react=react)
saveRDS(li, paste(outname,".RDS",sep=""))
invisible(li)
}
#' GSEA test
#'
#' saves result in RDS
#'
#' @export
testGSEA <- function(resultTable, organism, pvalGo, outname){
sortedTable <- resultTable[order(resultTable$log2FoldChange,decreasing = TRUE),]
geneList <- sortedTable$log2FoldChange
#LOG("got gene list")
names(geneList) <- sortedTable$entrez
geneList <- Filter(function(x){!is.na(x)}, geneList)
#LOG(paste("filtered gene list",organism))
geneSetsTable <- msigdbr::msigdbr(species = organism)
#LOG("extracted geneSets")
geneSets <- geneSetsTable %>% split(x = .$entrez_gene, f = .$gs_name)
geneSetsCat <- geneSetsTable %>%
dplyr::select(gs_name, gs_id, gs_cat) %>%
dplyr::group_by(gs_name, gs_id, gs_cat) %>%
dplyr::summarise(cat.size=dplyr::n()) %>%
dplyr::ungroup() ## gs_subcat
#LOG("got catalogs")
gsea <- fgsea::fgsea(pathways=geneSets, stats=geneList, minSize=15, maxSize=500, nperm=10000)
gseaj <- gsea %>% dplyr::inner_join(geneSetsCat,by=c(pathway="gs_name"))
#LOG("joined catalogs")
gseaL <- list(pathways=geneSets,stats=geneList,gsea=gseaj,pathway_table=geneSetsCat)
#LOG("before collapse")
collapsed <- getGSEACollapsedPathways(gseaL, pvalGo) ## takes long
gseaL$collapsed_pathways <- collapsed
gseaL$pvalGo <- pvalGo
saveRDS(gseaL, outname)
}
#' test functional enrichment with config parameters
#'
#' @param resultTable table of ID, etc..
#' @param outbasefun output base of functional annotation, should be valid path
#' @param deseqconfig deseq config
#'
#' @export
testfunctionalFromConfig <- function(resultTable, outbasefun, deseqconfig, comparisontitle){
functional <- deseqconfig$functional
organism <- functional$organism
idtype <- functional$idtype
pvalExp <- functional$adjp
pvalGo <- functional$pval
log2FCExp <- functional$log2fc
deseqconfig$comparisontitle <- comparisontitle #used for report
saveRDS(deseqconfig, paste(outbasefun, "_deseq2parameters.RDS", sep=""))
testFunctional(resultTable, organism, idtype, outbasefun, pvalExp, log2FCExp, pvalGo)
}
#' test functional enrichment
#'
#' @param resultTable table of ID, etc..
#' @param organism the organism abbreviated name: Mm, Hs, Dm, Ce, At
#' @param idtype type of id column in resultTable ENSEMBL etc..
#' @param outbasefun output base of functional annotation, should be valid path
#' @param pvalExp p-value cutoff for expression values
#' @param log2FCExp log2FC for expression values
#' @param pvalGo p-value cutoff for GO analysis
#'
#'
#' @return nothing
#'
#' saves multiple RDS files with GO etct..
#' @import GOstats
#' @import GO.db
#'
#' @export
testFunctional <- function(resultTable, organism, idtype, outbasefun, pvalExp, log2FCExp, pvalGo){
libMap <- speciesIDTypeToLib(organism)
libMapStr <- paste(libMap, collapse=" ", paste="")
#LOG(paste("species annotation map: ", libMapStr))
#LOG("testFunctional")
resultEntrez <- getEntrezIds(resultTable, idtype, libMap$lib)
filteredTable <- filterGenesGO( resultEntrez, libMap$lib )
entrezUniverse <- unique(filteredTable$entrez)
signifup <- subset(filteredTable, padj < pvalExp & log2FoldChange > log2FCExp )
signifdn <- subset(filteredTable, padj < pvalExp & log2FoldChange < -log2FCExp )
signifde <- subset(filteredTable, padj < pvalExp & abs(log2FoldChange) > log2FCExp )
testFunctionalEnrich(signifup, entrezUniverse, libMap, libMap$clusterprofiler, pvalGo, "upregulated", paste(outbasefun,"_up",sep=""))
testFunctionalEnrich(signifdn, entrezUniverse, libMap, libMap$clusterprofiler, pvalGo, "downregulated", paste(outbasefun,"_dn",sep=""))
testFunctionalEnrich(signifde, entrezUniverse, libMap, libMap$clusterprofiler, pvalGo, "deregulated", paste(outbasefun,"_de",sep=""))
#LOG("after signif filter"
if(FALSE){ #! is.null(libMap$msigdbr)){
testGSEA(resultEntrez, libMap$msigdbr, pvalGo, paste(outbasefun,"_gse.RDS",sep=""))
}
}
#' helper function for GO table
#' extracts the results in long format (i.e. one row per geneid that is in category)
#'
#'
#' @export
extract <- function(hyper, maxPval, signifTable, categorySize=NULL){
wanted <- getWantedResults(hyper, maxPval, categorySize)
pvals.all <- GOstats::pvalues(hyper)
ucounts.all <- GOstats::universeCounts(hyper)
if (!any(wanted)) {
NA
} else {
pvals <- pvals.all[wanted]
ucounts <- ucounts.all[wanted]
catIds <- names(pvals)
odds <- GOstats::oddsRatios(hyper)[wanted]
ecounts <- GOstats::expectedCounts(hyper)[wanted]
counts <- GOstats::geneCounts(hyper)[wanted]
catrows <- lapply(1:length(catIds), function(index){
goid <- catIds[index]
#goterm <- AnnotationDbi::Term(goid) ReactomePA hides this
goterm <- AnnotationDbi:::.GOid2go_termField(goid,"term")
nd <- graph::nodeData(GOstats::goDag(hyper))[[goid]]
geneIds <- nd$condGeneIds
fil <- subset(signifTable, entrez %in% geneIds)
entrez <- fil$entrez
gene.ids <- as.character(fil$geneid)
link <- paste0('<a target="_blank" href="http://amigo.geneontology.org/amigo/term/',goid,'">',goterm,'</a>',sep="")
tibble::tibble(go.id=goid, go.term=goterm, go.link=link, go.pval=nd$pvalue, go.oddsRatio=nd$oddsRatio,
go.expCount=nd$expCount, go.count=counts[index], go.size=ucounts[index],
go.gene.id=gene.ids,go.gene.ids=paste(gene.ids,collapse=","),go.result.index=index)
})
resultLong <- do.call("rbind", catrows)
resultLong
}
}
#' helper function for GO table
#'
#'
#' @export
getWantedResults <- function(result, pvalue, categorySize=NULL){
## Returns a logical vector with TRUE indicating selected
## results from those tested in the specified result instance.
pvals <- pvalues(result)
wanted <- is.finite(pvals) & pvals < pvalue
if (!is.null(categorySize)) {
ucounts <- universeCounts(result)
hasMinSize <- ucounts >= categorySize
wanted <- wanted & hasMinSize
}
wanted
}
#' plots top
#'
#'
#' @export
plotGSEAOverviewTop <- function(gseaL, maxPathway, pvalGo, minSize=20){
gsea <- gseaL$gsea
topPathwaysUp <- gsea %>% dplyr::filter(ES > 0 & size > minSize & padj < pvalGo) %>% dplyr::top_n(maxPathway, pval) %>% dplyr::pull(pathway)
topPathwaysDown <- gsea %>% dplyr::filter(ES < 0 & size > minSize & padj < pvalGo) %>% dplyr::top_n(maxPathway, pval) %>% dplyr::pull(pathway)
topPathways <- c(topPathwaysUp, rev(topPathwaysDown))
fgsea::plotGseaTable(gseaL$pathways[topPathways], gseaL$stats, gsea, gseaParam = 0.5)
}
#' extracts collapsed top
#'
#'
#' @export
extractGSEAOverviewCollapsedTop <- function(gseaL, maxPathway, categoryFilter, pvalGo, minSize=20){
filteredPathways <- gseaL$pathway_table %>% dplyr::filter(categoryFilter == gs_cat & gs_name %in% gseaL$collapsed_pathways) %>% dplyr::pull(gs_name)
gsea <- gseaL$gsea
filtered <- gsea %>% dplyr::filter(padj < pvalGo) %>% dplyr::arrange(pval)
topPathwaysUp <- gsea %>% dplyr::filter(ES > 0 & size > minSize & padj < pvalGo & pathway %in% filteredPathways) %>%
dplyr::top_n(maxPathway, pval) %>% dplyr::pull(pathway)
topPathwaysDown <- gsea %>% dplyr::filter(ES < 0 & size > minSize & padj < pvalGo & pathway %in% filteredPathways) %>%
dplyr::top_n(maxPathway, pval) %>% dplyr::pull(pathway)
topPathways <- c(topPathwaysUp, rev(topPathwaysDown))
topPathways
}
#' path of output table
#'
#'
#' @export
outTableFunctionalPath <- function(outtabledir, outbasefun, suffix){
paste(outtabledir, "/", outbasefun, "_", suffix, "_functional_analysis.tab",sep="")
}
#' scientific format for HTML tables
#'
#' @export
sfformat <- function(value){
formatC(value, "E", width=3, digits=2)
}
#' datatable for GO
#' data comes in in long format, which is why distinct is necessary
#'
#' also creates a text file
#'
#' @export
datatabGO <- function(dat, suffix, outtabledir, outbasefun){
tabvalshtml <- dat %>% dplyr::ungroup() %>% dplyr::select(-dplyr::one_of(c("go.result.index","go.oddsRatio","go.term","go.expCount","go.gene.id","go.gene.ids", "ontology"))) %>% dplyr::distinct()
if(! is.null(outtabledir) & ! is.null(outbasefun)){
tabvalstab <- dat %>% dplyr::ungroup() %>% dplyr::select(-dplyr::one_of(c("go.result.index", "go.link","go.gene.id"))) %>% dplyr::distinct()
readr::write_tsv(tabvalstab, outTableFunctionalPath(outtabledir, outbasefun, suffix))
}
tabvalshtml %>% dplyr::mutate(pvalue=sfformat(go.pval)) %>% dplyr::select(-go.pval) %>% DT::datatable(escape=FALSE)
}
#' datatable for KEGG
#'
#' also creates a text file
#'
#' @export
datatabKEGG <- function(dat, suffix, outtabledir, outbasefun){
if(! is.null(outtabledir) & ! is.null(outbasefun)){
readr::write_tsv(dat, outTableFunctionalPath(outtabledir, outbasefun, suffix))
}
dat %>% dplyr::mutate(pvalue=sfformat(Pvalue), `expected count`=sfformat(ExpCount)) %>% dplyr::select(-c(Pvalue, ExpCount)) %>% DT::datatable(escape=FALSE)
}
#' datatable for React
#'
#' also creates a text file
#'
#' @export
datatabReact <- function(dat, suffix, outtabledir, outbasefun){
if(! is.null(outtabledir) & ! is.null(outbasefun)){
readr::write_tsv(dat, outTableFunctionalPath(outtabledir, outbasefun, suffix))
}
dat %>% dplyr::mutate(p.adj=sfformat(p.adjust), qvalue=sfformat(qvalue), pvalue=sfformat(pvalue)) %>% dplyr::select(-p.adjust) %>% DT::datatable(escape=FALSE)
}
#' datatable for GSEA
#'
#' also creates a text file
#'
#' @export
datatabGSEA <- function(gsea, toppathways, suffix, outtabledir, outbasefun){
dat <- gsea %>% dplyr::filter(pathway %in% toppathways) %>% dplyr::select("pathway","padj","size","gs_id") #ES,NES
dats <- dat[match(toppathways, dat$pathway),] %>%
dplyr::mutate(pathwayl=paste0('<a target="_blank" href="http://software.broadinstitute.org/gsea/msigdb/geneset_page.jsp?geneSetName=',pathway,'">',pathway,'</a>',sep="")) %>%
dplyr::select(-pathway)
if(! is.null(outtabledir) & ! is.null(outbasefun)){
readr::write_tsv(dats, outTableFunctionalPath(outtabledir, outbasefun, suffix))
}
dat %>% dplyr::mutate(p.adj=sfformat(padj)) %>% dplyr::select(-padj) %>% DT::datatable(escape=FALSE)
}
#' calls function if item is not NA or NULL
#' otherwise returns NO DATA table
#'
#'
#' @export
emptyTable <- function(item, fun, ...){
if(is.object(item)){
fun(item, ...)
}else{
DT::datatable(data.frame(no=NA,data=NA), escape=FALSE)
}
}
#' plots react only if data frome has rows
#'
#' also creates a text file
#'
#' @export
plotEmpty <- function(react, pl){
empty <- nrow(as.data.frame(react)) == 0
if(empty){
ggplot(data.frame(text="no data"), aes(x=1,y=1,label=text)) + geom_text() + theme(axis.title=element_blank(),axis.ticks=element_blank(),axis.text=element_blank())
} else {
pl
}
}
#' describes the parameters for the functional analysis
#'
#' @export
goparameterdescription <- function(deseqconfig){
if(is.null(deseqconfig)){
""
}else{
functional <- deseqconfig$functional
organism <- functional$organism
idtype <- functional$idtype
pvalExp <- functional$adjp
pvalGo <- functional$pval
log2FCExp <- functional$log2fc
paste("For determining up, down and deregulated genes we used an adjusted p-value cutoff of ", pvalExp, " and a minimal log2 fold change of ", log2FCExp,". ",
"The p-value cutoff for the GO-Term and KEGG analysis was ",pvalGo,". ", sep="")
}
}
#' zips all files in the functional_analysis folder
#'
#'
#' @export
zipfunctabs <- function(outtabledir, knitdir){
if(!is.null(outtabledir)){
zipfile <- paste(knitdir, "/", outtabledir,".zip",sep="")
files2zip <- dir(paste(knitdir, "/", outtabledir, sep=""), full.names = TRUE)
zip::zipr(zipfile = zipfile, files = files2zip)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.