inst/app/modules/msigdb.R

runMSigDB <- function(DEtable, geneCol, species, category = NULL, subcategory = NULL) {
	##get species datdset
  species.msig = msigdbr::msigdbr(species = species, category = category, subcategory = subcategory)

	##subset columms of interest: gene-set name (gs_name) and gene symbols or enterez id
  msigTerm = species.msig %>% dplyr::select(gs_name, gene_symbol, entrez_gene, gs_cat, gs_subcat) %>% as.data.frame()

	##dedup table to remove multiple tests
	if (!is.null(DEtable$test)){
		DEtable <- DEtable[with(DEtable, order(p_val_adj, decreasing = F)),]
		DEtable <- DEtable[match(unique(DEtable[[geneCol]]), DEtable[[geneCol]]), ]
	}

	return_list = list()
	tryCatch({
		clusterTable <- DEtable

		if (nrow(clusterTable) > 0) {

			##Use the gene sets data frame for clusterProfiler (for genes as gene symbols)
			msig_enricher <- clusterProfiler::enricher(gene = clusterTable[[geneCol]], TERM2GENE = msigTerm)

			addSubset = paste('enricher_result', sep = '')
			return_list[[addSubset]] <- msig_enricher

			#.......................................
			##Use the gene sets data frame for fgsea.
			msig_geneSet = species.msig %>% split(x = .$gene_symbol, f = .$gs_name)

			##name the marker genes with their avgLogFC
			ranks <- clusterTable$avg_logFC
			ranks <- setNames(ranks, clusterTable[[geneCol]])

			set.seed(1234)
			fgsea_results <- fgsea::fgsea(
			  pathways = msig_geneSet,
			  stats = ranks,
			  minSize = 5,
			  maxSize = 600
			)

			topPathwaysUp <- fgsea_results[ES > 0][head(order(pval), n = 10), pathway]
			topPathwaysDown <- fgsea_results[ES < 0][head(order(pval), n = 10), pathway]
			topPathways <- c(topPathwaysUp, rev(topPathwaysDown))

			fgsea_gtable <- R.devices::suppressGraphics({fgsea::plotGseaTable(
				pathways = msig_geneSet[topPathways],
				stats = ranks,
				fgseaRes = fgsea_results,
				gseaParam = 0.5,
				render = FALSE,
				colwidths = c(5, 3, 0.8, 1.2, 1.2)
			)})

			return_list[['fgsea_result']] <- fgsea_results
			return_list[['fgsea_gtable']] <- fgsea_gtable
			return_list[['fgsea_ranks']] <- ranks
			return_list[['msig_geneSet']] <- msig_geneSet
		}
	}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})

	return(return_list)
}

msigdbModule <- function(session, input, output, envir, appDiskCache) {

	#NOTE: this should reset our tab whenever the input genes change
	observeEvent(list(envir$geneList), ignoreInit = F, {
		envir$msigdbRes <- NULL
		#envir$msigdbRes_fgsea <- NULL
		errEl <- NULL
		if (!is.null(errEl)) {shinyjs::hide(errEl)}
	})

  if (exists('gsvis_package')) {
    msigdbCategories <- read.table(file = system.file('app/intdata/msigdb_categories.txt', package = 'geneSetVis'), sep = '\t', header = T, stringsAsFactors = FALSE)
  } else {
    msigdbCategories <- read.table(file = 'intdata/msigdb_categories.txt', sep = '\t', header = T, stringsAsFactors = FALSE)
  }
  msigdbCategories <- msigdbCategories[!is.na(msigdbCategories$Subcategory) & msigdbCategories$Subcategory != '',]
	msigdbCategories$SubcategoryLabel <- paste0(msigdbCategories$Subcategory, ': ', msigdbCategories$SubcategoryLabel)

	observeEvent(input$msigdb_category_input, {
		req(input$msigdb_category_input)

		#NOTE: these do not appear to be specieis-specific, at least on the website.  This call introduces a lot of lag time
		#species.msig <- msigdbr::msigdbr(species = input$msigdbr_species_input)

		subcat <- msigdbCategories[msigdbCategories$Category == input$msigdb_category_input,]
		subcat <- subcat[c('Subcategory', 'SubcategoryLabel')]
		subcatValues <- subcat$Subcategory
		names(subcatValues) <- subcat$SubcategoryLabel
		subcatValues <- sort(subcatValues)

		updateSelectInput(session, "msigdb_subcategory_input",
			choices = c('', subcatValues),
			selected = ''
		)
	})

	observeEvent(input$runmsigdb_button, {
	  withBusyIndicatorServer("runmsigdb_button", {
			validate(need(!is.null(envir$geneList) && nrow(envir$geneList) > 0, "Please enter genes into Load Data tab"))
			validate(need(input$msigdb_selectGeneCol != '', "Please select gene column to use"))
			validate(need(input$msigdb_species_input != '', "Please enter species"))

			shinybusy::show_modal_spinner(text = 'Querying MSigDB. This might take some time.')

			category <- input$msigdb_category_input
			subcategory <- input$msigdb_subcategory_input

			if (category == '') {category <- NULL}
			if (subcategory == '') {subcategory <- NULL}

			cacheKey <- makeDiskCacheKey(list(envir$geneList, input$msigdb_selectGeneCol, input$msigdb_species_input, category, subcategory), 'msigdb')
			cacheVal <- appDiskCache$get(cacheKey)
			if (class(cacheVal) == 'key_missing') {
				print('missing cache key...')

				type <- ifelse(grepl('id', input$msigdb_selectGeneCol), 'ENSEMBL', 'SYMBOL')
				if (type != 'SYMBOL') {
					envir$msigdbRes$enricher_result <- NULL
					envir$msigdbRes$fgsea_result <- NULL
					stop('MsigDB only accepts gene columns of type SYMBOL')
				}

				msigdbRes <- runMSigDB(
					DEtable = envir$geneList,
					geneCol = input$msigdb_selectGeneCol,
					species = input$msigdb_species_input,
					category = category,
					subcategory = subcategory
				)
				appDiskCache$set(key = cacheKey, value = msigdbRes)
			} else {
				print('loading from cache...')
				msigdbRes <- cacheVal
			}

	    envir$msigdbRes <- msigdbRes
			remove_modal_progress()

			if (is.null(envir$msigdbRes$enricher_result) | nrow(envir$msigdbRes$enricher_result) == 0) {
	      envir$msigdbRes$enricher_result <- NULL
	      envir$msigdbRes$fgsea_result <- NULL
	      stop('No significant enrichment found.')
			}

	    if ( !is.null(envir$msigdbRes$fgsea_result) & nrow(envir$msigdbRes$fgsea_result) != 0 ) {
	      fgsea_geneIDCol <-
	        getEnrichResGeneID(
	          gseResult = msigdbRes$fgsea_result,
	          idCol = msigdbRes$fgsea_result$pathway,
	          gseGenes = msigdbRes$enricher_result@gene,
	          geneSet = msigdbRes$msig_geneSet,
	          idColName = 'pathway'
	        )

	      envir$msigdbRes$fgsea_result_asenrich <- as.enrichResult(
	        gseType = 'FGSEA',
	        gseResult = msigdbRes$fgsea_result,
	        gseGenes = msigdbRes$enricher_result@gene,
	        idCol = msigdbRes$fgsea_result$pathway,
	        padjCol = msigdbRes$fgsea_result$padj,
	        pvalCol = msigdbRes$fgsea_result$pval,
	        geneIDCol = fgsea_geneIDCol,
	        pvalueCutoff = 0.5,
	        #?size is not Count
	        #countCol = msigdbRes$fgsea_result$size,
	        countCol = lapply(stringr::str_split(fgsea_geneIDCol, pattern = '/'), length),
	        geneRatioCol = paste(
	          lapply(stringr::str_split(fgsea_geneIDCol, pattern = '/'), length),
	          '/',
	          length(msigdbRes$enricher_result@gene),
	          sep = ''
	        )
	      )
	    }
	  })
	})

	renderPlotSet(
	  output = output,
	  key = 'enricher',
	  enrichTypeResult = reactive(envir$msigdbRes$enricher_result),
	  datasetURL = 'https://www.gsea-msigdb.org/gsea/msigdb/geneset_page.jsp?geneSetName=',
	  datasetName = 'MSigDB',
	  namedGeneList = envir$namedGeneList
	)

	renderPlotSet(
	  output = output,
	  key = 'fgsea',
	  enrichTypeResult = reactive(envir$msigdbRes$fgsea_result_asenrich),
	  datasetURL = 'https://www.gsea-msigdb.org/gsea/msigdb/geneset_page.jsp?geneSetName=',
	  datasetName = 'MSigDB',
	  caption = 'Click on Term Description cell to view enrichment plot.',
	  namedGeneList = envir$namedGeneList
	)


	output$msigdb_map_stats <- renderText({
	  validate(need(!is.null(envir$msigdbRes$enricher_result), "No mapped genes."))
	  num_genes_mapped <- stringr::str_split(noquote(envir$msigdbRes$enricher_result@result$GeneRatio[1]), '/')[[1]][2]
	  HTML(
	    '<b>Mapped genes</b><br>',
	    paste0(num_genes_mapped, ' out of ', length(envir$geneList$gene), ' genes were mapped.')
	  )
	})



	output[["fgsea_table_PPI"]] <- renderPlot({
		info_list <- input$fgsea_table_cell_clicked

		cellVal <- strsplit(info_list[["value"]], perl = T, split = 'target=\"_blank\">')
		cellVal <- strsplit(cellVal[[1]][2], perl = T, split = "</a>")
		cellVal <- cellVal[[1]]

		fgsea::plotEnrichment(envir$msigdbRes$msig_geneSet[[cellVal]], envir$msigdbRes$fgsea_ranks) +
		ggplot2::labs(title = cellVal)
	})

	observeEvent(input$fgsea_table_cell_clicked, {
	  req(length(input$fgsea_table_cell_clicked) > 0)
		showModal(modalDialog(
			plotOutput("fgsea_table_PPI")
		))
	})

	observeEvent(input$msigdb_resource_info, {
		msigdb_resource_info <- list(
			title = "MSigDB Resource info",
			text = HTML(
			'<b>MSigDB</b><br>
				The Molecular Signatures Database (MSigDB) is a collection of gene sets
				originally created for use with the Gene Set Enrichment Analysis (GSEA) software.
				Gene homologs are provided by HUGO Gene Nomenclature Committee at the European Bioinformatics Institute
				which integrates the orthology assertions predicted for human genes by
				eggNOG, Ensembl Compara, HGNC, HomoloGene, Inparanoid, NCBI Gene Orthology, OMA, OrthoDB, OrthoMCL, Panther, PhylomeDB, TreeFam and ZFIN.
				For each human equivalent within each species, only the ortholog supported by the largest number of databases is used.
				<p>
				<li><a href="https://www.gsea-msigdb.org/gsea/msigdb/index.jsp"
				title="link to Official MSigDB"
				target="_blank"><b>Official MSigDB website</b></a></li>
				<p>
				<p>
				<b>enrichplot</b><br>
				The plots are produced using <a href=https://bioconductor.org/packages/release/bioc/html/enrichplot.html target="_blank"><b>enrichplot</b></a> by
				<a href=https://github.com/YuLab-SMU/enrichplot target="_blank"><b>Yu G (2019) </b></a>
				'
			)
		)

		showModal(modalDialog(
			msigdb_resource_info[["text"]],
			title = msigdb_resource_info[["title"]],
			easyClose = TRUE,
			footer = NULL
		))
	})
}
BimberLab/geneSetVis documentation built on Feb. 21, 2022, 4:21 p.m.