knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) # The following two lines should load the library of our package pkgName <- trimws(gsub("^Package:", "", readLines("../DESCRIPTION")[1])) library(pkgName, character.only = TRUE)
This Vignette explains and demonstrates how to use the function EnrichDepletTF()
of the package BCB420.2019.ESA
, and how to interprete its results.
This function uses an in-built helper function to find pairs of genes within the chosen system that their expression profiles significantly correlate (negatively or positively). Then, this function queries for enrichment or depletion in TF-binding-genes among the significantly correlated set of gene, with respect to the entire set of genes of the chosen system.
Finding specific transcription factors that are enriched or depleted for a system, implies on a regulatory relationship between the TF and the binding genes, that could be evaluated and defined in consecutive research.
As a first step, the user must choose a system to analyse.
Presenting the available systems:
names(SyDBgetRootSysIDs(fetchData("SysDB")))
Inserting one system's name into EnrichDepletTF()
For instance, typing this code into your console:
NLRIN <- EnrichDepletTF(sys = "NLRIN", nUpMost = 10) head(NLRIN)
This queries for enrichment and depletion of TFs among the 10 most positively correlated genes of the "NLRIN" system.
As inferred from the output, the transcription factors that are significantly enriched for this set of positively correlated genes are the TFs that their Enrichment_P_value
are smaller than either the corresponding BH_enrichment
cutoff value or than the Bonferroni
cutoff value. The choise of cutoff value is the user's.
Similarly, the transcription factors that are significantly depleted are the TFs that their Depletion_P_value
are smaller than either the corresponding BH_depletion
cutoff value or than the Bonferroni
cutoff value.
As shown by this example, only one of the queried transcription factors were found to be enriched for the 10 most positively correlated pairs of genes of the "NLRIN" system.
#For BH cutoff NLRIN[NLRIN$Enrichment_P_value < NLRIN$BH_enrichment, ] #For bonferroni cutoff NLRIN[NLRIN$Enrichment_P_value < NLRIN$Bonferroni, ]
Please notice that EnrichDepletTF()
has several parameters that are set to specific values as default, but can be modified by the user. The parameters are fully documented within the function documentation page.
Creating a small synthetic database of genes and transcription factors that relate to an hypothetical system, and using it as an input to the function for validation.
The original EnrichDepletTF()
function nests several helper functions that fetch and clean the required databases for the analysis. That way, users are only required to choose the system that they want to analyze by stating its name, and the parameters of the analysis they are interested of (e.g. whether the genes that they enrichment / depletion is evaluated are the positively correlated or negatively correlated genes, the value of alpha, etc...)
In order to analyze synthetic datasets for enrichment / depletion using the function, some code adjustments must be done.
For that reason, the simulation function EnrichDepSim()
was created. Although the part of which the databases are loaded into the function was altered, the enrichment / depletion analysis part of the code is identical to the original function EnrichDepletTF()
.
EnrichDepSim <- function(sysGenes, CorGenes, GenesToTfs, alpha = 0.05){ #Naming the databases according to the named variables in the original function GeneSym <- sysGenes upCorGenes <- CorGenes GTRDtf <- GenesToTfs TfSym <- names(GTRDtf) if (sum( ! is.na(upCorGenes)) >= 3) { #3 or more correlated genes. #calculating enrichment and depletion: numRow <- length(TfSym) corEnrichDep<- data.frame(TF = rep(NA, numRow), Enrichment = rep(NA, numRow), Enrichment_P_value = rep(NA, numRow), Depletion = rep(NA, numRow), Depletion_P_value = rep(NA, numRow), BH_enrichment = rep(NA, numRow), BH_depletion = rep(NA, numRow), Bonferroni = rep(NA, numRow), stringsAsFactors = F) corEnrichDep$TF <- TfSym for (i in seq_along(TfSym)){ #Calculating enrichment and p value GenesBindTf <- GTRDtf[[TfSym[i]]] CorGenesBindTf <- upCorGenes[which(upCorGenes %in% GenesBindTf)] a <- length(CorGenesBindTf) #Correlates and binds TF b <- length(GenesBindTf) - a #not correlates and binds TF c <- length(upCorGenes) - a #Correlates and don't bind d <- (length(GeneSym) - length(upCorGenes)) - b #Not correlates and not binds tmpFisher <- stats::fisher.test(matrix(c(a, b, c, d), nrow = 2), alternative = "greater") corEnrichDep$Enrichment[i] <- tmpFisher$estimate corEnrichDep$Enrichment_P_value[i] <- tmpFisher$p.value #Calculating depletion. and p value tmpFisher <- stats::fisher.test(matrix(c(c, d, a, b), nrow = 2), alternative = "greater") corEnrichDep$Depletion[i] <- tmpFisher$estimate corEnrichDep$Depletion_P_value[i] <- tmpFisher$p.value } #Calculating the cutoffs for each multi-test correction approach nExperiments <- length(TfSym) corEnrichDep <- corEnrichDep[order(corEnrichDep$Enrichment_P_value, decreasing = FALSE),] #order according to #increasing p values corEnrichDep$BH_enrichment <- (order(corEnrichDep$Enrichment_P_value, decreasing = FALSE) / nExperiments) * alpha corEnrichDep <- corEnrichDep[order(corEnrichDep$Depletion_P_value, decreasing = FALSE),] #order according to #increasing P values corEnrichDep$BH_depletion <- (order(corEnrichDep$Depletion_P_value, decreasing = FALSE) / nExperiments) * alpha corEnrichDep$Bonferroni <- alpha / nExperiments #Bonferroni cutoff value. return(corEnrichDep) } else { #Less then 3 correlated genes. return(message(NaN)) } }
#The genes of the system sysGenes <- c("AAA", "BBB", "CCC", "DDD", "EEE", "FFF", "GGG", "HHH", "III", "JJJ") #The genes of the system that were found to be correlated positively CorGenes <- c("AAA", "BBB", "EEE", "GGG", "III") #A list of transcription factors (ZZ, YY) and the genes of the system that they can bind to. GenesToTfs <- list(ZZ = c("AAA", "BBB", "EEE", "FFF", "GGG", "III"), YY = c("BBB", "CCC", "DDD", "FFF", "HHH"))
Presenting the contingency tables and fisher's exact test for both transcription factors, as a reference for the scores produced by the simulation function.
nCorAndbind <- sum(GenesToTfs$ZZ %in% CorGenes) #5 nNoCorAndbind <- sum( ! (GenesToTfs$ZZ %in% CorGenes)) #1 nCorNoBind <- sum( ! (CorGenes %in% GenesToTfs$ZZ)) #0 nNoCorNoBind <- sum(! (sysGenes[! (sysGenes %in% CorGenes)] %in% GenesToTfs$ZZ)) #4 matrix(data = c(nCorAndbind, nNoCorAndbind, nCorNoBind, nNoCorNoBind), nrow = 2, ncol = 2)
Enrichment
fisher.test(x = matrix(c(nCorAndbind, nNoCorAndbind, nCorNoBind, nNoCorNoBind), nrow = 2), alternative = "greater")
Depletion
fisher.test(x = matrix(c(nCorNoBind, nNoCorNoBind, nCorAndbind, nNoCorAndbind), nrow = 2), alternative = "greater")
nCorAndbind <- sum(GenesToTfs$YY %in% CorGenes) #1 nNoCorAndbind <- sum( ! (GenesToTfs$YY %in% CorGenes)) #4 nCorNoBind <- sum( ! (CorGenes %in% GenesToTfs$YY)) #4 nNoCorNoBind <- sum(! (sysGenes[! (sysGenes %in% CorGenes)] %in% GenesToTfs$YY)) #1 matrix(data = c(nCorAndbind, nNoCorAndbind, nCorNoBind, nNoCorNoBind), nrow = 2, ncol = 2)
Enrichment
fisher.test(x = matrix(c(nCorAndbind, nNoCorAndbind, nCorNoBind, nNoCorNoBind), nrow = 2), alternative = "greater")
Depletion
fisher.test(x = matrix(c(nCorNoBind, nNoCorNoBind, nCorAndbind, nNoCorAndbind), nrow = 2), alternative = "greater")
EnrichDepSim(sysGenes = sysGenes, CorGenes = CorGenes, GenesToTfs = GenesToTfs)
This release of the BCB420.2019.ESA
package was produced in the following context of supporting packages:
sessionInfo()
SyDBgetRootSysIDs()
: Written by Boris Steipe, published at https://github.com/hyginn/BCB420.2019.ESA/blob/master/R/SyDButils.RfetchData()
: Written by Boris Steipe, published at https://github.com/hyginn/BCB420.2019.ESA/blob/master/R/fetchData.Rcor.test()
: Used for calculating pearson's correlation.fisher.test()
: Used for computing fisher exact test and calculating the p values for depletion and enrichment.Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.