R/All-functions.R

Defines functions get_LFC_bar highlight_TF plot_RES plot_ES plot_CM GSEA_run GSEA_ESpermutations GSEA_EnrichmentScore rankTFs getCMstats contingency_matrix get_chip_index GeneID2entrez Select_genes preprocessInputData set_user_data matrixDB_to_listDB makeChIPGeneDB txt2GR

Documented in contingency_matrix GeneID2entrez get_chip_index getCMstats get_LFC_bar GSEA_EnrichmentScore GSEA_ESpermutations GSEA_run highlight_TF makeChIPGeneDB matrixDB_to_listDB plot_CM plot_ES plot_RES preprocessInputData rankTFs Select_genes set_user_data txt2GR

################### IMPORTS ###################
#' @importFrom GenomicFeatures genes
#' @importFrom GenomicRanges GRanges distanceToNearest mcols
#' @importFrom IRanges IRanges
#' @importFrom biomaRt select useMart getLDS getBM
#' @importFrom dplyr '%>%' arrange
#' @importFrom grDevices colorRamp
#' @importFrom stats fisher.test p.adjust
#' @importFrom utils data setTxtProgressBar txtProgressBar
#' @importFrom R.utils withTimeout
#' @importFrom methods is
#' @import org.Hs.eg.db
################### FUNCTIONS #################

##### Create a ChIP-Gene data base ####
txt2GR <- function( fileTable, format, fileMetaData, alpha = NULL ) {

    #' @title Function to filter a ChIP-Seq input.
    #' @description Function to filter a ChIP-Seq output (in .narrowpeak or
    #' MACS's peaks.bed formats) and then store the peak coordinates in a
    #' GenomicRanges object, associated to its metadata.
    #' @param fileTable data frame from a txt/tsv/bed file
    #' @param format 'narrowpeak', 'macs1.4' or 'macs2'.
    #' narrowPeak fields:
    #' 'chrom','chromStart','chromEnd','name','score','strand','signalValue',
    #' 'pValue','qValue','peak'
    #' macs1.4 fields:
    #' 'chrom','chromStart','chromEnd','name','-10*log10(p-value)'
    #' macs2 fields:
    #' 'chrom','chromStart','chromEnd','name','-log10(p-value)'
    #' @param fileMetaData Data frame/matrix/array contaning the following
    #' fields: 'Name','Accession','Cell','Cell Type','Treatment','Antibody',
    #' 'TF'.
    #' @param alpha max p-value to consider ChIPseq peaks as significant and
    #' include them in the database. By default alpha is 0.05 for narrow peak
    #' files and 1e-05 for MACS files
    #' @return The function returns a GR object generated from the ChIP-Seq
    #' dataset input.
    #' @export txt2GR
    #' @examples
    #' data('ARNT.peaks.bed','ARNT.metadata',package = 'TFEA.ChIP')
    #' ARNT.gr<-txt2GR(ARNT.peaks.bed,'macs1.4',ARNT.metadata)

    stopifnot(format %in% c("narrowpeak","narrowPeak","macs1.4",
                            "macs2", "MACS1.4", "MACS2"))
    stopifnot((is.data.frame(fileMetaData) || is.matrix(fileMetaData) ||
        is.array(fileMetaData)))

    # checking fileMetadata has all the fields needed and the correct
    # column names and column order.
    columnNames <- c("Name", "Accession", "Cell", "Cell.Type",
        "Treatment", "Antibody", "TF")


    fileMetaData <- as.data.frame( fileMetaData )

    if ( dim( fileMetaData )[2] == 7) {

        if (FALSE %in% (columnNames %in% colnames(fileMetaData))) {
            stop("fileMetaData format error: 'fileMetaData' must be a",
                " data frame/matrix/array with 7 atributes: 'Name',",
                "'Accession', 'Cell', 'Cell.Type','Treatment',",
                "'Antibody','TF'")
        } else {
            fileMetaData <- fileMetaData[columnNames]
        }
    } else {
        stop("fileMetaData format error: 'fileMetaData' must be a",
            " data frame/matrix/array with 7 atributes: 'Name',",
            "'Accession', 'Cell', 'Cell.Type','Treatment',",
            "'Antibody','TF'")
    }

    format <- tolower(format)

    if (format == "narrowpeak") {

        if (fileTable[1, 8] == -1 & fileTable[1, 9] == -1) {
            # If there's no p-value column
            warning("The ChIP-Seq input file ", fileMetaData$Name,
                " does not include p-value or Q-value for each peak.",
                " Please, make sure the peaks in the input file have been",
                " previously filtered according to their significance")
            fileTable <- fileTable[, 1:3]
            Stat <- "no score"
            fileTable$score = rep(NA, dim(fileTable)[1])
            colnames(fileTable)[1:3] <- c("chr", "start", "end")

        } else if (fileTable[1, 8] == -1) {
            # If the score table has a q-value column
            fileTable <- fileTable[, c(1, 2, 3, 9)]
            colnames(fileTable) <- c("chr", "start", "end", "score")
            Stat <- "log10(p-Value)"

            if (is.null(alpha)) {
                valLimit <- 1.3
            } else {
                valLimit <- (-log10(alpha))
            }
            fileTable <- fileTable[fileTable$score > valLimit, ]

        } else if (fileTable[1, 9] == -1) {
            # If the score table has a -log10(p-value) column
            fileTable <- fileTable[, c(1, 2, 3, 8)]
            colnames(fileTable) <- c("chr", "start", "end", "score")
            fileTable$score <- 10 ^ (-1 * (fileTable$score ) )
            fileTable$score <- p.adjust( fileTable$score, "fdr" ) # adjust p-values using Benjamini & Hochberg correction.
            Stat <- "corrected p-Value"
            if (is.null(alpha)) {
                valLimit <- 0.05
            } else {
                valLimit <- alpha
            }
            fileTable <- fileTable[fileTable$score < valLimit, ]

        } else {
            # If the dataset has both p-value and 10*-log(qval) columns.
            fileTable <- fileTable[, c(1, 2, 3, 9)]
            colnames(fileTable) <- c("chr", "start", "end", "score")
            Stat <- "corrected p-Value"

            if (is.null(alpha)) {
                valLimit <- 1.3
            } else {
                valLimit <- (-log10(alpha))
            }
            fileTable <- fileTable[fileTable$score > valLimit, ]
        }
        if( dim( fileTable)[1] > 0){
            fileMetaData <- c(fileMetaData, Stat)
            MDframe <- as.data.frame(lapply(fileMetaData, rep, dim(fileTable)[1]))
            colnames(MDframe) <- c("Name", "Accession", "Cell", "Cell Type",
                "Treatment", "Antibody", "TF", "Score Type")

            gr <- GenomicRanges::GRanges(seqnames = fileTable$chr,
                ranges = IRanges::IRanges(fileTable$start, end = fileTable$end),
                score = fileTable$score, mcols = MDframe)
            return(gr)
        } else {
            warning( "File ", fileMetaData$Accession,
                     " has no significant peaks after correcting p-values.")
            return(NULL)
        }

    } else if (format %in% c("macs1.4", "macs2") ) {

        if (is.null(alpha)) {
            valLimit <- 0.05
        } else {
            valLimit <- alpha
        }

        if (dim(fileTable)[2] == 5) {
            fileTable <- fileTable[, c(1, 2, 3, 5)]
            colnames(fileTable) <- c("chr", "start", "end", "score")

            if (format == "macs1.4") {
                fileTable$score <- 10 ^ (-1 * (fileTable$score / 10 ) ) # -10 * log10(p-value)
            } else if (format == "macs2"){
                fileTable$score <- 10 ^ (-1 * (fileTable$score ) ) # -log10(p-value)
            }
            fileTable$score <- p.adjust( fileTable$score, "fdr" ) # adjust p-values using Benjamini & Hochberg correction.

            fileTable <- fileTable[fileTable$score < valLimit, ]
            Stat <- "corrected p-Value"

        } else if (dim(fileTable)[2] == 4 & is.character(fileTable[1, 4])) {
            # if the 4th column consists of peak names
            warning("The ChIP-Seq input file does not include p-value or",
                " Q-value for each peak. Please, make sure the peaks",
                " in the input file have been previously filtered",
                " according to their significance")
            fileTable <- fileTable[, 1:3]
            Stat <- "no score"
            fileTable$score = rep(NA, dim(fileTable)[1])
            colnames(fileTable)[1:3] <- c("chr", "start", "end")

        } else if (dim(fileTable)[2] == 4 & !is.character(fileTable[1,
            4])) {
            # if the 4th column consists of p-values
            colnames(fileTable) <- c("chr", "start", "end", "score")

            if (format == "macs1.4") {
                fileTable$score <- 10 ^ (-1 * (fileTable$score / 10 ) ) # -10 * log10(p-value)
            } else if (format == "macs2"){
                fileTable$score <- 10 ^ (-1 * (fileTable$score ) ) # -log10(p-value)
            }

            fileTable$score <- p.adjust( fileTable$score, "fdr" ) # adjust p-values using Benjamini & Hochberg correction.
            fileTable <- fileTable[fileTable$score < valLimit, ]
            Stat <- "corrected p-Value"
        }

        if( dim( fileTable)[1] > 0){

            fileMetaData <- c(fileMetaData, Stat)
            MDframe <- as.data.frame(lapply(fileMetaData, rep, dim(fileTable)[1]))
            colnames(MDframe) <- c("Name", "Accession", "Cell", "Cell Type",
                "Treatment", "Antibody", "TF", "Score Type")

            gr <- GenomicRanges::GRanges(seqnames = fileTable$chr,
                ranges = IRanges::IRanges(fileTable$start, end = fileTable$end),
                score = fileTable$score, mcols = MDframe)
            return(gr)
        } else {
            warning( "File ", fileMetaData$Accession,
                     " has no significant peaks after correcting p-values.")
            return( NULL )
        }

    } else {
        stop("format error: variable 'format' must be",
             " either 'narrowpeak' or 'macs'. ")
    }
}

makeChIPGeneDB <- function( Ref.db, gr.list, distanceMargin = 10,
                            min.Targets = 10 ){
    #' @title Make a ChIP - target database
    #' @description makeChIPGeneDB generates a ChIP-seq - target database
    #' through the association of ChIP-Seq peak coordinates (provided
    #' as a GenomicRange object) to overlapping genes or gene-associated
    #' genomic regions (Ref.db).
    #' @param Ref.db GenomicRanges object containing a database of reference
    #' elements (either Genes or gene-associated regions) including a gene_id
    #' metacolumn
    #' @param gr.list List of GR objects containing ChIP-seq peak coordinates
    #' (output of txt2GR).
    #' @param distanceMargin Maximum distance allowed between a gene or 
    #' regulatory element to assign a gene to a ChIP-seq peak. Set to 10 bases
    #' by default.
    #' @param min.Targets Minimum number of putative targets per ChIP-seq in
    #' gr.list. ChIPs with fewer targets will be discarded.
    #' regulatory element to assign a gene to a ChIP-seq peak. Set to 10 bases
    #' by default.
    #' @return List containing two elements:
    #'    - Gene Keys: vector of gene IDs
    #'    - ChIP Targets: list of vectors, one per element in gr.list, 
    #'      containing the putative targets assigned. Each target is coded as
    #'      its position in the vector 'Gene Keys'.
    #' @export makeChIPGeneDB
    #' @examples
    #' data( 'DnaseHS_db','gr.list', package = 'TFEA.ChIP' )
    #' makeChIPGeneDB( DnaseHS_db, gr.list )
  
  
  if (!requireNamespace("S4Vectors", quietly = TRUE)) {
    stop("S4Vectors package needed for this function to work. ",
         "Please install it.", call. = FALSE)
  }
  
  referenceIDs <- sort(unique( Ref.db$gene_id ))
  
  ChIPtarget_list <- lapply(
    gr.list,
    function( gr.list, dm, Ref.db, IDs, min.Targets ,i ){
      
      nearest_index <- suppressWarnings(
        GenomicRanges::findOverlaps( i, Ref.db, maxgap = dm ))
      inSubject <- S4Vectors::subjectHits( nearest_index )
      
      assigned_genes <- unique( Ref.db[ inSubject ]$gene_id )
      
      # in case any ChIP-Seq dataset does not have enough targets
      if ( length( assigned_genes ) < min.Targets ) {
        NULL
      } else {
        sort( match( assigned_genes, IDs ) )
      }
    },
    gr.list = gr.list,
    dm = distanceMargin,
    Ref.db = Ref.db, 
    IDs = referenceIDs,
    min.Targets = min.Targets
  )
  
  # naming elements
  if( !is.null( names( gr.list ))){
    names( ChIPtarget_list ) <- names( gr.list )
  }else {
    # if gr.list is not named, search for an 'Accession' in GR metadata
    # or create names.
    ChIP_acc_col <- grep(
      "accession",
      tolower( colnames( mcols( gr.list[[ 1 ]] ) ) ) )[1]
    
    if ( length( ChIP_acc_col > 0 ) ){
      ChIP_acc_col <- ChIP_acc_col[ 1 ]
      cat("Using '", ChIP_acc_col, "' as ChIP-seq identifiers." )
      
      chipNames <- sapply(
        gr.list,
        function(i){
          mcols( i )@listData[[ ChIP_acc_col ]][ 1 ]
        }
      )
      names( ChIPtarget_list ) <- chipNames 
      
    } else {
      names( ChIPtarget_list ) <- paste0("ChIPseq_", 1:length(gr.list ) )
    }
  }
  
  ChIPtarget_list[ sapply( ChIPtarget_list, is.null ) ] <- NULL
  
  return( list(
    "Gene Keys" = referenceIDs,
    "ChIP Targets" = ChIPtarget_list
  ) )
  
}

matrixDB_to_listDB <- function( Mat01 ){
    #' @title Re-formatting ChIP-Gene database
    #' @description Function to transform a ChIP-gene data base from the former
    #' binary matrix to the current list-based format.
    #' @param Mat01 Matrix[n,m] which rows correspond to all the human
    #' genes that have been assigned an Entrez ID, and its columns, to every
    #' ChIP-Seq experiment in the database. The values are 1 – if the ChIP-Seq
    #' has a peak assigned to that gene – or 0 – if it hasn’t –.
    #' @return List containing two elements:
    #'    - Gene Keys: vector of gene IDs
    #'    - ChIP Targets: list of vectors, one per ChIP-seq experiment in the,
    #'      database, containing the putative targets assigned. Each target is
    #'      coded as its position in the vector 'Gene Keys'.
    #' @export matrixDB_to_listDB
    #' @examples
    #' Mat01 <- matrix( 
    #'     round( runif(9) ), nrow = 3,
    #'     dimnames= list( paste0("Gene ", 1:3), paste0("ChIPseq ", 1:3))  )
    #' matrixDB_to_listDB( Mat01 )
    
    ChIPDB <- list(
      "Gene Keys" = rownames( Mat01 )
    )
    ChIPDB <- c( ChIPDB, "ChIP Targets" = list( sapply(
        seq_len( ncol( Mat01 ) ),
        function(i){
            unname( which( Mat01[, i] == 1 ) )
        }
    )) )
    names( ChIPDB[["ChIP Targets"]] ) <- colnames( Mat01 )
    
    return( ChIPDB )
}

#### Prepare input data and options ####

set_user_data <- function( metadata, ChIPDB ) {
    #' @title Sets the data objects as default.
    #' @description Function to set the data objects provided by the user
    #' as default to the rest of the functions.
    #' @param metadata Data frame/matrix/array contaning the following fields:
    #' 'Name','Accession','Cell','Cell Type','Treatment','Antibody','TF'.
    #' @param ChIPDB List containing two elements:
    #'    - Gene Keys: vector of gene IDs
    #'    - ChIP Targets: list of vectors, one per ChIP-seq experiment in the,
    #'      database, containing the putative targets assigned. Each target is
    #'      coded as its position in the vector 'Gene Keys'.
    #' @return sets the user's metadata table and TFBS matrix as the variables
    #' 'MetaData' and 'ChIPDB', used by the rest of the package.
    #' @export set_user_data
    #' @examples
    #' data( 'MetaData', 'ChIPDB', package='TFEA.ChIP' )
    #' # For this example, we will usethe variables already included in the
    #' # package.
    #' set_user_data( MetaData, ChIPDB )

    pos <- 1
    envir = as.environment(pos)

    assign("MetaData", metadata, envir = envir)
    assign("ChIPDB", ChIPDB, envir = envir)
}

preprocessInputData <- function(inputData, mode = "h2h" ) {
    #' @title Extracts data from a DESeqResults object or a data frame.
    #' @description Function to extract Gene IDs, logFoldChange, and p-val
    #' values from a DESeqResults object or data frame. Gene IDs are
    #' translated to ENTREZ IDs, if possible, and the resultant data frame
    #' is sorted accordint to decreasing log2(Fold Change). Translating
    #' gene IDs from mouse to their equivalent human genes is avaible
    #' using the variable "mode".
    #' @param inputData DESeqResults object or data frame. In all cases
    #' must include gene IDs. Data frame inputs should include 'pvalue' and
    #' 'log2FoldChange' as well.
    #' @param  mode Specify the organism used: 'h2h' for homo sapiens gene IDs,
    #' 'm2m' for mouse gene IDs, or 'm2h' to get the corresponding human gene
    #' IDs from a mouse input.
    #' @return A table containing Entrez Gene IDs, LogFoldChange and p-val
    #' values (both raw p-value and fdr adjusted p-value), sorted by
    #' log2FoldChange.
    #' @export preprocessInputData
    #' @examples
    #' data('hypoxia_DESeq',package='TFEA.ChIP')
    #' preprocessInputData( hypoxia_DESeq )

    if ( methods::is(inputData, "DESeqResults") ){
        # Extracting data from a DESeqResults object
        if (!requireNamespace("DESeq2", quietly = TRUE)) {
            stop("DESeq2 package needed for this function to work. ",
                "Please install it.", call. = FALSE)
        }
        g <- inputData@rownames
        inputData <- as.data.frame( inputData@listData )
        rownames( inputData ) <- g
        rm(g)

        # check the gene ids and translate if needed
        if ( ! all( grepl("^\\d*$", rownames(inputData) ) ) | mode == "m2h" ) {
            genes <- suppressMessages( GeneID2entrez(
                gene.IDs = rownames( inputData ),
                mode,
                return.Matrix = TRUE))

            if ( mode %in% c("h2h","m2m") ){
                genes <- genes[!is.na(genes$ENTREZ.ID), ]
                inputData <- inputData[rownames(inputData) %in% genes$GENE.ID, ]
                genes <- genes$ENTREZ.ID
            } else {
                genes <- genes[!is.na(genes$human.gene.ID), ]
                inputData <- inputData[rownames(inputData) %in% genes$mouse.gene.ID, ]
                genes <- genes$human.gene.ID
            }

        } else {
            genes <- rownames(inputData)
        }
        # get the rest of the variables
        log2FoldChange <- inputData[["log2FoldChange"]]
        pvalue <- inputData[["pvalue"]]
        pval.adj <- inputData[["padj"]]

        Table <- data.frame(Genes = genes, log2FoldChange = log2FoldChange,
            pvalue = pvalue, pval.adj = pval.adj)
        Table$Genes <- as.character(Table$Genes)
        Table <- Table[!is.na(Table$log2FoldChange), ]
        Table <- Table[order(Table$log2FoldChange, decreasing = TRUE), ]
        rownames(Table) <- NULL
        return(Table)

    } else if ( methods::is(inputData, "data.frame") ) {
        # Extracting data from a data frame.
        # Checkig if all the necessary columns are present
        if (FALSE %in% (c("Genes", "pvalue", "log2FoldChange") %in%
            colnames(inputData)) & FALSE %in% (c("Genes", "pval.adj",
            "log2FoldChange") %in% colnames(inputData))) {
            stop("The necessary atributes can't be found in input data frame",
                ". Input data must include: 'Genes', 'log2FoldChange',and ",
                "'pvalue' or 'pval.adj'", call. = FALSE)
        }
        # If there's not an adjusted p-value column
        if (!("pval.adj" %in% colnames(inputData))) {
            inputData$pval.adj <- p.adjust(inputData$pvalue,
                "fdr")
        }
        # If Gene IDs aren't in Entrez Gene ID format or come from mouse genes.
        if ( ! all( grepl("^\\d*$", inputData$Genes)) | mode == "m2h" ) {
            if( mode== "h2h" ){ inputData$Genes <- toupper( inputData$Genes ) }
            inputData$Genes <- trimws( inputData$Genes )
            genes <- suppressMessages( GeneID2entrez(
                gene.IDs = inputData$Genes,
                mode,
                return.Matrix = TRUE))

            if (  mode %in% c("h2h","m2m")  ){
                genes <- genes[!is.na(genes$ENTREZ.ID), ]
                inputData <- inputData[ inputData$Genes %in% genes$GENE.ID, ]
                inputData$Genes <- genes$ENTREZ.ID
            }else{
                genes <- genes[!is.na(genes$human.gene.ID), ]
                inputData <- inputData[ inputData$Genes %in% genes$mouse.gene.ID, ]
                inputData$Genes <- genes$human.gene.ID
            }
        }
        # sorting according to log2(FoldChange)
        inputData <- inputData[order(inputData$log2FoldChange,
            decreasing = TRUE), ]
        return(inputData)

    } else {
        stop("preprocessInputData requires a DESeqResults object or ",
            "a data frame as input.", call. = FALSE)}
}

Select_genes <- function(GeneExpression_df, max_pval = 0.05,
                         min_pval = 0, max_LFC = Inf, min_LFC = -Inf) {
    #' @title Extracts genes according to logFoldChange and p-val limits
    #' @description Function to extract Gene IDs from a dataframe according
    #' to the established limits for log2(FoldChange) and p-value.
    #' If possible, the function will use the adjusted p-value column.
    #' @param GeneExpression_df A data frame with the folowing fields:
    #' 'Gene', 'pvalue' or 'pval.adj', 'log2FoldChange'.
    #' @param max_pval maximum p-value allowed, 0.05 by default.
    #' @param min_pval minimum p-value allowed, 0 by default.
    #' @param max_LFC maximum log2(FoldChange) allowed.
    #' @param min_LFC minimum log2(FoldChange) allowed.
    #' @return A vector of gene IDs.
    #' @export Select_genes
    #' @examples
    #' data('hypoxia',package='TFEA.ChIP')
    #' Select_genes(hypoxia)
  
  # Checking input variables
  if (max_pval < min_pval) {
    stop("'max_pval' has to be greater than 'min_pval'. ", call. = FALSE)
  }
  if (max_LFC < min_LFC) {
    stop("'max_LFC' has to be greater than 'min_LFC'.", call. = FALSE)
  }
  if( ! "log2FoldChange" %in% colnames( GeneExpression_df ) ) {
    stop("No 'log2FoldChange' field in the input data. ")
  } 
  if( ! any(c( "pvalue","pval.adj" ) %in%  colnames( GeneExpression_df ) ) ){
    stop("No 'pvalue' or 'pval.adj' field in the input data. ")
  }
  
  # Selecting by p-value
  if ("pval.adj" %in% colnames( GeneExpression_df ) ) {
    g_pv <- GeneExpression_df$pval.adj >= min_pval &
      GeneExpression_df$pval.adj <= max_pval
  } else if ("pvalue" %in% colnames(GeneExpression_df)) {
    g_pv <- GeneExpression_df$pvalue >= min_pval &
      GeneExpression_df$pvalue <= max_pval
  }
  
  # Selecting by log2(FoldChange) 
  g_lfc <- GeneExpression_df$log2FoldChange >= min_LFC &
    GeneExpression_df$log2FoldChange <= max_LFC
  
  GeneExpression_df$Gene[ g_pv & g_lfc ]
}

GeneID2entrez <- function(gene.IDs, return.Matrix = FALSE, mode = "h2h") {
    
    #' @title Translates gene IDs from Gene Symbol or Ensemble ID to Entrez ID.
    #' @description Translates mouse or human gene IDs from Gene Symbol or
    #' Ensemble Gene ID to Entrez Gene ID using the IDs approved by HGNC.
    #' When translating from Gene Symbol, keep in mind that many genes have
    #' been given more than one symbol through the years. This function will
    #' return the Entrez ID corresponding to the currently approved symbols
    #' if they exist, otherwise NA is returned. In addition some genes might
    #' map to more than one Entrez ID, in this case gene is assigned to the
    #' first match and a warning is displayed.
    #' @param gene.IDs Array of Gene Symbols or Ensemble Gene IDs.
    #' @param return.Matrix T/F. When TRUE, the function returns a matrix[n,2],
    #' one column with the gene symbols or Ensemble IDs, another with their
    #' respective Entrez IDs.
    #' @param mode Specify the organism used: 'h2h' for homo sapiens gene IDs,
    #' 'm2m' for mouse gene IDs, or 'm2h' to get the corresponding human gene
    #' IDs from a mouse input.
    #' @return Vector or matrix containing the Entrez IDs(or NA) corresponding
    #' to every element of the input.
    #' @export GeneID2entrez
    #' @examples
    #' GeneID2entrez(c('TNMD','DPM1','SCYL3','FGR','CFH','FUCA2','GCLC'))
    
    stopifnot( mode %in% c("h2h", "m2m", "m2h"))
    
    gene.IDs <- gene.IDs[ !is.na( gene.IDs ) ]
    gene.IDs <- trimws( gene.IDs ) # remone any possible white space
    
    if ( mode == 'h2h'){
        
        gene.IDs <- toupper(gene.IDs)  # in case any name is in lowercase.
        # suppressWarnings added to avoid 'select()' returned 1:many
        # mapping between keys and columns
        if ( all( grepl("^ENSG", gene.IDs, perl = TRUE ) ) ) {
            ID.type <- "ENSEMBL"
            suppressMessages(GeneNames <- biomaRt::select(
                org.Hs.eg.db,  gene.IDs,
                c("ENTREZID", "ENSEMBL"), keytype = "ENSEMBL" ))
        } else {
            ID.type <- "SYMBOL"
            suppressMessages(GeneNames <- biomaRt::select(
                org.Hs.eg.db,  gene.IDs,
                c("SYMBOL", "ENTREZID"), keytype = "ALIAS" ))
        }
        
        matched <- match( as.character(gene.IDs), GeneNames[, ID.type] )
        matched_2 <- match( GeneNames[, ID.type], as.character(gene.IDs) )
        
        if ( sum( duplicated( matched_2[ !is.na(matched_2) ] ) ) > 0) {
            warning("Some genes returned 1:many mapping to ENTREZ ID. ",
                    "Genes were assigned the first ENTREZ ID match found.\n",
                    call. = FALSE)
        }
        message("Done! ", sum( !is.na(matched) ), " genes of ",
                length( matched ), " successfully converted.\n")
        
        if ( return.Matrix == TRUE ) {
            if ( sum( is.na(matched) ) > 0 ) {
                message("Couldn't find Entrez IDs for ", sum( is.na(matched) ),
                        " genes (NAs returned instead).\n")
            }
            return( data.frame(
                GENE.ID = gene.IDs,
                ENTREZ.ID = GeneNames[ matched, "ENTREZID"],
                stringsAsFactors = FALSE))
        } else {
            if ( sum( is.na(matched) ) > 0) {
                message("Couldn't find Entrez IDs for ", sum( is.na(matched) ),
                        " genes.\n")
            }
            return( GeneNames[ matched[!is.na(matched)] , "ENTREZID"])
        }
        
    } else if( mode == "m2m" ) {
        
        biomart_test <- tryCatch(
            {R.utils::withTimeout( {tmp <- biomaRt::listMarts()},
                                   timeout = 3, onTimeout = "warning")},
            error = function(w) { return( 0 ) },
            warning = function(w){ return( 0 ) }
        )
        if ( all(biomart_test == 0) ){
            stop( paste0("We are having trouble reaching biomaRt.\n",
                         "Please, try again later."))
        }
        
        mouse <- biomaRt::useMart("ensembl", dataset = "mmusculus_gene_ensembl")
        GeneNames <- biomaRt::getBM(
            attributes = c("ensembl_gene_id", "mgi_symbol","entrezgene_id"),
            values = "*", mart = mouse)
        
        if ( all( grepl("^ENSM", gene.IDs, perl = TRUE ) ) ) {
            
            ids <- GeneNames[ match( gene.IDs, GeneNames$ensembl_gene_id), c(1,3) ]
            colnames(ids)<- c("GENE.ID", "ENTREZ.ID")
            
        } else {
            
            ids <- GeneNames[ match( gene.IDs, GeneNames$mgi_symbol), c(2,3) ]
            colnames(ids)<- c("GENE.ID", "ENTREZ.ID")
        }
        
        message("Done! ", sum( ! is.na( ids$ENTREZ.ID ) ) ,
                " genes of ", dim( ids )[1], " successfully converted.\n")
        
        if (return.Matrix == TRUE) {
            if ( sum( is.na( ids$ENTREZ.ID )) > 0) {
                message("Couldn't find Entrez IDs for ",
                        sum( is.na( ids$ENTREZ.ID )),
                        " genes (NAs returned instead).\n")
            }
            return( ids )
        } else {
            if ( sum( is.na( ids$ENTREZ.ID )) > 0) {
                message("Couldn't find human Entrez IDs for ",
                        sum( is.na( ids$ENTREZ.ID )), " genes.\n")
            }
            return( ids$ENTREZ.ID[ !is.na( ids$ENTREZ.ID ) ] )
        }
        
        
    } else if( mode == "m2h" ){
        
        biomart_test <- tryCatch(
            {R.utils::withTimeout( {tmp <- biomaRt::listMarts()},
                                   timeout = 3, onTimeout = "warning")},
            error = function(w) { return( 0 ) },
            warning = function(w){ return( 0 ) }
        )
        if( all(biomart_test == 0) ){
            stop( paste0("We are having trouble reaching biomaRt.\n",
                         "Please, try again later."))
        }
        
        
        human <- biomaRt::useMart("ensembl", dataset = "hsapiens_gene_ensembl")
        mouse <- biomaRt::useMart("ensembl", dataset = "mmusculus_gene_ensembl")
        
        if ( all( grepl("^ENSM", gene.IDs, perl = TRUE ) ) == TRUE) {
            hs_ids = biomaRt::getLDS(
                attributes = c("ensembl_gene_id"), filters = "ensembl_gene_id",
                values = gene.IDs , mart = mouse,
                attributesL = c("entrezgene_id"), martL = human,
                uniqueRows = TRUE )
            hs_ids <- data.frame(
                mouse.gene.ID=gene.IDs,
                human.gene.ID=hs_ids$NCBI.gene.ID[match( gene.IDs, hs_ids$Gene.stable.ID )  ],
                stringsAsFactors = F
            )
            
        } else if ( all( grepl("^\\d*$", gene.IDs, perl = TRUE) ) == TRUE ) {
            hs_ids = biomaRt::getLDS(
                attributes = c("entrezgene_id"), filters = "entrezgene_id",
                values = gene.IDs, mart = mouse,
                attributesL = c("entrezgene_id"), martL = human,
                uniqueRows = TRUE )
            hs_ids <- data.frame(
                mouse.gene.ID=gene.IDs,
                human.gene.ID=hs_ids$NCBI.gene.ID.1[match( gene.IDs, hs_ids$NCBI.gene.ID )  ],
                stringsAsFactors = F
            )
            
        } else {
            hs_ids = biomaRt::getLDS(
                attributes = c("mgi_symbol"), filters = "mgi_symbol",
                values = gene.IDs, mart = mouse,
                attributesL = c("entrezgene_id"), martL = human,
                uniqueRows = TRUE )
            hs_ids <- data.frame(
                mouse.gene.ID=gene.IDs,
                human.gene.ID=hs_ids$NCBI.gene.ID[match( gene.IDs, hs_ids$MGI.symbol )  ],
                stringsAsFactors = F
            )
        }
        
        message("Done! ", sum( ! is.na( hs_ids$human.gene.ID )),
                " genes of ", length( gene.IDs ), " successfully converted.\n")
        
        if (return.Matrix == TRUE) {
            if ( any( is.na( hs_ids$human.gene.ID )) ) {
                message("Couldn't find human Entrez IDs for ",
                        sum(is.na( hs_ids$human.gene.ID )),
                        " genes (NAs returned instead).\n")
            }
            return( hs_ids )
        } else {
            if ( any( is.na( hs_ids$human.gene.ID )) ) {
                message("Couldn't find human Entrez IDs for ",
                        sum(is.na( hs_ids$human.gene.ID )),  " genes.\n")
            }
            return( hs_ids$human.gene.ID[ !is.na( hs_ids$human.gene.ID ) ] )
        }
        
    }
}

get_chip_index <- function(encodeFilter = FALSE, TFfilter = NULL) {

    #' @title Creates df containing accessions of ChIP-Seq datasets and TF.
    #' @description Function to create a data frame containing the ChIP-Seq
    #' dataset accession IDs and the transcription factor tested in each ChIP.
    #' This index is used in functions like “contingency_matrix” and “GSEA_run”
    #' as a filter to select specific ChIPs or transcription factors to run an
    #' analysis.
    #' @param encodeFilter (Optional) If TRUE, only ENCODE ChIP-Seqs are
    #' included in the index.
    #' @param TFfilter (Optional) Transcription factors of interest.
    #' @return Data frame containig the accession ID and TF for every ChIP-Seq
    #' experiment included in the metadata files.
    #' @export get_chip_index
    #' @examples
    #' get_chip_index(encodeFilter = TRUE)
    #' get_chip_index(TFfilter=c('SMAD2','SMAD4'))

    if (!exists("MetaData")) {
        MetaData <- NULL
        data("MetaData", package = "TFEA.ChIP", envir = environment())
    }

    if (is.null(TFfilter)) {
        Index <- dplyr::select(MetaData, "Accession", "TF")
        if (encodeFilter == TRUE) {
            if (any(grepl("^wg.*", Index$Accession))){
                Index <- Index[grepl("^wg", Index$Accession), ]
            } else {
                Index <- Index[grepl("^ENC", Index$Accession), ]
            }
        }
    } else {
        Index <- dplyr::select(MetaData, "Accession", "TF")
        Index <- Index[Index$TF %in% TFfilter, ]
        if (encodeFilter == TRUE) {

            if (any(grepl("^wg.*", Index$Accession))){
                Index <- Index[grepl("^wg", Index$Accession), ]
            } else {
                Index <- Index[grepl("^ENC", Index$Accession), ]
            }
        }
    }

    if ( dim( Index )[1] == 0 ){
        stop("No ChIP-Seq dataset in the database follows ",
            "your conditions.")
    }else{
        return(Index)
    }
}

#### Run TFEA ####

contingency_matrix <- function(test_list, control_list,
                               chip_index = get_chip_index()) {
  
    #' @title Computes 2x2 contingency matrices
    #' @description Function to compute contingency 2x2 matrix by the partition
    #' of the two gene ID lists according to the presence or absence of the
    #' terms in these list in a ChIP-Seq binding database.
    #' @param test_list List of gene Entrez IDs
    #' @param control_list If not provided, all human genes not present in
    #' test_list will be used as control.
    #' @param chip_index Output of the function “get_chip_index”, a data frame
    #' containing accession IDs of ChIPs on the database and the TF each one
    #' tests. If not provided, the whole internal database will be used
    #' @return List of contingency matrices, one CM per element in chip_index
    #' (i.e. per ChIP-seq dataset).
    #' @export contingency_matrix
    #' @examples
    #' data('Genes.Upreg',package = 'TFEA.ChIP')
    #' CM_list_UP <- contingency_matrix(Genes.Upreg)
  
    if ( ! exists( "ChIPDB" )) {
        ChIPDB <- NULL
        data( "ChIPDB", package = "TFEA.ChIP", envir = environment() )
    }
    if( is.matrix( ChIPDB ) ){
        ChIPDB <- matrixDB_to_listDB( ChIPDB )
    }
    
    if ( missing( control_list ) ) {
        # Generating control gene list in case is not provided.
        control_list <- ChIPDB[["Gene Keys"]]  
    }
    control_list <- control_list[ !( control_list %in% test_list ) ]
    
    CM_list <- lapply(
        seq_along(chip_index$Accession),
        
        function( i, g1, g2, accs, ChIPDB ){
            
            acc <- which( names( ChIPDB[[ "ChIP Targets" ]] ) == accs[ i ] )
            chip.targets <- ChIPDB[["Gene Keys"]][
                ChIPDB[[ "ChIP Targets" ]][[ acc ]] ]
            
            pos1 <- sum( g1 %in% chip.targets )
            pos2 <- sum( g2 %in% chip.targets )
            neg1 <- sum( ! g1 %in% chip.targets )
            neg2 <- sum( ! g2 %in% chip.targets )
            
            contMatrix <- cbind(c(pos1, pos2), c(neg1, neg2))
            rownames(contMatrix) <- c("Test", "Control")
            colnames(contMatrix) <- c("Positive", "Negative")
            
            return(contMatrix)
        },
        g1 = test_list,
        g2 = control_list,
        accs = chip_index$Accession,
        ChIPDB = ChIPDB
    )
    
    names(CM_list) <- chip_index$Accession
    
    return(CM_list)
}

getCMstats <- function( CM_list, chip_index = get_chip_index()) {

    #' @title Generate statistical parameters from a contingency_matrix output
    #' @description From a list of contingency matrices, such as the output
    #' from “contingency_matrix”, this function computes a fisher's exact test
    #' for each matrix and generates a data frame that stores accession ID of a
    #' ChIP-Seq experiment, the TF tested in that experiment, the p-value and
    #' the odds ratio resulting from the test.
    #' @param CM_list Output of “contingency_matrix”, a list of
    #' contingency matrices.
    #' @param chip_index Output of the function “get_chip_index”, a data frame
    #' containing accession IDs of ChIPs on the database and the TF each one
    #' tests. If not provided, the whole internal database will be used
    #' @return Data frame containing accession ID of a ChIP-Seq experiment and
    #' its experimental conditions, the TF tested in that experiment, raw and
    #' adjusted p-values, odds-ratio, and euclidean distance.
    #' and FDR-adjusted p-values (-10*log10 adj.pvalue).
    #' @export getCMstats
    #' @examples
    #' data('Genes.Upreg',package = 'TFEA.ChIP')
    #' CM_list_UP <- contingency_matrix( Genes.Upreg )
    #' stats_mat_UP <- getCMstats( CM_list_UP )
    
    
    fishTests <- do.call( rbind, lapply(
        seq_along( CM_list ),
        function(CM_list,i) {
            data.frame( stats::fisher.test(
                CM_list[[ i ]] )[ c("estimate","p.value") ] )
        },
        CM_list = CM_list ) )
    rownames( fishTests ) <- names( CM_list )
    
    chip_index <- chip_index[ match( names(CM_list), chip_index$Accession ),]
    
    statMat <- data.frame(
        Accession = chip_index$Accession, TF = chip_index$TF,
        p.value = fishTests$p.value, OR = fishTests$estimate,
        stringsAsFactors = FALSE)
    
    statMat$log2.OR <- log2(statMat$OR)
    statMat$log2.OR[ abs( statMat$log2.OR ) == Inf ] <- NA

    statMat$adj.p.value <- stats::p.adjust( statMat$p.value, "fdr" )
    statMat$log10.adj.pVal <- ( -1 * ( log10( statMat$adj.p.value ) ) )
    statMat$log10.adj.pVal[ abs( statMat$log2.OR ) == Inf ] <- NA
    
    tmpOR <- statMat$OR
    tmpOR[ statMat$OR == Inf ] <-  max( statMat$OR, na.rm = TRUE )
    tmpOR[ statMat$OR == -Inf ] <- min( statMat$OR, na.rm = TRUE )

    statMat$distance <- sapply(
        seq_along( statMat$Accession ),
        function(i){
            if( statMat$OR[i] > 1 ){
                x1 <- c( 0, 1 )
                x2 <- c( statMat$log10.adj.pVal[ i ], tmpOR[ i ] )
                d <- sqrt( sum( (x2-x1)**2 ) )
                
            } else if ( statMat$OR[ i ] <= 1 & statMat$OR[ i ] > 0 ) {
                x1 <- c( 0, 1 )
                x2 <- c( statMat$log10.adj.pVal[ i ], 1 / tmpOR[ i ] )
                d <- -1 * sqrt( sum( (x2-x1)**2 ) )
            } else { d <- 0 }
            
            return(d)
        }
    )

    if (!exists("MetaData")) {
        MetaData <- NULL
        data("MetaData", package = "TFEA.ChIP", envir = environment())
    }
    statMat <- merge(
        MetaData[, c( "Accession", "Cell", "Treatment" ) ],
        statMat, by = "Accession" )
    statMat <- statMat[ order( statMat$distance, decreasing = TRUE, na.last = TRUE ), ]
    return( statMat )
}

rankTFs <- function( resultsTable,
                     rankMethod = "gsea", makePlot = FALSE,
                     plotTitle = "TF ranking"){

    #' @title Rank the TFs in the output from 'getCMstats'
    #' @description Rank the TFs in the output from 'getCMstats' using
    #' Wilcoxon rank-sum test or a GSEA-like approach.
    #' @param resultsTable Output from the function 'getCMstats'
    #' @param rankMethod "wilcoxon" or "gsea".
    #' @param makePlot (Optional) For rankMethod="gsea". If TRUE, generates a plot for
    #' TFs with a p-value < 0.05.
    #' @param plotTitle (Optional) Title for the plot.
    #' @return data frame containing:
    #' \itemize{
    #'   \item For Wilcoxon rank-sum test: rank, TF name, test statistic
    #'   ('wilc_W), p-value, Freeman's theta, epsilon-squared anf effect size
    #'   \item For GSEA-like ranking: TF name, enrichment score, argument,
    #'   p-value, number of ChIPs}
    #' @export rankTFs
    #' @examples
    #' data('Genes.Upreg',package = 'TFEA.ChIP')
    #' CM_list_UP <- contingency_matrix(Genes.Upreg)
    #' stats_mat_UP <- getCMstats(CM_list_UP)
    #' rankTFs( stats_mat_UP )


    #### Input format
    rankMethod <- tolower( rankMethod )
    stopifnot(rankMethod %in% c("wilcoxon", "gsea"))

    # check the input type ( TFEA.ChIP association analysis  )
    cols_needed <- c("Accession","TF","distance")
    if( any( ! cols_needed %in% colnames( resultsTable ) ) ){
        stop("Input error: resultsTable doesn't contain all",
             "the columns required ('Accession','TF','distance')")
    }

    #### gsea ranking
    if( rankMethod == "gsea" ){

        chipSets <- lapply(
            unique( resultsTable$TF ),
            function(i, tab ){
                return( tab$Accession[ tab$TF == i ])
            }, tab = resultsTable
        )
        names( chipSets ) <- unique( resultsTable$TF )

        TFrank <- lapply(
            chipSets,
            function(i, statMat ){

                tf <-  statMat$TF[ statMat$Accession == i[1] ]
                res <- GSEA_EnrichmentScore(
                    statMat$Accession, i, 1, statMat$distance )

                shuffled <- rep( list( statMat$Accession ), 100)
                shuffled <- lapply( shuffled, sample )

                shuffled.ES <-  sapply(
                    shuffled,
                    function( j, i, chipDist ) {
                        tmp.ES <- GSEA_EnrichmentScore( j , i, 1, chipDist )$ES
                        return(tmp.ES)
                    }, i=i, chipDist = statMat$distance )

                shuffled.ES <- unlist( shuffled.ES )
                shuffled.ES <- shuffled.ES[ !is.na(shuffled.ES) ]
                pVal <- sum( abs(shuffled.ES) > abs(res$ES) ) / length( shuffled )

                return( data.frame(
                    "TF" = tf,
                    "ES" = res$ES,
                    "arg.ES" = res$arg.ES,
                    "pVal" = pVal,
                    "numberOfChIPs" = length(i),
                    stringsAsFactors = FALSE
                ))
            }, statMat = resultsTable
        )
        TFrank <- do.call( rbind, TFrank )

        if( makePlot == TRUE ){
            plot_df <- TFrank[TFrank$pVal<0.05 & !is.na(TFrank$pVal),]
            sub1 <- subset( plot_df, plot_df$ES > 0)
            sub2 <- subset( plot_df, plot_df$ES < 0)

            if( any( resultsTable$distance == 0 ) ){
                mid <- max(which( resultsTable$distance ==0 )) -
                    min( which( resultsTable$distance ==0 ) )/2
            } else {
                mid <- sum( resultsTable$distance > 0 ) + 0.5
            }

            p <- ggplot2::ggplot( ) +
              ggplot2::geom_point(
                  ggplot2::aes( x = arg.ES, y = ES, color = TF),
                  data = plot_df ) +
              ggplot2::ylim( -1.5, 1.5 ) +
              ggrepel::geom_text_repel( 
                  ggplot2::aes( x = arg.ES, y = ES, label = TF,
                                  color = TF),
                  data = sub1, nudge_y = 1,
                  angle = 90, direction = "x" ) +
              ggrepel::geom_text_repel(
                  ggplot2::aes( x = arg.ES, y = ES, label = TF,
                                  color = TF),
                  data = sub2, nudge_y = -1,
                  angle = 90, direction = "x" ) +
              ggplot2::theme_minimal() +
              ggplot2::geom_hline( yintercept = 0 ) +
              ggplot2::geom_point( ggplot2::aes( x=mid, y=0 ), 
                                   color="black" ) +
              ggplot2::guides( color = FALSE ) +
              ggplot2::labs( title = plotTitle,
                  y = "Enrichment Score", x= "ChIP-seq ranking")
            p
            return( list( TF_ranking=TFrank, TFranking_plot = p ))
        } else{ return(TFrank) }

        #### Wilcoxon rank-sum test
    } else if( rankMethod == "wilcoxon" ){

        chip_ranking <- data.frame(
            acc = resultsTable$Accession,
            TF = resultsTable$TF,
            rank = seq_along( resultsTable$Accession  ),
            stringsAsFactors = FALSE
        )

        TF_wilcox <- lapply(
            unique( chip_ranking$TF ),
            function(i, chip_ranking){

                tmp <- chip_ranking[, c(2:3)]
                tmp$TF[ tmp$TF != i ] <- "other"
                tmp$TF <- as.factor(tmp$TF)

                wilTest <- stats::wilcox.test( rank ~ TF, data=tmp )

                return( data.frame(
                    TF = i,
                    wilc_W = wilTest[["statistic"]],
                    wilc_pval = wilTest[["p.value"]],
                    freeman_theta = rcompanion::freemanTheta(x = tmp$rank, g = tmp$TF ),
                    epsilon_squared = rcompanion::epsilonSquared( x = tmp$rank, g = tmp$TF ),
                    wilc_r = rcompanion::wilcoxonR( x = tmp$rank, g = tmp$TF ),
                    stringsAsFactors = FALSE
                ))
            }, chip_ranking = chip_ranking
        )
        TF_wilcox <- do.call( rbind, TF_wilcox)
        TF_wilcox$rank <- seq_len( nrow( TF_wilcox ) )
        rownames( TF_wilcox ) <- TF_wilcox$rank
        TF_wilcox <- TF_wilcox[, c(7,1:6)]

        return( TF_wilcox )
    }
}

GSEA_EnrichmentScore <- function(gene.list, gene.set,
                                 weighted.score.type = 0, correl.vector = NULL) {

    # Computes the weighted GSEA score of gene.set in gene.list. Developed by
    # The Broad Institute

    #' @title Computes the weighted GSEA score of gene.set in gene.list.
    #' @description Computes the weighted GSEA score of gene.set in gene.list.
    #' @param gene.list The ordered gene list
    #' @param gene.set A gene set, e.g. gene IDs corresponding to a ChIP-Seq
    #' experiment's peaks.
    #' @param weighted.score.type Type of score: weight:
    #' 0 (unweighted = Kolmogorov-Smirnov), 1 (weighted), and 2 (over-weighted)
    #' @param correl.vector A vector with the coorelations (such as signal to
    #' noise scores) corresponding to the genes in the gene list
    #' @return list of:
    #' ES: Enrichment score (real number between -1 and +1)
    #' arg.ES: Location in gene.list where the peak running enrichment occurs
    #' (peak of the 'mountain')
    #' RES: Numerical vector containing the running enrichment score for all
    #' locations in the gene list
    #' tag.indicator: Binary vector indicating the location of the gene sets
    #' (1's) in the gene list
    #' @export GSEA_EnrichmentScore
    #' @examples
    #' GSEA_EnrichmentScore(gene.list=c('3091','2034','405','55818'),
    #' gene.set=c('2034','112399','405'))

    tag.indicator <- sign(match(gene.list, gene.set, nomatch = 0))
    no.tag.indicator <- 1 - tag.indicator
    N <- length(gene.list)
    Nh <- sum( gene.list %in% gene.set )
    Nm <- N - Nh
    if (weighted.score.type == 0) {
        correl.vector <- rep(1, N)
    }
    alpha <- weighted.score.type
    correl.vector <- abs(correl.vector^alpha)
    sum.correl.tag <- sum(correl.vector[tag.indicator == 1])
    if (sum.correl.tag > 0) {
        norm.tag <- 1/sum.correl.tag
        norm.no.tag <- 1/Nm
        RES <- cumsum(tag.indicator * correl.vector * norm.tag -
            no.tag.indicator * norm.no.tag)
        max.ES <- max( RES, na.rm = TRUE )
        min.ES <- min( RES, na.rm = TRUE )
        if (abs(max.ES) > abs(min.ES)) {
            # ES <- max.ES
            ES <- signif(max.ES, digits = 5)
            arg.ES <- which.max(RES)
        } else {
            # ES <- min.ES
            ES <- signif(min.ES, digits = 5)
            arg.ES <- which.min(RES)
        }
        return(list(ES = ES, arg.ES = arg.ES, RES = RES, indicator = tag.indicator))

    } else {
        RES <- rep(NaN, length(gene.list))
        indicator <- rep(0, length(gene.list))
        ES <- NA
        arg.ES <- NA
        return(list(ES = ES, arg.ES = arg.ES, RES = RES, indicator = tag.indicator))
    }
}

GSEA_ESpermutations <- function(gene.list, gene.set, weighted.score.type = 0,
                                correl.vector = NULL, perms = 1000 ) {

    #' @title Calculate enrichment scores for a permutation test.
    #' @description Function to calculate enrichment scores over a randomly ordered
    #' gene list.
    #' @param gene.list Vector of gene Entrez IDs.
    #' @param gene.set A gene set, e.g. gene IDs corresponding to a ChIP-Seq
    #' experiment's peaks.
    #' @param weighted.score.type Type of score: weight:
    #' 0 (unweighted = Kolmogorov-Smirnov), 1 (weighted), and 2 (over-weighted)
    #' @param correl.vector A vector with the coorelations (such as signal to
    #' noise scores) corresponding to the genes in the gene list
    #' @param perms Number of permutations
    #' @return Vector of Enrichment Scores for a permutation test.
    # examples
    # GSEA_EnrichmentScore(gene.list=c('3091','2034','405','55818'),
    #' gene.set=c('2034','112399','405'), perms=10)

    tag.indicator <- sign(match(gene.list, gene.set, nomatch = 0))
    no.tag.indicator <- 1 - tag.indicator
    N <- length(gene.list)
    Nh <- sum( gene.list %in% gene.set )
    Nm <- N - Nh
    if (weighted.score.type == 0) {
        correl.vector <- rep(1, N)
    }
    alpha <- weighted.score.type
    correl.vector <- abs(correl.vector^alpha)
    sum.correl.tag <- sum(correl.vector[tag.indicator == 1])
    if (sum.correl.tag > 0) {
        norm.tag <- 1/sum.correl.tag
        norm.no.tag <- 1/Nm

        pES <- sapply(
            seq_len(perms),
            function( i, mInd, cv, mNorm, nmNorm){

                mInd <- sample( mInd )
                nmInd <- 1 - mInd
                sum.correl.tag <- sum(cv[mInd == 1])
                mNorm <- 1/sum.correl.tag

                RES <- cumsum(
                    mInd * cv * mNorm - nmInd * nmNorm)
                return( max( abs( RES ), na.rm = TRUE ) )
            },
            mInd=tag.indicator, cv=correl.vector,
            nmNorm=norm.no.tag
        )
        return( pES )

    } else {
        return( rep( NA, perms ) )
    }
}

GSEA_run <- function(gene.list, LFC, chip_index = get_chip_index(),
                     get.RES = FALSE, RES.filter = NULL, perms = 1000) {
  
    #' @title Function to run a GSEA analysis
    #' @description Function to run a GSEA to analyze the distribution of TFBS
    #' across a sorted list of genes.
    #' @param gene.list List of Entrez IDs ordered by their fold change.
    #' @param LFC Vector of log2( Fold Change ) values.
    #' @param chip_index Output of the function “get_chip_index”, a data frame
    #' containing accession IDs of ChIPs on the database and the TF each one
    #' tests. If not provided, the whole internal database will be used
    #' @param get.RES (Optional) boolean. If TRUE, the function stores Running
    #' Enrichment Scores of all/some TF.
    #' @param RES.filter (Optional) chr vector. When get.RES==TRUE, allows to
    #' choose which TF's Running Enrichment Score to store.
    #' @param perms (Optional) integer. Number of permutations for the enrichment test.
    #' @return a list of:
    #' Enrichment.table: data frame containing accession ID, Cell type, ChIP-Seq
    #' treatment, transcription factor tested, enrichment score, adjusted p-value,
    #' and argument of every ChIP-Seq experiment.
    #' RES (optional): list of running sums of every ChIP-Seq
    #' indicators (optional): list of 0/1 vectors that stores the matches (1)
    #' and mismatches (0) between the gene list and the gene set.
    #' @export GSEA_run
    #' @examples
    #' data( 'hypoxia', package = 'TFEA.ChIP' )
    #' hypoxia <- preprocessInputData( hypoxia )
    #' chip_index <- get_chip_index( TFfilter = c('HIF1A','EPAS1','ARNT' ) )
    #' GSEA.result <- GSEA_run( hypoxia$Genes, hypoxia$log2FoldChange, chip_index, get.RES = TRUE)
  
    if (!exists("ChIPDB")) {
        ChIPDB <- NULL
        data("ChIPDB", package = "TFEA.ChIP", envir = environment())
    }
    if( is.matrix( ChIPDB ) ){
        ChIPDB <- matrixDB_to_listDB( ChIPDB )
    }
    if (!exists("MetaData")) {
        MetaData <- NULL
        data("MetaData", package = "TFEA.ChIP", envir = environment())
    }
    
    enrichmentScore <- vector()
    pval <- vector()
    enrichmentArg <- vector()
    
    if (get.RES == TRUE) {
        res <- list()
        ind <- list()
    }
    # create progress bar
    pbar <- txtProgressBar(
        min = 0, max = length(chip_index$Accession), style = 3)
    
    for (i in seq_along(chip_index$Accession)) {
        
        acc <- chip_index$Accession[i] 
        targets <- ChIPDB[["Gene Keys"]][ ChIPDB[["ChIP Targets"]][[acc]] ]
        
        if ( sum( targets %in% gene.list ) > 10 ) {
            
            result <- GSEA_EnrichmentScore( gene.list, targets, 1, LFC )
            
            shuffled.ES <- GSEA_ESpermutations( 
                gene.list , targets, 1, LFC, perms )
            shuffled.ES <- shuffled.ES[ !is.na( shuffled.ES ) ]
            
            enrichmentScore <- c( enrichmentScore, result$ES )
            pval <- c( pval,
                       sum( shuffled.ES >= abs(result$ES))/ length(shuffled.ES))
            enrichmentArg <- c( enrichmentArg, result$arg.ES )
            
            if( get.RES ==TRUE ){
                
                if ( missing( RES.filter ) ) {
                    # Store running sums of all TFs.
                    res <- c( res, list( result$RES ) )
                    names( res )[ length( res )] <- acc
                    ind <- c( ind, list( result$indicator ) )
                    names( ind )[ length( ind ) ] <- acc
                    
                } else {
                    # Store running sums of TF in RES.filter
                    if ( acc %in% RES.filter) {
                        res <- c( res, list( result$RES ) )
                        names( res )[ length( res )] <- acc
                        ind <- c( ind, list( result$indicator ) )
                        names( ind )[ length( ind )] <- acc
                    }
                }
            }
            
        } else {
            chip_index <- chip_index[-i, ]
            i <- i-1
        }
        # update progress bar
        setTxtProgressBar(pbar, i)
    }
    close(pbar)
    pval.adj <- stats::p.adjust(pval, "fdr")  # Adjust pvalues
    
    enrichmentTable <- data.frame(
        Accession = chip_index$Accession, TF = chip_index$TF,
        ES = enrichmentScore, p.val = pval, pval.adj = pval.adj,
        Arg.ES = enrichmentArg, stringsAsFactors = F )
    
    enrichmentTable <- merge(
        MetaData[,c("Accession","Cell","Treatment")],
        enrichmentTable, by="Accession")
    
    enrichmentTable <- enrichmentTable[ !is.na( enrichmentTable$pval.adj ), ]
    
    if ( get.RES == TRUE) {
        GSEA_results <- list( enrichmentTable, res, ind )
        names( GSEA_results ) <- c("Enrichment.table", "RES", "indicators")
        return( GSEA_results )
    } else {
        return( enrichmentTable )
    }
}

#### Plotting functions ####

plot_CM <- function( CM.statMatrix, plot_title = NULL,
    specialTF = NULL, TF_colors = NULL ) {
    
    #' @title Makes an interactive html plot from an enrichment table.
    #' @description Function to generate an interactive html plot from a
    #' transcription factor enrichment table, output of the function
    #' 'getCMstats'.
    #' @param CM.statMatrix Output of the function 'getCMstats'.
    #' A data frame storing: Accession ID of every ChIP-Seq tested,
    #' Transcription Factor,Odds Ratio, p-value and adjusted p-value.
    #' @param plot_title The title for the plot.
    #' @param specialTF (Optional) Named vector of TF symbols -as written in
    #' the enrichment table- to be highlighted in the plot. The name of each
    #' element of the vector specifies its color group, i.e.: naming elements
    #' HIF1A and HIF1B as 'HIF' to represent them with the same color.
    #' @param TF_colors (Optional) Nolors to highlight TFs chosen in specialTF.
    #' @return plotly scatter plot.
    #' @export plot_CM
    #' @examples
    #' data('Genes.Upreg',package = 'TFEA.ChIP')
    #' CM_list_UP <- contingency_matrix( Genes.Upreg )
    #' stats_mat_UP <- getCMstats( CM_list_UP )
    #' plot_CM( stats_mat_UP )
    
    if (!requireNamespace("plotly", quietly = TRUE)) {
        stop("plotly package needed for this function to work. ",
             "Please install it.", call. = FALSE)
    }
    
    # Checking input variables
    if (is.null(plot_title)) {
        plot_title <- "Transcription Factor Enrichment"
    }
    if( is.null( specialTF ) ) {
        CM.statMatrix$highlight <- rep("Other", dim(CM.statMatrix)[1])
        markerColors <- c("azure4")
        names(markerColors) <- c("Other")
    }
    if( !is.null( specialTF ) & is.null( TF_colors ) ) {
        TF_colors <- c(
            "red", "blue", "green", "hotpink", "cyan", "greenyellow", "gold",
            "darkorchid", "chocolate1", "black", "lightpink", "seagreen")
        TF_colors <- TF_colors[ 1 : length( unique( names( specialTF ) ) ) ]
        color_list <- highlight_TF( CM.statMatrix, 4, specialTF, TF_colors ) 
        CM.statMatrix$highlight <- color_list[[ 1 ]]
        markerColors <- color_list[[ 2 ]]
    }
    if (!is.null(specialTF) & !is.null(TF_colors)) {
        color_list <- highlight_TF( CM.statMatrix, 4, specialTF, TF_colors )
        CM.statMatrix$highlight <- color_list[[ 1 ]]
        markerColors <- color_list[[ 2 ]]
    }
    if (!exists("MetaData")) {
        MetaData <- NULL
        data("MetaData", package = "TFEA.ChIP", envir = environment())
    }
    
    # Adding metadata for the plot
    MetaData <- MetaData[
        match( CM.statMatrix$Accession, MetaData$Accession), ]
    CM.statMatrix$Treatment <- MetaData$Treatment
    CM.statMatrix$Cell <- MetaData$Cell
    rm(MetaData)
    
    # Cheking if any plot variables have a Inf values.
    if ( sum( CM.statMatrix$OR == Inf ) > 0 ) {
        
        warn_number <- sum( CM.statMatrix$OR == Inf )
        
        # Substitute Inf OR values for the maximum finite value
        finMax <- max( CM.statMatrix$OR[ CM.statMatrix$OR != Inf ] )
        CM.statMatrix[CM.statMatrix$OR == Inf, ]$OR <- finMax
        
        warning(warn_number, " elements have an Odds Ratio of Inf.",
                " Maximum value for OR introduced instead.")
    }
    if ( sum( CM.statMatrix$OR == -Inf ) > 0) {
        
        warn_number <- sum( CM.statMatrix$OR == -Inf )
        
        # Substitute -Inf OR values for the minimum finite value
        finMin <- min( CM.statMatrix$OR[ CM.statMatrix$OR != -Inf ] )
        CM.statMatrix$OR[CM.statMatrix$OR == -Inf, ]$OR <- finMin
        
        warning(warn_number, " elements have an Odds Ratio of -Inf. Minimum",
                " value for OR introduced instead.")
    }
    if ( sum( CM.statMatrix$adj.p.value == 0 ) > 0) {
        warn_number <- sum( CM.statMatrix$adj.p.value == 0 )
        
        # Substitute Inf -log(pval) values for the maximum finite value
        finMax <- max( CM.statMatrix$log.adj.pVal[ CM.statMatrix$p.value != 0])
        CM.statMatrix$log.adj.pVal[ CM.statMatrix$p.value == 0] <- finMax
        
        warning(warn_number, " elements have a -log(p-Value) of Inf. ",
                "Maximum value for -log(p-Val) introduced instead.")
    }
    
    CM.statMatrix$pointText <- paste0(
        CM.statMatrix$Accession, ": ", CM.statMatrix$TF,
        "<br>Treatment: ", CM.statMatrix$Treatment,
        "<br>Cell: ", CM.statMatrix$Cell)
    
    if (length(markerColors) > 1) {
        colorPoints <- CM.statMatrix[ CM.statMatrix$highlight != "Other", ]
        bgPoints <- CM.statMatrix[ CM.statMatrix$highlight == "Other", ]
        
        p <- plotly::plot_ly( bgPoints, 
            x = ~log10.adj.pVal, y = ~log2.OR, type = "scatter",
            mode = "markers", text = ~pointText, color = ~highlight,
            colors = markerColors )
        
        p <- plotly::add_markers( p,
            x = colorPoints$log10.adj.pVal, y = colorPoints$log2.OR,
            type = "scatter", mode = "markers", text = colorPoints$pointText,
            color = colorPoints$highlight, colors = markerColors ) %>%
            plotly::layout( title = plot_title )
        
    } else {
        p <- plotly::plot_ly( CM.statMatrix, 
            x = ~log10.adj.pVal, y = ~log2.OR, type = "scatter", 
            mode = "markers", text = ~pointText, color = ~highlight,
            colors = markerColors ) %>%
            plotly::layout( title = plot_title )
    }
    p
    return(p)
}

plot_ES <- function( GSEA_result, LFC, plot_title = NULL, specialTF = NULL,
    TF_colors = NULL, Accession = NULL, TF = NULL ) {
    
    #' @title Plots Enrichment Score from the output of GSEA.run.
    #' @description Function to plot the Enrichment Score of every member of
    #' the ChIPseq binding database.
    #' @param GSEA_result Returned by GSEA_run
    #' @param LFC Vector with log2(Fold Change) of every gene that has an
    #' Entrez ID. Arranged from higher to lower.
    #' @param plot_title (Optional) Title for the plot
    #' @param specialTF (Optional) Named vector of TF symbols -as written in
    #' the enrichment table- to be highlighted in the plot. The name of each
    #' element specifies its color group, i.e.: naming elements HIF1A and HIF1B
    #' as 'HIF' to represent them with the same color.
    #' @param TF_colors (Optional) Colors to highlight TFs chosen in specialTF.
    #' @param Accession (Optional) restricts plot to the indicated list dataset
    #' IDs.
    #' @param TF (Optional) restricts plot to the indicated list transcription
    #' factor names.
    #' @return Plotly object with a scatter plot -Enrichment scores- and a
    #' heatmap -log2(fold change) bar-.
    #' @export plot_ES
    #' @examples
    #' data('GSEA.result','log2.FC',package = 'TFEA.ChIP')
    #' TF.hightlight <- c('E2F1' = 'E2F1')
    #' col <- c('red')
    #' plot_ES( GSEA.result, log2.FC, "Example", TF.hightlight, col )
    
    if (!requireNamespace("plotly", quietly = TRUE)) {
        stop("plotly package needed for this function to work. ",
             "Please install it.", call. = FALSE)
    }
    
    if ( is.data.frame( GSEA_result ) ) {
        enrichTab <- GSEA_result
    } else {
        enrichTab <- GSEA_result[["Enrichment.table"]]
    }
    
    # If plotting selected points only
    if ( ! is.null( Accession ) | ! is.null( TF ) ) {
        if ( is.null( Accession ) ) {
            Accession <- enrichTab$Accession[ enrichTab$TF %in% TF ]
        }
        if (is.null(TF)) {
            TF <- enrichTab$TF[ enrichTab$Accession %in% Accession ]
        }
        sel <- enrichTab$Accession %in% Accession & enrichTab$TF %in% TF
        enrichTab <- enrichTab[ which( sel ), ]
    }
    
    # Default options
    if ( is.null(plot_title) ) {
        plot_title <- "Transcription Factor Enrichment"
    }
    
    if ( is.null( specialTF ) ) {
        enrichTab$highlight <- "Other"
        markerColors <- c( "Other"="azure4")
    } else {
        if ( is.null( TF_colors ) ) {
            TF_colors <- c(
                "red", "blue", "green", "hotpink", "cyan", "greenyellow",
                "gold", "darkorchid", "chocolate1", "black", "lightpink",
                "seagreen" )
            TF_colors <- TF_colors[ 1 : length( unique( names( specialTF ) ) ) ]
            color_list <- highlight_TF( enrichTab, 4, specialTF, TF_colors )
            enrichTab$highlight <- color_list[[ 1 ]]
            markerColors <- color_list[[ 2 ]]
        } else {
            color_list <- highlight_TF( enrichTab, 4, specialTF, TF_colors )
            enrichTab$highlight <- color_list[[ 1 ]]
            markerColors <- color_list[[ 2 ]]
        }
    }

    enrichTab$symbol <- "pVal>0.05"
    enrichTab$symbol[ enrichTab$pval.adj <= 0.05 ] <- "pVal<=0.05"
    
    # Adding metadata
    if ( ! exists( "MetaData" ) ) {
        MetaData <- NULL
        data("MetaData", package = "TFEA.ChIP", envir = environment())
    }
    
    MetaData <- MetaData[ match( enrichTab$Accession, MetaData$Accession), ]
    enrichTab$Treatment <- MetaData$Treatment
    enrichTab$Cell <- MetaData$Cell
    rm(MetaData)
    
   
    enrichTab$pointText = paste0(
        enrichTab$Accession, ": ", enrichTab$TF, "<br>Adjusted p-value: ",
        round( enrichTab$pval.adj, 3 ), "<br>Treatment: ", enrichTab$Treatment,
        "<br>Cell: ", enrichTab$Cell )
    
    
    multicolor <- length(markerColors) > 1 &
        length( unique( enrichTab$TF ) ) > length( specialTF )
    
    if ( multicolor ) {
        colorPoints <- enrichTab[ enrichTab$highlight != "Other", ]
        bgPoints <- enrichTab[ enrichTab$highlight == "Other", ]
        
        p <- plotly::plot_ly( bgPoints,
            x = bgPoints$Arg.ES, y = bgPoints$ES, type = "scatter",
            mode = "markers", text = bgPoints$pointText,
            color = bgPoints$highlight, colors = markerColors,
            symbol = bgPoints$symbol, symbols = c( "x", "circle" ) )
        
        p <- plotly::add_markers( p, 
            x = colorPoints$Arg.ES, y = colorPoints$ES, type = "scatter",
            mode = "markers", text = colorPoints$pointText,
            color = colorPoints$highlight, colors = markerColors,
            symbol = colorPoints$symbol, symbols = c("x", "circle") ) %>%
            plotly::layout( title = plot_title,
                xaxis = list(title = "Argument"), yaxis = list(title = "ES") )
        
    } else {
        p <- plotly::plot_ly(enrichTab,
            x = enrichTab$Arg.ES, y = enrichTab$ES, type = "scatter", 
            mode = "markers", text = enrichTab$pointText, 
            color = enrichTab$highlight, colors = markerColors, 
            symbol = enrichTab$symbol, symbols = c( "x", "circle" ) ) %>%
            plotly::layout( title = plot_title,
                xaxis = list( title = "Argument"), yaxis = list(title = "ES"))
    }
    # Adding log2(Fold Change) bar to the plot
    LFCbar <- get_LFC_bar( LFC )
    graf <- plotly::subplot( p, 
        LFCbar, shareX = TRUE, nrows = 2, heights = c(0.95, 0.05),
        titleY = TRUE)
    graf
    return(graf)
}

plot_RES <- function( GSEA_result, LFC, plot_title = NULL, line.colors = NULL,
    line.styles = NULL, Accession = NULL, TF = NULL ) {

    #' @title Plots all the RES stored in a GSEA_run output.
    #' @description Function to plot all the RES stored in a GSEA_run output.
    #' @param GSEA_result Returned by GSEA_run
    #' @param LFC Vector with log2(Fold Change) of every gene that has an
    #' Entrez ID. Arranged from higher to lower.
    #' @param plot_title (Optional) Title for the plot.
    #' @param line.colors (Optional) Vector of colors for each line.
    #' @param line.styles (Optional) Vector of line styles for each line
    #' ('solid'/'dash'/'longdash').
    #' @param Accession (Optional) restricts plot to the indicated list dataset
    #' IDs.
    #' @param TF (Optional) restricts plot to the indicated list transcription
    #' factor names.
    #' @return Plotly object with a line plot -running sums- and a
    #' heatmap -log2(fold change) bar-.
    #' @export plot_RES
    #' @examples
    #' data('GSEA.result','log2.FC',package = 'TFEA.ChIP')
    #' plot_RES(GSEA.result, log2.FC, TF = c('E2F4',"E2F1"),
    #'     Accession=c('ENCSR000DYY.E2F4.GM12878',
    #'     'ENCSR000EVJ.E2F1.HeLa-S3'))

    if (!requireNamespace("plotly", quietly = TRUE)) {
        stop("plotly package needed for this function to work.",
            " Please install it.", call. = FALSE)
    }
    # Checking input variables
    if (!exists("MetaData")) {
        MetaData <- NULL
        data("MetaData", package = "TFEA.ChIP", envir = environment())
    }
    
    enrichTab <- GSEA_result[["Enrichment.table"]]
    rSums <- GSEA_result[["RES"]]
    rm( GSEA_result )
    
    # Accession or TF filters
    if ( !is.null( Accession ) | !is.null( TF ) ) {

        if( is.null( Accession ) ) { 
            Accession <- enrichTab$Accession[ enrichTab$TF %in% TF ] }
        if( is.null( TF ) ) { 
            TF <- enrichTab$TF[ enrichTab$Accession %in% Accession ] }

        enrichTab <- enrichTab[ match( Accession, enrichTab$Accession ), ]
        rSums <- rSums[ match( Accession, names( rSums ) ) ]

    } else { Accession <- enrichTab$Accession }
    
    # line parameters & plot title
    if( is.null( line.colors ) ) {
        line.colors <- c("red", "blue", "green", "hotpink", "cyan",
            "greenyellow", "gold", "darkorchid", "chocolate1",
            "black", "lightpink", "seagreen")
        line.colors <- line.colors[ 1 : length( Accession ) ]
    }
    if( is.null( line.styles ) ) {
        line.styles <- rep("solid", length( Accession ))
    }
    if( is.null( plot_title ) ) {
        plot_title <- "Transcription Factor Enrichment"
    }

    # Getting metadata for the plot
    MetaData <- MetaData[ match( Accession, MetaData$Accession ), ]

    plotTab <- data.frame(
    	Accession = MetaData$Accession,
    	Cell = MetaData$Cell,
    	Treatment = MetaData$Treatment,
    	TF = MetaData$TF,
    	stringsAsFactors = FALSE)

    plotTab$RES <- rSums
    rm( MetaData, rSums )
    
    plotTab$lineName <- paste0( plotTab$Accession, " - ", plotTab$TF )
    plotTab$lineText <- paste0(
        plotTab$Accession, " - ", plotTab$TF,
        "<br>Cell: ", plotTab$Cell, "<br>Treatment: ", plotTab$Treatment )

    if ( dim( plotTab )[ 1 ] > 1 ) { # plot multiple lines
        for (i in seq_along(Accession)) {
            if (i == 1) { # First line
                grafica <- plotly::plot_ly( plotTab,
                    x = c( 1 : length( plotTab$RES[[ 1 ]] ) ),
                    y = plotTab$RES[[ Accession[ 1 ] ]], 
                    type = "scatter", mode = "lines", 
                    line = list(color = line.colors[1], dash = line.styles[1]),
                    name = plotTab$lineName[ 1 ], text = plotTab$lineText[ 1 ])
                
            } else if ( i > 1 & i < length( Accession ) ) {
                grafica <- plotly::add_trace( grafica,
                    y = plotTab$RES[[ Accession[ i ] ]],
                    type = "scatter", mode = "lines",
                    line = list(color = line.colors[i], dash = line.styles[i]),
                    name = plotTab$lineName[ i ], text = plotTab$lineText[ i ])
                
            } else if (i == length(Accession)) { # Last line
                grafica <- plotly::add_trace( grafica,
                    y = plotTab$RES[[Accession[i]]],
                    type = "scatter", mode = "lines",
                    line = list(color = line.colors[i], dash = line.styles[i]),
                    name = plotTab$lineName[ i ],
                    text = plotTab$lineText[ i ] ) %>%
                plotly::layout(
                    title = plot_title, xaxis = list(title = "Argument"),
                    yaxis = list(title = "ES"))
            }
        }
    } else { # plot one line
        grafica <- plotly::plot_ly( plotTab, 
            x = c( 1 : length( plotTab$RES[[ 1 ]] ) ),
            y = plotTab$RES[[ Accession[ 1 ] ]], 
            type = "scatter", mode = "lines",
            line = list(color = line.colors[1], dash = line.styles[1]),
            name = plotTab$lineText ) %>%
        plotly::layout(title = plot_title,
            xaxis = list(title = "Argument"), yaxis = list(title = "ES"))
    }

    # Adding log2(Fold Change) bar to the plot
    LFC.bar <- get_LFC_bar( LFC )
    graf <- plotly::subplot( grafica, LFC.bar, shareX = TRUE,
        nrows = 2, heights = c(0.95, 0.05), titleY = TRUE )
    graf
    return( graf )
}

highlight_TF <- function(table, column, specialTF, markerColors) {

    #' @title Highlight certain transcription factors in a plotly graph.
    #' @description Function to highlight certain transcription factors using
    #' different colors in a plotly graph.
    #' @param table Enrichment matrix/data.frame.
    #' @param column Column # that stores the TF name in the matrix/df.
    #' @param specialTF Named vector containing TF names as they appear in the
    #' enrichment matrix/df and nicknames for their color group.
    #' Example:
    #'           specialTF<-c('HIF1A','EPAS1','ARNT','SIN3A')
    #'           names(specialTF)<-c('HIF','HIF','HIF','SIN3A')
    #' @param markerColors Vector specifying the shade for every color group.
    #' @return List of two objects:
    #' A vector to attach to the enrichment matrix/df pointing out the color
    #' group of every row.
    #' A named vector connecting each color group to the chosen color.
    # examples
    # highlight_TF( CM.statMatrix_UP, 4, specialTF, colors )

    highlight <- sapply( seq_along(table[,1]),
        function (table, column, TF, i) {
            if (table[i, column] %in% TF){
                color.group <- names( TF[ TF == table[i, column] ] )
            }else{
                color.group <- "Other"
            }
            return(color.group) },
        table = table, column = column, TF = specialTF
        )

    markerColors <- c( "azure4", markerColors )
    names( markerColors ) <- c( "Other", unique( names( specialTF ) ) )
    return( list( highlight, markerColors ) )
}

get_LFC_bar <- function( LFC ) {

    #' @title Plots a color bar from log2(Fold Change) values.
    #' @description Function to plot a color bar from log2(Fold Change)
    #' values from an expression experiment.
    #' @param LFC Vector of log2(fold change) values arranged from higher
    #' to lower. Use ony the values of genes that have an Entrez ID.
    #' @return Plotly heatmap plot -log2(fold change) bar-.
    # examples
    # get_LFC_bar( arranged.log2FC.array )

    if (!requireNamespace("scales", quietly = TRUE)) {
        stop("scales package needed for this function to work. ",
            "Please install it.", call. = FALSE)
    }
    if (!requireNamespace("plotly", quietly = TRUE)) {
        stop("plotly package needed for this function to work. ",
            "Please install it.", call. = FALSE)
    }

    # Rescaling log Fold Change values to get a -1 to 1 scale
    vals <- scales::rescale( LFC )
    o <- order( vals, decreasing = FALSE )

    posLen <- sum( LFC > 0 )

    # Building a red-blue scale adapted to the log Fold Change
    cols1 <- ( scales::col_numeric(
        grDevices::colorRamp( c( "mistyrose", "red3" ) ),
        domain = NULL) )( vals[ 1 : posLen ] )
    
    cols2 <- ( scales::col_numeric(
        grDevices::colorRamp(c("navy", "lightcyan")),
        domain = NULL)
        )( vals[ posLen+1 : length(LFC) ] )
    
    cols <- c(cols1, cols2)

    colorValues <- data.frame( vals[ o ], cols[ o ] )

    LFC.bar <- plotly::plot_ly(
        x = c( 1 : length( LFC ) ), y = rep( 1, length( LFC ) ), z = LFC,
        type = "heatmap", colorscale = colorValues, showscale = FALSE) %>%
    plotly::layout(yaxis = list(visible = FALSE))

    return( LFC.bar )
}
LauraPS1/TFEA.ChIP documentation built on April 21, 2023, 7:29 a.m.