R/All-functions.R

Defines functions get_LFC_bar highlight_TF plot_RES plot_ES plot_CM GSEA_run GSEA_ESpermutations GSEA_EnrichmentScore rankTFs getCMstats contingency_matrix get_chip_index GeneID2entrez Select_genes preprocessInputData set_user_data makeTFBSmatrix GR2tfbs_db txt2GR

Documented in contingency_matrix GeneID2entrez get_chip_index getCMstats get_LFC_bar GR2tfbs_db GSEA_EnrichmentScore GSEA_ESpermutations GSEA_run highlight_TF makeTFBSmatrix plot_CM plot_ES plot_RES preprocessInputData rankTFs Select_genes set_user_data txt2GR

################### IMPORTS ###################
#' @importFrom GenomicFeatures genes
#' @importFrom GenomicRanges GRanges distanceToNearest
#' @importFrom IRanges IRanges
#' @importFrom biomaRt select useMart getLDS getBM
#' @importFrom dplyr '%>%' arrange
#' @importFrom grDevices colorRamp
#' @importFrom stats fisher.test p.adjust
#' @importFrom utils data setTxtProgressBar txtProgressBar
#' @importFrom R.utils withTimeout
#' @importFrom methods is
#' @import org.Hs.eg.db
################### FUNCTIONS #################

txt2GR <- function(fileTable, format, fileMetaData, alpha = NULL) {

    #' @title Function to filter a ChIP-Seq input.
    #' @description Function to filter a ChIP-Seq output (in .narrowpeak or
    #' MACS's peaks.bed formats) and then store the peak coordinates in a
    #' GenomicRanges object, associated to its metadata.
    #' @param fileTable data frame from a txt/tsv/bed file
    #' @param format 'narrowpeak', 'macs1.4' or 'macs2'.
    #' narrowPeak fields:
    #' 'chrom','chromStart','chromEnd','name','score','strand','signalValue',
    #' 'pValue','qValue','peak'
    #' macs1.4 fields:
    #' 'chrom','chromStart','chromEnd','name','-10*log10(p-value)'
    #' macs2 fields:
    #' 'chrom','chromStart','chromEnd','name','-log10(p-value)'
    #' @param fileMetaData Data frame/matrix/array contaning the following
    #' fields: 'Name','Accession','Cell','Cell Type','Treatment','Antibody',
    #' 'TF'.
    #' @param alpha max p-value to consider ChIPseq peaks as significant and
    #' include them in the database. By default alpha is 0.05 for narrow peak
    #' files and 1e-05 for MACS files
    #' @return The function returns a GR object generated from the ChIP-Seq
    #' dataset input.
    #' @export txt2GR
    #' @examples
    #' data('ARNT.peaks.bed','ARNT.metadata',package = 'TFEA.ChIP')
    #' ARNT.gr<-txt2GR(ARNT.peaks.bed,'macs1.4',ARNT.metadata)

    stopifnot(format %in% c("narrowpeak","narrowPeak","macs1.4",
                            "macs2", "MACS1.4", "MACS2"))
    stopifnot((is.data.frame(fileMetaData) || is.matrix(fileMetaData) ||
        is.array(fileMetaData)))

    # checking fileMetadata has all the fields needed and the correct
    # column names and column order.
    columnNames <- c("Name", "Accession", "Cell", "Cell.Type",
        "Treatment", "Antibody", "TF")


    fileMetaData <- as.data.frame( fileMetaData )

    if ( dim( fileMetaData )[2] == 7) {

        if (FALSE %in% (columnNames %in% colnames(fileMetaData))) {
            stop("fileMetaData format error: 'fileMetaData' must be a",
                " data frame/matrix/array with 7 atributes: 'Name',",
                "'Accession', 'Cell', 'Cell.Type','Treatment',",
                "'Antibody','TF'")
        } else {
            fileMetaData <- fileMetaData[columnNames]
        }
    } else {
        stop("fileMetaData format error: 'fileMetaData' must be a",
            " data frame/matrix/array with 7 atributes: 'Name',",
            "'Accession', 'Cell', 'Cell.Type','Treatment',",
            "'Antibody','TF'")
    }

    format <- tolower(format)

    if (format == "narrowpeak") {

        if (fileTable[1, 8] == -1 & fileTable[1, 9] == -1) {
            # If there's no p-value column
            warning("The ChIP-Seq input file ", fileMetaData$Name,
                " does not include p-value or Q-value for each peak.",
                " Please, make sure the peaks in the input file have been",
                " previously filtered according to their significance")
            fileTable <- fileTable[, 1:3]
            Stat <- "no score"
            fileTable$score = rep(NA, dim(fileTable)[1])
            colnames(fileTable)[1:3] <- c("chr", "start", "end")

        } else if (fileTable[1, 8] == -1) {
            # If the score table has a q-value column
            fileTable <- fileTable[, c(1, 2, 3, 9)]
            colnames(fileTable) <- c("chr", "start", "end", "score")
            Stat <- "log10(p-Value)"

            if (is.null(alpha)) {
                valLimit <- 1.3
            } else {
                valLimit <- (-log10(alpha))
            }
            fileTable <- fileTable[fileTable$score > valLimit, ]

        } else if (fileTable[1, 9] == -1) {
            # If the score table has a -log10(p-value) column
            fileTable <- fileTable[, c(1, 2, 3, 8)]
            colnames(fileTable) <- c("chr", "start", "end", "score")
            fileTable$score <- 10 ^ (-1 * (fileTable$score ) )
            fileTable$score <- p.adjust( fileTable$score, "fdr" ) # adjust p-values using Benjamini & Hochberg correction.
            Stat <- "corrected p-Value"
            if (is.null(alpha)) {
                valLimit <- 0.05
            } else {
                valLimit <- alpha
            }
            fileTable <- fileTable[fileTable$score < valLimit, ]

        } else {
            # If the dataset has both p-value and 10*-log(qval) columns.
            fileTable <- fileTable[, c(1, 2, 3, 9)]
            colnames(fileTable) <- c("chr", "start", "end", "score")
            Stat <- "corrected p-Value"

            if (is.null(alpha)) {
                valLimit <- 1.3
            } else {
                valLimit <- (-log10(alpha))
            }
            fileTable <- fileTable[fileTable$score > valLimit, ]
        }
        if( dim( fileTable)[1] > 0){
            fileMetaData <- c(fileMetaData, Stat)
            MDframe <- as.data.frame(lapply(fileMetaData, rep, dim(fileTable)[1]))
            colnames(MDframe) <- c("Name", "Accession", "Cell", "Cell Type",
                "Treatment", "Antibody", "TF", "Score Type")

            gr <- GenomicRanges::GRanges(seqnames = fileTable$chr,
                ranges = IRanges::IRanges(fileTable$start, end = fileTable$end),
                score = fileTable$score, mcols = MDframe)
            return(gr)
        } else {
            warning( "File ", fileMetaData$Accession,
                     " has no significant peaks after correcting p-values.")
            return(NULL)
        }

    } else if (format %in% c("macs1.4", "macs2") ) {

        if (is.null(alpha)) {
            valLimit <- 0.05
        } else {
            valLimit <- alpha
        }

        if (dim(fileTable)[2] == 5) {
            fileTable <- fileTable[, c(1, 2, 3, 5)]
            colnames(fileTable) <- c("chr", "start", "end", "score")

            if (format == "macs1.4") {
                fileTable$score <- 10 ^ (-1 * (fileTable$score / 10 ) ) # -10 * log10(p-value)
            } else if (format == "macs2"){
                fileTable$score <- 10 ^ (-1 * (fileTable$score ) ) # -log10(p-value)
            }
            fileTable$score <- p.adjust( fileTable$score, "fdr" ) # adjust p-values using Benjamini & Hochberg correction.

            fileTable <- fileTable[fileTable$score < valLimit, ]
            Stat <- "corrected p-Value"

        } else if (dim(fileTable)[2] == 4 & is.character(fileTable[1, 4])) {
            # if the 4th column consists of peak names
            warning("The ChIP-Seq input file does not include p-value or",
                " Q-value for each peak. Please, make sure the peaks",
                " in the input file have been previously filtered",
                " according to their significance")
            fileTable <- fileTable[, 1:3]
            Stat <- "no score"
            fileTable$score = rep(NA, dim(fileTable)[1])
            colnames(fileTable)[1:3] <- c("chr", "start", "end")

        } else if (dim(fileTable)[2] == 4 & !is.character(fileTable[1,
            4])) {
            # if the 4th column consists of p-values
            colnames(fileTable) <- c("chr", "start", "end", "score")

            if (format == "macs1.4") {
                fileTable$score <- 10 ^ (-1 * (fileTable$score / 10 ) ) # -10 * log10(p-value)
            } else if (format == "macs2"){
                fileTable$score <- 10 ^ (-1 * (fileTable$score ) ) # -log10(p-value)
            }

            fileTable$score <- p.adjust( fileTable$score, "fdr" ) # adjust p-values using Benjamini & Hochberg correction.
            fileTable <- fileTable[fileTable$score < valLimit, ]
            Stat <- "corrected p-Value"
        }

        if( dim( fileTable)[1] > 0){

            fileMetaData <- c(fileMetaData, Stat)
            MDframe <- as.data.frame(lapply(fileMetaData, rep, dim(fileTable)[1]))
            colnames(MDframe) <- c("Name", "Accession", "Cell", "Cell Type",
                "Treatment", "Antibody", "TF", "Score Type")

            gr <- GenomicRanges::GRanges(seqnames = fileTable$chr,
                ranges = IRanges::IRanges(fileTable$start, end = fileTable$end),
                score = fileTable$score, mcols = MDframe)
            return(gr)
        } else {
            warning( "File ", fileMetaData$Accession,
                     " has no significant peaks after correcting p-values.")
            return( NULL )
        }

    } else {
        stop("format error: variable 'format' must be",
             " either 'narrowpeak' or 'macs'. ")
    }
}

GR2tfbs_db <- function(Ref.db, gr.list, distanceMargin = 10, outputAsVector = FALSE) {

    #' @title Makes a TFBS-gene binding database
    #' @description GR2tfbs_db generates a TFBS-gene binding database
    #' through the association of ChIP-Seq peak coordinates (provided
    #' as a GenomicRange object) to overlapping genes or gene-associated
    #' Dnase regions (Ref.db).
    #' @param Ref.db GenomicRanges object containing a database of reference
    #' elements (either Genes or gene-associate Dnase regions) including
    #' a gene_id metacolumn
    #' @param gr.list List of GR objects containing ChIP-seq peak coordinates
    #' (output of txt2GR).
    #' @param distanceMargin Maximum distance allowed between a gene or DHS to
    #' assign a gene to a ChIP-seq peak. Set to 10 bases by default.
    #' @param outputAsVector when 'TRUE', the output is a list of vectors
    #' instrad of a list of GeneSet objects
    #' @return List of GeneSe objetcs or vectors, one for every ChIP-Seq,
    #' storing the IDs of the genes to which the TF bound in the ChIP-Seq.
    #' @export GR2tfbs_db
    #' @examples
    #' data('DnaseHS_db','gr.list', package='TFEA.ChIP')
    #' GR2tfbs_db(DnaseHS_db, gr.list)

    if (!requireNamespace("S4Vectors", quietly = TRUE)) {
        stop("S4Vectors package needed for this function to work. ",
            "Please install it.", call. = FALSE)
    }
    if (!requireNamespace("GSEABase", quietly = TRUE)) {
        stop("GSEABase package needed for this function to work. ",
            "Please install it.", call. = FALSE)
    }

    TFgenes_list <- lapply(
        seq_along(gr.list),
        function( gr.list, dm, Ref.db, i ){

            gr <- gr.list[[i]]
            nearest_index <- suppressWarnings(
                GenomicRanges::findOverlaps(gr, Ref.db, maxgap = dm ))
            inSubject <- S4Vectors::subjectHits(nearest_index)

            # in case any ChIP-Seq dataset does not have any genes to be assigned
            if (length( Ref.db[unique(inSubject)]$gene_id ) < 10) {
                NULL
            } else {

                assigned_genes <- Ref.db[inSubject]$gene_id

                if (outputAsVector == TRUE) {
                    return(assigned_genes)

                } else {

                    gene_set <- GSEABase::GeneSet(unique(assigned_genes))
                    gene_set@geneIdType@type <- "Entrez Gene ID"
                    gene_set@setName <- as.character(
                        gr.list[[i]]@elementMetadata@listData[["mcols.Accession"]][1] )
                    gene_set@setIdentifier <- as.character(
                        gr.list[[i]]@elementMetadata@listData[["mcols.Accession"]][1] )
                    gene_set@organism <- "Homo sapiens"
                    gene_set@shortDescription <- "Genes assigned to ChIP-seq"
                    gene_set@longDescription <- paste0(
                        "Cell: ", gr.list[[i]]@elementMetadata@listData[["mcols.Cell"]][1],
                        ", Treatment: ", gr.list[[i]]@elementMetadata@listData[["mcols.Treatment"]][1],
                        ", TF: ", gr.list[[i]]@elementMetadata@listData[["mcols.TF"]][1])

                    return(gene_set)
                }
            }
        },
        gr.list = gr.list,
        dm = distanceMargin,
        Ref.db = Ref.db
        )
    # Removing list elements that didn't have any genes assigned
    TFgenes_list[!vapply(TFgenes_list, is.null, logical(1))]

    # Naming every GeneSet / array in the list with their accession ID
    if ( outputAsVector == FALSE ){
        if( !is.null( names( gr.list ))){
            names(TFgenes_list) <- names(gr.list)
        }else {
            list.names <- sapply(
                TFgenes_list,
                function( i ){
                    return( i@setName )
                }
            )
            names(TFgenes_list) <- list.names
        }
    } else {
        if( !is.null( names( gr.list ))){
            names(TFgenes_list) <- names(gr.list)
        }else {

            list.names <- sapply(
                gr.list,
                function(i){
                    tmp <- as.character( i@elementMetadata@listData[["mcols.Accession"]][1])
                    if (length(tmp) ==0){
                        tmp <- as.character( i@elementMetadata@listData[["Accession"]][1])
                    }
                    return( tmp )
                })
             names(TFgenes_list) <- list.names
        }
    }
    TFgenes_list[ sapply( TFgenes_list, is.null ) ] <- NULL
    return( TFgenes_list )
}

makeTFBSmatrix <- function(GeneList, id_db, geneSetAsInput = TRUE) {

    #' @title Function to search for a list of entrez gene IDs.
    #' @description Function to search for a list of entrez gene IDs
    #' in a  TF-gene binding data base.
    #' @param GeneList Array of gene Entrez IDs
    #' @param id_db TF - gene binding database.
    #' @param geneSetAsInput TRUE for lists of GeneSet objects,
    #' FALSE for lists of vectors.
    #' @return 1/0 matrix. Each row represents a gene, each column,
    #' a ChIP-Seq file.
    #' @export makeTFBSmatrix
    #' @examples
    #' data('tfbs.database','Entrez.gene.IDs',package = 'TFEA.ChIP')
    #' makeTFBSmatrix(Entrez.gene.IDs,tfbs.database)


    TF_matrix <- sapply(
        seq_along(id_db),
        function(id_db, GeneList, i){

            TF_vector <- rep(0, length(GeneList))
            names(TF_vector) <- GeneList

            if ( is.vector( id_db[[1]]) ){
                TF_vector[ names(TF_vector) %in% id_db[[i]] ] <- 1
            } else { # if id_db is a list of GeneSets
                TF_vector[ names(TF_vector) %in% id_db[[i]]@geneIds ] <- 1
            }

            return(TF_vector)
        },
        id_db = id_db,
        GeneList = GeneList
    )
    colnames(TF_matrix) <- names(id_db)
    rownames(TF_matrix) <- GeneList
    return(TF_matrix)
}

set_user_data <- function(metadata, binary_matrix) {
    #' @title Sets the data objects as default.
    #' @description Function to set the data objects provided by the user
    #' as default to the rest of the functions.
    #' @param metadata Data frame/matrix/array contaning the following fields:
    #' 'Name','Accession','Cell','Cell Type','Treatment','Antibody','TF'.
    #' @param binary_matrix Matrix[n,m] which rows correspond to all the human
    #' genes that have been assigned an Entrez ID, and its columns, to every
    #' ChIP-Seq experiment in the database. The values are 1 – if the ChIP-Seq
    #' has a peak assigned to that gene – or 0 – if it hasn’t –.
    #' @return sets the user's metadata table and TFBS matrix as the variables
    #' 'MetaData' and 'Mat01', used by the rest of the package.
    #' @export set_user_data
    #' @examples
    #' data('MetaData','Mat01',package='TFEA.ChIP')
    #' # For this example, we will usethe variables already included in the
    #' # package.
    #' set_user_data(MetaData,Mat01)

    pos <- 1
    envir = as.environment(pos)

    assign("MetaData", metadata, envir = envir)
    assign("Mat01", binary_matrix, envir = envir)
}

preprocessInputData <- function(inputData, mode = "h2h" ) {
    #' @title Extracts data from a DESeqResults object or a data frame.
    #' @description Function to extract Gene IDs, logFoldChange, and p-val
    #' values from a DESeqResults object or data frame. Gene IDs are
    #' translated to ENTREZ IDs, if possible, and the resultant data frame
    #' is sorted accordint to decreasing log2(Fold Change). Translating
    #' gene IDs from mouse to their equivalent human genes is avaible
    #' using the variable "mode".
    #' @param inputData DESeqResults object or data frame. In all cases
    #' must include gene IDs. Data frame inputs should include 'pvalue' and
    #' 'log2FoldChange' as well.
    #' @param  mode Specify the organism used: 'h2h' for homo sapiens gene IDs,
    #' 'm2m' for mouse gene IDs, or 'm2h' to get the corresponding human gene
    #' IDs from a mouse input.
    #' @return A table containing Entrez Gene IDs, LogFoldChange and p-val
    #' values (both raw p-value and fdr adjusted p-value), sorted by
    #' log2FoldChange.
    #' @export preprocessInputData
    #' @examples
    #' data('hypoxia_DESeq',package='TFEA.ChIP')
    #' preprocessInputData(hypoxia_DESeq)

    if ( methods::is(inputData, "DESeqResults") ){
        # Extracting data from a DESeqResults object
        if (!requireNamespace("DESeq2", quietly = TRUE)) {
            stop("DESeq2 package needed for this function to work. ",
                "Please install it.", call. = FALSE)
        }
        g <- inputData@rownames
        inputData <- as.data.frame( inputData@listData )
        rownames( inputData ) <- g
        rm(g)

        # check the gene ids and translate if needed
        if ( ! all( grepl("^\\d*$", rownames(inputData) ) ) | mode == "m2h" ) {
            genes <- suppressMessages( GeneID2entrez(
                gene.IDs = rownames( inputData ),
                mode,
                return.Matrix = TRUE))

            if ( mode %in% c("h2h","m2m") ){
                genes <- genes[!is.na(genes$ENTREZ.ID), ]
                inputData <- inputData[rownames(inputData) %in% genes$GENE.ID, ]
                genes <- genes$ENTREZ.ID
            } else {
                genes <- genes[!is.na(genes$human.gene.ID), ]
                inputData <- inputData[rownames(inputData) %in% genes$mouse.gene.ID, ]
                genes <- genes$human.gene.ID
            }

        } else {
            genes <- rownames(inputData)
        }
        # get the rest of the variables
        log2FoldChange <- inputData[["log2FoldChange"]]
        pvalue <- inputData[["pvalue"]]
        pval.adj <- inputData[["padj"]]

        Table <- data.frame(Genes = genes, log2FoldChange = log2FoldChange,
            pvalue = pvalue, pval.adj = pval.adj)
        Table$Genes <- as.character(Table$Genes)
        Table <- Table[!is.na(Table$log2FoldChange), ]
        Table <- Table[order(Table$log2FoldChange, decreasing = TRUE), ]
        rownames(Table) <- NULL
        return(Table)

    } else if ( methods::is(inputData, "data.frame") ) {
        # Extracting data from a data frame.
        # Checkig if all the necessary columns are present
        if (FALSE %in% (c("Genes", "pvalue", "log2FoldChange") %in%
            colnames(inputData)) & FALSE %in% (c("Genes", "pval.adj",
            "log2FoldChange") %in% colnames(inputData))) {
            stop("The necessary atributes can't be found in input data frame",
                ". Input data must include: 'Genes', 'log2FoldChange',and ",
                "'pvalue' or 'pval.adj'", call. = FALSE)
        }
        # If there's not an adjusted p-value column
        if (!("pval.adj" %in% colnames(inputData))) {
            inputData$pval.adj <- p.adjust(inputData$pvalue,
                "fdr")
        }
        # If Gene IDs aren't in Entrez Gene ID format or come from mouse genes.
        if ( ! all( grepl("^\\d*$", inputData$Genes)) | mode == "m2h" ) {
            if( mode== "h2h" ){ inputData$Genes <- toupper( inputData$Genes ) }
            inputData$Genes <- trimws( inputData$Genes )
            genes <- suppressMessages( GeneID2entrez(
                gene.IDs = inputData$Genes,
                mode,
                return.Matrix = TRUE))

            if (  mode %in% c("h2h","m2m")  ){
                genes <- genes[!is.na(genes$ENTREZ.ID), ]
                inputData <- inputData[ inputData$Genes %in% genes$GENE.ID, ]
                inputData$Genes <- genes$ENTREZ.ID
            }else{
                genes <- genes[!is.na(genes$human.gene.ID), ]
                inputData <- inputData[ inputData$Genes %in% genes$mouse.gene.ID, ]
                inputData$Genes <- genes$human.gene.ID
            }
        }
        # sorting according to log2(FoldChange)
        inputData <- inputData[order(inputData$log2FoldChange,
            decreasing = TRUE), ]
        return(inputData)

    } else {
        stop("preprocessInputData requires a DESeqResults object or ",
            "a data frame as input.", call. = FALSE)}
}

Select_genes <- function(GeneExpression_df, max_pval = 0.05,
                         min_pval = 0, max_LFC = NULL, min_LFC = NULL) {
    #' @title Extracts genes according to logFoldChange and p-val limits
    #' @description Function to extract Gene IDs from a dataframe according
    #' to the established limits for log2(FoldChange) and p-value.
    #' If possible, the function will use the adjusted p-value column.
    #' @param GeneExpression_df A data frame with the folowing fields:
    #' 'Gene', 'pvalue' or 'pval.adj', 'log2FoldChange'.
    #' @param max_pval maximum p-value allowed, 0.05 by default.
    #' @param min_pval minimum p-value allowed, 0 by default.
    #' @param max_LFC maximum log2(FoldChange) allowed.
    #' @param min_LFC minimum log2(FoldChange) allowed.
    #' @return A vector of gene IDs.
    #' @export Select_genes
    #' @examples
    #' data('hypoxia',package='TFEA.ChIP')
    #' Select_genes(hypoxia)

    # Checking input variables

    if (max_pval < min_pval) {
        stop("'max_pval' has to be greater than 'min_pval'. ",
            call. = FALSE)
    } else if (!(is.null(max_LFC)) & !(is.null(min_LFC))) {
        if (max_LFC < min_LFC) {
            stop("'max_LFC' has to be greater than 'min_LFC'.",
                call. = FALSE)
        }
    }
    # If only one log2(Fold Change) limit is defined
    if (is.null(max_LFC) & !is.null(min_LFC)) {

        max_LFC <- max(GeneExpression_df$log2FoldChange)

        if (min_LFC == 0) {
            min_LFC <- min(GeneExpression_df[
                GeneExpression_df$log2FoldChange > 0, ]$log2FoldChange)
        }

    } else if (is.null(min_LFC) & !is.null(max_LFC)) {

        min_LFC <- min(GeneExpression_df$log2FoldChange)

        if (max_LFC == 0) {
            max_LFC <- max(GeneExpression_df[
                GeneExpression_df$log2FoldChange < 0, ]$log2FoldChange)
        }
    }

    # Selecting by p-value
    if ("pval.adj" %in% colnames(GeneExpression_df)) {
        geneSelection <- GeneExpression_df[
            GeneExpression_df$pval.adj >= min_pval &
            GeneExpression_df$pval.adj <= max_pval, ]
    } else if ("pvalue" %in% colnames(GeneExpression_df)) {
        geneSelection <- GeneExpression_df[
            GeneExpression_df$pvalue >= min_pval &
            GeneExpression_df$pvalue <= max_pval, ]
    } else {
        stop("No 'pvalue' or 'pval.adj' field in the input data. ")
    }

    # Selecting by log2(FoldChange)
    if (!is.null(min_LFC) & !is.null(max_LFC)) {
        if( "log2FoldChange" %in% colnames(GeneExpression_df) ){
            geneSelection <- geneSelection[
                geneSelection$log2FoldChange >= min_LFC &
                geneSelection$log2FoldChange <= max_LFC, ]
        }else{
            stop("No 'log2FoldChange' field in the input data. ")
        }
    }

    geneSelection <- geneSelection$Genes
    return(geneSelection)
}

GeneID2entrez <- function(gene.IDs, return.Matrix = FALSE, mode = "h2h") {
    
    #' @title Translates gene IDs from Gene Symbol or Ensemble ID to Entrez ID.
    #' @description Translates mouse or human gene IDs from Gene Symbol or
    #' Ensemble Gene ID to Entrez Gene ID using the IDs approved by HGNC.
    #' When translating from Gene Symbol, keep in mind that many genes have
    #' been given more than one symbol through the years. This function will
    #' return the Entrez ID corresponding to the currently approved symbols
    #' if they exist, otherwise NA is returned. In addition some genes might
    #' map to more than one Entrez ID, in this case gene is assigned to the
    #' first match and a warning is displayed.
    #' @param gene.IDs Array of Gene Symbols or Ensemble Gene IDs.
    #' @param return.Matrix T/F. When TRUE, the function returns a matrix[n,2],
    #' one column with the gene symbols or Ensemble IDs, another with their
    #' respective Entrez IDs.
    #' @param mode Specify the organism used: 'h2h' for homo sapiens gene IDs,
    #' 'm2m' for mouse gene IDs, or 'm2h' to get the corresponding human gene
    #' IDs from a mouse input.
    #' @return Vector or matrix containing the Entrez IDs(or NA) corresponding
    #' to every element of the input.
    #' @export GeneID2entrez
    #' @examples
    #' GeneID2entrez(c('TNMD','DPM1','SCYL3','FGR','CFH','FUCA2','GCLC'))
    
    stopifnot( mode %in% c("h2h", "m2m", "m2h"))
    
    gene.IDs <- gene.IDs[ !is.na( gene.IDs ) ]
    gene.IDs <- trimws( gene.IDs ) # remone any possible white space
    
    if ( mode == 'h2h'){
        
        gene.IDs <- toupper(gene.IDs)  # in case any name is in lowercase.
        # suppressWarnings added to avoid 'select()' returned 1:many
        # mapping between keys and columns
        if ( all( grepl("^ENSG", gene.IDs, perl = TRUE ) ) ) {
            ID.type <- "ENSEMBL"
            suppressMessages(GeneNames <- biomaRt::select(
                org.Hs.eg.db,  gene.IDs,
                c("ENTREZID", "ENSEMBL"), keytype = "ENSEMBL" ))
        } else {
            ID.type <- "SYMBOL"
            suppressMessages(GeneNames <- biomaRt::select(
                org.Hs.eg.db,  gene.IDs,
                c("SYMBOL", "ENTREZID"), keytype = "ALIAS" ))
        }
        
        matched <- match( as.character(gene.IDs), GeneNames[, ID.type] )
        matched_2 <- match( GeneNames[, ID.type], as.character(gene.IDs) )
        
        if ( sum( duplicated( matched_2[ !is.na(matched_2) ] ) ) > 0) {
            warning("Some genes returned 1:many mapping to ENTREZ ID. ",
                    "Genes were assigned the first ENTREZ ID match found.\n",
                    call. = FALSE)
        }
        message("Done! ", sum( !is.na(matched) ), " genes of ",
                length( matched ), " successfully converted.\n")
        
        if ( return.Matrix == TRUE ) {
            if ( sum( is.na(matched) ) > 0 ) {
                message("Couldn't find Entrez IDs for ", sum( is.na(matched) ),
                        " genes (NAs returned instead).\n")
            }
            return( data.frame(
                GENE.ID = gene.IDs,
                ENTREZ.ID = GeneNames[ matched, "ENTREZID"],
                stringsAsFactors = FALSE))
        } else {
            if ( sum( is.na(matched) ) > 0) {
                message("Couldn't find Entrez IDs for ", sum( is.na(matched) ),
                        " genes.\n")
            }
            return( GeneNames[ matched[!is.na(matched)] , "ENTREZID"])
        }
        
    } else if( mode == "m2m" ) {
        
        biomart_test <- tryCatch(
            {R.utils::withTimeout( {tmp <- biomaRt::listMarts()},
                                   timeout = 3, onTimeout = "warning")},
            error = function(w) { return( 0 ) },
            warning = function(w){ return( 0 ) }
        )
        if ( all(biomart_test == 0) ){
            stop( paste0("We are having trouble reaching biomaRt.\n",
                         "Please, try again later."))
        }
        
        mouse <- biomaRt::useMart("ensembl", dataset = "mmusculus_gene_ensembl")
        GeneNames <- biomaRt::getBM(
            attributes = c("ensembl_gene_id", "mgi_symbol","entrezgene_id"),
            values = "*", mart = mouse)
        
        if ( all( grepl("^ENSM", gene.IDs, perl = TRUE ) ) ) {
            
            ids <- GeneNames[ match( gene.IDs, GeneNames$ensembl_gene_id), c(1,3) ]
            colnames(ids)<- c("GENE.ID", "ENTREZ.ID")
            
        } else {
            
            ids <- GeneNames[ match( gene.IDs, GeneNames$mgi_symbol), c(2,3) ]
            colnames(ids)<- c("GENE.ID", "ENTREZ.ID")
        }
        
        message("Done! ", sum( ! is.na( ids$ENTREZ.ID ) ) ,
                " genes of ", dim( ids )[1], " successfully converted.\n")
        
        if (return.Matrix == TRUE) {
            if ( sum( is.na( ids$ENTREZ.ID )) > 0) {
                message("Couldn't find Entrez IDs for ",
                        sum( is.na( ids$ENTREZ.ID )),
                        " genes (NAs returned instead).\n")
            }
            return( ids )
        } else {
            if ( sum( is.na( ids$ENTREZ.ID )) > 0) {
                message("Couldn't find human Entrez IDs for ",
                        sum( is.na( ids$ENTREZ.ID )), " genes.\n")
            }
            return( ids$ENTREZ.ID[ !is.na( ids$ENTREZ.ID ) ] )
        }
        
        
    } else if( mode == "m2h" ){
        
        biomart_test <- tryCatch(
            {R.utils::withTimeout( {tmp <- biomaRt::listMarts()},
                                   timeout = 3, onTimeout = "warning")},
            error = function(w) { return( 0 ) },
            warning = function(w){ return( 0 ) }
        )
        if( all(biomart_test == 0) ){
            stop( paste0("We are having trouble reaching biomaRt.\n",
                         "Please, try again later."))
        }
        
        
        human <- biomaRt::useMart("ensembl", dataset = "hsapiens_gene_ensembl")
        mouse <- biomaRt::useMart("ensembl", dataset = "mmusculus_gene_ensembl")
        
        if ( all( grepl("^ENSM", gene.IDs, perl = TRUE ) ) == TRUE) {
            hs_ids = biomaRt::getLDS(
                attributes = c("ensembl_gene_id"), filters = "ensembl_gene_id",
                values = gene.IDs , mart = mouse,
                attributesL = c("entrezgene_id"), martL = human,
                uniqueRows = TRUE )
            hs_ids <- data.frame(
                mouse.gene.ID=gene.IDs,
                human.gene.ID=hs_ids$NCBI.gene.ID[match( gene.IDs, hs_ids$Gene.stable.ID )  ],
                stringsAsFactors = F
            )
            
        } else if ( all( grepl("^\\d*$", gene.IDs, perl = TRUE) ) == TRUE ) {
            hs_ids = biomaRt::getLDS(
                attributes = c("entrezgene_id"), filters = "entrezgene_id",
                values = gene.IDs, mart = mouse,
                attributesL = c("entrezgene_id"), martL = human,
                uniqueRows = TRUE )
            hs_ids <- data.frame(
                mouse.gene.ID=gene.IDs,
                human.gene.ID=hs_ids$NCBI.gene.ID.1[match( gene.IDs, hs_ids$NCBI.gene.ID )  ],
                stringsAsFactors = F
            )
            
        } else {
            hs_ids = biomaRt::getLDS(
                attributes = c("mgi_symbol"), filters = "mgi_symbol",
                values = gene.IDs, mart = mouse,
                attributesL = c("entrezgene_id"), martL = human,
                uniqueRows = TRUE )
            hs_ids <- data.frame(
                mouse.gene.ID=gene.IDs,
                human.gene.ID=hs_ids$NCBI.gene.ID[match( gene.IDs, hs_ids$MGI.symbol )  ],
                stringsAsFactors = F
            )
        }
        
        message("Done! ", sum( ! is.na( hs_ids$human.gene.ID )),
                " genes of ", length( gene.IDs ), " successfully converted.\n")
        
        if (return.Matrix == TRUE) {
            if ( any( is.na( hs_ids$human.gene.ID )) ) {
                message("Couldn't find human Entrez IDs for ",
                        sum(is.na( hs_ids$human.gene.ID )),
                        " genes (NAs returned instead).\n")
            }
            return( hs_ids )
        } else {
            if ( any( is.na( hs_ids$human.gene.ID )) ) {
                message("Couldn't find human Entrez IDs for ",
                        sum(is.na( hs_ids$human.gene.ID )),  " genes.\n")
            }
            return( hs_ids$human.gene.ID[ !is.na( hs_ids$human.gene.ID ) ] )
        }
        
    }
}

get_chip_index <- function(encodeFilter = FALSE, TFfilter = NULL) {

    #' @title Creates df containing accessions of ChIP-Seq datasets and TF.
    #' @description Function to create a data frame containing the ChIP-Seq
    #' dataset accession IDs and the transcription factor tested in each ChIP.
    #' This index is used in functions like “contingency_matrix” and “GSEA_run”
    #' as a filter to select specific ChIPs or transcription factors to run an
    #' analysis.
    #' @param encodeFilter (Optional) If TRUE, only ENCODE ChIP-Seqs are
    #' included in the index.
    #' @param TFfilter (Optional) Transcription factors of interest.
    #' @return Data frame containig the accession ID and TF for every ChIP-Seq
    #' experiment included in the metadata files.
    #' @export get_chip_index
    #' @examples
    #' get_chip_index(encodeFilter = TRUE)
    #' get_chip_index(TFfilter=c('SMAD2','SMAD4'))

    if (!exists("MetaData")) {
        MetaData <- NULL
        data("MetaData", package = "TFEA.ChIP", envir = environment())
    }

    if (is.null(TFfilter)) {
        Index <- dplyr::select(MetaData, "Accession", "TF")
        if (encodeFilter == TRUE) {
            if (any(grepl("^wg.*", Index$Accession))){
                Index <- Index[grepl("^wg", Index$Accession), ]
            } else {
                Index <- Index[grepl("^ENC", Index$Accession), ]
            }
        }
    } else {
        Index <- dplyr::select(MetaData, "Accession", "TF")
        Index <- Index[Index$TF %in% TFfilter, ]
        if (encodeFilter == TRUE) {

            if (any(grepl("^wg.*", Index$Accession))){
                Index <- Index[grepl("^wg", Index$Accession), ]
            } else {
                Index <- Index[grepl("^ENC", Index$Accession), ]
            }
        }
    }

    if ( dim( Index )[1] == 0 ){
        stop("No ChIP-Seq dataset in the database follows ",
            "your conditions.")
    }else{
        return(Index)
    }
}

contingency_matrix <- function(test_list, control_list,
                               chip_index = get_chip_index()) {

    #' @title Computes 2x2 contingency matrices
    #' @description Function to compute contingency 2x2 matrix by the partition
    #' of the two gene ID lists according to the presence or absence of the
    #' terms in these list in a ChIP-Seq binding database.
    #' @param test_list List of gene Entrez IDs
    #' @param control_list If not provided, all human genes not present in
    #' test_list will be used as control.
    #' @param chip_index Output of the function “get_chip_index”, a data frame
    #' containing accession IDs of ChIPs on the database and the TF each one
    #' tests. If not provided, the whole internal database will be used
    #' @return List of contingency matrices, one CM per element in chip_index
    #' (i.e. per ChIP-seq dataset).
    #' @export contingency_matrix
    #' @examples
    #' data('Genes.Upreg',package = 'TFEA.ChIP')
    #' CM_list_UP <- contingency_matrix(Genes.Upreg)

    if (!exists("Mat01")) {
        Mat01 <- NULL
        data("Mat01", package = "TFEA.ChIP", envir = environment())
    }
    if (missing(control_list)) {
        # Generating control gene list in case is not provided.
        control_list <- rownames(Mat01)
    }

    control_list <- control_list[!(control_list %in% test_list)]

    Matrix1 <- Mat01[rownames(Mat01) %in% test_list, colnames(Mat01) %in%
        chip_index$Accession]
    Matrix2 <- Mat01[rownames(Mat01) %in% control_list, colnames(Mat01) %in%
        chip_index$Accession]

    contMatrix_list <- lapply(
        seq_along(chip_index$Accession),

        function(accs, m1, m2, i){
            chip.vector1 <- m1[, accs[i] ]
            chip.vector2 <- m2[, accs[i] ]

            pos1 <- sum( chip.vector1 == 1 )
            pos2 <- sum(chip.vector2 == 1 )
            neg1 <- sum( chip.vector1 == 0 )
            neg2 <- sum( chip.vector2 == 0 )

            contMatrix <- cbind(c(pos1, pos2), c(neg1, neg2))
            rownames(contMatrix) <- c("Test", "Control")
            colnames(contMatrix) <- c("Positive", "Negative")

            return(contMatrix)
        },
        accs = chip_index$Accession,
        m1 = Matrix1,
        m2 = Matrix2
    )
    names(contMatrix_list) <- as.character(chip_index$Accession)
    return(contMatrix_list)
}

getCMstats <- function(contMatrix_list, chip_index = get_chip_index()) {

  #' @title Generate statistical parameters from a contingency_matrix output
  #' @description From a list of contingency matrices, such as the output
  #' from “contingency_matrix”, this function computes a fisher's exact test
  #' for each matrix and generates a data frame that stores accession ID of a
  #' ChIP-Seq experiment, the TF tested in that experiment, the p-value and
  #' the odds ratio resulting from the test.
  #' @param contMatrix_list Output of “contingency_matrix”, a list of
  #' contingency matrix.
  #' @param chip_index Output of the function “get_chip_index”, a data frame
  #' containing accession IDs of ChIPs on the database and the TF each one
  #' tests. If not provided, the whole internal database will be used
  #' @return Data frame containing accession ID of a ChIP-Seq experiment and
  #' its experimental conditions, the TF tested in that experiment, raw and
  #' adjusted p-values, odds-ratio, and euclidean distance.
  #' and FDR-adjusted p-values (-10*log10 adj.pvalue).
  #' @export getCMstats
  #' @examples
  #' data('CM_list',package = 'TFEA.ChIP')
  #' stats_mat_UP <- getCMstats(CM_list)

  pvals <- sapply(seq_along(contMatrix_list),
                  function(lista,i) {
                    pval <- stats::fisher.test(lista[[names(lista)[i]]])[["p.value"]]
                    return(pval)
                  },
                  lista = contMatrix_list)

  oddsRatios <- sapply(seq_along(contMatrix_list),
                       function(lista,i) {
                         pval <- stats::fisher.test(lista[[names(lista)[i]]])[["estimate"]]
                         return(pval)
                       },
                       lista = contMatrix_list)

  chip_index <- chip_index[chip_index$Accession %in% names(contMatrix_list),]
  chip_index <- chip_index[ match(
    names(contMatrix_list), chip_index$Accession ),]

  statMat <- data.frame(Accession = chip_index$Accession, TF = chip_index$TF,
                        p.value = pvals, OR = oddsRatios, stringsAsFactors = FALSE)

  statMat$log2.OR <- log2(statMat$OR)
  statMat$log2.OR[which(!is.finite(statMat$log2.OR))]<-NA

  statMat$adj.p.value <- stats::p.adjust(statMat$p.value, "fdr")
  statMat$log10.adj.pVal <- (-1 * (log10(statMat$adj.p.value)))
  statMat$log10.adj.pVal[which(!is.finite(statMat$log10.adj.pVal))]<-NA

  maxOR <- max(statMat$OR[ statMat$OR != Inf ])
  minOR <- min(statMat$OR[ statMat$OR != -Inf ])

  statMat$tmpOR <- statMat$OR
  statMat$tmpOR[ statMat$OR == Inf ] <- maxOR
  statMat$tmpOR[ statMat$OR == -Inf ] <- minOR

  statMat$distance <- sapply(
    seq_along(statMat$Accession),
    function(i){
        if( statMat$OR[i] > 1 ){
            x1 <- c( 0, 1 )
            x2 <- c( statMat$log10.adj.pVal[i], statMat$tmpOR[i])
            d <- sqrt( sum( (x2-x1)**2 ) )

        } else if ( statMat$OR[i] <= 1 & statMat$OR[i] > 0 ) {
            x1 <- c( 0, 1 )
            x2 <- c( statMat$log10.adj.pVal[i], 1/statMat$tmpOR[i] )
            d <- -1 * sqrt( sum( (x2-x1)**2 ) )
        } else { d <- 0 }

        return(d)
    }
  )
  statMat <- statMat[, colnames(statMat)!="tmpOR" ]

  if (!exists("MetaData")) {
    MetaData <- NULL
    data("MetaData", package = "TFEA.ChIP", envir = environment())
  }
  statMat <- merge(MetaData[,c("Accession","Cell","Treatment")],statMat,by="Accession")
  statMat <- statMat[order(statMat$distance,decreasing = TRUE,na.last = TRUE),]
  return( statMat )
}

rankTFs <- function( resultsTable,
                     rankMethod = "gsea", makePlot = FALSE,
                     plotTitle = "TF ranking"){

    #' @title Rank the TFs in the output from 'getCMstats'
    #' @description Rank the TFs in the output from 'getCMstats' using
    #' Wilcoxon rank-sum test or a GSEA-like approach.
    #' @param resultsTable Output from the function 'getCMstats'
    #' @param rankMethod "wilcoxon" or "gsea".
    #' @param makePlot (Optional) For rankMethod="gsea". If TRUE, generates a plot for
    #' TFs with a p-value < 0.05.
    #' @param plotTitle (Optional) Title for the plot.
    #' @return data frame containing:
    #' \itemize{
    #'   \item For Wilcoxon rank-sum test: rank, TF name, test statistic
    #'   ('wilc_W), p-value, Freeman's theta, epsilon-squared anf effect size
    #'   \item For GSEA-like ranking: TF name, enrichment score, argument,
    #'   p-value, number of ChIPs}
    #' @export rankTFs
    #' @examples
    #' data('stat_mat',package = 'TFEA.ChIP')
    #' rankTFs( stat_mat )


    #### Input format
    rankMethod <- tolower( rankMethod )
    stopifnot(rankMethod %in% c("wilcoxon", "gsea"))

    # check the input type ( TFEA.ChIP association analysis  )
    cols_needed <- c("Accession","TF","distance")
    if( any( ! cols_needed %in% colnames( resultsTable ) ) ){
        stop("Input error: resultsTable doesn't contain all",
             "the columns required ('Accession','TF','distance')")
    }

    #### gsea ranking
    if( rankMethod == "gsea" ){

        chipSets <- lapply(
            unique( resultsTable$TF ),
            function(i, tab ){
                return( tab$Accession[ tab$TF == i ])
            }, tab = resultsTable
        )
        names( chipSets ) <- unique( resultsTable$TF )

        TFrank <- lapply(
            chipSets,
            function(i, statMat ){

                tf <-  statMat$TF[ statMat$Accession == i[1] ]
                res <- GSEA_EnrichmentScore(
                    statMat$Accession, i, 1, statMat$distance )

                shuffled <- rep( list( statMat$Accession ), 100)
                shuffled <- lapply( shuffled, sample )

                shuffled.ES <-  sapply(
                    shuffled,
                    function( j, i, chipDist ) {
                        tmp.ES <- GSEA_EnrichmentScore( j , i, 1, chipDist )$ES
                        return(tmp.ES)
                    }, i=i, chipDist = statMat$distance )

                shuffled.ES <- unlist( shuffled.ES )
                shuffled.ES <- shuffled.ES[ !is.na(shuffled.ES) ]
                pVal <- sum( abs(shuffled.ES) > abs(res$ES) ) / length( shuffled )

                return( data.frame(
                    "TF" = tf,
                    "ES" = res$ES,
                    "arg.ES" = res$arg.ES,
                    "pVal" = pVal,
                    "numberOfChIPs" = length(i),
                    stringsAsFactors = FALSE
                ))
            }, statMat = resultsTable
        )
        TFrank <- do.call( rbind, TFrank )

        if( makePlot == TRUE ){
            plot_df <- TFrank[TFrank$pVal<0.05 & !is.na(TFrank$pVal),]
            sub1 <- subset( plot_df, plot_df$ES > 0)
            sub2 <- subset( plot_df, plot_df$ES < 0)

            if( any( resultsTable$distance == 0 ) ){
                mid <- max(which( resultsTable$distance ==0 )) -
                    min( which( resultsTable$distance ==0 ) )/2
            } else {
                mid <- sum( resultsTable$distance > 0 ) + 0.5
            }

            p <- ggplot2::ggplot( ) +
                ggplot2::geom_point(
                    mapping=ggplot2::aes(x=plot_df$arg.ES, y=plot_df$ES, color=plot_df$TF) ) +
                ggplot2::ylim( -1.5, 1.5 ) +
                ggrepel::geom_text_repel(
                    ggplot2::aes( x=sub1$arg.ES, y=sub1$ES, label=sub1$TF,
                                  color=sub1$TF),
                    data = sub1, nudge_y = 1,
                    angle = 90, direction = "x" ) +
                ggrepel::geom_text_repel(
                    ggplot2::aes( x=sub2$arg.ES, y=sub2$ES, label=sub2$TF,
                                  color=sub2$TF),
                    data = sub2, nudge_y = -1,
                    angle = 90, direction = "x" ) +
                ggplot2::theme_minimal() +
                ggplot2::geom_hline( yintercept = 0 ) +
                ggplot2::geom_point( ggplot2::aes( x=mid, y=0 ),  color="black" ) +
                ggplot2::guides( color = FALSE ) +
                ggplot2::labs( title = plotTitle,
                    y = "Enrichment Score", x= "ChIP-seq ranking")
            p
            return( list( TF_ranking=TFrank, TFranking_plot = p ))
        } else{ return(TFrank) }

        #### Wilcoxon rank-sum test
    } else if( rankMethod == "wilcoxon" ){

        chip_ranking <- data.frame(
            acc = resultsTable$Accession,
            TF = resultsTable$TF,
            rank = seq_along( resultsTable$Accession  ),
            stringsAsFactors = FALSE
        )

        TF_wilcox <- lapply(
            unique( chip_ranking$TF ),
            function(i, chip_ranking){

                tmp <- chip_ranking[, c(2:3)]
                tmp$TF[ tmp$TF != i ] <- "other"
                tmp$TF <- as.factor(tmp$TF)

                wilTest <- stats::wilcox.test( rank ~ TF, data=tmp )

                return( data.frame(
                    TF = i,
                    wilc_W = wilTest[["statistic"]],
                    wilc_pval = wilTest[["p.value"]],
                    freeman_theta = rcompanion::freemanTheta(x = tmp$rank, g = tmp$TF ),
                    epsilon_squared = rcompanion::epsilonSquared( x = tmp$rank, g = tmp$TF ),
                    wilc_r = rcompanion::wilcoxonR( x = tmp$rank, g = tmp$TF ),
                    stringsAsFactors = FALSE
                ))
            }, chip_ranking = chip_ranking
        )
        TF_wilcox <- do.call( rbind, TF_wilcox)
        TF_wilcox$rank <- seq_len( nrow( TF_wilcox ) )
        rownames( TF_wilcox ) <- TF_wilcox$rank
        TF_wilcox <- TF_wilcox[, c(7,1:6)]

        return( TF_wilcox )
    }
}

GSEA_EnrichmentScore <- function(gene.list, gene.set,
                                 weighted.score.type = 0, correl.vector = NULL) {

    # Computes the weighted GSEA score of gene.set in gene.list. Developed by
    # The Broad Institute

    #' @title Computes the weighted GSEA score of gene.set in gene.list.
    #' @description Computes the weighted GSEA score of gene.set in gene.list.
    #' @param gene.list The ordered gene list
    #' @param gene.set A gene set, e.g. gene IDs corresponding to a ChIP-Seq
    #' experiment's peaks.
    #' @param weighted.score.type Type of score: weight:
    #' 0 (unweighted = Kolmogorov-Smirnov), 1 (weighted), and 2 (over-weighted)
    #' @param correl.vector A vector with the coorelations (such as signal to
    #' noise scores) corresponding to the genes in the gene list
    #' @return list of:
    #' ES: Enrichment score (real number between -1 and +1)
    #' arg.ES: Location in gene.list where the peak running enrichment occurs
    #' (peak of the 'mountain')
    #' RES: Numerical vector containing the running enrichment score for all
    #' locations in the gene list
    #' tag.indicator: Binary vector indicating the location of the gene sets
    #' (1's) in the gene list
    #' @export GSEA_EnrichmentScore
    #' @examples
    #' GSEA_EnrichmentScore(gene.list=c('3091','2034','405','55818'),
    #' gene.set=c('2034','112399','405'))

    tag.indicator <- sign(match(gene.list, gene.set, nomatch = 0))
    no.tag.indicator <- 1 - tag.indicator
    N <- length(gene.list)
    Nh <- sum( gene.list %in% gene.set )
    Nm <- N - Nh
    if (weighted.score.type == 0) {
        correl.vector <- rep(1, N)
    }
    alpha <- weighted.score.type
    correl.vector <- abs(correl.vector^alpha)
    sum.correl.tag <- sum(correl.vector[tag.indicator == 1])
    if (sum.correl.tag > 0) {
        norm.tag <- 1/sum.correl.tag
        norm.no.tag <- 1/Nm
        RES <- cumsum(tag.indicator * correl.vector * norm.tag -
            no.tag.indicator * norm.no.tag)
        max.ES <- max( RES, na.rm = TRUE )
        min.ES <- min( RES, na.rm = TRUE )
        if (abs(max.ES) > abs(min.ES)) {
            # ES <- max.ES
            ES <- signif(max.ES, digits = 5)
            arg.ES <- which.max(RES)
        } else {
            # ES <- min.ES
            ES <- signif(min.ES, digits = 5)
            arg.ES <- which.min(RES)
        }
        return(list(ES = ES, arg.ES = arg.ES, RES = RES, indicator = tag.indicator))

    } else {
        RES <- rep(NaN, length(gene.list))
        indicator <- rep(0, length(gene.list))
        ES <- NA
        arg.ES <- NA
        return(list(ES = ES, arg.ES = arg.ES, RES = RES, indicator = tag.indicator))
    }
}

GSEA_ESpermutations <- function(gene.list, gene.set, weighted.score.type = 0,
                                correl.vector = NULL, perms = 1000 ) {

    #' @title Calculate enrichment scores for a permutation test.
    #' @description Function to calculate enrichment scores over a randomly ordered
    #' gene list.
    #' @param gene.list Vector of gene Entrez IDs.
    #' @param gene.set A gene set, e.g. gene IDs corresponding to a ChIP-Seq
    #' experiment's peaks.
    #' @param weighted.score.type Type of score: weight:
    #' 0 (unweighted = Kolmogorov-Smirnov), 1 (weighted), and 2 (over-weighted)
    #' @param correl.vector A vector with the coorelations (such as signal to
    #' noise scores) corresponding to the genes in the gene list
    #' @param perms Number of permutations
    #' @return Vector of Enrichment Scores for a permutation test.
    # examples
    # GSEA_EnrichmentScore(gene.list=c('3091','2034','405','55818'),
    #' gene.set=c('2034','112399','405'), perms=10)

    tag.indicator <- sign(match(gene.list, gene.set, nomatch = 0))
    no.tag.indicator <- 1 - tag.indicator
    N <- length(gene.list)
    Nh <- sum( gene.list %in% gene.set )
    Nm <- N - Nh
    if (weighted.score.type == 0) {
        correl.vector <- rep(1, N)
    }
    alpha <- weighted.score.type
    correl.vector <- abs(correl.vector^alpha)
    sum.correl.tag <- sum(correl.vector[tag.indicator == 1])
    if (sum.correl.tag > 0) {
        norm.tag <- 1/sum.correl.tag
        norm.no.tag <- 1/Nm

        pES <- sapply(
            seq_len(perms),
            function( i, mInd, cv, mNorm, nmNorm){

                mInd <- sample( mInd )
                nmInd <- 1 - mInd
                sum.correl.tag <- sum(cv[mInd == 1])
                mNorm <- 1/sum.correl.tag

                RES <- cumsum(
                    mInd * cv * mNorm - nmInd * nmNorm)
                return( max( abs( RES ), na.rm = TRUE ) )
            },
            mInd=tag.indicator, cv=correl.vector,
            nmNorm=norm.no.tag
        )
        return( pES )

    } else {
        return( rep( NA, perms ) )
    }
}

GSEA_run <- function(gene.list, LFC, chip_index = get_chip_index(),
                     get.RES = FALSE, RES.filter = NULL, perms = 1000 ) {

    #' @title Function to run a GSEA analysis
    #' @description Function to run a GSEA to analyze the distribution of TFBS
    #' across a sorted list of genes.
    #' @param gene.list List of Entrez IDs ordered by their fold change.
    #' @param LFC Vector of log2( Fold Change ) values.
    #' @param chip_index Output of the function “get_chip_index”, a data frame
    #' containing accession IDs of ChIPs on the database and the TF each one
    #' tests. If not provided, the whole internal database will be used
    #' @param get.RES (Optional) boolean. If TRUE, the function stores Running
    #' Enrichment Scores of all/some TF.
    #' @param RES.filter (Optional) chr vector. When get.RES==TRUE, allows to
    #' choose which TF's Running Enrichment Score to store.
    #' @param perms Number of permutations for a permutation test.
    #' @return a list of:
    #' Enrichment.table: data frame containing accession ID, Cell type, ChIP-Seq
    #' treatment, transcription factor tested, enrichment score, raw and
    #' adjusted p-value, and argument of every ChIP-Seq experiment.
    #' RES (optional): list of running sums of every ChIP-Seq
    #' indicators (optional): list of 0/1 vectors that stores the matches (1)
    #' and mismatches (0) between the gene list and the gene set.
    #' @export GSEA_run
    #' @examples
    #' data('hypoxia',package = 'TFEA.ChIP')
    #' preprocessInputData(hypoxia)
    #' chip_index<-get_chip_index(TFfilter = c('HIF1A','EPAS1','ARNT'))
    #' GSEA.result <- GSEA_run( hypoxia$Genes, hypoxia$log2FoldChange, chip_index, get.RES = TRUE)

    if (!exists("Mat01")) {
        Mat01 <- NULL
        data("Mat01", package = "TFEA.ChIP", envir = environment())
    }
    if (!exists("MetaData")) {
        MetaData <- NULL
        data("MetaData", package = "TFEA.ChIP", envir = environment())
    }
    Mat01 <- Mat01[, colnames(Mat01) %in% chip_index$Accession]

    enrichmentScore <- vector()
    pval <- vector()
    enrichmentArg <- vector()

    if (get.RES == TRUE) {
        res <- list()
        ind <- list()
    }
    # create progress bar
    pbar <- txtProgressBar(min = 0, max = length(chip_index$Accession),
        style = 3)

    chip_index$done <- rep(FALSE, nrow(chip_index))

    for (i in seq_along(chip_index$Accession)) {

        if (is.matrix(Mat01)){
            chip.genes <- Mat01[, colnames(Mat01) == chip_index$Accession[i]]
        } else { chip.genes <- Mat01 }
        chip.genes <- names(chip.genes[chip.genes == 1])

        if (length(chip.genes) > 10) {

            result <- GSEA_EnrichmentScore(gene.list, chip.genes,
                weighted.score.type = 1, correl.vector =  LFC)

            shuffled.ES <- GSEA_ESpermutations( gene.list , chip.genes,
                weighted.score.type = 1, correl.vector =  LFC, perms )

            shuffled.ES <- shuffled.ES[ !is.na(shuffled.ES) ]

            enrichmentScore <- c(enrichmentScore, result$ES)
            pval <- c(pval, sum( shuffled.ES >= abs(result$ES))/ length(shuffled.ES))
            enrichmentArg <- c(enrichmentArg, result$arg.ES)
            chip_index$done[i] <- TRUE

            if (get.RES == TRUE & missing(RES.filter)) {
                # Store running sums of all TFs.
                res <- c(res, list(result$RES))
                names(res)[length(res)] <- chip_index$Accession[i]
                ind <- c(ind, list(result$indicator))
                names(ind)[length(ind)] <- chip_index$Accession[i]
            } else if (get.RES == TRUE & !missing(RES.filter) ) {
                # Store running sums of TF in RES.filter
                if (chip_index$TF[i] %in% RES.filter) {
                    res <- c(res, list(result$RES))
                    names(res)[length(res)] <- chip_index$Accession[i]
                    ind <- c(ind, list(result$indicator))
                    names(ind)[length(ind)] <- chip_index$Accession[i]
                }
            }
        }
        # update progress bar
        setTxtProgressBar(pbar, i)
    }
    chip_index <- chip_index[ chip_index$done == TRUE, ]

    close(pbar)
    pval.adj <- stats::p.adjust(pval, "fdr")  # Adjust pvalues

    enrichmentTable <- data.frame(
        Accession = chip_index$Accession, TF = chip_index$TF,
        ES = enrichmentScore, p.val = pval, pval.adj = pval.adj,
        Arg.ES = enrichmentArg, stringsAsFactors = FALSE )

    enrichmentTable <- enrichmentTable[!is.na(enrichmentTable$pval.adj), ]

    enrichmentTable <- merge(
        MetaData[,c("Accession","Cell","Treatment")],
        enrichmentTable, by="Accession")

    if (get.RES == TRUE) {
        GSEA_results <- list(enrichmentTable, res, ind)
        names(GSEA_results) <- c("Enrichment.table", "RES", "indicators")
        return(GSEA_results)
    } else {
        return(enrichmentTable)
    }
}

plot_CM <- function(CM.statMatrix, plot_title = NULL,
                    specialTF = NULL, TF_colors = NULL) {

    #' @title Makes an interactive html plot from an enrichment table.
    #' @description Function to generate an interactive html plot from a
    #' transcription factor enrichment table, output of the function
    #' 'getCMstats'.
    #' @param CM.statMatrix Output of the function 'getCMstats'.
    #' A data frame storing: Accession ID of every ChIP-Seq tested,
    #' Transcription Factor,Odds Ratio, p-value and adjusted p-value.
    #' @param plot_title The title for the plot.
    #' @param specialTF (Optional) Named vector of TF symbols -as written in
    #' the enrichment table- to be highlighted in the plot. The name of each
    #' element of the vector specifies its color group, i.e.: naming elements
    #' HIF1A and HIF1B as 'HIF' to represent them with the same color.
    #' @param TF_colors (Optional) Nolors to highlight TFs chosen in specialTF.
    #' @return plotly scatter plot.
    #' @export plot_CM
    #' @examples
    #' data('stat_mat',package = 'TFEA.ChIP')
    #' plot_CM(stat_mat)

    if (!requireNamespace("plotly", quietly = TRUE)) {
        stop("plotly package needed for this function to work. ",
            "Please install it.", call. = FALSE)
    }

    # Checking input variables
    if (is.null(plot_title)) {
        plot_title <- "Transcription Factor Enrichment"
    }
    if (is.null(specialTF)) {
        CM.statMatrix$highlight <- rep("Other", dim(CM.statMatrix)[1])
        markerColors <- c("azure4")
        names(markerColors) <- c("Other")
    }
    if (!is.null(specialTF) & is.null(TF_colors)) {
        TF_colors <- c("red", "blue", "green", "hotpink", "cyan",
            "greenyellow", "gold", "darkorchid", "chocolate1",
            "black", "lightpink", "seagreen")
        TF_colors <- TF_colors[1:length(unique(names(specialTF)))]
        highlight_list <- highlight_TF(CM.statMatrix, 4, specialTF,
            TF_colors)
        CM.statMatrix$highlight <- highlight_list[[1]]
        markerColors <- highlight_list[[2]]
    }
    if (!is.null(specialTF) & !is.null(TF_colors)) {
        highlight_list <- highlight_TF(CM.statMatrix, 4, specialTF,
            TF_colors)
        CM.statMatrix$highlight <- highlight_list[[1]]
        markerColors <- highlight_list[[2]]
    }
    if (!exists("MetaData")) {
        MetaData <- NULL
        data("MetaData", package = "TFEA.ChIP", envir = environment())
    }

    # Adding metadata for the plot
    MetaData <- MetaData[MetaData$Accession %in% CM.statMatrix$Accession, ]
    MetaData <- MetaData[
        match( CM.statMatrix$Accession, MetaData$Accession), ]
    CM.statMatrix$Treatment <- MetaData$Treatment
    CM.statMatrix$Cell <- MetaData$Cell
    rm(MetaData)

    # Cheking if any plot variables have an Inf value.
    if (length(CM.statMatrix[CM.statMatrix$OR == Inf, 1]) > 0) {

        warn_number <- length(CM.statMatrix[CM.statMatrix$OR == Inf, 1])

        # Substitute Inf OR values for the maximum finite value
        CM.statMatrix[CM.statMatrix$OR == Inf, ]$OR <-
            rep(max(CM.statMatrix[CM.statMatrix$OR != Inf, ]$OR),
                length(CM.statMatrix[CM.statMatrix$OR == Inf, 1]))

        warning(warn_number, " elements have an Odds Ratio of Inf.",
            " Maximum value for OR introduced instead.")
    }
    if (length(CM.statMatrix[CM.statMatrix$OR == -Inf, 1]) > 0) {

        warn_number <- dim(CM.statMatrix[CM.statMatrix$OR == -Inf,][1])

        # Substitute -Inf OR values for the minimum finite value
        CM.statMatrix[CM.statMatrix$OR == -Inf, ]$OR <-
            rep(min(CM.statMatrix[CM.statMatrix$OR != -Inf, ]$OR),
                dim(CM.statMatrix[CM.statMatrix$OR == -Inf, ])[1])

        warning(warn_number, " elements have an Odds Ratio of -Inf. Minimum",
            " value for OR introduced instead.")
    }
    if (dim(CM.statMatrix[CM.statMatrix$adj.p.value == 0,])[1] > 0) {
        warn_number <- dim(CM.statMatrix[CM.statMatrix$adj.p.value == 0,][1])

        # Substitute Inf -log(pval) values for the maximum finite value
        CM.statMatrix[CM.statMatrix$p.value == 0, "log.adj.pVal"] <-
            rep(max(CM.statMatrix[CM.statMatrix$adj.p.value != 0, "log.adj.pVal"]),
            dim(CM.statMatrix[CM.statMatrix$adj.p.value == 0,])[1])

        warning(warn_number, " elements have a -log(p-Value) of Inf. ",
            "Maximum value for -log(p-Val) introduced instead.")
    }

    if (length(markerColors) > 1) {
        CM.statMatrix_highlighted <- CM.statMatrix[CM.statMatrix$highlight !=
            "Other", ]
        CM.statMatrix_other <- CM.statMatrix[CM.statMatrix$highlight ==
            "Other", ]

        p <- plotly::plot_ly(CM.statMatrix_other, x = ~log10.adj.pVal,
            y = ~log2.OR, type = "scatter", mode = "markers",
            text = paste0(
                CM.statMatrix_other$Accession, ": ", CM.statMatrix_other$TF,
                "<br>Treatment: ", CM.statMatrix_other$Treatment,
                "<br>Cell: ", CM.statMatrix_other$Cell),
            color = ~highlight, colors = markerColors)

        p <- plotly::add_markers(p, x = CM.statMatrix_highlighted$log10.adj.pVal,
            y = CM.statMatrix_highlighted$log2.OR, type = "scatter", mode = "markers",
            text = paste0(
                CM.statMatrix_highlighted$Accession, ": ", CM.statMatrix_highlighted$TF,
                "<br>Treatment: ", CM.statMatrix_highlighted$Treatment,
                "<br>Cell: ", CM.statMatrix_highlighted$Cell),
            color = CM.statMatrix_highlighted$highlight,
            colors = markerColors) %>%
        plotly::layout(title = plot_title)

    } else {
        p <- plotly::plot_ly(CM.statMatrix, x = ~log10.adj.pVal,
            y = ~log2.OR, type = "scatter", mode = "markers",
            text = paste0(CM.statMatrix$Accession, ": ", CM.statMatrix$TF,
                "<br>Treatment: ", CM.statMatrix$Treatment,
                "<br>Cell: ", CM.statMatrix$Cell), color = ~highlight,
            colors = markerColors) %>%
        plotly::layout(title = plot_title)
    }
    p
    return(p)
}

plot_ES <- function(GSEA_result, LFC, plot_title = NULL, specialTF = NULL,
                    TF_colors = NULL, Accession = NULL, TF = NULL) {

    #' @title Plots Enrichment Score from the output of GSEA.run.
    #' @description Function to plot the Enrichment Score of every member of
    #' the ChIPseq binding database.
    #' @param GSEA_result Returned by GSEA_run
    #' @param LFC Vector with log2(Fold Change) of every gene that has an
    #' Entrez ID. Arranged from higher to lower.
    #' @param plot_title (Optional) Title for the plot
    #' @param specialTF (Optional) Named vector of TF symbols -as written in
    #' the enrichment table- to be highlighted in the plot. The name of each
    #' element specifies its color group, i.e.: naming elements HIF1A and HIF1B
    #' as 'HIF' to represent them with the same color.
    #' @param TF_colors (Optional) Colors to highlight TFs chosen in specialTF.
    #' @param Accession (Optional) restricts plot to the indicated list dataset
    #' IDs.
    #' @param TF (Optional) restricts plot to the indicated list transcription
    #' factor names.
    #' @return Plotly object with a scatter plot -Enrichment scores- and a
    #' heatmap -log2(fold change) bar-.
    #' @export plot_ES
    #' @examples
    #' data('GSEA.result','log2.FC',package = 'TFEA.ChIP')
    #' TF.hightlight<-c('EPAS1')
    #' names(TF.hightlight)<-c('EPAS1')
    #' col<- c('red')
    #' plot_ES(GSEA.result,log2.FC,specialTF = TF.hightlight,TF_colors = col)

    if (!requireNamespace("plotly", quietly = TRUE)) {
        stop("plotly package needed for this function to work. ",
            "Please install it.", call. = FALSE)
    }

    if (is.data.frame(GSEA_result) == TRUE) {
        enrichmentTable <- GSEA_result
    } else if (is.list(GSEA_result) == TRUE) {
        enrichmentTable <- GSEA_result[["Enrichment.table"]]
    }

    if (!is.null(Accession) | !is.null(TF)) {
        if (is.null(Accession)) {
            Accession <- enrichmentTable[ enrichmentTable$TF %in% TF,]$Accession
        }
        if (is.null(TF)) {
            TF <- enrichmentTable[ enrichmentTable$Accession %in% Accession,]$TF
        }
        SS <- ((enrichmentTable$Accession %in% Accession) &
            (enrichmentTable$TF %in% TF))
        enrichmentTable <- enrichmentTable[which(SS), ]
    }

    if (is.null(plot_title)) {
        plot_title <- "Transcription Factor Enrichment"
    }

    if (is.null(specialTF)) {
        highlight_list <- rep("Other", dim(enrichmentTable)[1])
        enrichmentTable$highlight <- highlight_list
        markerColors <- c("azure4")
        names(markerColors) <- c("Other")
    }

    if (!is.null(specialTF) & is.null(TF_colors)) {
        TF_colors <- c("red", "blue", "green", "hotpink", "cyan",
            "greenyellow", "gold", "darkorchid", "chocolate1",
            "black", "lightpink", "seagreen")
        TF_colors <- TF_colors[1:length(unique(names(specialTF)))]
        highlight_list <- highlight_TF(enrichmentTable, 4, specialTF,
            TF_colors)
        enrichmentTable$highlight <- highlight_list[[1]]
        markerColors <- highlight_list[[2]]
    }
    if (!is.null(specialTF) & !is.null(TF_colors)) {
        highlight_list <- highlight_TF(enrichmentTable, 4, specialTF,
            TF_colors)
        enrichmentTable$highlight <- highlight_list[[1]]
        markerColors <- highlight_list[[2]]
    }

    simbolo<-sapply(seq_along(enrichmentTable[,1]),
        function(EnrTable,i){
            if (EnrTable$pval.adj[i] <= 0.05) {
                sym<-"pVal<=0.05"
            }else{ sym<-"pVal>0.05" }
            return(sym)
        },
        EnrTable = enrichmentTable)
    enrichmentTable$symbol <- simbolo

    # Adding metadata
    if (!exists("MetaData")) {
        MetaData <- NULL
        data("MetaData", package = "TFEA.ChIP", envir = environment())
    }

    MetaData <- MetaData[MetaData$Accession %in% enrichmentTable$Accession, ]
    MetaData <- MetaData[
        match( enrichmentTable$Accession, MetaData$Accession), ]
    enrichmentTable$Treatment <- MetaData$Treatment
    enrichmentTable$Cell <- MetaData$Cell
    rm(MetaData)

    if (length(markerColors) > 1 &
        length(unique(enrichmentTable$TF)) > length( specialTF ) ) {
        enrichmentTable_highlighted <- enrichmentTable[enrichmentTable$highlight !=
            "Other", ]
        enrichmentTable_other <- enrichmentTable[enrichmentTable$highlight ==
            "Other", ]

        p <- plotly::plot_ly(enrichmentTable_other, x = enrichmentTable_other$Arg.ES,
            y = enrichmentTable_other$ES, type = "scatter", mode = "markers",
            text = paste0(
                enrichmentTable_other$Accession, ": ", enrichmentTable_other$TF,
                "<br>Adjusted p-value: ", round(enrichmentTable_other$pval.adj, 3),
                "<br>Treatment: ", enrichmentTable_other$Treatment,
                "<br>Cell: ", enrichmentTable_other$Cell),
            color = enrichmentTable_other$highlight,
            colors = markerColors, symbol = enrichmentTable_other$symbol,
            symbols = c("x", "circle"))

        p <- plotly::add_markers(p, x = enrichmentTable_highlighted$Arg.ES,
            y = enrichmentTable_highlighted$ES, type = "scatter", mode = "markers",
            text = paste0(
                enrichmentTable_highlighted$Accession, ": ", enrichmentTable_highlighted$TF,
                "<br>Pval: ", round(enrichmentTable_highlighted$pval.adj, 3),
                "<br>Treatment: ", enrichmentTable_highlighted$Treatment,
                "<br>Cell: ", enrichmentTable_highlighted$Cell),
            color = enrichmentTable_highlighted$highlight, colors = markerColors,
            symbol = enrichmentTable_highlighted$symbol, symbols = c("x",
                "circle")) %>%
        plotly::layout(title = plot_title,
            xaxis = list(title = "Argument"), yaxis = list(title = "ES"))

    } else {
        p <- plotly::plot_ly(enrichmentTable, x = enrichmentTable$Arg.ES,
            y = enrichmentTable$ES, type = "scatter", mode = "markers",
            text = paste0(
                enrichmentTable$Accession, ": ", enrichmentTable$TF,
                "<br>Pval: ", round(enrichmentTable$pval.adj, 3),
                "<br>Treatment: ", enrichmentTable$Treatment,
                "<br>Cell: ", enrichmentTable$Cell),
            color = enrichmentTable$highlight,
            colors = markerColors, symbol = enrichmentTable$symbol,
            symbols = c("x", "circle")) %>%
        plotly::layout(title = plot_title,
            xaxis = list(title = "Argument"), yaxis = list(title = "ES"))
    }
    # Adding log2(Fold Change) bar to the plot
    LFC.bar <- get_LFC_bar(LFC)
    graf <- plotly::subplot(p, LFC.bar, shareX = TRUE, nrows = 2,
        heights = c(0.95, 0.05), titleY = TRUE)
    graf
    return(graf)
}

plot_RES <- function(GSEA_result, LFC, plot_title = NULL, line.colors = NULL,
                     line.styles = NULL, Accession = NULL, TF = NULL) {

    #' @title Plots all the RES stored in a GSEA_run output.
    #' @description Function to plot all the RES stored in a GSEA_run output.
    #' @param GSEA_result Returned by GSEA_run
    #' @param LFC Vector with log2(Fold Change) of every gene that has an
    #' Entrez ID. Arranged from higher to lower.
    #' @param plot_title (Optional) Title for the plot.
    #' @param line.colors (Optional) Vector of colors for each line.
    #' @param line.styles (Optional) Vector of line styles for each line
    #' ('solid'/'dash'/'longdash').
    #' @param Accession (Optional) restricts plot to the indicated list dataset
    #' IDs.
    #' @param TF (Optional) restricts plot to the indicated list transcription
    #' factor names.
    #' @return Plotly object with a line plot -running sums- and a
    #' heatmap -log2(fold change) bar-.
    #' @export plot_RES
    #' @examples
    #' data('GSEA.result','log2.FC',package = 'TFEA.ChIP')
    #' plot_RES(GSEA.result,log2.FC,TF=c('STAT1'),
    #'     Accession=c('GSM2390642','wgEncodeEH002867'))

    if (!requireNamespace("plotly", quietly = TRUE)) {
        stop("plotly package needed for this function to work.",
            " Please install it.", call. = FALSE)
    }
    # Checking input variables
    if (!exists("MetaData")) {
        MetaData <- NULL
        data("MetaData", package = "TFEA.ChIP", envir = environment())
    }
        # Accession or TF filters
    if (!is.null(Accession) | !is.null(TF)) {

        tmp <- GSEA_result[["Enrichment.table"]]

        if (is.null(Accession)) { Accession <- tmp[tmp$TF %in% TF,]$Accession }
        if (is.null(TF)) { TF <- tmp[tmp$Accession %in% Accession,]$TF }

        GSEA_result[["Enrichment.table"]] <- tmp[
            tmp$Accession %in% Accession &
            tmp$TF %in% TF, ]

        GSEA_result[["RES"]] <- GSEA_result$RES[
            names(GSEA_result[["RES"]]) %in% Accession]

        GSEA_result[["indicators"]] <- GSEA_result[["indicators"]][
            names(GSEA_result[["indicators"]] %in% Accession)]

    } else {
        Accession <- GSEA_result[["Enrichment.table"]]$Accession
    }
        # line parameters & plot title
    if (is.null(line.colors)) {
        line.colors <- c("red", "blue", "green", "hotpink", "cyan",
            "greenyellow", "gold", "darkorchid", "chocolate1",
            "black", "lightpink", "seagreen")
        line.colors <- line.colors[1:length(names(GSEA_result[["RES"]]))]
    }
    if (is.null(line.styles)) {
        line.styles <- rep("solid", length(names(GSEA_result[["RES"]])))
    }
    if (is.null(plot_title)) {
        plot_title <- "Transcription Factor Enrichment"
    }

    GSEA.runningSum <- GSEA_result[["RES"]]

    # Getting metadata for the plot
    MetaData <- MetaData[ match(Accession, MetaData$Accession), ]
    RES <- lapply(
    	seq_along (Accession),
    	function(result, i){
    		tmp <- result[ names(result) %in% Accession[i] ][[1]]
    		return (tmp)
    	},
    	result = GSEA.runningSum
    )
    names( RES ) <- MetaData$Accession

    tabla <- data.frame(
    	Accession = MetaData$Accession,
    	Cell = MetaData$Cell,
    	Treatment = MetaData$Treatment,
    	TF = MetaData$TF,
    	stringsAsFactors = FALSE)

    tabla$RES <- RES

    rm (MetaData, RES)

    if (dim(tabla)[1] > 1) { # plot multiple lines
        for (i in seq_along(Accession)) {
            if (i == 1) {
                grafica <- plotly::plot_ly(tabla, x = c(1:length(tabla$RES[[1]])),
                    y = tabla$RES[[Accession[1]]], type = "scatter", mode = "lines",
                    line = list(color = line.colors[1], dash = line.styles[1]),
                    name = paste0( tabla$Accession[1], " - ", tabla$TF[1]),
                    text = paste0(
                        tabla$Accession[1], " - ", tabla$TF[1],
                        "<br>Cell: ", tabla$Cell[1],
                        "<br>Treatment: ", tabla$Treatment[1]))
            } else if (i > 1 & i < length(Accession)) {
                grafica <- plotly::add_trace(p = grafica, y = tabla$RES[[Accession[i]]],
                    type = "scatter", mode = "lines",
                    line = list(color = line.colors[i], dash = line.styles[i]),
                    name = paste0(tabla$Accession[i], " - ", tabla$TF[i]),
                    text = paste0(
                        tabla$Accession[i], " - ", tabla$TF[i],
                        "<br>Cell: ", tabla$Cell[i],
                        "<br>Treatment: ", tabla$Treatment[i]))
            } else if (i == length(Accession)) {
                grafica <- plotly::add_trace(p = grafica, y = tabla$RES[[Accession[i]]],
                    type = "scatter", mode = "lines",
                    line = list(color = line.colors[i], dash = line.styles[i]),
                    name = paste0(tabla$Accession[i], " - ", tabla$TF[i]),
                    text = paste0(
                        tabla$Accession[i], " - ", tabla$TF[i],
                        "<br>Cell: ", tabla$Cell[i],
                        "<br>Treatment: ", tabla$Treatment[i])) %>%
                plotly::layout(title = plot_title, xaxis = list(title = "Argument"),
                    yaxis = list(title = "ES"))
            }
        }
    } else { # plot one line
        grafica <- plotly::plot_ly(tabla, x = c(1:length(tabla$RES[[1]])),
            y = tabla$RES[[Accession[1]]], type = "scatter", mode = "lines",
            line = list(color = line.colors[1], dash = line.styles[1]),
            name = paste0(tabla$Accession[1],
                " - ", tabla$TF[1]), text = paste0(tabla$Accession[1],
                " - ", tabla$TF[1], "<br>Cell: ", tabla$Cell[1],
                "<br>Treatment: ", tabla$Treatment[1])) %>%
        plotly::layout(title = plot_title,
            xaxis = list(title = "Argument"), yaxis = list(title = "ES"))
    }

    # Adding log2(Fold Change) bar to the plot
    LFC.bar <- get_LFC_bar(LFC)
    graf <- plotly::subplot(grafica, LFC.bar, shareX = TRUE,
        nrows = 2, heights = c(0.95, 0.05), titleY = TRUE)
    graf
    return(graf)
}

highlight_TF <- function(table, column, specialTF, markerColors) {

    #' @title Highlight certain transcription factors in a plotly graph.
    #' @description Function to highlight certain transcription factors using
    #' different colors in a plotly graph.
    #' @param table Enrichment matrix/data.frame.
    #' @param column Column # that stores the TF name in the matrix/df.
    #' @param specialTF Named vector containing TF names as they appear in the
    #' enrichment matrix/df and nicknames for their color group.
    #' Example:
    #'           specialTF<-c('HIF1A','EPAS1','ARNT','SIN3A')
    #'           names(specialTF)<-c('HIF','HIF','HIF','SIN3A')
    #' @param markerColors Vector specifying the shade for every color group.
    #' @return List of two objects:
    #' A vector to attach to the enrichment matrix/df pointing out the color
    #' group of every row.
    #' A named vector connecting each color group to the chosen color.
    # examples
    # highlight_TF(CM.statMatrix_UP,4,specialTF,colors)

    highlight <- sapply( seq_along(table[,1]),
        function (tmp.table, tmp.column, tmp.TF, i) {
            if (table[i, tmp.column] %in% tmp.TF){
                color.group <- names(tmp.TF[tmp.TF == table[i, tmp.column] ] )
            }else{
                color.group <- "Other"
            }
            return(color.group) },
        tmp.table = table,
        tmp.colum = column,
        tmp.TF = specialTF
        )

    markerColors <- c("azure4", markerColors)
    names(markerColors) <- c("Other", unique(names(specialTF)))
    return(list(highlight, markerColors))
}

get_LFC_bar <- function(LFC) {

    #' @title Plots a color bar from log2(Fold Change) values.
    #' @description Function to plot a color bar from log2(Fold Change)
    #' values from an expression experiment.
    #' @param LFC Vector of log2(fold change) values arranged from higher
    #' to lower. Use ony the values of genes that have an Entrez ID.
    #' @return Plotly heatmap plot -log2(fold change) bar-.
    # examples
    # get_LFC_bar(arranged.log2FC.array)

    if (!requireNamespace("scales", quietly = TRUE)) {
        stop("scales package needed for this function to work. ",
            "Please install it.", call. = FALSE)
    }
    if (!requireNamespace("plotly", quietly = TRUE)) {
        stop("plotly package needed for this function to work. ",
            "Please install it.", call. = FALSE)
    }

    # Rescaling log Fold Change values to get a -1 to 1 scale
    vals <- scales::rescale(LFC)
    o <- order(vals, decreasing = FALSE)

    # Building a red-blue scale adapted to the log Fold Change
    cols1 <- (scales::col_numeric(grDevices::colorRamp(c("mistyrose",
        "red3")), domain = NULL))(vals[1:length(LFC[LFC > 0])])
    cols2 <- (scales::col_numeric(grDevices::colorRamp(c("navy",
        "lightcyan")), domain = NULL))(vals[length(LFC[LFC >
        0]) + 1:length(LFC)])
    cols <- c(cols1, cols2)

    colz <- data.frame(vals[o], cols[o])

    LFC.bar <- plotly::plot_ly(x = c(1:length(LFC)), y = rep(1,
        length(LFC)), z = LFC, type = "heatmap", colorscale = colz,
        showscale = FALSE) %>%
    plotly::layout(yaxis = list(visible = FALSE))

    return(LFC.bar)
}

Try the TFEA.ChIP package in your browser

Any scripts or data that you put into this service are public.

TFEA.ChIP documentation built on Nov. 8, 2020, 5:05 p.m.