TFEA.ChIP.R
In TFEA.ChIP: Analyze Transcription Factor Enrichment

## ----eval=TRUE,echo=TRUE,message=FALSE----------------------------------------
library(TFEA.ChIP)
data( "hypoxia_DESeq", "hypoxia", package="TFEA.ChIP" ) # load example datasets
hypoxia_table <- preprocessInputData( hypoxia_DESeq )
head( hypoxia_table )

head( hypoxia )
hypoxia_table <- preprocessInputData( hypoxia )
head( hypoxia_table )


## ----eval=TRUE,echo=TRUE------------------------------------------------------
#extract vector with names of upregulated genes
Genes.Upreg <- Select_genes( hypoxia_table, min_LFC = 1 )
#extract vector with names of non-responsive genes
Genes.Control <- Select_genes( hypoxia_table,
    min_pval = 0.5, max_pval = 1,
    min_LFC = -0.25, max_LFC = 0.25 )

## ----eval=TRUE,echo=TRUE,message=FALSE----------------------------------------
#Conversion of hgnc to ENTREZ IDs
GeneID2entrez( gene.IDs = c("EGLN3","NFYA","ALS2","MYC","ARNT" ) )
# To translate from mouse IDs:
# GeneID2entrez( gene.IDs = c( "Hmmr", "Tlx3", "Cpeb4" ), mode = "m2h" ) # To get the equivalent human gene IDs
# GeneID2entrez( gene.IDs = c( "Hmmr", "Tlx3", "Cpeb4" ), mode = "m2m" ) # To get mouse ENTREZ gene IDs

## ----eval=TRUE,echo=TRUE------------------------------------------------------
CM_list_UP <- contingency_matrix( Genes.Upreg, Genes.Control ) #generates list of contingency tables, one per dataset
pval_mat_UP <- getCMstats( CM_list_UP ) #generates list of p-values and OR from association test
head( pval_mat_UP )

## ----eval=TRUE,echo=TRUE------------------------------------------------------
chip_index <- get_chip_index( TFfilter = c( "HIF1A","EPAS1","ARNT" ) ) #restrict the analysis to datasets assaying these factors
chip_index <- get_chip_index( encodeFilter = TRUE ) # Or select ENCODE datasets only
CM_list_UPe <- contingency_matrix( Genes.Upreg, Genes.Control, chip_index ) #generates list of contingency tables
pval_mat_UPe <- getCMstats( CM_list_UPe, chip_index ) #generates list of p-values and ORs
head( pval_mat_UPe )

## ----eval=TRUE, echo=TRUE, fig.width=8, fig.height=4--------------------------
TF_ranking <- rankTFs( pval_mat_UP, rankMethod = "gsea", makePlot = TRUE )
TF_ranking[[ "TFranking_plot" ]]
head( TF_ranking[[ "TF_ranking" ]] )

## ----eval=FALSE,echo=TRUE-----------------------------------------------------
#  plot_CM( pval_mat_UP ) #plot p-values against ORs

## ----eval=FALSE,echo=TRUE-----------------------------------------------------
#  HIFs <- c( "EPAS1","HIF1A","ARNT" )
#  names(HIFs) <- c( "EPAS1","HIF1A","ARNT" )
#  col <- c( "red","blue","green" )
#  plot_CM( pval_mat_UP, specialTF = HIFs, TF_colors = col ) #plot p-values against ORs highlighting indicated TFs

## ----eval=TRUE,echo=TRUE------------------------------------------------------
chip_index <- get_chip_index( TFfilter = c( "HIF1A","EPAS1","ARNT" ) ) #restrict the analysis to datasets assaying these factors

## ----eval=TRUE,echo=TRUE,results='hide'---------------------------------------
GSEA.result <- GSEA_run( hypoxia_table$Genes, hypoxia_table$log2FoldChange, chip_index, get.RES = TRUE) #run GSEA analysis

## ----eval=TRUE,echo=TRUE------------------------------------------------------
head(GSEA.result[["Enrichment.table"]])
head(GSEA.result[["RES"]][["GSM2390642"]])
head(GSEA.result[["indicators"]][["GSM2390642"]])

## ----eval=FALSE,echo=TRUE-----------------------------------------------------
#  TF.hightlight <- c( "EPAS1","ARNT","HIF1A" )
#  names( TF.hightlight ) <- c( "EPAS1","ARNT","HIF1A" )
#  col <- c( "red","blue","green" )
#  plot_ES( GSEA.result, LFC = hypoxia_table$log2FoldChange, specialTF = TF.hightlight, TF_colors = col)

## ----eval=FALSE, echo=TRUE----------------------------------------------------
#  plot_RES(
#      GSEA_result = GSEA.result, LFC = hypoxia_table$log2FoldChange,
#      TF = c( "EPAS1" ), Accession = c(
#          "GSE43109.EPAS1.MACROPHAGE_HYPO_IL",
#          "GSE43109.EPAS1.MACROPHAGE_NORMO_IL" ) )

## ----eval=FALSE,echo=TRUE-----------------------------------------------------
#  folder <- "~/peak.files.folder"
#  File.list<-dir( folder )
#  format <- "macs"
#  
#  gr.list <- lapply(
#      seq_along( File.list ),
#      function( File.list, myMetaData, format ){
#  
#          tmp<-read.table( File.list[i], ..., stringsAsFactors = FALSE )
#  
#          file.metadata <- myMetaData[ myMetaData$Name == File.list[i], ]
#  
#          ChIP.dataset.gr<-txt2GR(tmp, format, file.metadata)
#  
#          return(ChIP.dataset.gr)
#      },
#      File.list = File.list,
#      myMetadata = myMetadata,
#      format = format
#  )

## ----eval=TRUE,echo=TRUE------------------------------------------------------
# As an example of the output
data( "ARNT.peaks.bed","ARNT.metadata", package = "TFEA.ChIP" ) # Loading example datasets for this function
ARNT.gr <- txt2GR( ARNT.peaks.bed, "macs1.4", ARNT.metadata )
head( ARNT.gr, n=2 )

## ----eval=FALSE,echo=TRUE-----------------------------------------------------
#  dnaseClusters<-read.table(
#      file="~/path.to.file.txt",
#      header = TRUE, sep="\t", stringsAsFactors = FALSE )
#  dnaseClusters<-makeGRangesFromDataFrame(
#      dnaseClusters, ignore.strand=TRUE,
#      seqnames.field="chrom", start.field="chromStart",
#      end.field="chromEnd" )

## ----eval=FALSE,echo=TRUE-----------------------------------------------------
#  library( TxDb.Hsapiens.UCSC.hg19.knownGene, quietly = TRUE )
#  txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
#  Genes <- genes( txdb )
#  
#  near.gene <- findOverlaps( dnaseClusters, Genes, maxgap = 1000 )
#  
#  dnase.sites.list <- queryHits( near.gene )
#  near.gene <- subjectHits( near.gene )
#  
#  gene_ids <- Genes[ near.gene ]$gene_id
#  DHS.database <- dnaseClusters[ dnase.sites.list ]
#  mcols(DHS.database)$gene_id <- gene_ids
#  

## ----eval=TRUE,echo=TRUE------------------------------------------------------
data( "DnaseHS_db", "gr.list", package="TFEA.ChIP" ) # Loading example datasets for this function
TF.gene.binding.db <- GR2tfbs_db( DnaseHS_db, gr.list ) 
str( TF.gene.binding.db )

## ----eval=TRUE,echo=TRUE------------------------------------------------------
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
data( "gr.list", package="TFEA.ChIP") # Loading example datasets for this function
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
Genes <- genes( txdb )
TF.gene.binding.db <- GR2tfbs_db( Genes, gr.list, distanceMargin = 0 )
str( TF.gene.binding.db )

## ----eval=TRUE,echo=TRUE------------------------------------------------------
data( "tfbs.database", package = "TFEA.ChIP" ) # Loading example datasets for this function
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
gen.list <- genes( txdb )$gene_id # selecting all the genes in knownGene

myTFBSmatrix <- makeTFBSmatrix( gen.list, tfbs.database )
myTFBSmatrix[ 2530:2533, 1:3 ] # The gene HMGN4 (Entrez ID 10473) has TFBS for this three ChIP-Seq datasets


## ----eval=FALSE,echo=TRUE-----------------------------------------------------
#  set_user_data( binary_matrix = myTFBSmatrix, metadata = myMetaData )