Gene Regulation Analysis

knitr::opts_chunk$set(echo = FALSE,message = FALSE,warning = FALSE)
library(enrichTF)
library(rtracklayer)
library(ggplot2)
library(ggpubr)
objname <- load("PrevSteps.Rdata")

Click here to see all original code for generating this report.

Regions Information

Basic Information

stepUnzipAndMergeBed <-prevSteps[["UnzipAndMergeBed"]]
bedInput <- input(stepUnzipAndMergeBed)$bedInput
bedOutput <- output(stepUnzipAndMergeBed)$bedOutput
genome <- "hg19" #property(stepUnzipAndMergeBed)[["genome"]]

outputGrange <- import.bed(bedOutput)

Total r length(outputGrange) regions from r length(bedInput) BED file(s) of r genome.

knitr::kable(data.frame(Directory=bedInput,Regions=sapply(bedInput, function(x){length(readLines(x))})))

Region sizes distribution

ggplot(data.frame(RegionSize = width(outputGrange)),aes(x=RegionSize)) + 
    geom_histogram(binwidth = 1) + geom_vline(xintercept = median(width(outputGrange))) + 
    annotate("text", x = median(width(outputGrange)), y = 50, 
             color="black",size=2 ,label = paste("median:",median(width(outputGrange)))) + 
    xlab("Region Size") + ylab("Counts")

Region chromesome distribution

outputdf <- as.data.frame(outputGrange)

library(ggplot2)
ggplot(outputdf ,aes(seqnames)) + geom_bar() + xlab("Chromosome") + ylab("Counts")

Open Specificity and Tissues

stepTissueOpennessSpecificity <-prevSteps[["TissueOpennessSpecificity"]]

Top 20 open Tissues / Cell Types are list below.

sampleTxtFile <- output(stepTissueOpennessSpecificity)$sampleTxtOutput
sampleTxt <- read.table(sampleTxtFile, header = TRUE, sep = "\t")

Complete table can be accessed from the directory of TissueOpennessSpecificity: r sampleTxtFile

showSampleTxt <- sampleTxt[1:20,]
rownames(showSampleTxt) <- NULL
colnames(showSampleTxt) <- c("Index", "Tissue/Cell Type", "ENCODE", "Median")

knitr::kable(showSampleTxt)

Top 8 open Tissues / Cell Types are list below.

All figures can be accessed from the directory of TissueOpennessSpecificity: r output(stepTissueOpennessSpecificity)$distPdfOutput

bedOutput <- read.table(output(stepTissueOpennessSpecificity)$bedOutput,sep = "\t", header = FALSE)

openValue <- bedOutput[,4:ncol(bedOutput)]

idx <- sampleTxt[1:8,1]
spname <- as.character(sampleTxt[1:8,2])
names(spname) <- idx

plt<-lapply(idx, function(x){
    v <- openValue[[x]]
    ggplot(data.frame(v=v),aes(x=v)) +
        geom_histogram(binwidth = 0.1) +
        geom_vline(xintercept = median(v)) +
        annotate("text", x = median(v),
                 y = 50, color="white",
                 size=2 ,label = paste("median:", median(v))) + xlab(spname[as.character(x)])
})        

plt[["nrow"]] <- ceiling(length(idx)/2)
plt[["ncol"]] <- 2


do.call(what = ggarrange,args = plt)

Heatmap of region and tissue

load(output(stepTissueOpennessSpecificity)$heatmapDataOutput)

library(heatmap3)
p<-pdf("heatmap.png")
heatmap3(heatmapData, useRaster = TRUE)
dev.off()
file.copy(from = output(stepTissueOpennessSpecificity)$distPdfOutput, to = "heatmap.pdf")

Click to Show Large Heatmap

Conservation

stepTissueOpennessConserve <-prevSteps[["TissueOpennessConserve"]]

conserve <- read.table(output(stepTissueOpennessConserve)[["bedOutput"]],header = FALSE,sep = "\t")
ggplot(conserve, aes(V5)) + geom_histogram(binwidth = 0.01) + xlab("Conserve") + ylab("Counts")

Regulation Target Gene

Target gene average score and count are shown in figure below.

The original region - target gene BED file can be downloaded here

stepRegionConnectTargetGene <-prevSteps[["RegionConnectTargetGene"]]

tg<-read.table(output(stepRegionConnectTargetGene)$outputForegroundBed,header=FALSE,sep = "\t")

genecounts <- table(tg$V6)

avg<-lapply(names(genecounts), function(x){
    return(mean(tg[tg$V6==x,5]))
})

df <- data.frame(score = unlist(avg), count = as.numeric(genecounts), gene = names(genecounts))

ggplot(df) + geom_text(aes(x=count,y=score,label=gene)) + xlab("Target gene count") + ylab("Average score")

Motif and Transcription Factor Enrichment

HOMER/motifmatchr Motif Enrichment Result

stepFindMotifsInRegions <- prevSteps[["FindMotifsInRegions"]]

motifcallingresult <- output(stepFindMotifsInRegions)[["outputRegionMotifBed"]]

Transcription Factor Enrichment Based on PECA model

Top 20 TF is shown in table below.

Click here to download full txt table.

stepTFsEnrichInRegions <- prevSteps[["TFsEnrichInRegions"]]
peca<-read.table(output(stepTFsEnrichInRegions)$outputTFsEnrichTxt,header=TRUE,sep = "\t")
peca[,2:5] <- format(peca[,2:5],scientific=TRUE)
knitr::kable(peca[1:20,])

Gene Ontology Enrichment

stepGeneOntology <- prevSteps[["GeneOntology"]]
df <- read.table(output(stepGeneOntology)$outputTxt,sep="\t",header = TRUE)
knitr::kable(df)


Try the enrichTF package in your browser

Any scripts or data that you put into this service are public.

enrichTF documentation built on Nov. 8, 2020, 6:15 p.m.