brand-models.R
In trena: Fit transcriptional regulatory networks using gene expression, priors, machine learning

library(igvR)
library(TrenaProjectErythropoiesis)
library(GenomicScores)
library(phastCons7way.UCSC.hg38); phast.7 <- phastCons7way.UCSC.hg38
library(trenaSGM)
library(rnaSeqNormalizer)
#------------------------------------------------------------------------------------------------------------------------
if(!exists("mtx.brand")){
   tp <- TrenaProjectErythropoiesis()
   file <- "~/github/TrenaProjectErythropoiesis/prep/import/rnaFromMarjorie/GSE118537_DESeq_Read_Counts.tsv"
   tbl <- read.table(file, sep="\t", as.is=TRUE, header=TRUE, nrow=-1)
   rownames(tbl) <- tbl$GeneName
   mtx.counts <- as.matrix(tbl[, -1])
   fivenum(mtx.counts)
   normalizer <- rnaSeqNormalizer.gtex(mtx.counts, algorithm="log+scale", duplicate.selection.statistic="median")
   suppressWarnings(
      mtx.brand <- getNormalizedMatrix(normalizer)
      )
   x <- colnames(mtx.brand)
   reps <- unlist(lapply(strsplit(x, ".", fixed=TRUE), "[", 1))
   dayNames <- unlist(lapply(strsplit(x, ".", fixed=TRUE), "[", 2))
   dayNames <- sub("RC", "", dayNames)
   dayNames <- paste(dayNames, reps, sep=".")
   dayNames <- sub("^G[0123]", "", dayNames)
   dayNames <- tolower(dayNames)
   colnames(mtx.brand) <- dayNames

   mtx.brand[is.nan(mtx.brand)] <- 0
   deleters <- as.numeric(which(rowSums(abs(mtx.brand)) == 0))
   if(length(deleters) > 0)
      mtx.brand <- mtx.brand[-deleters,]
   fivenum(mtx.brand)
   dim(mtx.brand)
   save(mtx.brand, file="~/github/TrenaProjectErythropoiesis/prep/import/rnaFromMarjorie/mtx.brand.logPlusScaleNormalized.RData")
   }

solverNames <- c("lasso",
                 "lassopv",
                 "pearson",
                 "randomForest",
                 "ridge",
                 "spearman",
                 "xgboost")

#------------------------------------------------------------------------------------------------------------------------
build.with.tfs.with.motifs <- function()
{
      # use the log+scale normalizer

   target.gene <- "NFE2"
   tfs.with.motifs <- sort(unique(mcols(query(MotifDb, c("sapiens"), c("jaspar2018", "hocomoco")))$geneSymbol))
   length(tfs.with.motifs)  # 780
   tfs <- intersect(tfs.with.motifs, rownames(mtx.brand))
   length(tfs)  # 558

   solver <- EnsembleSolver(mtx.brand, target.gene, tfs, geneCutoff=1.0, solverNames=solverNames)
   suppressWarnings(
      tbl <- run(solver)
      )
   dim(tbl)
   new.order <- order(abs(tbl$pearsonCoeff), decreasing=TRUE)
   tbl <- tbl[new.order,]
   rownames(tbl) <- NULL
   tbl.brand <- tbl # [1:100,]

   match(c("GATA1", "TAL1", "KLF1"), tbl.brand$gene)   # 8 3 6

   save(tbl.brand, file="trena.models.brand.noDNA.RData")

} # build.with.tfs.with.motifs
#------------------------------------------------------------------------------------------------------------------------
build.with.tfs.with.matching.fimo.motifs <- function()
{
      # use the log+scale normalizer

   fivenum(mtx.brand)
   target.gene <- "NFE2"

   load("../tbls.motifs.RData")
   fimo.score <- 1e-5
   phast.score <- 0.90

   tbl.fimo.strong <- subset(tbl.fimoMotifs, p.value <= fimo.score & phast7 >= phast.score)
   tfs <- sort(unique(tbl.fimo.strong$tf))
   length(tfs)  # 209

   tbl.fimo.weak <- subset(tbl.fimoMotifs, p.value <= 1e-4 & phast7 >= 0.5)


   solver <- EnsembleSolver(mtx.brand, target.gene, tfs, geneCutoff=1.0, solverNames=solverNames)
   tbl <- run(solver)
   dim(tbl)
   new.order <- order(abs(tbl$pearsonCoeff), decreasing=TRUE)
   tbl <- tbl[new.order,]
   rownames(tbl) <- NULL
   tbl.brand.fimo <- tbl

   tbl.tfbs.counts <- as.data.frame(sort(table(tbl.fimo.strong$tf)))
   bindingSiteCount <- merge(tbl.brand.fimo, tbl.tfbs.counts, by.x="gene", by.y="Var1")$Freq
   tbl.brand.fimo$tfbs.strong <- bindingSiteCount

   tbl.tfbs.counts <- as.data.frame(sort(table(tbl.fimo.weak$tf)))
   bindingSiteCount <- merge(tbl.brand.fimo, tbl.tfbs.counts, by.x="gene", by.y="Var1")$Freq
   tbl.brand.fimo$tfbs.weak <- bindingSiteCount

   match(c("GATA1", "TAL1", "KLF1"), tbl.brand.fimo$gene)   # 5 2 4

   save(tbl.brand.fimo, file="trena.model.fimo.phast7.RData")

} # build.with.tfs.with.matching.fimo.motifs
#------------------------------------------------------------------------------------------------------------------------



if(!exists("tbl.benchmark")){
   tbl.benchmark <- get(load(system.file(package="TrenaValidator", "extdata", "tbl.A.RData")))
   tbl.benchmark$pubmed.count <- unlist(lapply(strsplit(tbl.benchmark$pubmedID_from_curated_resources, ","), length))
   as.data.frame(t(subset(tbl.benchmark, TF=="GATA1" & target=="NFE2")))
   }

#  TF                                          GATA1
#  target                                       NFE2
#  effect                                          0
#  score                                           A
#  is_evidence_curateddatabase                  TRUE
#  is_evidence_chipSeq                         FALSE
#  is_evidence_TFbindingMotif                   TRUE
#  is_evidence_coexpression                     TRUE
#  which_curateddatabase               HTRIdb,trrust
#  which_chipSeq                                none
#  which_TFbindingMotif                 hocomoco_v11
#  which_coexpression                    ARACNe-GTEx
#  pubmedID_from_curated_resources 16648487,20564185
#  kegg_pathway                                    -
#  pubmed.count                                    2

#------------------------------------------------------------------------------------------------------------------------
# TF                                          GATA1
# target                                       NFE2
# effect                                          0
# score                                           A
# is_evidence_curateddatabase                  TRUE
# is_evidence_chipSeq                         FALSE
# is_evidence_TFbindingMotif                   TRUE
# is_evidence_coexpression                     TRUE
# which_curateddatabase               HTRIdb,trrust
# which_chipSeq                                none
# which_TFbindingMotif                 hocomoco_v11
# which_coexpression                    ARACNe-GTEx
# pubmedID_from_curated_resources 16648487,20564185
# kegg_pathway                                    -
# pubmed.count                                    2
#------------------------------------------------------------------------------------------------------------------------
# https://www.ncbi.nlm.nih.gov/pubmed/16648487
#
# Functional interaction of CP2 with GATA-1 in the regulation of erythroid promoters.   2006
# cp2 -> TFCP2
# Hsapiens-HOCOMOCOv10-TFCP2_HUMAN.H10MO.D
# Hsapiens-jaspar2018-TFCP2-MA0145.3

# We observed that binding sites for the ubiquitously expressed transcription factor CP2 [TFCP2] were
# present in regulatory regions of multiple erythroid genes. In these regions, the CP2 binding site
# was adjacent to a site for the erythroid factor GATA-1. Using three such regulatory regions (from
# genes encoding the transcription factors GATA-1, EKLF, and p45 NF-E2), we demonstrated the
# functional importance of the adjacent CP2/GATA-1 sites. In particular, CP2 binds to the GATA-1 HS2
# enhancer, generating a ternary complex with GATA-1 and DNA. Mutations in the CP2 consensus greatly
# impaired HS2 activity in transient transfection assays with K562 cells. Similar results were
# obtained by transfection of EKLF and p45 NF-E2 mutant constructs. Chromatin immunoprecipitation
# with K562 cells showed that CP2 binds in vivo to all three regulatory elements and that both
# GATA-1 and CP2 were present on the same GATA-1 and EKLF regulatory elements. Adjacent CP2/GATA-1
# sites may represent a novel module for erythroid expression of a number of genes. Additionally,
# coimmunoprecipitation and glutathione S-transferase pull-down experiments demonstrated a physical
# interaction between GATA-1 and CP2. This may contribute to the functional cooperation between
# these factors and provide an explanation for the important role of ubiquitous CP2 in the
# regulation of erythroid genes.
#
# 10778    chr12 54295889 54295904    16      + TFCP2 17.2360 1.56e-07 CCCTGCCTGGGCCAGA Hsapiens-HOCOMOCOv10-TFCP2_HUMAN.H10MO.D 0.95625
#
# lapply(query(MotifDb, c("sapiens", "TFCP2"), c("jaspar2018", "hocomoco")), consensusString)
# $`Hsapiens-jaspar2018-TFCP2-MA0145.3`
# [1] "AAACCGGTT?"
#
# $`Hsapiens-HOCOMOCOv10-TFCP2_HUMAN.H10MO.D`
# [1] "?CCTG??C??GCC?GA"   where the trailing GA is followed by TAAGA: perhaps the adjacency mentioned above
#
#------------------------------------------------------------------------------------------------------------------------
# https://www.ncbi.nlm.nih.gov/pubmed/20564185
#
# Over-expression of EDAG in the myeloid cell line 32D: induction of GATA-1 expression and erythroid/megakaryocytic
# phenotype.  (2010)
#
# Erythroid differentiation-associated gene (EDAG), a hematopoietic tissue-specific transcription
# regulator, plays a key role in maintaining the homeostasis of hematopoietic lineage
# commitment. However, the mechanism and genes regulated by EDAG remain unknown. In this study, we
# showed that overexpression of EDAG in a myeloid cell line 32D led to an erythroid phenotype with
# increased number of benzidine-positive cells and up-regulation of erythroid specific surface
# marker TER119. The megakaryocytic specific marker CD61 was also induced significantly. Using a
# genome-wide microarray analysis and a twofold change cutoff, we identified 332 genes with reduced
# expression and 288 genes with increased expression. Among up-regulation genes, transcription
# factor GATA-1 and its target genes including EKLF, NF-E2, Gfi-1b, hemogen, SCL, hemoglobin alpha,
# beta and megakaryocytic gene GPIX were increased. Silencing of EDAG by RNA interference in K562
# cells resulted in down-regulation of these genes. Taken together, EDAG functions as a positive
# regulator of erythroid/megakaryocytic differentiation in 32D cells associated with the induction
# of GATA-1 and its target genes.
#
#------------------------------------------------------------------------------------------------------------------------
# from google search: gata1 nf32
#
# Genetic Analysis of Hierarchical Regulation for Gata1 and NF-E2 p45 Gene Expression in Megakaryopoiesis
#  mouse, 2010. "a unique in vivo validation of the hierarchical relationship between GATA1 and p45 in megakaryocytes"
#
# Insight into GATA1 transcriptional activity through interrogation of cis elements disrupted in human erythroid disorders
# pnas (2016) very strong paper but seems only to discuss downstream effects of GATA1 and NFE2
#
# https://mcb.asm.org/content/25/4/1215
# minireview: GATA1 Function, a Paradigm for Transcription Factors in Hematopoiesis
# includes this suggestion that gata1 mutant causes significant decrease in nfe2 expression
# (p45 is an alias for nfe2.  nfe1 is an alias for gata1)

# (ii) MaFK and p45 NF-E2.In addition to the results seen with other megakaryocyte-specific genes,
# the expression of the transcription factors MafK and p45 NF-E2 is significantly decreased in
# megakaryocytes expressing an N-finger mutant of GATA1 (V205G) and in GATA1-deficient
# megakaryocytes (96, 116). p45 NF-E2 p45 and small Maf factors are critical for terminal
# differentiation of megakaryocytes (71, 100). This suggests that the attenuated expression of the
# essential transcription factors NF-E2 p45 and MafK is a major cause of the megakaryocytic
# phenotype of GATA1 mutations.


# Regulation of Mouse p45 NF-E2 Transcription by an Erythroid-specific GATA-dependent Intronic Alternative Promoter*
# mouse, 2000.   two alternate promoters.
#
# The erythroid-enriched transcription factor NF-E2 is composed of two subunits, p45 and p18, the
# former of which is mainly expressed in the hematopoietic system. We have isolated and
# characterized the mouse p45 NF-E2 gene; we show here that, similar to the human gene, the mouse
# gene has two alternative promoters, which are differentially active during development and in
# different hematopoietic cells. Transcripts from the distal promoter are present in both erythroid
# and myeloid cells; however, transcripts from an alternative proximal 1b promoter, lying in the
# first intron, are abundant in erythroid cells, but barely detectable in myeloid cells. During
# development, both transcripts are detectable in yolk sac, fetal liver, and bone
# marrow. Transfection experiments show that proximal promoter 1b has a strong activity in erythroid
# cells, which is completely dependent on the integrity of a palindromic GATA-1 binding site. In
# contrast, the distal promoter 1a is not active in this assay. When the promoter 1b is placed 3′ to
# the promoter 1a and reporter gene, in an arrangement that resembles the natural one, it acts as an
# enhancer to stimulate the activity of the upstream promoter la.

# previous two articles mentioned in plos 2011
#  Hierarchical Differentiation of Myeloid Progenitors Is Encoded in the Transcription Factor Network
#
# we examplarily discuss five such cases. (i) NF-E2 is regulated by GATA-1 and SCL (TAL1), but specifically
# important for megakaryocytic development [23]–[25].
#
if(!exists("igv")){
   igv <- igvR()
   setGenome(igv, "hg38")
   showGenomicRegion(igv, "NFE2")
   tp <- TrenaProjectErythropoiesis()
   setTargetGene(tp, "NFE2")
   tbl.gh <- getEnhancers(tp)
   tbl.gh$width <- with(tbl.gh, 1 + end - start)
   tbl.gh <- subset(tbl.gh, width < 5000)
   track <- DataFrameQuantitativeTrack("gh", tbl.gh[, c(1,2,3,11)], color="blue", autoscale=FALSE, min=0, max=50)
   displayTrack(igv, track)
   showGenomicRegion(igv, "chr12:54,289,590-54,311,542")
   showGenomicRegion(igv, "chr12:54,282,156-54,326,061")
   showGenomicRegion(igv, "chr12:54,290,497-54,315,164")
   }
#------------------------------------------------------------------------------------------------------------------------
round.numeric.columns.in.dataframe <- function(tbl, digits=2, pvalColumnNames="lassoPValue")
{
   tbl.pvals <- data.frame()
   tbl.main <- tbl

   if(!(all(is.na(pvalColumnNames)))){
     pval.cols <- grep(pvalColumnNames, colnames(tbl))
     stopifnot(length(pval.cols) == length(pvalColumnNames))
     tbl.pvals <- tbl[, pval.cols, drop=FALSE]
     tbl.main <- tbl[, -pval.cols, drop=FALSE]
     }
   numeric_columns <- sapply(tbl.main, mode) == 'numeric'
   tbl.main[numeric_columns] <-  round(tbl.main[numeric_columns], digits)
   if(ncol(tbl.pvals) > 0){
   tbl.pvals <- apply(tbl.pvals, 2, function(col) as.numeric(formatC(col, format = "e", digits = 2)))
   }

  tbl.out <- cbind(tbl.main, tbl.pvals)[, colnames(tbl)]
  tbl.out

   } # round.numeric.columns.in.dataframe

#------------------------------------------------------------------------------------------------------------------------
conservationTrack <- function()
{
   loc <- getGenomicRegion(igv)
   starts <- with(loc, seq(start, end, by=5))
   ends <- starts + 5
   count <- length(starts)
   tbl.blocks <- data.frame(chrom=rep(loc$chrom, count), start=starts, end=ends, stringsAsFactors=FALSE)
   tbl.cons7 <- as.data.frame(gscores(phast.7, GRanges(tbl.blocks)), stringsAsFactors=FALSE)
   tbl.cons7$chrom <- as.character(tbl.cons7$seqnames)
   tbl.cons7 <- tbl.cons7[, c("chrom", "start", "end", "default")]
   track <- DataFrameQuantitativeTrack("phast7", tbl.cons7, autoscale=TRUE, color="red")
   displayTrack(igv, track)

} # conservationTrack
#------------------------------------------------------------------------------------------------------------------------
fimoConservationTable <- function()
{
  source("~/github/fimoService/batchMode/fimoBatchTools.R")   # works on hagfish & khaleesi
  meme.file <- "jaspar2018-hocomoco.meme"
  motifs <- query(MotifDb, "hsapiens", c("jaspar2018", "hocomoco"))
  length(motifs)  # 1177
  export(motifs, con=meme.file, format="meme")

  roi <- getGenomicRegion(igv)
  tbl.regions <- with(roi, data.frame(chrom=chrom, start=start, end=end, stringsAsFactors=FALSE))
  fimo.threshold <- 1e-5
  tbl.match <- fimoBatch(tbl.regions, matchThreshold=fimo.threshold, genomeName="hg38", pwmFile=meme.file)
  dim(tbl.match)

  tbl.matchCons <- as.data.frame(gscores(phast.7, GRanges(tbl.match)), stringsAsFactors=FALSE)
  dim(tbl.matchCons)
  tbl.matchCons <- subset(tbl.matchCons, default > 0.95)
  dim(tbl.matchCons)
  return(tbl.matchCons)

} # fimoConservationTable
#------------------------------------------------------------------------------------------------------------------------
demo_NFE2_models <- function()
{
   library(TrenaProjectLymphocyte)
   library(org.Hs.eg.db)
   tp <- TrenaProjectLymphocyte();

   genome <- "hg38"
   targetGene <- "NFE2"
   setTargetGene(tp, targetGene)
   tbl.info <- getTranscriptsTable(tp)

   chromosome <- tbl.info$chrom
   tss <- tbl.info$tss
      # strand-aware start and end: atf1 is on the + strand
   start <- tss - 50000
   end   <- tss + 50000

   tbl.regions <- data.frame(chrom=chromosome, start=start, end=end, stringsAsFactors=FALSE)
   file <- system.file(package="TrenaProjectLymphocyte", "extdata", "expression","GTEX.wholeBlood.rna-seq-geneSymbols.22330x407.RData")
   mtx.blood <- get(load(file))

   file <- system.file(package="TrenaProjectLymphocyte", "extdata", "expression",
                       "GTEX.lymphocyte.rna-seq-geneSymbols.21415x130.RData")
   mtx.lymphocyte <- get(load(file))

   file <- system.file(package="TrenaProjectErythropoiesis", "extdata", "expression", "brandLabDifferentiationTimeCourse-27171x28.RData")
   mtx.marjorie <- get(load(file))

   file <- "~/github/TrenaProjectErythropoiesis/prep/import/buenrostro/GSE74246_RNAseq_All_Counts.txt"
   file.exists(file)
   tbl <- read.table(file, sep="\t", as.is=TRUE, header=TRUE,nrow=-1)
   rownames(tbl) <- tbl[, 1]
   tbl <- tbl[, -1]
   mtx.buenrosto <- asinh(as.matrix(tbl))
   fivenum(mtx.buenrosto)



   tbl.matchCons <- fimoConservationTable()
   candidate.tfs <- unique(tbl.matchCons$tf)
   length(candidate.tfs)  # 57

   noDNA.recipe <- list(title="noDNA.matchCons",
                        type="noDNA.tfsSupplied",
                        matrix=mtx.marjorie,
                        #matrix=mtx.blood,
                        #matrix=mtx.lymphocyte,
                        #matrix=mtx.buenrosto,
                        candidateTFs=candidate.tfs,
                        tfPool=allKnownTFs(),
                        tfPrefilterCorrelation=0.2,
                        annotationDbFile=dbfile(org.Hs.eg.db),
                        orderModelByColumn="rfScore",
                        solverNames=c("lasso", "lassopv", "pearson", "randomForest", "ridge", "spearman", "xgboost"),
                        quiet=TRUE)
   builder <- NoDnaModelBuilder(genome, targetGene,  noDNA.recipe, quiet=TRUE)
   x <- build(builder)
   tbl.model <- x$model[order(abs(x$model$pearsonCoeff), decreasing=TRUE),]
   tbl.tfbs.counts <- as.data.frame(sort(table(tbl.matchCons$tf)))
   bindingSiteCount <- merge(tbl.model, tbl.tfbs.counts, by.x="gene", by.y="Var1")$Freq
   tbl.model$bindingSites <- bindingSiteCount

   tbl.model.strong <- subset(tbl.model, abs(pearsonCoeff) > 0.5)
   displayBindingSites(tbl.model.strong, tbl.matchCons)
   mtx.model <- as.matrix(tbl.model.strong[, -1])
   rownames(mtx.model) <- tbl.model.strong$gene

   tbl.model.trimmed <- as.data.frame(round.numeric.columns.in.dataframe(mtx.model))
   save(tbl.model.trimmed, file="brand.tbl.model.trimmed")



   recipe.all <- noDNA.recipe
   recipe.all$candidateTFs <- allKnownTFs()
   builder <- NoDnaModelBuilder(genome, targetGene,  recipe.all, quiet=TRUE)
   x1 <- build(builder)



      #----------------------------------------------------------------------------------------------------
      # first, build a model with "placenta2", an early version of the placenta footprint database
      #----------------------------------------------------------------------------------------------------

   recipe <- list(title="NFE2",
                  type="footprint.database",
                  regions=tbl.regions,
                  geneSymbol=targetGene,
                  tss=tss,
                  matrix=mtx.blood,
                  db.host="khaleesi.systemsbiology.net",
                  db.port=5432,
                  databases=list("lymphoblast_hint_16", "lymphoblast_hint_20"),
                  annotationDbFile=dbfile(org.Hs.eg.db),
                  motifDiscovery="builtinFimo",
                  tfPool=allKnownTFs(),
                  tfMapping="MotifDB",
                  tfPrefilterCorrelation=0.1,
                  orderModelByColumn="rfScore",
                  solverNames=c("lasso", "lassopv", "pearson", "randomForest", "ridge", "spearman", "xgboost"))

   fpBuilder <- FootprintDatabaseModelBuilder(genome, targetGene,  recipe, quiet=FALSE)
   x <- build(fpBuilder)

   recipe$matrix <- mtx.marjorie
   fpBuilder <- FootprintDatabaseModelBuilder(genome, targetGene,  recipe, quiet=FALSE)
   x2 <- build(fpBuilder)


   file <- "~/github/TrenaProjectErythropoiesis/prep/import/buenrostro/GSE74246_RNAseq_All_Counts.txt"
   file.exists(file)
   tbl <- read.table(file, sep="\t", as.is=TRUE, header=TRUE,nrow=-1)
   rownames(tbl) <- tbl[, 1]
   tbl <- tbl[, -1]
   mtx <- asinh(as.matrix(tbl))
   fivenum(mtx)

   suppressWarnings(
      logTimingInfo("placenta2 db, +/- 5bk generic promoter on ATF1", system.time(x2 <- build(fpBuilder)))
      )

} # demo_NFE2_models
#------------------------------------------------------------------------------------------------------------------------
displayBindingSites <- function(tbl.model, tbl.matchCons)
{
   tfs <- subset(tbl.model, abs(pearsonCoeff) > 0.5)$gene

   for(one.tf in tfs){
      tbl.bs <- subset(tbl.matchCons, tf==one.tf)[, c("seqnames", "start", "end", "matched_sequence")]
      colnames(tbl.bs) <- c("chrom", "start", "end", "seq")
      tbl.bs$chrom <- as.character(tbl.bs$chrom)
      track <- DataFrameAnnotationTrack(one.tf, tbl.bs, color="random", trackHeight=25, displayMode="EXPANDED")
      displayTrack(igv, track)
      }

} # displayBindingSites
#------------------------------------------------------------------------------------------------------------------------
getATACseq <- function()
{
   roi <- getGenomicRegion(igv)
   chromosome <- roi$chrom
   start.loc <- roi$start
   end.loc <- roi$end

   directory <- "~/github/TrenaProjectErythropoiesis/prep/import/atacPeaks"
   files <- grep("narrowPeak$", list.files(directory), value=TRUE)
   result <- list()

   for(file in files){
      full.path <- file.path(directory, file)
      track.name <- sub("_hg38_macs2_.*$", "", sub("ATAC_Cord_", "", file))
      tbl.atac <- read.table(full.path, sep="\t", as.is=TRUE)
      colnames(tbl.atac) <- c("chrom", "start", "end", "name", "c5", "strand", "c7", "c8", "c9", "c10")
      tbl.atac.region <- subset(tbl.atac, chrom==chromosome & start >= start.loc & end <= end.loc)
      if(nrow(tbl.atac.region) > 0){
         tbl.atac.region$sample <- track.name
         result[[track.name]] <- tbl.atac.region
         }
      } # files

   tbl.out <- do.call(rbind, result)
   rownames(tbl.out) <- NULL

   tbl.out

} # getATACseq
#------------------------------------------------------------------------------------------------------------------------
displayATACseq <- function()
{
   library (RColorBrewer)
   totalColorCount <- 12
   colors <- brewer.pal(8, "Dark2")
   currentColorNumber <- 0

   tbl.all <- getATACseq()
   samples <- unique(tbl.all$sample)
   current.day.string <- ""
   color <- colors[1]

   for(current.sample in samples){
      this.day.string <- strsplit(current.sample, "_")[[1]][1]
      if(this.day.string != current.day.string){
         currentColorNumber <- (currentColorNumber %% totalColorCount) + 1
         color <- colors[currentColorNumber]
         current.day.string <- this.day.string
         }
      tbl.atac.sub <- subset(tbl.all, sample == current.sample)
      track.name <- current.sample
      track <- DataFrameQuantitativeTrack(track.name, tbl.atac.sub[, c("chrom", "start", "end", "c10")],
                                          color, autoscale=FALSE, min=0, max=430, trackHeight=30)
      displayTrack(igv, track)
      } # for samples

   tbl.regions.condensed <- as.data.frame(union(GRanges(tbl.all[, c("chrom", "start", "end")]),
                                                GRanges(tbl.all[, c("chrom", "start", "end")])))[, c("seqnames", "start", "end")]
   colnames(tbl.regions.condensed) <- c("chrom", "start", "end")
   tbl.regions.condensed$chrom <- as.character(tbl.regions.condensed$chrom)
   lapply(tbl.regions.condensed, class)

   #state$tbl.regions.condensed <- tbl.regions.condensed
   track <- DataFrameAnnotationTrack("atac combined", tbl.regions.condensed, color="black")
   displayTrack(igv, track)

} # displayATACseq
#------------------------------------------------------------------------------------------------------------------------
# oddly, no methylation data in the immediate vicinity of nfe2.
addMethylationTracks <- function()
{
  library(AnnotationHub)
  ah <- AnnotationHub()
  ah.human <- subset(ah, species == "Homo sapiens")
  histone.tracks <- query(ah.human, c("H3K4me3", "Gm12878", "Peak", "narrow"))  # 3 tracks
  descriptions <- histone.tracks$description
  titles <- histone.tracks$title
  colors <- rep(terrain.colors(6), 4)
  color.index <- 0

  tbl.roi <- as.data.frame(getGenomicRegion(igv), stringsAsFactors=FALSE)

  for(i in seq_len(length(histone.tracks))){
     name <- names(histone.tracks)[i]
     color.index <- color.index + 1
     gr <- histone.tracks[[name]]
     ov <- findOverlaps(gr, GRanges(tbl.roi))
     roi.histones <- gr[queryHits(ov)]
     track.histones <- GRangesQuantitativeTrack(titles[i], roi.histones[, "pValue"],
                                              color=colors[color.index], trackHeight=50,
                                              autoscale=TRUE)
     displayTrack(igv, track.histones)
     } # for track

} # addMethylationTracks
#------------------------------------------------------------------------------------------------------------------------
Any scripts or data that you put into this service are public.
trena documentation built on Nov. 15, 2020, 2:07 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
trena
Fit transcriptional regulatory networks using gene expression, priors, machine learning

misc/nfe2/brand/brand-models.R
In trena: Fit transcriptional regulatory networks using gene expression, priors, machine learning

Try the trena package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

trena Fit transcriptional regulatory networks using gene expression, priors, machine learning

misc/nfe2/brand/brand-models.R In trena: Fit transcriptional regulatory networks using gene expression, priors, machine learning

Try the trena package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

trena
Fit transcriptional regulatory networks using gene expression, priors, machine learning

misc/nfe2/brand/brand-models.R
In trena: Fit transcriptional regulatory networks using gene expression, priors, machine learning